Merge branch 'feature-normal-science-backport' into legacy

2017-11-18 18:44:24 -07:00
parent 1b84b22e3f 4388bede7d
commit be6d1fa495
6 changed files with 182 additions and 137 deletions
--- a/.distributions
+++ b/.distributions
--- a/copycat/init.py
+++ b/copycat/init.py
@ -1 +1,2 @@
 from .copycat import Copycat, Reporter  # noqa
 from .problem import Problem
--- a/copycat/problem.py
+++ b/copycat/problem.py
@ -0,0 +1,62 @@
 from .copycat import Copycat
 from pprint import pprint
 class Problem:
    def __init__(self, initial, modified, target, iterations, distributions=None, formulas=None):
        self.formulas = formulas
        self.initial  = initial
        self.modified = modified
        self.target   = target
        self.iterations    = iterations
        if distributions is None:
            self.distributions = self.solve()
        else:
            self.distributions = distributions
        if formulas is not None:
            assert hasattr(Copycat().workspace, 'temperature')
    def test(self, comparison, expected=None):
        print('-' * 120)
        print('Testing copycat problem: {} : {} :: {} : _'.format(self.initial,
                                                                  self.modified,
                                                                  self.target))
        print('expected:')
        if expected is None:
            expected = self.distributions
        pprint(expected)
        actual = self.solve()
        print('actual:')
        pprint(actual)
        comparison(actual, expected)
        print('-' * 120)
    def solve(self):
        print('-' * 120)
        print('Testing copycat problem: {} : {} :: {} : _'.format(self.initial,
                                                                  self.modified,
                                                                  self.target))
        copycat = Copycat()
        answers  = dict()
        if self.formulas == None:
            if hasattr(copycat.workspace, 'temperature'):
                formula = copycat.workspace.temperature.getAdj()
            else:
                formula = None
            answers[formula] = copycat.run(self.initial,
                                self.modified,
                                self.target,
                                self.iterations)
        else:
            for formula in self.formulas:
                copycat.temperature.useAdj(formula)
                answers[formulas] = copycat.run(self.initial,
                                        self.modified,
                                        self.target,
                                        self.iterations)
        return answers
    def generate(self):
        self.distributions = self.solve()
--- a/copycat/statistics.py
+++ b/copycat/statistics.py
@ -0,0 +1,57 @@
 # CHI2 values for n degrees freedom
 _chiSquared_table = {
        1:3.841,
        2:5.991,
        3:7.815,
        4:9.488,
        5:11.071,
        6:12.592,
        7:14.067,
        8:15.507,
        9:16.919,
        10:18.307
        }
 class ChiSquaredException(Exception):
    pass
 def chi_squared(actual, expected):
    answerKeys = set(list(actual.keys()) + list(expected.keys()))
    degreesFreedom = len(answerKeys)
    chiSquared = 0
    get_count = lambda k, d : d[k]['count'] if k in d else 0
    for k in answerKeys:
        E = get_count(k, expected)
        O = get_count(k, actual)
        if E == 0:
            print('Warning! Expected 0 counts of {}, but got {}'.format(k, O))
        else:
            chiSquared += (O - E) ** 2 / E
    return chiSquared
 def cross_formula_chi_squared(actualDict, expectedDict):
    for ka, actual in actualDict.items():
        for ke, expected in expectedDict.items():
            print('Comparing {} with {}'.format(ka, ke))
            chiSquared = chi_squared(actual, expected)
            if chiSquared >= _chiSquared_table[degreesFreedom]:
                print('Significant difference between expected and actual answer distributions: \n' +
                    'Chi2 value: {} with {} degrees of freedom'.format(chiSquared, degreesFreedom))
 def cross_chi_squared(problemSets):
    for i, problemSetA in enumerate(problemSets):
        for problemSetB in problemSets[i + 1:]:
            for problemA in problemSetA:
                for problemB in problemSetB:
                    answersA = problemA.distributions
                    answersB = problemB.distributions
                    cross_formula_chi_squared(answersA, answersB)
 def iso_chi_squared(actualDict, expectedDict):
    for key in expectedDict.keys():
        assert key in actualDict, 'The key {} was not tested'.format(key)
        actual   = actualDict[key]
        expected = expectedDict[key]
--- a/copycat/tests.py
+++ b/copycat/tests.py
@ -1,137 +0,0 @@
 import unittest
 from .copycat import Copycat
 def pnormaldist(p):
    table = {
        0.80: 1.2815,
        0.90: 1.6448,
        0.95: 1.9599,
        0.98: 2.3263,
        0.99: 2.5758,
        0.995: 2.8070,
        0.998: 3.0902,
        0.999: 3.2905,
        0.9999: 3.8905,
        0.99999: 4.4171,
        0.999999: 4.8916,
        0.9999999: 5.3267,
        0.99999999: 5.7307,
        0.999999999: 6.1094,
    }
    return max(v for k, v in table.items() if k <= p)
 def lower_bound_on_probability(hits, attempts, confidence=0.95):
    if attempts == 0:
        return 0
    z = pnormaldist(confidence)
    zsqr = z * z
    phat = 1.0 * hits / attempts
    under_sqrt = (phat * (1 - phat) + zsqr / (4 * attempts)) / attempts
    denominator = (1 + zsqr / attempts)
    return (phat + zsqr / (2 * attempts) - z * (under_sqrt ** 0.5)) / denominator
 def upper_bound_on_probability(hits, attempts, confidence=0.95):
    misses = attempts - hits
    return 1.0 - lower_bound_on_probability(misses, attempts, confidence)
 class TestCopycat(unittest.TestCase):
    def setUp(self):
        self.longMessage = True  # new in Python 2.7
    def assertProbabilitiesLookRoughlyLike(self, actual, expected):
        actual_count = 0.0 + sum(d['count'] for d in list(actual.values()))
        expected_count = 0.0 + sum(d['count'] for d in list(expected.values()))
        self.assertGreater(actual_count, 1)
        self.assertGreater(expected_count, 1)
        for k in set(list(actual.keys()) + list(expected.keys())):
            if k not in expected:
                self.fail('Key %s was produced but not expected! %r != %r' % (k, actual, expected))
            expected_probability = expected[k]['count'] / expected_count
            if k in actual:
                actual_lo = lower_bound_on_probability(actual[k]['count'], actual_count)
                actual_hi = upper_bound_on_probability(actual[k]['count'], actual_count)
                if not (actual_lo <= expected_probability <= actual_hi):
                    print('Failed (%s <= %s <= %s)' % (actual_lo, expected_probability, actual_hi))
                    self.fail('Count ("obviousness" metric) seems way off! %r != %r' % (actual, expected))
                if abs(actual[k]['avgtemp'] - expected[k]['avgtemp']) >= 10.0 + (10.0 / actual[k]['count']):
                    print('Failed (%s - %s >= %s)' % (actual[k]['avgtemp'], expected[k]['avgtemp'], 10.0 + (10.0 / actual[k]['count'])))
                    self.fail('Temperature ("elegance" metric) seems way off! %r != %r' % (actual, expected))
            else:
                actual_hi = upper_bound_on_probability(0, actual_count)
                if not (0 <= expected_probability <= actual_hi):
                    self.fail('No instances of expected key %s were produced! %r != %r' % (k, actual, expected))
    def run_testcase(self, initial, modified, target, iterations, expected):
        actual = Copycat().run(initial, modified, target, iterations)
        self.assertEqual(sum(a['count'] for a in list(actual.values())), iterations)
        self.assertProbabilitiesLookRoughlyLike(actual, expected)
    def test_simple_cases(self):
        self.run_testcase('abc', 'abd', 'efg', 50, {
            'efd': {'count': 1, 'avgtemp': 16},
            'efh': {'count': 99, 'avgtemp': 19},
        })
        self.run_testcase('abc', 'abd', 'ijk', 50, {
            'ijd': {'count': 4, 'avgtemp': 24},
            'ijl': {'count': 96, 'avgtemp': 20},
        })
    def test_abc_xyz(self):
        self.run_testcase('abc', 'abd', 'xyz', 20, {
            'xyd': {'count': 100, 'avgtemp': 19},
        })
    def test_ambiguous_case(self):
        self.run_testcase('abc', 'abd', 'ijkk', 50, {
            'ijkkk': {'count': 7, 'avgtemp': 21},
            'ijll': {'count': 47, 'avgtemp': 28},
            'ijkl': {'count': 44, 'avgtemp': 32},
            'ijkd': {'count': 2, 'avgtemp': 65},
        })
    def test_mrrjjj(self):
        self.run_testcase('abc', 'abd', 'mrrjjj', 50, {
            'mrrjjjj': {'count': 4, 'avgtemp': 16},
            'mrrkkk': {'count': 31, 'avgtemp': 47},
            'mrrjjk': {'count': 64, 'avgtemp': 51},
            'mrrjkk': {'count': 1, 'avgtemp': 52},
            'mrrjjd': {'count': 1, 'avgtemp': 54},
        })
    def test_elongation(self):
        # This isn't remotely what a human would say.
        self.run_testcase('abc', 'aabbcc', 'milk', 50, {
            'milj': {'count': 85, 'avgtemp': 55},
            'mikj': {'count': 10, 'avgtemp': 56},
            'milk': {'count': 1, 'avgtemp': 56},
            'lilk': {'count': 1, 'avgtemp': 57},
            'milb': {'count': 3, 'avgtemp': 57},
        })
    def test_repairing_successor_sequence(self):
        # This isn't remotely what a human would say.
        self.run_testcase('aba', 'abc', 'xyx', 50, {
            'xc': {'count': 9, 'avgtemp': 57},
            'xyc': {'count': 82, 'avgtemp': 59},
            'cyx': {'count': 7, 'avgtemp': 68},
            'xyx': {'count': 2, 'avgtemp': 69},
        })
    def test_nonsense(self):
        self.run_testcase('cat', 'dog', 'cake', 10, {
            'cakg': {'count': 99, 'avgtemp': 70},
            'gake': {'count': 1, 'avgtemp': 59},
        })
        self.run_testcase('cat', 'dog', 'kitten', 10, {
            'kitteg': {'count': 96, 'avgtemp': 66},
            'kitten': {'count': 4, 'avgtemp': 68},
        })
 if __name__ == '__main__':
    unittest.main()
--- a/tests.py
+++ b/tests.py
@ -0,0 +1,62 @@
 import unittest
 import os.path
 import pickle
 import argparse
 import sys
 from pprint  import pprint
 from copycat import Problem
 from copycat.statistics import iso_chi_squared
 # TODO: update test cases to use entropy
 def generate():
    print('Generating distributions for new file')
    iterations = 30
    problems = [
            Problem('abc', 'abd', 'efg',    iterations),
            Problem('abc', 'abd', 'ijk',    iterations),
            Problem('abc', 'abd', 'xyz',    iterations),
            Problem('abc', 'abd', 'ijkk',   iterations),
            Problem('abc', 'abd', 'mrrjjj', iterations)]
    with open(TestCopycat.Filename, 'wb') as outfile:
        pickle.dump(problems, outfile)
    return problems
 class TestCopycat(unittest.TestCase):
    Filename = None
    def setUp(self):
        self.longMessage = True  # new in Python 2.7
    def test(self):
        print('Testing copycat with input file: {}'.format(TestCopycat.Filename))
        try:
            with open(TestCopycat.Filename, 'rb') as infile:
                problems = pickle.load(infile)
        except Exception as e:
            print('Generating due to error:')
            print(e)
            problems = generate()
        for problem in problems:
            problem.test(iso_chi_squared)
 if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--generate', action='store_true')
    parser.add_argument('filename', default='.distributions', nargs='?')
    parser.add_argument('unittest_args', default=[], nargs='?')
    args = parser.parse_args()
    # TODO: Go do something with args.input and args.filename
    TestCopycat.Filename = args.filename
    if args.generate:
        generate()
    # Now set the sys.argv to the unittest_args (leaving sys.argv[0] alone)
    sys.argv[1:] = args.unittest_args
    unittest.main()
`@ -1 +1,2 @@`
	`from .copycat import Copycat, Reporter # noqa`	`from .copycat import Copycat, Reporter # noqa`
		`from .problem import Problem`