diff --git a/copycat/tests.py b/copycat/tests.py new file mode 100644 index 0000000..ae2e343 --- /dev/null +++ b/copycat/tests.py @@ -0,0 +1,135 @@ +"""Run the copycat program""" + +import unittest + +import copycat + +def pnormaldist(p): + table = { + 0.80: 1.2815, + 0.90: 1.6448, + 0.95: 1.9599, + 0.98: 2.3263, + 0.99: 2.5758, + 0.995: 2.8070, + 0.998: 3.0902, + 0.999: 3.2905, + 0.9999: 3.8905, + 0.99999: 4.4171, + 0.999999: 4.8916, + 0.9999999: 5.3267, + 0.99999999: 5.7307, + 0.999999999: 6.1094, + } + return max(v for k,v in table.iteritems() if k <= p) + +def lower_bound_on_probability(hits, attempts, confidence=0.95): + if attempts == 0: + return 0 + z = pnormaldist(confidence) + zsqr = z*z + phat = 1.0 * hits / attempts + under_sqrt = (phat * (1 - phat) + zsqr / (4*attempts)) / attempts + denominator = (1 + zsqr / attempts) + return (phat + zsqr / (2*attempts) - z * (under_sqrt ** 0.5)) / denominator + +def upper_bound_on_probability(hits, attempts, confidence=0.95): + misses = attempts - hits + return 1.0 - lower_bound_on_probability(misses, attempts, confidence) + + +class TestCopycat(unittest.TestCase): + def setUp(self): + self.longMessage = True # new in Python 2.7 + + def assertProbabilitiesLookRoughlyLike(self, actual, expected): + actual_count = 0.0 + sum(d['count'] for d in actual.values()) + expected_count = 0.0 + sum(d['count'] for d in expected.values()) + self.assertGreater(actual_count, 1) + self.assertGreater(expected_count, 1) + for k in set(actual.keys() + expected.keys()): + if k not in expected: + self.fail('Key %s was produced but not expected! %r != %r' % (k, actual, expected)) + expected_probability = expected[k]['count'] / expected_count + if k in actual: + actual_lo = lower_bound_on_probability(actual[k]['count'], actual_count) + actual_hi = upper_bound_on_probability(actual[k]['count'], actual_count) + if not (actual_lo <= expected_probability <= actual_hi): + print 'Failed (%s <= %s <= %s)' % (actual_lo, expected_probability, actual_hi) + self.fail('Count ("obviousness" metric) seems way off! %r != %r' % (actual, expected)) + if abs(actual[k]['avgtemp'] - expected[k]['avgtemp']) >= 10.0 + (10.0 / actual[k]['count']): + print 'Failed (%s - %s >= %s)' % (actual[k]['avgtemp'], expected[k]['avgtemp'], 10.0 + (10.0 / actual[k]['count'])) + self.fail('Temperature ("elegance" metric) seems way off! %r != %r' % (actual, expected)) + else: + actual_hi = upper_bound_on_probability(0, actual_count) + if not (0 <= expected_probability <= actual_hi): + self.fail('No instances of expected key %s were produced! %r != %r' % (k, actual, expected)) + + def run_testcase(self, initial, modified, target, iterations, expected): + actual = copycat.run(initial, modified, target, iterations) + self.assertEqual(sum(a['count'] for a in actual.values()), iterations) + self.assertProbabilitiesLookRoughlyLike(actual, expected) + + def test_simple_cases(self): + self.run_testcase('abc', 'abd', 'efg', 50, { + 'efd': {'count': 1, 'avgtemp': 16}, + 'efh': {'count': 99, 'avgtemp': 19}, + }) + self.run_testcase('abc', 'abd', 'ijk', 50, { + 'ijd': {'count': 4, 'avgtemp': 24}, + 'ijl': {'count': 96, 'avgtemp': 20}, + }) + + def test_abc_xyz(self): + self.run_testcase('abc', 'abd', 'xyz', 20, { + 'xyd': {'count': 100, 'avgtemp': 19}, + }) + + def test_ambiguous_case(self): + self.run_testcase('abc', 'abd', 'ijkk', 50, { + 'ijkkk': {'count': 7, 'avgtemp': 21}, + 'ijll': {'count': 47, 'avgtemp': 28}, + 'ijkl': {'count': 44, 'avgtemp': 32}, + 'ijkd': {'count': 2, 'avgtemp': 65}, + }) + + def test_mrrjjj(self): + self.run_testcase('abc', 'abd', 'mrrjjj', 50, { + 'mrrjjjj': {'count': 4, 'avgtemp': 16}, + 'mrrkkk': {'count': 31, 'avgtemp': 47}, + 'mrrjjk': {'count': 64, 'avgtemp': 51}, + 'mrrjkk': {'count': 1, 'avgtemp': 52}, + 'mrrjjd': {'count': 1, 'avgtemp': 54}, + }) + + def test_elongation(self): + # This isn't remotely what a human would say. + self.run_testcase('abc', 'aabbcc', 'milk', 50, { + 'milj': {'count': 85, 'avgtemp': 55}, + 'mikj': {'count': 10, 'avgtemp': 56}, + 'milk': {'count': 1, 'avgtemp': 56}, + 'lilk': {'count': 1, 'avgtemp': 57}, + 'milb': {'count': 3, 'avgtemp': 57}, + }) + + def test_repairing_successor_sequence(self): + # This isn't remotely what a human would say. + self.run_testcase('aba', 'abc', 'xyx', 50, { + 'xc': {'count': 9, 'avgtemp': 57}, + 'xyc': {'count': 82, 'avgtemp': 59}, + 'cyx': {'count': 7, 'avgtemp': 68}, + 'xyx': {'count': 2, 'avgtemp': 69}, + }) + + def test_nonsense(self): + self.run_testcase('cat', 'dog', 'cake', 10, { + 'cakg': {'count': 99, 'avgtemp': 70}, + 'gake': {'count': 1, 'avgtemp': 59}, + }) + self.run_testcase('cat', 'dog', 'kitten', 10, { + 'kitteg': {'count': 96, 'avgtemp': 66}, + 'kitten': {'count': 4, 'avgtemp': 68}, + }) + +if __name__ == '__main__': + unittest.main()