Merge branch 'feature-normal-science-backport' into legacy
This commit is contained in:
BIN
.distributions
Normal file
BIN
.distributions
Normal file
Binary file not shown.
@ -1 +1,2 @@
|
|||||||
from .copycat import Copycat, Reporter # noqa
|
from .copycat import Copycat, Reporter # noqa
|
||||||
|
from .problem import Problem
|
||||||
|
|||||||
62
copycat/problem.py
Normal file
62
copycat/problem.py
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
from .copycat import Copycat
|
||||||
|
|
||||||
|
from pprint import pprint
|
||||||
|
|
||||||
|
class Problem:
|
||||||
|
def __init__(self, initial, modified, target, iterations, distributions=None, formulas=None):
|
||||||
|
self.formulas = formulas
|
||||||
|
self.initial = initial
|
||||||
|
self.modified = modified
|
||||||
|
self.target = target
|
||||||
|
|
||||||
|
self.iterations = iterations
|
||||||
|
if distributions is None:
|
||||||
|
self.distributions = self.solve()
|
||||||
|
else:
|
||||||
|
self.distributions = distributions
|
||||||
|
if formulas is not None:
|
||||||
|
assert hasattr(Copycat().workspace, 'temperature')
|
||||||
|
|
||||||
|
def test(self, comparison, expected=None):
|
||||||
|
print('-' * 120)
|
||||||
|
print('Testing copycat problem: {} : {} :: {} : _'.format(self.initial,
|
||||||
|
self.modified,
|
||||||
|
self.target))
|
||||||
|
print('expected:')
|
||||||
|
if expected is None:
|
||||||
|
expected = self.distributions
|
||||||
|
pprint(expected)
|
||||||
|
|
||||||
|
actual = self.solve()
|
||||||
|
print('actual:')
|
||||||
|
pprint(actual)
|
||||||
|
comparison(actual, expected)
|
||||||
|
print('-' * 120)
|
||||||
|
|
||||||
|
def solve(self):
|
||||||
|
print('-' * 120)
|
||||||
|
print('Testing copycat problem: {} : {} :: {} : _'.format(self.initial,
|
||||||
|
self.modified,
|
||||||
|
self.target))
|
||||||
|
copycat = Copycat()
|
||||||
|
answers = dict()
|
||||||
|
if self.formulas == None:
|
||||||
|
if hasattr(copycat.workspace, 'temperature'):
|
||||||
|
formula = copycat.workspace.temperature.getAdj()
|
||||||
|
else:
|
||||||
|
formula = None
|
||||||
|
answers[formula] = copycat.run(self.initial,
|
||||||
|
self.modified,
|
||||||
|
self.target,
|
||||||
|
self.iterations)
|
||||||
|
else:
|
||||||
|
for formula in self.formulas:
|
||||||
|
copycat.temperature.useAdj(formula)
|
||||||
|
answers[formulas] = copycat.run(self.initial,
|
||||||
|
self.modified,
|
||||||
|
self.target,
|
||||||
|
self.iterations)
|
||||||
|
return answers
|
||||||
|
|
||||||
|
def generate(self):
|
||||||
|
self.distributions = self.solve()
|
||||||
57
copycat/statistics.py
Normal file
57
copycat/statistics.py
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
# CHI2 values for n degrees freedom
|
||||||
|
_chiSquared_table = {
|
||||||
|
1:3.841,
|
||||||
|
2:5.991,
|
||||||
|
3:7.815,
|
||||||
|
4:9.488,
|
||||||
|
5:11.071,
|
||||||
|
6:12.592,
|
||||||
|
7:14.067,
|
||||||
|
8:15.507,
|
||||||
|
9:16.919,
|
||||||
|
10:18.307
|
||||||
|
}
|
||||||
|
|
||||||
|
class ChiSquaredException(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def chi_squared(actual, expected):
|
||||||
|
answerKeys = set(list(actual.keys()) + list(expected.keys()))
|
||||||
|
degreesFreedom = len(answerKeys)
|
||||||
|
chiSquared = 0
|
||||||
|
|
||||||
|
get_count = lambda k, d : d[k]['count'] if k in d else 0
|
||||||
|
|
||||||
|
for k in answerKeys:
|
||||||
|
E = get_count(k, expected)
|
||||||
|
O = get_count(k, actual)
|
||||||
|
if E == 0:
|
||||||
|
print('Warning! Expected 0 counts of {}, but got {}'.format(k, O))
|
||||||
|
else:
|
||||||
|
chiSquared += (O - E) ** 2 / E
|
||||||
|
return chiSquared
|
||||||
|
|
||||||
|
def cross_formula_chi_squared(actualDict, expectedDict):
|
||||||
|
for ka, actual in actualDict.items():
|
||||||
|
for ke, expected in expectedDict.items():
|
||||||
|
print('Comparing {} with {}'.format(ka, ke))
|
||||||
|
chiSquared = chi_squared(actual, expected)
|
||||||
|
|
||||||
|
if chiSquared >= _chiSquared_table[degreesFreedom]:
|
||||||
|
print('Significant difference between expected and actual answer distributions: \n' +
|
||||||
|
'Chi2 value: {} with {} degrees of freedom'.format(chiSquared, degreesFreedom))
|
||||||
|
|
||||||
|
def cross_chi_squared(problemSets):
|
||||||
|
for i, problemSetA in enumerate(problemSets):
|
||||||
|
for problemSetB in problemSets[i + 1:]:
|
||||||
|
for problemA in problemSetA:
|
||||||
|
for problemB in problemSetB:
|
||||||
|
answersA = problemA.distributions
|
||||||
|
answersB = problemB.distributions
|
||||||
|
cross_formula_chi_squared(answersA, answersB)
|
||||||
|
|
||||||
|
def iso_chi_squared(actualDict, expectedDict):
|
||||||
|
for key in expectedDict.keys():
|
||||||
|
assert key in actualDict, 'The key {} was not tested'.format(key)
|
||||||
|
actual = actualDict[key]
|
||||||
|
expected = expectedDict[key]
|
||||||
137
copycat/tests.py
137
copycat/tests.py
@ -1,137 +0,0 @@
|
|||||||
import unittest
|
|
||||||
|
|
||||||
from .copycat import Copycat
|
|
||||||
|
|
||||||
|
|
||||||
def pnormaldist(p):
|
|
||||||
table = {
|
|
||||||
0.80: 1.2815,
|
|
||||||
0.90: 1.6448,
|
|
||||||
0.95: 1.9599,
|
|
||||||
0.98: 2.3263,
|
|
||||||
0.99: 2.5758,
|
|
||||||
0.995: 2.8070,
|
|
||||||
0.998: 3.0902,
|
|
||||||
0.999: 3.2905,
|
|
||||||
0.9999: 3.8905,
|
|
||||||
0.99999: 4.4171,
|
|
||||||
0.999999: 4.8916,
|
|
||||||
0.9999999: 5.3267,
|
|
||||||
0.99999999: 5.7307,
|
|
||||||
0.999999999: 6.1094,
|
|
||||||
}
|
|
||||||
return max(v for k, v in table.items() if k <= p)
|
|
||||||
|
|
||||||
|
|
||||||
def lower_bound_on_probability(hits, attempts, confidence=0.95):
|
|
||||||
if attempts == 0:
|
|
||||||
return 0
|
|
||||||
z = pnormaldist(confidence)
|
|
||||||
zsqr = z * z
|
|
||||||
phat = 1.0 * hits / attempts
|
|
||||||
under_sqrt = (phat * (1 - phat) + zsqr / (4 * attempts)) / attempts
|
|
||||||
denominator = (1 + zsqr / attempts)
|
|
||||||
return (phat + zsqr / (2 * attempts) - z * (under_sqrt ** 0.5)) / denominator
|
|
||||||
|
|
||||||
|
|
||||||
def upper_bound_on_probability(hits, attempts, confidence=0.95):
|
|
||||||
misses = attempts - hits
|
|
||||||
return 1.0 - lower_bound_on_probability(misses, attempts, confidence)
|
|
||||||
|
|
||||||
|
|
||||||
class TestCopycat(unittest.TestCase):
|
|
||||||
def setUp(self):
|
|
||||||
self.longMessage = True # new in Python 2.7
|
|
||||||
|
|
||||||
def assertProbabilitiesLookRoughlyLike(self, actual, expected):
|
|
||||||
actual_count = 0.0 + sum(d['count'] for d in list(actual.values()))
|
|
||||||
expected_count = 0.0 + sum(d['count'] for d in list(expected.values()))
|
|
||||||
self.assertGreater(actual_count, 1)
|
|
||||||
self.assertGreater(expected_count, 1)
|
|
||||||
for k in set(list(actual.keys()) + list(expected.keys())):
|
|
||||||
if k not in expected:
|
|
||||||
self.fail('Key %s was produced but not expected! %r != %r' % (k, actual, expected))
|
|
||||||
expected_probability = expected[k]['count'] / expected_count
|
|
||||||
if k in actual:
|
|
||||||
actual_lo = lower_bound_on_probability(actual[k]['count'], actual_count)
|
|
||||||
actual_hi = upper_bound_on_probability(actual[k]['count'], actual_count)
|
|
||||||
if not (actual_lo <= expected_probability <= actual_hi):
|
|
||||||
print('Failed (%s <= %s <= %s)' % (actual_lo, expected_probability, actual_hi))
|
|
||||||
self.fail('Count ("obviousness" metric) seems way off! %r != %r' % (actual, expected))
|
|
||||||
if abs(actual[k]['avgtemp'] - expected[k]['avgtemp']) >= 10.0 + (10.0 / actual[k]['count']):
|
|
||||||
print('Failed (%s - %s >= %s)' % (actual[k]['avgtemp'], expected[k]['avgtemp'], 10.0 + (10.0 / actual[k]['count'])))
|
|
||||||
self.fail('Temperature ("elegance" metric) seems way off! %r != %r' % (actual, expected))
|
|
||||||
else:
|
|
||||||
actual_hi = upper_bound_on_probability(0, actual_count)
|
|
||||||
if not (0 <= expected_probability <= actual_hi):
|
|
||||||
self.fail('No instances of expected key %s were produced! %r != %r' % (k, actual, expected))
|
|
||||||
|
|
||||||
def run_testcase(self, initial, modified, target, iterations, expected):
|
|
||||||
actual = Copycat().run(initial, modified, target, iterations)
|
|
||||||
self.assertEqual(sum(a['count'] for a in list(actual.values())), iterations)
|
|
||||||
self.assertProbabilitiesLookRoughlyLike(actual, expected)
|
|
||||||
|
|
||||||
def test_simple_cases(self):
|
|
||||||
self.run_testcase('abc', 'abd', 'efg', 50, {
|
|
||||||
'efd': {'count': 1, 'avgtemp': 16},
|
|
||||||
'efh': {'count': 99, 'avgtemp': 19},
|
|
||||||
})
|
|
||||||
self.run_testcase('abc', 'abd', 'ijk', 50, {
|
|
||||||
'ijd': {'count': 4, 'avgtemp': 24},
|
|
||||||
'ijl': {'count': 96, 'avgtemp': 20},
|
|
||||||
})
|
|
||||||
|
|
||||||
def test_abc_xyz(self):
|
|
||||||
self.run_testcase('abc', 'abd', 'xyz', 20, {
|
|
||||||
'xyd': {'count': 100, 'avgtemp': 19},
|
|
||||||
})
|
|
||||||
|
|
||||||
def test_ambiguous_case(self):
|
|
||||||
self.run_testcase('abc', 'abd', 'ijkk', 50, {
|
|
||||||
'ijkkk': {'count': 7, 'avgtemp': 21},
|
|
||||||
'ijll': {'count': 47, 'avgtemp': 28},
|
|
||||||
'ijkl': {'count': 44, 'avgtemp': 32},
|
|
||||||
'ijkd': {'count': 2, 'avgtemp': 65},
|
|
||||||
})
|
|
||||||
|
|
||||||
def test_mrrjjj(self):
|
|
||||||
self.run_testcase('abc', 'abd', 'mrrjjj', 50, {
|
|
||||||
'mrrjjjj': {'count': 4, 'avgtemp': 16},
|
|
||||||
'mrrkkk': {'count': 31, 'avgtemp': 47},
|
|
||||||
'mrrjjk': {'count': 64, 'avgtemp': 51},
|
|
||||||
'mrrjkk': {'count': 1, 'avgtemp': 52},
|
|
||||||
'mrrjjd': {'count': 1, 'avgtemp': 54},
|
|
||||||
})
|
|
||||||
|
|
||||||
def test_elongation(self):
|
|
||||||
# This isn't remotely what a human would say.
|
|
||||||
self.run_testcase('abc', 'aabbcc', 'milk', 50, {
|
|
||||||
'milj': {'count': 85, 'avgtemp': 55},
|
|
||||||
'mikj': {'count': 10, 'avgtemp': 56},
|
|
||||||
'milk': {'count': 1, 'avgtemp': 56},
|
|
||||||
'lilk': {'count': 1, 'avgtemp': 57},
|
|
||||||
'milb': {'count': 3, 'avgtemp': 57},
|
|
||||||
})
|
|
||||||
|
|
||||||
def test_repairing_successor_sequence(self):
|
|
||||||
# This isn't remotely what a human would say.
|
|
||||||
self.run_testcase('aba', 'abc', 'xyx', 50, {
|
|
||||||
'xc': {'count': 9, 'avgtemp': 57},
|
|
||||||
'xyc': {'count': 82, 'avgtemp': 59},
|
|
||||||
'cyx': {'count': 7, 'avgtemp': 68},
|
|
||||||
'xyx': {'count': 2, 'avgtemp': 69},
|
|
||||||
})
|
|
||||||
|
|
||||||
def test_nonsense(self):
|
|
||||||
self.run_testcase('cat', 'dog', 'cake', 10, {
|
|
||||||
'cakg': {'count': 99, 'avgtemp': 70},
|
|
||||||
'gake': {'count': 1, 'avgtemp': 59},
|
|
||||||
})
|
|
||||||
self.run_testcase('cat', 'dog', 'kitten', 10, {
|
|
||||||
'kitteg': {'count': 96, 'avgtemp': 66},
|
|
||||||
'kitten': {'count': 4, 'avgtemp': 68},
|
|
||||||
})
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
unittest.main()
|
|
||||||
62
tests.py
Normal file
62
tests.py
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
import unittest
|
||||||
|
import os.path
|
||||||
|
import pickle
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from pprint import pprint
|
||||||
|
from copycat import Problem
|
||||||
|
from copycat.statistics import iso_chi_squared
|
||||||
|
|
||||||
|
# TODO: update test cases to use entropy
|
||||||
|
|
||||||
|
def generate():
|
||||||
|
print('Generating distributions for new file')
|
||||||
|
iterations = 30
|
||||||
|
problems = [
|
||||||
|
Problem('abc', 'abd', 'efg', iterations),
|
||||||
|
Problem('abc', 'abd', 'ijk', iterations),
|
||||||
|
Problem('abc', 'abd', 'xyz', iterations),
|
||||||
|
Problem('abc', 'abd', 'ijkk', iterations),
|
||||||
|
Problem('abc', 'abd', 'mrrjjj', iterations)]
|
||||||
|
|
||||||
|
with open(TestCopycat.Filename, 'wb') as outfile:
|
||||||
|
pickle.dump(problems, outfile)
|
||||||
|
return problems
|
||||||
|
|
||||||
|
class TestCopycat(unittest.TestCase):
|
||||||
|
Filename = None
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.longMessage = True # new in Python 2.7
|
||||||
|
|
||||||
|
def test(self):
|
||||||
|
print('Testing copycat with input file: {}'.format(TestCopycat.Filename))
|
||||||
|
try:
|
||||||
|
with open(TestCopycat.Filename, 'rb') as infile:
|
||||||
|
problems = pickle.load(infile)
|
||||||
|
except Exception as e:
|
||||||
|
print('Generating due to error:')
|
||||||
|
print(e)
|
||||||
|
problems = generate()
|
||||||
|
|
||||||
|
for problem in problems:
|
||||||
|
problem.test(iso_chi_squared)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('--generate', action='store_true')
|
||||||
|
parser.add_argument('filename', default='.distributions', nargs='?')
|
||||||
|
parser.add_argument('unittest_args', default=[], nargs='?')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
# TODO: Go do something with args.input and args.filename
|
||||||
|
|
||||||
|
TestCopycat.Filename = args.filename
|
||||||
|
|
||||||
|
if args.generate:
|
||||||
|
generate()
|
||||||
|
|
||||||
|
# Now set the sys.argv to the unittest_args (leaving sys.argv[0] alone)
|
||||||
|
sys.argv[1:] = args.unittest_args
|
||||||
|
unittest.main()
|
||||||
Reference in New Issue
Block a user