WIP: Add cross-chi-2 tests

This commit is contained in:
LSaldyt
2017-11-16 08:45:11 -07:00
parent d16e347f04
commit bd8bec2d37
5 changed files with 81 additions and 410 deletions

View File

@ -6,69 +6,25 @@ import sys
from pprint import pprint
from copycat import Problem
from copycat.statistics import iso_chi_squared
# TODO: update test cases to use entropy
# CHI2 values for n degrees freedom
_chiSquared_table = {
1:3.841,
2:5.991,
3:7.815,
4:9.488,
5:11.071,
6:12.592,
7:14.067,
8:15.507,
9:16.919,
10:18.307
}
class ChiSquaredException(Exception):
pass
def chi_squared(actualDict, expectedDict):
for key in expectedDict.keys():
assert key in actualDict, 'The key {} was not tested'.format(key)
actual = actualDict[key]
expected = expectedDict[key]
answerKeys = set(list(actual.keys()) + list(expected.keys()))
degreesFreedom = len(answerKeys)
chiSquared = 0
get_count = lambda k, d : d[k]['count'] if k in d else 0
for k in answerKeys:
E = get_count(k, expected)
O = get_count(k, actual)
if E == 0:
print('Warning! Expected 0 counts of {}, but got {}'.format(k, O))
else:
chiSquared += (O - E) ** 2 / E
if chiSquared >= _chiSquared_table[degreesFreedom]:
raise ChiSquaredException('Significant difference between expected and actual answer distributions: \n' +
'Chi2 value: {} with {} degrees of freedom'.format(chiSquared, degreesFreedom))
def generate():
print('Generating distributions for new file')
iterations = 30
distributions = [
Problem('abc', 'abd', 'efg', iterations, None),
Problem('abc', 'abd', 'ijk', iterations, None),
Problem('abc', 'abd', 'xyz', iterations, None),
Problem('abc', 'abd', 'ijkk', iterations, None),
Problem('abc', 'abd', 'mrrjjj', iterations, None)]
for distribution in distributions:
distribution.generate()
problems = [
Problem('abc', 'abd', 'efg', iterations),
Problem('abc', 'abd', 'ijk', iterations),
Problem('abc', 'abd', 'xyz', iterations),
Problem('abc', 'abd', 'ijkk', iterations),
Problem('abc', 'abd', 'mrrjjj', iterations)]
with open(TestCopycat.Filename, 'wb') as outfile:
pickle.dump(distributions, outfile)
return distributions
class TestCopycat(unittest.TestCase):
pickle.dump(problems, outfile)
return problems
class TestCopycat(unittest.TestCase):
Filename = None
def setUp(self):
@ -78,14 +34,14 @@ class TestCopycat(unittest.TestCase):
print('Testing copycat with input file: {}'.format(TestCopycat.Filename))
try:
with open(TestCopycat.Filename, 'rb') as infile:
distributions = pickle.load(infile)
problems = pickle.load(infile)
except Exception as e:
print('Generating due to error:')
print(e)
distributions = generate()
problems = generate()
for distribution in distributions:
distribution.test(chi_squared)
for problem in problems:
problem.test(iso_chi_squared)
if __name__ == '__main__':
parser = argparse.ArgumentParser()