WIP: Add cross-chi-2 tests

This commit is contained in:
LSaldyt
2017-11-16 08:45:11 -07:00
parent d16e347f04
commit bd8bec2d37
5 changed files with 81 additions and 410 deletions

View File

@ -3,29 +3,34 @@ from .copycat import Copycat
from pprint import pprint
class Problem:
def __init__(self, initial, modified, target, iterations, distributions, formulas=None):
def __init__(self, initial, modified, target, iterations, distributions=None, formulas=None):
self.initial = initial
self.modified = modified
self.target = target
self.iterations = iterations
self.distributions = distributions
if distributions is None:
self.distributions = self.solve()
else:
self.distributions = distributions
self.formulas = formulas
if formulas is not None:
assert hasattr(Copycat().workspace, 'temperature')
def test(self, comparison):
def test(self, comparison, expected=None):
print('-' * 120)
print('Testing copycat problem: {} : {} :: {} : _'.format(self.initial,
self.modified,
self.target))
print('expected:')
pprint(self.distributions)
if expected is None:
expected = self.distributions
pprint(expected)
actual = self.solve()
print('actual:')
pprint(actual)
comparison(actual, self.distributions)
comparison(actual, expected)
print('-' * 120)
def solve(self):

57
copycat/statistics.py Normal file
View File

@ -0,0 +1,57 @@
# CHI2 values for n degrees freedom
_chiSquared_table = {
1:3.841,
2:5.991,
3:7.815,
4:9.488,
5:11.071,
6:12.592,
7:14.067,
8:15.507,
9:16.919,
10:18.307
}
class ChiSquaredException(Exception):
pass
def chi_squared(actual, expected):
answerKeys = set(list(actual.keys()) + list(expected.keys()))
degreesFreedom = len(answerKeys)
chiSquared = 0
get_count = lambda k, d : d[k]['count'] if k in d else 0
for k in answerKeys:
E = get_count(k, expected)
O = get_count(k, actual)
if E == 0:
print('Warning! Expected 0 counts of {}, but got {}'.format(k, O))
else:
chiSquared += (O - E) ** 2 / E
return chiSquared
def cross_formula_chi_squared(actualDict, expectedDict):
for ka, actual in actualDict.items():
for ke, expected in expectedDict.items():
print('Comparing {} with {}'.format(ka, ke))
chiSquared = chi_squared(actual, expected)
if chiSquared >= _chiSquared_table[degreesFreedom]:
print('Significant difference between expected and actual answer distributions: \n' +
'Chi2 value: {} with {} degrees of freedom'.format(chiSquared, degreesFreedom))
def cross_chi_squared(problemSets):
for i, problemSetA in enumerate(problemSets):
for problemSetB in problemSets[i + 1:]:
for problemA in problemSetA:
for problemB in problemSetB:
answersA = problemA.distributions
answersB = problemB.distributions
cross_formula_chi_squared(answersA, answersB)
def iso_chi_squared(actualDict, expectedDict):
for key in expectedDict.keys():
assert key in actualDict, 'The key {} was not tested'.format(key)
actual = actualDict[key]
expected = expectedDict[key]

View File

@ -71,6 +71,5 @@ def main():
plot_answers(answers, show=not options.noshow)
save_answers(answers, 'output/answers.csv')
if __name__ == '__main__':
main()

View File

@ -1,346 +0,0 @@
lsaldyt@shiva:~/projects/farg/copycat$ ./main.py abc abd mrrjjj
Answered mrrjjd (time 90)
{'mrrjjd': {'avgtime': 90.0, 'count': 1}}
mrrjjd: 1 (avg time 90.0)
lsaldyt@shiva:~/projects/farg/copycat$ ./main.py abc abd mrrjjj --iterations 100
Answered mrrjjk (time 112)
Answered mrrjjk (time 112)
Answered mrrjjk (time 105)
Answered mrrjjk (time 80)
Answered mrrjjk (time 97)
Answered mrrjjk (time 122)
Answered mrrjjk (time 94)
Answered mrrjjk (time 82)
Answered mrrjjk (time 97)
Answered mrrjjk (time 65)
Answered mrrjjk (time 94)
Answered mrrjjk (time 101)
Answered mrrjjk (time 79)
Answered mrrjjk (time 90)
Answered mrrjjk (time 72)
Answered mrrjjk (time 87)
Answered mrrjjk (time 100)
Answered mrrjjk (time 118)
Answered mrrjjk (time 100)
Answered mrrjjk (time 76)
Answered mrrjjk (time 109)
Answered mrrjjk (time 137)
Answered mrrjjk (time 125)
Answered mrrjjd (time 77)
Answered mrrjjk (time 92)
Answered mrrjjk (time 155)
Answered mrrjjk (time 75)
Answered mrrjjd (time 139)
Answered mrrjjk (time 115)
Answered mrrjjk (time 82)
Answered mrrjjk (time 125)
Answered mrrjjk (time 97)
Answered mrrjjk (time 81)
Answered mrrjjk (time 105)
Answered mrrjjk (time 105)
Answered mrrjjk (time 130)
Answered mrrjjk (time 110)
Answered mrrjjk (time 156)
Answered mrrjjk (time 57)
Answered mrrjjk (time 158)
Answered mrrjjk (time 90)
Answered mrrjjk (time 92)
Answered mrrjjk (time 92)
Answered mrrjjk (time 106)
Answered mrrjjk (time 98)
Answered mrrjjk (time 69)
Answered mrrjjk (time 65)
Answered mrrjjk (time 63)
Answered mrrjjk (time 91)
Answered mrrjjk (time 111)
Answered mrrjjk (time 116)
Answered mrrjjk (time 96)
Answered mrrjjk (time 114)
Answered mrrjjk (time 113)
Answered mrrjjk (time 253)
Answered mrrjjk (time 97)
Answered mrrjjk (time 138)
Answered mrrjjk (time 121)
Answered mrrjjk (time 119)
Answered mrrjjk (time 208)
Answered mrrjjk (time 88)
Answered mrrjjk (time 139)
Answered mrrjjk (time 104)
Answered mrrjjk (time 127)
Answered mrrjjk (time 92)
Answered mrrjjk (time 81)
Answered mrrjjk (time 79)
Answered mrrjjk (time 124)
Answered mrrjjk (time 103)
Answered mrrjjk (time 93)
Answered mrrjjk (time 95)
Answered mrrjjk (time 136)
Answered mrrjjk (time 124)
Answered mrrjjk (time 181)
Answered mrrjjk (time 80)
Answered mrrjjk (time 71)
Answered mrrjjk (time 107)
Answered mrrjjk (time 79)
Answered mrrjjd (time 62)
Answered mrrjjk (time 92)
Answered mrrjjk (time 99)
Answered mrrjjk (time 65)
Answered mrrjjk (time 134)
Answered mrrjjd (time 116)
Answered mrrjjk (time 85)
Answered mrrjjk (time 107)
Answered mrrjjk (time 102)
Answered mrrjjk (time 132)
Answered mrrjjk (time 127)
Answered mrrjjk (time 92)
Answered mrrjjk (time 41)
Answered mrrjjk (time 78)
Answered mrrjjk (time 77)
Answered mrrjjk (time 140)
Answered mrrjjk (time 126)
Answered mrrjjk (time 110)
Answered mrrjjk (time 95)
Answered mrrjjk (time 127)
Answered mrrjjd (time 92)
Answered mrrjjk (time 121)
{'mrrjjd': {'avgtime': 97.2, 'count': 5},
'mrrjjk': {'avgtime': 105.28421052631579, 'count': 95}}
mrrjjd: 5 (avg time 97.2)
mrrjjk: 95 (avg time 105.3)
lsaldyt@shiva:~/projects/farg/copycat$ git checkout feature-temperature-effect-analysis
Switched to branch 'feature-temperature-effect-analysis'
Your branch is ahead of 'origin/feature-temperature-effect-analysis' by 1 commit.
(use "git push" to publish your local commits)
lsaldyt@shiva:~/projects/farg/copycat$ ./main.py abc abd mrrjjj --iterations 100
Changing to adjustment formula original
Answered mrrkkk (time 559, final temperature 47.2)
Answered mrrkkk (time 1115, final temperature 39.7)
Answered mrrkkk (time 1540, final temperature 39.8)
Answered mrrjjk (time 1591, final temperature 40.2)
Answered mrrjjk (time 303, final temperature 54.8)
Answered mrrkkk (time 1418, final temperature 42.4)
Answered mrrkkk (time 610, final temperature 40.3)
Answered mrrkkk (time 2585, final temperature 41.8)
Answered mrrkkk (time 795, final temperature 38.8)
Answered mrrkkk (time 2609, final temperature 38.8)
Answered mrrkkk (time 1569, final temperature 39.3)
Answered mrrkkk (time 771, final temperature 38.5)
Answered mrrkkk (time 3046, final temperature 39.4)
Answered mrrkkk (time 1551, final temperature 41.7)
Answered mrrkkk (time 1120, final temperature 39.7)
Answered mrrkkk (time 1089, final temperature 40.0)
Answered mrrjjk (time 2029, final temperature 41.2)
Answered mrrkkk (time 1729, final temperature 39.5)
Answered mrrjjk (time 1641, final temperature 51.6)
Answered mrrkkk (time 1194, final temperature 39.0)
Answered mrrkkk (time 950, final temperature 39.0)
Answered mrrkkk (time 2388, final temperature 40.0)
Answered mrrjjk (time 547, final temperature 47.0)
Answered mrrjjk (time 3121, final temperature 39.6)
Answered mrrkkk (time 1611, final temperature 48.0)
Answered mrrkkk (time 2819, final temperature 41.7)
Answered mrrkkk (time 1249, final temperature 55.9)
Answered mrrjjk (time 9285, final temperature 40.7)
Answered mrrjjk (time 341, final temperature 45.2)
Answered mrrkkk (time 1193, final temperature 40.4)
Answered mrrjjk (time 2199, final temperature 40.4)
Answered mrrkkk (time 2958, final temperature 39.8)
Answered mrrkkk (time 1463, final temperature 39.3)
Answered mrrjjd (time 1278, final temperature 54.7)
Answered mrrkkk (time 1217, final temperature 39.5)
Answered mrrkkk (time 534, final temperature 45.9)
Answered mrrkkk (time 1032, final temperature 39.7)
Answered mrrkkk (time 1749, final temperature 40.1)
Answered mrrkkk (time 549, final temperature 39.8)
Answered mrrkkk (time 2385, final temperature 41.9)
Answered mrrkkk (time 890, final temperature 38.8)
Answered mrrkkk (time 1997, final temperature 39.6)
Answered mrrkkk (time 1369, final temperature 49.3)
Answered mrrkkk (time 1567, final temperature 42.6)
Answered mrrkkk (time 966, final temperature 39.6)
Answered mrrkkk (time 472, final temperature 44.2)
Answered mrrd (time 627, final temperature 47.4)
Answered mrrkkk (time 526, final temperature 39.9)
Answered mrrkkk (time 2873, final temperature 38.9)
Answered mrrkkk (time 2136, final temperature 41.8)
Answered mrrkkk (time 2479, final temperature 41.1)
Answered mrrkkk (time 943, final temperature 41.8)
Answered mrrjjk (time 115, final temperature 59.4)
Answered mrrkkk (time 2702, final temperature 39.0)
Answered mrrkkk (time 779, final temperature 40.1)
Answered mrrjjk (time 410, final temperature 47.7)
Answered mrrkkk (time 1402, final temperature 39.9)
Answered mrrkkk (time 986, final temperature 39.5)
Answered mrrkkk (time 929, final temperature 39.9)
Answered mrrkkk (time 2139, final temperature 39.6)
Answered mrrjjk (time 580, final temperature 51.4)
Answered mrrkkk (time 685, final temperature 41.2)
Answered mrrkkk (time 1822, final temperature 38.1)
Answered mrrkkk (time 1424, final temperature 39.9)
Answered mrrkkk (time 2572, final temperature 40.1)
Answered mrrkkk (time 1763, final temperature 39.1)
Answered mrrkkk (time 1628, final temperature 47.5)
Answered mrrjjk (time 550, final temperature 48.5)
Answered mrrkkk (time 2478, final temperature 39.8)
Answered mrrkkk (time 3211, final temperature 38.8)
Answered mrrkkk (time 465, final temperature 46.4)
Answered mrrjjk (time 2658, final temperature 41.1)
Answered mrrkkk (time 8031, final temperature 39.1)
Answered mrrjjk (time 656, final temperature 45.8)
Answered mrrkkk (time 544, final temperature 38.9)
Answered mrrkkk (time 1584, final temperature 39.7)
Answered mrrkkk (time 654, final temperature 40.5)
Answered mrrkkk (time 6083, final temperature 39.1)
Answered mrrkkk (time 4492, final temperature 39.8)
Answered mrrkkk (time 902, final temperature 39.8)
Answered mrrkkk (time 758, final temperature 39.1)
Answered mrrkkk (time 1630, final temperature 39.8)
Answered mrrjjk (time 915, final temperature 44.2)
Answered mrrjjk (time 2037, final temperature 46.5)
Answered mrrkkk (time 2473, final temperature 43.2)
Answered mrrkkk (time 1707, final temperature 43.7)
Answered mrrjjk (time 5846, final temperature 41.1)
Answered mrrkkk (time 1140, final temperature 41.1)
Answered mrrkkk (time 1978, final temperature 39.1)
Answered mrrkkk (time 1494, final temperature 39.1)
Answered mrrkkk (time 1488, final temperature 40.5)
Answered mrrjjk (time 2378, final temperature 43.9)
Answered mrrkkk (time 6334, final temperature 40.9)
Answered mrrkkk (time 984, final temperature 39.6)
Answered mrrkkk (time 3887, final temperature 40.5)
Answered mrrkkk (time 1772, final temperature 42.7)
Answered mrrjjk (time 999, final temperature 45.5)
Answered mrrkkk (time 2352, final temperature 39.9)
Answered mrrkkk (time 1065, final temperature 39.3)
Answered mrrjjk (time 905, final temperature 55.2)
The formula original provided:
Average difference: 0.028295162318835276
{'mrrd': {'avgtemp': 47.37583217746938, 'avgtime': 627.0, 'count': 1},
'mrrjjd': {'avgtemp': 54.66020964652063, 'avgtime': 1278.0, 'count': 1},
'mrrjjk': {'avgtemp': 46.23473786693079,
'avgtime': 1862.1904761904761,
'count': 21},
'mrrkkk': {'avgtemp': 40.89543617652126,
'avgtime': 1791.5844155844156,
'count': 77}}
Changing to adjustment formula best
Answered mrrjjk (time 1591, final temperature 42.2)
Answered mrrkkk (time 2596, final temperature 53.7)
Answered mrrjjk (time 2621, final temperature 47.2)
Answered mrrjjk (time 2280, final temperature 50.7)
Answered mrrjjk (time 1044, final temperature 54.1)
Answered mrrkkk (time 2718, final temperature 40.9)
Answered mrrkkk (time 1638, final temperature 43.6)
Answered mrrjjjj (time 2555, final temperature 19.8)
Answered mrrjjk (time 565, final temperature 54.4)
Answered mrrkkk (time 1374, final temperature 39.2)
Answered mrrkkk (time 836, final temperature 49.1)
Answered mrrjjjj (time 3500, final temperature 17.9)
Answered mrrkkk (time 971, final temperature 39.6)
Answered mrrjjjj (time 845, final temperature 12.2)
Answered mrrjjjj (time 856, final temperature 18.3)
Answered nrrjjj (time 1108, final temperature 46.4)
Answered mrrjjk (time 711, final temperature 59.7)
Answered mrrjjjj (time 1769, final temperature 18.1)
Answered mrrkkk (time 986, final temperature 44.0)
Answered mrrjjk (time 2288, final temperature 47.5)
Answered mrrkkk (time 1300, final temperature 40.9)
Answered mrrjjk (time 3549, final temperature 52.5)
Answered mrrkkk (time 1725, final temperature 36.6)
Answered mrrjjk (time 528, final temperature 56.5)
Answered mrrkkk (time 4370, final temperature 45.5)
Answered mrrjjk (time 4833, final temperature 48.0)
Answered mrrjjjj (time 1586, final temperature 16.2)
Answered mrrkkk (time 3567, final temperature 42.3)
Answered mrrkkk (time 2323, final temperature 42.0)
Answered mrrjjjj (time 729, final temperature 19.3)
Answered mrrkkk (time 2556, final temperature 42.7)
Answered mrrkkk (time 2177, final temperature 42.7)
Answered mrrkkk (time 3675, final temperature 43.1)
Answered mrrkkk (time 980, final temperature 41.5)
Answered mrrkkk (time 714, final temperature 51.4)
Answered mrrjjjj (time 4852, final temperature 19.7)
Answered mrrjjk (time 555, final temperature 49.5)
Answered mrrjjk (time 1966, final temperature 40.5)
Answered mrrkkk (time 2063, final temperature 39.0)
Answered mrrkkk (time 3212, final temperature 38.8)
Answered mrrkkk (time 997, final temperature 39.2)
Answered mrrkkk (time 2353, final temperature 43.8)
Answered mrrkkk (time 3841, final temperature 39.5)
Answered mrrjjjj (time 779, final temperature 19.6)
Answered mrrjjk (time 939, final temperature 43.9)
Answered mrrjjd (time 1472, final temperature 45.0)
Answered mrrjjjj (time 790, final temperature 17.7)
Answered mrrjjk (time 3372, final temperature 44.2)
Answered mrrjjjj (time 4746, final temperature 18.3)
Answered mrrkkk (time 2040, final temperature 46.4)
Answered mrrkkk (time 4106, final temperature 42.8)
Answered mrrjjjj (time 960, final temperature 14.2)
Answered mrrkkk (time 1979, final temperature 39.9)
Answered mrrjjk (time 1085, final temperature 48.1)
Answered mrrjjjj (time 693, final temperature 18.5)
Answered mrrkkk (time 1490, final temperature 41.5)
Answered mrrkkk (time 2587, final temperature 41.1)
Answered mrrjjk (time 574, final temperature 47.4)
Answered mrrjjk (time 889, final temperature 50.9)
Answered mrrjjk (time 1170, final temperature 38.0)
Answered mrrjjk (time 603, final temperature 47.9)
Answered mrrkkk (time 595, final temperature 42.8)
Answered mrrkkk (time 3329, final temperature 38.3)
Answered mrrjjjj (time 1589, final temperature 13.3)
Answered mrrjjjj (time 1146, final temperature 16.1)
Answered mrrjjk (time 1300, final temperature 51.3)
Answered mrrjjk (time 699, final temperature 41.6)
Answered mrrjjjj (time 2570, final temperature 18.7)
Answered mrrjjk (time 1412, final temperature 46.2)
Answered mrrjjk (time 1841, final temperature 44.9)
Answered mrrjjjj (time 1354, final temperature 19.0)
Answered mrrjjk (time 956, final temperature 34.7)
Answered mrrjjk (time 5125, final temperature 40.7)
Answered mrrkkk (time 2663, final temperature 49.6)
Answered mrrjjk (time 486, final temperature 47.3)
Answered mrrjjd (time 3135, final temperature 49.7)
Answered mrrjjjj (time 1960, final temperature 16.9)
Answered mrrjjk (time 384, final temperature 49.0)
Answered mrrkkk (time 12938, final temperature 41.4)
Answered mrrjjjj (time 1067, final temperature 18.4)
Answered mrrkkk (time 2759, final temperature 39.2)
Answered mrrjkk (time 573, final temperature 46.8)
Answered mrrkkk (time 799, final temperature 41.2)
Answered mrrjjk (time 671, final temperature 53.8)
Answered mrrkkk (time 851, final temperature 44.0)
Answered mrrkkk (time 3239, final temperature 44.6)
Answered mrrjjk (time 988, final temperature 53.0)
Answered mrrjjjj (time 1114, final temperature 18.8)
Answered mrrjjjj (time 1053, final temperature 20.6)
Answered mrrkkk (time 1292, final temperature 44.9)
Answered mrrjjjj (time 1969, final temperature 18.6)
Answered mrrkkk (time 1831, final temperature 40.4)
Answered mrrkkk (time 2394, final temperature 46.4)
Answered mrrkkk (time 3041, final temperature 47.4)
Answered mrrjjjj (time 1233, final temperature 18.4)
Answered mrrjjjj (time 983, final temperature 19.2)
Answered mrrkkk (time 980, final temperature 49.0)
Answered mrrkkk (time 1325, final temperature 39.2)
Answered mrrjjk (time 585, final temperature 69.1)
Answered mrrkkk (time 562, final temperature 48.7)
The formula best provided:
Average difference: 0.12629281972431122
{'mrrjjd': {'avgtemp': 47.36375177458433, 'avgtime': 2303.5, 'count': 2},
'mrrjjjj': {'avgtemp': 17.82623805422265, 'avgtime': 1695.75, 'count': 24},
'mrrjjk': {'avgtemp': 48.48478317064579,
'avgtime': 1520.3333333333333,
'count': 30},
'mrrjkk': {'avgtemp': 46.8418022350217, 'avgtime': 573.0, 'count': 1},
'mrrkkk': {'avgtemp': 43.04275727525823,
'avgtime': 2327.904761904762,
'count': 42},
'nrrjjj': {'avgtemp': 46.370799897149105, 'avgtime': 1108.0, 'count': 1}}
mrrjjjj: 24 (avg time 1695.8, avg temp 17.8)
mrrkkk: 42 (avg time 2327.9, avg temp 43.0)
nrrjjj: 1 (avg time 1108.0, avg temp 46.4)
mrrjkk: 1 (avg time 573.0, avg temp 46.8)
mrrjjd: 2 (avg time 2303.5, avg temp 47.4)
mrrjjk: 30 (avg time 1520.3, avg temp 48.5)
lsaldyt@shiva:~/projects/farg/copycat$

View File

@ -6,69 +6,25 @@ import sys
from pprint import pprint
from copycat import Problem
from copycat.statistics import iso_chi_squared
# TODO: update test cases to use entropy
# CHI2 values for n degrees freedom
_chiSquared_table = {
1:3.841,
2:5.991,
3:7.815,
4:9.488,
5:11.071,
6:12.592,
7:14.067,
8:15.507,
9:16.919,
10:18.307
}
class ChiSquaredException(Exception):
pass
def chi_squared(actualDict, expectedDict):
for key in expectedDict.keys():
assert key in actualDict, 'The key {} was not tested'.format(key)
actual = actualDict[key]
expected = expectedDict[key]
answerKeys = set(list(actual.keys()) + list(expected.keys()))
degreesFreedom = len(answerKeys)
chiSquared = 0
get_count = lambda k, d : d[k]['count'] if k in d else 0
for k in answerKeys:
E = get_count(k, expected)
O = get_count(k, actual)
if E == 0:
print('Warning! Expected 0 counts of {}, but got {}'.format(k, O))
else:
chiSquared += (O - E) ** 2 / E
if chiSquared >= _chiSquared_table[degreesFreedom]:
raise ChiSquaredException('Significant difference between expected and actual answer distributions: \n' +
'Chi2 value: {} with {} degrees of freedom'.format(chiSquared, degreesFreedom))
def generate():
print('Generating distributions for new file')
iterations = 30
distributions = [
Problem('abc', 'abd', 'efg', iterations, None),
Problem('abc', 'abd', 'ijk', iterations, None),
Problem('abc', 'abd', 'xyz', iterations, None),
Problem('abc', 'abd', 'ijkk', iterations, None),
Problem('abc', 'abd', 'mrrjjj', iterations, None)]
for distribution in distributions:
distribution.generate()
problems = [
Problem('abc', 'abd', 'efg', iterations),
Problem('abc', 'abd', 'ijk', iterations),
Problem('abc', 'abd', 'xyz', iterations),
Problem('abc', 'abd', 'ijkk', iterations),
Problem('abc', 'abd', 'mrrjjj', iterations)]
with open(TestCopycat.Filename, 'wb') as outfile:
pickle.dump(distributions, outfile)
return distributions
class TestCopycat(unittest.TestCase):
pickle.dump(problems, outfile)
return problems
class TestCopycat(unittest.TestCase):
Filename = None
def setUp(self):
@ -78,14 +34,14 @@ class TestCopycat(unittest.TestCase):
print('Testing copycat with input file: {}'.format(TestCopycat.Filename))
try:
with open(TestCopycat.Filename, 'rb') as infile:
distributions = pickle.load(infile)
problems = pickle.load(infile)
except Exception as e:
print('Generating due to error:')
print(e)
distributions = generate()
problems = generate()
for distribution in distributions:
distribution.test(chi_squared)
for problem in problems:
problem.test(iso_chi_squared)
if __name__ == '__main__':
parser = argparse.ArgumentParser()