From 8203cebb1579bccbd7160d047da9b852fcd38db9 Mon Sep 17 00:00:00 2001 From: LSaldyt Date: Wed, 4 Oct 2017 15:37:22 -0600 Subject: [PATCH] Calculate Chi^2 values for getAdj- formulas --- copycat/copycat.py | 8 ++++++-- multi-run.py | 28 +++++++++++++++++++++++++--- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/copycat/copycat.py b/copycat/copycat.py index 64adbef..e5f41e1 100644 --- a/copycat/copycat.py +++ b/copycat/copycat.py @@ -67,6 +67,10 @@ class Copycat(object): def run(self, initial, modified, target, iterations, testAdjFormulas=False): self.workspace.resetWithStrings(initial, modified, target) + + # I (LSaldyt) am very sorry for writing code like this. + # It will soon be deleted. I promise. + if testAdjFormulas: formulas = self.temperature.adj_formulas() else: @@ -90,9 +94,9 @@ class Copycat(object): for answer, d in answers.items(): d['avgtemp'] = d.pop('sumtemp') / d['count'] d['avgtime'] = d.pop('sumtime') / d['count'] - formulaList.append(answers) + formulaList.append((formula, answers)) if not testAdjFormulas: - return formulaList[0] + return formulaList[0][1] else: return formulaList diff --git a/multi-run.py b/multi-run.py index fc3cbfc..a5b58c6 100755 --- a/multi-run.py +++ b/multi-run.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import argparse, logging from copycat import Copycat, Reporter, plot_answers, save_answers +from collections import defaultdict class SimpleReporter(Reporter): """Reports results from a single run.""" @@ -22,9 +23,30 @@ def main(): line = line.replace('\n', '') a, b, c = line.split(',') answerList = copycat.run(a, b, c, options.iterations, True) - for answers in answerList: - for answer, d in sorted(iter(answers.items()), key=lambda kv: kv[1]['avgtemp']): - print('%s: %d (avg time %.1f, avg temp %.1f)' % (answer, d['count'], d['avgtime'], d['avgtemp'])) + results = dict() + for formula, answers in answerList: + answers = sorted(answers.items(), key=lambda kv : kv[1]['count']) + keys = [k for k, v in answers] + counts = [v['count'] for k, v in answers] + results[formula] = (keys, counts) + + originalCounts = defaultdict(lambda : 0) + originalCounts.update(dict(zip(*results['original']))) + + for formula, (keys, counts) in results.items(): + if formula != 'original': + chi2 = 0 + for answer, count in zip(keys, counts): + originalCount = originalCounts[answer] + if originalCount != 0: + chi2 += (count + originalCount) ** 2 / originalCount + print((formula, chi2)) + + + + #for answer, d in sorted(iter(answers.items()), key=lambda kv: kv[1]['avgtemp']): + #print('%s: %d (avg time %.1f, avg temp %.1f)' % (answer, d['count'], d['avgtime'], d['avgtemp'])) + #filename = 'output/{}-{}-{}.csv'.format(a, b, c) #save_answers(answers, filename)