diff --git a/.distributions b/.distributions new file mode 100644 index 0000000..5f4912f Binary files /dev/null and b/.distributions differ diff --git a/.gitignore b/.gitignore index fc96e13..308d314 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,7 @@ pip-log.txt # Unit test / coverage reports .coverage .tox +.log # Other filesystems .svn diff --git a/.ipynb_checkpoints/Copycat-checkpoint.ipynb b/.ipynb_checkpoints/Copycat-checkpoint.ipynb new file mode 100644 index 0000000..2fd6442 --- /dev/null +++ b/.ipynb_checkpoints/Copycat-checkpoint.ipynb @@ -0,0 +1,6 @@ +{ + "cells": [], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Copycat.ipynb b/Copycat.ipynb new file mode 100644 index 0000000..2225b63 --- /dev/null +++ b/Copycat.ipynb @@ -0,0 +1,81 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Copycat \n", + "\n", + "Just type your copycat example, and the number of iterations." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Answered iijjkl (time 1374, final temperature 13.5)\n", + "Answered iijjll (time 665, final temperature 19.6)\n", + "Answered iijjll (time 406, final temperature 16.6)\n", + "Answered iijjkl (time 379, final temperature 47.9)\n", + "Answered iijjll (time 556, final temperature 19.2)\n", + "Answered iijjkl (time 813, final temperature 42.8)\n", + "Answered iijjll (time 934, final temperature 15.5)\n", + "Answered iijjkl (time 1050, final temperature 49.5)\n", + "Answered iijjkl (time 700, final temperature 44.0)\n", + "Answered iijjkl (time 510, final temperature 34.8)\n", + "Answered iijjkl (time 673, final temperature 18.1)\n", + "Answered iijjkl (time 1128, final temperature 19.8)\n", + "Answered iijjll (time 961, final temperature 19.9)\n", + "Answered iijjll (time 780, final temperature 16.5)\n", + "Answered iijjll (time 607, final temperature 17.8)\n", + "Answered iijjll (time 594, final temperature 39.7)\n", + "Answered iijjll (time 736, final temperature 18.4)\n", + "Answered iijjll (time 903, final temperature 18.6)\n", + "Answered iijjll (time 601, final temperature 20.6)\n", + "Answered iijjll (time 949, final temperature 42.4)\n", + "iijjll: 12 (avg time 724.3, avg temp 22.1)\n", + "iijjkl: 8 (avg time 828.4, avg temp 33.8)\n" + ] + } + ], + "source": [ + "%run main.py abc abd iijjkk --iterations 20" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/README.md b/README.md index 9a18937..0fe9ab9 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ co.py.cat ![GUI](https://i.imgur.com/7pb20g0.png) An implementation of [Douglas Hofstadter](http://prelectur.stanford.edu/lecturers/hofstadter/)'s Copycat algorithm. -The Copycat algorithm is explained [on Wikipedia](https://en.wikipedia.org/wiki/Copycat_%28software%29), and that page has many links for deeper reading. +The Copycat algorithm is explained [on Wikipedia](https://en.wikipedia.org/wiki/Copycat_%28software%29), and that page has many links for deeper reading. See also [Farglexandria](https://github.com/Alex-Linhares/Farglexandria). This implementation is a copycat of Scott Boland's [Java implementation](https://archive.org/details/JavaCopycat). The original Java-to-Python translation work was done by J Alan Brogan (@jalanb on GitHub). @@ -75,3 +75,10 @@ $ python ``` The result of `run` is a dict containing the same information as was printed by `main.py` above. + + + +Questions +--------- + +1. Why are codelets **NOT** implemented through lambda? diff --git a/copycat/__init__.py b/copycat/__init__.py index 67e5cc9..292c4b6 100644 --- a/copycat/__init__.py +++ b/copycat/__init__.py @@ -1 +1,2 @@ from .copycat import Copycat, Reporter # noqa +from .problem import Problem diff --git a/copycat/codeletMethods.py b/copycat/codeletMethods.py index d8484d8..e4ae446 100644 --- a/copycat/codeletMethods.py +++ b/copycat/codeletMethods.py @@ -74,8 +74,10 @@ def __structureVsStructure(structure1, weight1, structure2, weight2): temperature = ctx.temperature structure1.updateStrength() structure2.updateStrength() + # TODO: use entropy weightedStrength1 = temperature.getAdjustedValue( structure1.totalStrength * weight1) + # TODO: use entropy weightedStrength2 = temperature.getAdjustedValue( structure2.totalStrength * weight2) return random.weighted_greater_than(weightedStrength1, weightedStrength2) @@ -111,6 +113,7 @@ def __slippability(ctx, conceptMappings): temperature = ctx.temperature for mapping in conceptMappings: slippiness = mapping.slippability() / 100.0 + # TODO: use entropy probabilityOfSlippage = temperature.getAdjustedProbability(slippiness) if random.coinFlip(probabilityOfSlippage): return True @@ -122,6 +125,7 @@ def breaker(ctx, codelet): random = ctx.random temperature = ctx.temperature workspace = ctx.workspace + # TODO: use entropy probabilityOfFizzle = (100.0 - temperature.value()) / 100.0 if random.coinFlip(probabilityOfFizzle): return @@ -138,6 +142,7 @@ def breaker(ctx, codelet): breakObjects += [structure.source.group] # Break all the objects or none of them; this matches the Java for structure in breakObjects: + # TODO: use entropy breakProbability = temperature.getAdjustedProbability( structure.totalStrength / 100.0) if random.coinFlip(breakProbability): @@ -149,8 +154,7 @@ def breaker(ctx, codelet): def chooseRelevantDescriptionByActivation(ctx, workspaceObject): random = ctx.random descriptions = workspaceObject.relevantDescriptions() - weights = [description.descriptor.activation - for description in descriptions] + weights = [description.descriptor.activation for description in descriptions] return random.weighted_choice(descriptions, weights) @@ -160,6 +164,7 @@ def similarPropertyLinks(ctx, slip_node): result = [] for slip_link in slip_node.propertyLinks: association = slip_link.degreeOfAssociation() / 100.0 + # TODO:use entropy probability = temperature.getAdjustedProbability(association) if random.coinFlip(probability): result += [slip_link] @@ -182,7 +187,7 @@ def bottom_up_description_scout(ctx, codelet): sliplinks = similarPropertyLinks(ctx, description.descriptor) assert sliplinks weights = [sliplink.degreeOfAssociation() * sliplink.destination.activation - for sliplink in sliplinks] + for sliplink in sliplinks] chosen = random.weighted_choice(sliplinks, weights) chosenProperty = chosen.destination coderack.proposeDescription(chosenObject, chosenProperty.category(), @@ -215,6 +220,7 @@ def description_strength_tester(ctx, codelet): description.descriptor.buffer = 100.0 description.updateStrength() strength = description.totalStrength + # TODO: use entropy probability = temperature.getAdjustedProbability(strength / 100.0) assert random.coinFlip(probability) coderack.newCodelet('description-builder', strength, [description]) @@ -298,7 +304,7 @@ def rule_scout(ctx, codelet): workspace = ctx.workspace assert workspace.numberOfUnreplacedObjects() == 0 changedObjects = [o for o in workspace.initial.objects if o.changed] - #assert len(changedObjects) < 2 + # assert len(changedObjects) < 2 # if there are no changed objects, propose a rule with no changes if not changedObjects: return coderack.proposeRule(None, None, None, None) @@ -328,8 +334,8 @@ def rule_scout(ctx, codelet): if targetObject.described(node): if targetObject.distinguishingDescriptor(node): newList += [node] - objectList = newList # surely this should be += - # "union of this and distinguishing descriptors" + objectList = newList # surely this should be += + # "union of this and distinguishing descriptors" assert objectList # use conceptual depth to choose a description weights = [ @@ -360,6 +366,7 @@ def rule_strength_tester(ctx, codelet): temperature = ctx.temperature rule = codelet.arguments[0] rule.updateStrength() + # TODO: use entropy probability = temperature.getAdjustedProbability(rule.totalStrength / 100.0) if random.coinFlip(probability): coderack.newCodelet('rule-builder', rule.totalStrength, [rule]) @@ -392,8 +399,8 @@ def replacement_finder(ctx, codelet): relation = relations[diff] else: relation = None - letterOfInitialString.replacement = Replacement(ctx, - letterOfInitialString, letterOfModifiedString, relation) + letterOfInitialString.replacement = Replacement(ctx, letterOfInitialString, + letterOfModifiedString, relation) if relation != slipnet.sameness: letterOfInitialString.changed = True workspace.changedObject = letterOfInitialString @@ -436,8 +443,8 @@ def top_down_bond_scout__direction(ctx, codelet): coderack = ctx.coderack slipnet = ctx.slipnet direction = codelet.arguments[0] - source = __getScoutSource(ctx, - direction, formulas.localDirectionCategoryRelevance, 'bond') + source = __getScoutSource(ctx, direction, formulas.localDirectionCategoryRelevance, + 'bond') destination = chooseDirectedNeighbor(ctx, source, direction) assert destination logging.info('to object: %s', destination) @@ -462,6 +469,7 @@ def bond_strength_tester(ctx, codelet): __showWhichStringObjectIsFrom(bond) bond.updateStrength() strength = bond.totalStrength + # TODO: use entropy probability = temperature.getAdjustedProbability(strength / 100.0) logging.info('bond strength = %d for %s', strength, bond) assert random.coinFlip(probability) @@ -502,7 +510,7 @@ def bond_builder(ctx, codelet): if incompatibleCorrespondences: logging.info("trying to break incompatible correspondences") assert __fight(bond, 2.0, incompatibleCorrespondences, 3.0) - #assert __fightIncompatibles(incompatibleCorrespondences, + # assert __fightIncompatibles(incompatibleCorrespondences, # bond, 'correspondences', 2.0, 3.0) for incompatible in incompatibleBonds: incompatible.break_the_structure() @@ -692,7 +700,7 @@ def top_down_group_scout__direction(ctx, codelet): direction, bondFacet) -#noinspection PyStringFormat +# noinspection PyStringFormat @codelet('group-scout--whole-string') def group_scout__whole_string(ctx, codelet): coderack = ctx.coderack @@ -744,6 +752,7 @@ def group_strength_tester(ctx, codelet): __showWhichStringObjectIsFrom(group) group.updateStrength() strength = group.totalStrength + # TODO: use entropy probability = temperature.getAdjustedProbability(strength / 100.0) if random.coinFlip(probability): # it is strong enough - post builder & activate nodes @@ -871,6 +880,7 @@ def rule_translator(ctx, codelet): bondDensity = min(bondDensity, 1.0) weights = __getCutoffWeights(bondDensity) cutoff = 10.0 * random.weighted_choice(list(range(1, 11)), weights) + # TODO: use entropy if cutoff >= temperature.actual_value: result = workspace.rule.buildTranslatedRule() if result is not None: @@ -907,11 +917,11 @@ def bottom_up_correspondence_scout(ctx, codelet): and m.initialDescriptionType != slipnet.bondFacet] initialDescriptionTypes = [m.initialDescriptionType for m in opposites] flipTargetObject = False - if (objectFromInitial.spansString() and - objectFromTarget.spansString() and - slipnet.directionCategory in initialDescriptionTypes - and all(m.label == slipnet.opposite for m in opposites) # unreached? - and slipnet.opposite.activation != 100.0): + if (objectFromInitial.spansString() and + objectFromTarget.spansString() and + slipnet.directionCategory in initialDescriptionTypes + and all(m.label == slipnet.opposite for m in opposites) # unreached? + and slipnet.opposite.activation != 100.0): objectFromTarget = objectFromTarget.flippedVersion() conceptMappings = formulas.getMappings( objectFromInitial, objectFromTarget, @@ -927,6 +937,7 @@ def important_object_correspondence_scout(ctx, codelet): coderack = ctx.coderack random = ctx.random slipnet = ctx.slipnet + # TODO: use entropy temperature = ctx.temperature workspace = ctx.workspace objectFromInitial = chooseUnmodifiedObject(ctx, 'relativeImportance', @@ -966,11 +977,11 @@ def important_object_correspondence_scout(ctx, codelet): and m.initialDescriptionType != slipnet.bondFacet] initialDescriptionTypes = [m.initialDescriptionType for m in opposites] flipTargetObject = False - if (objectFromInitial.spansString() - and objectFromTarget.spansString() - and slipnet.directionCategory in initialDescriptionTypes - and all(m.label == slipnet.opposite for m in opposites) # unreached? - and slipnet.opposite.activation != 100.0): + if (objectFromInitial.spansString() + and objectFromTarget.spansString() + and slipnet.directionCategory in initialDescriptionTypes + and all(m.label == slipnet.opposite for m in opposites) # unreached? + and slipnet.opposite.activation != 100.0): objectFromTarget = objectFromTarget.flippedVersion() conceptMappings = formulas.getMappings( objectFromInitial, objectFromTarget, @@ -997,6 +1008,7 @@ def correspondence_strength_tester(ctx, codelet): objectFromTarget.flipped_version()))) correspondence.updateStrength() strength = correspondence.totalStrength + # TODO: use entropy probability = temperature.getAdjustedProbability(strength / 100.0) if random.coinFlip(probability): # activate some concepts @@ -1050,8 +1062,8 @@ def correspondence_builder(ctx, codelet): # if there is an incompatible bond then fight against it initial = correspondence.objectFromInitial target = correspondence.objectFromTarget - if (initial.leftmost or initial.rightmost and - target.leftmost or target.rightmost): + if (initial.leftmost or initial.rightmost and + target.leftmost or target.rightmost): # search for the incompatible bond incompatibleBond = correspondence.getIncompatibleBond() if incompatibleBond: diff --git a/copycat/coderack.py b/copycat/coderack.py index fb58e2a..d8bca0b 100644 --- a/copycat/coderack.py +++ b/copycat/coderack.py @@ -83,6 +83,7 @@ class Coderack(object): if 'correspondence' in codeletName: return workspace.interStringUnhappiness / 100.0 if 'description' in codeletName: + # TODO: use entropy return (temperature.value() / 100.0) ** 2 return workspace.intraStringUnhappiness / 100.0 @@ -161,6 +162,8 @@ class Coderack(object): urgency = 3 if codeletName == 'breaker': urgency = 1 + + # TODO: use entropy if temperature.value() < 25.0 and 'translator' in codeletName: urgency = 5 for _ in range(howMany): @@ -287,6 +290,8 @@ class Coderack(object): random = self.ctx.random temperature = self.ctx.temperature assert self.codelets + + # TODO: use entropy scale = (100.0 - temperature.value() + 10.0) / 15.0 chosen = random.weighted_choice(self.codelets, [codelet.urgency ** scale for codelet in self.codelets]) self.removeCodelet(chosen) diff --git a/copycat/curses_reporter.py b/copycat/curses_reporter.py index 08f24db..1bd224a 100644 --- a/copycat/curses_reporter.py +++ b/copycat/curses_reporter.py @@ -239,6 +239,7 @@ class CursesReporter(Reporter): w.border() w.refresh() + #TODO: use entropy def report_temperature(self, temperature): self.do_keyboard_shortcuts() w = self.temperatureWindow diff --git a/copycat/group.py b/copycat/group.py index 015dae3..0d3abb4 100644 --- a/copycat/group.py +++ b/copycat/group.py @@ -96,6 +96,7 @@ class Group(WorkspaceObject): support = self.localSupport() / 100.0 activation = slipnet.length.activation / 100.0 supportedActivation = (support * activation) ** exp + #TODO: use entropy return temperature.getAdjustedProbability(supportedActivation) def flippedVersion(self): @@ -130,6 +131,7 @@ class Group(WorkspaceObject): cubedlength = length ** 3 fred = cubedlength * (100.0 - slipnet.length.activation) / 100.0 probability = 0.5 ** fred + #TODO: use entropy value = temperature.getAdjustedProbability(probability) if value < 0.06: value = 0.0 diff --git a/copycat/problem.py b/copycat/problem.py new file mode 100644 index 0000000..bf335e5 --- /dev/null +++ b/copycat/problem.py @@ -0,0 +1,62 @@ +from .copycat import Copycat + +from pprint import pprint + +class Problem: + def __init__(self, initial, modified, target, iterations, distributions=None, formulas=None): + self.formulas = formulas + self.initial = initial + self.modified = modified + self.target = target + + self.iterations = iterations + if distributions is None: + self.distributions = self.solve() + else: + self.distributions = distributions + if formulas is not None: + assert hasattr(Copycat().workspace, 'temperature') + + def test(self, comparison, expected=None): + print('-' * 120) + print('Testing copycat problem: {} : {} :: {} : _'.format(self.initial, + self.modified, + self.target)) + print('expected:') + if expected is None: + expected = self.distributions + pprint(expected) + + actual = self.solve() + print('actual:') + pprint(actual) + comparison(actual, expected) + print('-' * 120) + + def solve(self): + print('-' * 120) + print('Testing copycat problem: {} : {} :: {} : _'.format(self.initial, + self.modified, + self.target)) + copycat = Copycat() + answers = dict() + if self.formulas == None: + if hasattr(copycat.workspace, 'temperature'): + formula = copycat.workspace.temperature.getAdj() + else: + formula = None + answers[formula] = copycat.run(self.initial, + self.modified, + self.target, + self.iterations) + else: + for formula in self.formulas: + copycat.temperature.useAdj(formula) + answers[formulas] = copycat.run(self.initial, + self.modified, + self.target, + self.iterations) + return answers + + def generate(self): + self.distributions = self.solve() diff --git a/copycat/statistics.py b/copycat/statistics.py new file mode 100644 index 0000000..4f1ffe3 --- /dev/null +++ b/copycat/statistics.py @@ -0,0 +1,57 @@ +# CHI2 values for n degrees freedom +_chiSquared_table = { + 1:3.841, + 2:5.991, + 3:7.815, + 4:9.488, + 5:11.071, + 6:12.592, + 7:14.067, + 8:15.507, + 9:16.919, + 10:18.307 + } + +class ChiSquaredException(Exception): + pass + +def chi_squared(actual, expected): + answerKeys = set(list(actual.keys()) + list(expected.keys())) + degreesFreedom = len(answerKeys) + chiSquared = 0 + + get_count = lambda k, d : d[k]['count'] if k in d else 0 + + for k in answerKeys: + E = get_count(k, expected) + O = get_count(k, actual) + if E == 0: + print('Warning! Expected 0 counts of {}, but got {}'.format(k, O)) + else: + chiSquared += (O - E) ** 2 / E + return chiSquared + +def cross_formula_chi_squared(actualDict, expectedDict): + for ka, actual in actualDict.items(): + for ke, expected in expectedDict.items(): + print('Comparing {} with {}'.format(ka, ke)) + chiSquared = chi_squared(actual, expected) + + if chiSquared >= _chiSquared_table[degreesFreedom]: + print('Significant difference between expected and actual answer distributions: \n' + + 'Chi2 value: {} with {} degrees of freedom'.format(chiSquared, degreesFreedom)) + +def cross_chi_squared(problemSets): + for i, problemSetA in enumerate(problemSets): + for problemSetB in problemSets[i + 1:]: + for problemA in problemSetA: + for problemB in problemSetB: + answersA = problemA.distributions + answersB = problemB.distributions + cross_formula_chi_squared(answersA, answersB) + +def iso_chi_squared(actualDict, expectedDict): + for key in expectedDict.keys(): + assert key in actualDict, 'The key {} was not tested'.format(key) + actual = actualDict[key] + expected = expectedDict[key] diff --git a/copycat/temperature.py b/copycat/temperature.py index 37a5d6f..11a03f4 100644 --- a/copycat/temperature.py +++ b/copycat/temperature.py @@ -35,6 +35,7 @@ class Temperature(object): def getAdjustedValue(self, value): return value ** (((100.0 - self.value()) / 30.0) + 0.5) + """ def getAdjustedProbability(self, value): if value == 0 or value == 0.5 or self.value() == 0: return value @@ -45,3 +46,180 @@ class Temperature(object): c = (10 - a) / 100 f = (c + 1) * value return max(f, 0.5) + """ + + def getAdjustedProbability(self, value): + """ + This function returns the probability for a decision. + Copied above. + + Please look at the last line of it. Strangely, it was + return max(f, 0.5). Does that make sense? Let's compare + some results. Where it was (0.5), we obtained, for example: + + iiijjjlll: 670 (avg time 1108.5, avg temp 23.6) + iiijjjd: 2 (avg time 1156.0, avg temp 35.0) + iiijjjkkl: 315 (avg time 1194.4, avg temp 35.5) + iiijjjkll: 8 (avg time 2096.8, avg temp 44.1) + iiijjjkkd: 5 (avg time 837.2, avg temp 48.0) + + wyz: 5 (avg time 2275.2, avg temp 14.9) + xyd: 982 (avg time 2794.4, avg temp 17.5) + yyz: 7 (avg time 2731.9, avg temp 25.1) + dyz: 2 (avg time 3320.0, avg temp 27.1) + xyy: 2 (avg time 4084.5, avg temp 31.1) + xyz: 2 (avg time 1873.5, avg temp 52.1) + + Now, let's see what return max(f, 0.0000) does: + + wyz: 7 (avg time 3192.9, avg temp 13.1) + xyd: 985 (avg time 2849.1, avg temp 17.5) + yyz: 6 (avg time 3836.7, avg temp 18.6) + xyy: 1 (avg time 1421.0, avg temp 19.5) + xyz: 1 (avg time 7350.0, avg temp 48.3) + + They *seem* better (in the strict sense that we've obtained both + lower T and more times of wyz.) But they're *not* statistically + significant (for 1000 runs). + + Now... looking at the code... it seems to be a mess... what does + function f() even mean in intuitive terms? + + Work it does, but dude... quite a hack. + + Another run, with return f @line89: + + wyz: 8 (avg time 4140.5, avg temp 13.3) + yyz: 6 (avg time 2905.2, avg temp 14.5) + xyd: 982 (avg time 3025.4, avg temp 17.6) + dyz: 4 (avg time 4265.0, avg temp 17.7) + + Does it even matter? Another (quick) run, I think with return (0.5): + + dyz: 1 (avg time 5198.0, avg temp 15.3) + wyz: 3 (avg time 4043.7, avg temp 17.1) + yyz: 9 (avg time 3373.6, avg temp 21.0) + xyd: 84 (avg time 5011.1, avg temp 23.3) + xyy: 3 (avg time 4752.0, avg temp 27.9) + + Compared to return(0.99): + + xyd: 1000 (avg time 1625.2, avg temp 17.3) + + Comparing to return f --> Statistically significant. + Comparing to return(0.5) --> same, so this return value does something. + + Now running return(0.0): + + xyz: 3 (avg time 3996.7, avg temp 81.1) + dyz: 46 (avg time 5931.7, avg temp 82.6) + xd: 17 (avg time 6090.3, avg temp 83.8) + xyd: 934 (avg time 7699.8, avg temp 88.1) + + It's bad overall, but at least it's statistically significant! + + return (-f * (math.log2(f))) # Entropy test #1 (global). + + wyz: 123 (avg time 5933.1, avg temp 16.5) + xyy: 200 (avg time 6486.7, avg temp 27.8) + yyz: 330 (avg time 6310.2, avg temp 38.5) + dyz: 75 (avg time 6393.3, avg temp 39.6) + yzz: 5 (avg time 4965.0, avg temp 59.3) + xyz: 160 (avg time 6886.2, avg temp 60.2) + xd: 4 (avg time 2841.0, avg temp 61.8) + dz: 3 (avg time 3721.0, avg temp 62.1) + xyd: 100 (avg time 5853.1, avg temp 67.5) + + Here we get an intuitive result: entropy/uncertainty seems better at + exploring a whole range of possible solutions. It even seems, at least + to me, better than the distribution obtained by the original copycat. + + instead of log2, trying ln --> return (-f * math.log(f)): + + wyz: 78 (avg time 7793.7, avg temp 16.6) + xyy: 202 (avg time 9168.5, avg temp 27.5) + wxz: 1 (avg time 3154.0, avg temp 33.4) + dyz: 63 (avg time 7950.3, avg temp 41.7) + yyz: 217 (avg time 8147.4, avg temp 41.7) + xyz: 201 (avg time 7579.7, avg temp 62.5) + xxy: 1 (avg time 7994.0, avg temp 64.8) + yzz: 8 (avg time 4672.6, avg temp 65.7) + xd: 9 (avg time 9215.2, avg temp 68.1) + xyd: 217 (avg time 7677.9, avg temp 73.8) + dz: 3 (avg time 20379.0, avg temp 77.3) + + (quickly) trying out (1-this_entropy_function): + + xyd: 100 (avg time 2984.3, avg temp 18.2) + + And that's beautiful! One wants an inverse function that punishes + exploration and creativity, that takes all the fluidity off + the system. + + But somehow this completely messes up with abc abd iijjkk: + + jijjkk: 66 (avg time 3200.1, avg temp 61.3) + iijjkk: 114 (avg time 5017.2, avg temp 63.5) + dijjkk: 23 (avg time 2209.0, avg temp 67.3) + iijjkl: 748 (avg time 3262.8, avg temp 70.0) + iijjkd: 49 (avg time 2315.9, avg temp 76.3) + + Which leads me to suspect that someone may have overfitted the + model for either xyz or iijjkk or some other problem, and one + improvement there means disaster here. + + Something tells me to invert again to 1-entropy... and bingo! + + iijjll: 59 (avg time 797.4, avg temp 19.8) + iijjkl: 41 (avg time 696.1, avg temp 28.5) + + My guess is that some code is prefering to find groups in the + opposite form that it likes finding the "symmetry/opposite" + concepts of the xyz problem. + + Sould compare & contrast the unhappiness and relevance of both + the opposite/symmetry codelets and the grouping/chunking codelets. + My hunch is the sameness group code: something there that + interacts with Temperature is wicked, and should be relatively + easy to find the error. + + Here's why: the following run was done on (1-entropy(f)): + + mrrlll: 77 (avg time 2195.7, avg temp 41.4) + mrrd: 2 (avg time 1698.0, avg temp 42.6) + mrrkkl: 20 (avg time 1317.8, avg temp 46.6) + mrrkkd: 1 (avg time 1835.0, avg temp 48.6) + + + If (1-entropy(f)) binds the system into a tight corridor of possibilities, + then why does it easily get the samenessGroup right? If this is right, + then running just entropy(f) should have big trouble with samenessGroup. + Let's see: + + nrrkkk: 11 (avg time 3637.8, avg temp 64.6) + drrkkk: 3 (avg time 5921.3, avg temp 66.2) + mrrkkd: 7 (avg time 6771.3, avg temp 74.6) + mrrkkl: 79 (avg time 3723.0, avg temp 74.9) + + So there we are: the system is unable to find that change samenessGroup + to next letterCategory, so there ought to be something very different + in the code that: + + * Interacts with Temperature (things like unhappiness, relevance, depth, + urgency, and whatever else interacts with T) + * something very close to samenessGroup... sameGroup, sameness, + sameNeighbors, etc... is encoded in a form that is *directly opposite* + to other concepts/categories/codlets, etc. + + + Need to play with this more... and WTF is f anyways? + """ + if value == 0 or value == 0.5 or self.value() == 0: + return value + if value < 0.5: + return 1.0 - self.getAdjustedProbability(1.0 - value) + coldness = 100.0 - self.value() + a = math.sqrt(coldness) + c = (10 - a) / 100 + f = (c + 1) * value + return (0 + (-f * math.log2(f))) # max(f, 0.0000) diff --git a/copycat/tests.py b/copycat/tests.py deleted file mode 100644 index 7556f4a..0000000 --- a/copycat/tests.py +++ /dev/null @@ -1,137 +0,0 @@ -import unittest - -from .copycat import Copycat - - -def pnormaldist(p): - table = { - 0.80: 1.2815, - 0.90: 1.6448, - 0.95: 1.9599, - 0.98: 2.3263, - 0.99: 2.5758, - 0.995: 2.8070, - 0.998: 3.0902, - 0.999: 3.2905, - 0.9999: 3.8905, - 0.99999: 4.4171, - 0.999999: 4.8916, - 0.9999999: 5.3267, - 0.99999999: 5.7307, - 0.999999999: 6.1094, - } - return max(v for k, v in table.items() if k <= p) - - -def lower_bound_on_probability(hits, attempts, confidence=0.95): - if attempts == 0: - return 0 - z = pnormaldist(confidence) - zsqr = z*z - phat = 1.0 * hits / attempts - under_sqrt = (phat * (1 - phat) + zsqr / (4*attempts)) / attempts - denominator = (1 + zsqr / attempts) - return (phat + zsqr / (2*attempts) - z * (under_sqrt ** 0.5)) / denominator - - -def upper_bound_on_probability(hits, attempts, confidence=0.95): - misses = attempts - hits - return 1.0 - lower_bound_on_probability(misses, attempts, confidence) - - -class TestCopycat(unittest.TestCase): - def setUp(self): - self.longMessage = True # new in Python 2.7 - - def assertProbabilitiesLookRoughlyLike(self, actual, expected): - actual_count = 0.0 + sum(d['count'] for d in list(actual.values())) - expected_count = 0.0 + sum(d['count'] for d in list(expected.values())) - self.assertGreater(actual_count, 1) - self.assertGreater(expected_count, 1) - for k in set(list(actual.keys()) + list(expected.keys())): - if k not in expected: - self.fail('Key %s was produced but not expected! %r != %r' % (k, actual, expected)) - expected_probability = expected[k]['count'] / expected_count - if k in actual: - actual_lo = lower_bound_on_probability(actual[k]['count'], actual_count) - actual_hi = upper_bound_on_probability(actual[k]['count'], actual_count) - if not (actual_lo <= expected_probability <= actual_hi): - print('Failed (%s <= %s <= %s)' % (actual_lo, expected_probability, actual_hi)) - self.fail('Count ("obviousness" metric) seems way off! %r != %r' % (actual, expected)) - if abs(actual[k]['avgtemp'] - expected[k]['avgtemp']) >= 10.0 + (10.0 / actual[k]['count']): - print('Failed (%s - %s >= %s)' % (actual[k]['avgtemp'], expected[k]['avgtemp'], 10.0 + (10.0 / actual[k]['count']))) - self.fail('Temperature ("elegance" metric) seems way off! %r != %r' % (actual, expected)) - else: - actual_hi = upper_bound_on_probability(0, actual_count) - if not (0 <= expected_probability <= actual_hi): - self.fail('No instances of expected key %s were produced! %r != %r' % (k, actual, expected)) - - def run_testcase(self, initial, modified, target, iterations, expected): - actual = Copycat().run(initial, modified, target, iterations) - self.assertEqual(sum(a['count'] for a in list(actual.values())), iterations) - self.assertProbabilitiesLookRoughlyLike(actual, expected) - - def test_simple_cases(self): - self.run_testcase('abc', 'abd', 'efg', 50, { - 'efd': {'count': 1, 'avgtemp': 16}, - 'efh': {'count': 99, 'avgtemp': 19}, - }) - self.run_testcase('abc', 'abd', 'ijk', 50, { - 'ijd': {'count': 4, 'avgtemp': 24}, - 'ijl': {'count': 96, 'avgtemp': 20}, - }) - - def test_abc_xyz(self): - self.run_testcase('abc', 'abd', 'xyz', 20, { - 'xyd': {'count': 100, 'avgtemp': 19}, - }) - - def test_ambiguous_case(self): - self.run_testcase('abc', 'abd', 'ijkk', 50, { - 'ijkkk': {'count': 7, 'avgtemp': 21}, - 'ijll': {'count': 47, 'avgtemp': 28}, - 'ijkl': {'count': 44, 'avgtemp': 32}, - 'ijkd': {'count': 2, 'avgtemp': 65}, - }) - - def test_mrrjjj(self): - self.run_testcase('abc', 'abd', 'mrrjjj', 50, { - 'mrrjjjj': {'count': 4, 'avgtemp': 16}, - 'mrrkkk': {'count': 31, 'avgtemp': 47}, - 'mrrjjk': {'count': 64, 'avgtemp': 51}, - 'mrrjkk': {'count': 1, 'avgtemp': 52}, - 'mrrjjd': {'count': 1, 'avgtemp': 54}, - }) - - def test_elongation(self): - # This isn't remotely what a human would say. - self.run_testcase('abc', 'aabbcc', 'milk', 50, { - 'milj': {'count': 85, 'avgtemp': 55}, - 'mikj': {'count': 10, 'avgtemp': 56}, - 'milk': {'count': 1, 'avgtemp': 56}, - 'lilk': {'count': 1, 'avgtemp': 57}, - 'milb': {'count': 3, 'avgtemp': 57}, - }) - - def test_repairing_successor_sequence(self): - # This isn't remotely what a human would say. - self.run_testcase('aba', 'abc', 'xyx', 50, { - 'xc': {'count': 9, 'avgtemp': 57}, - 'xyc': {'count': 82, 'avgtemp': 59}, - 'cyx': {'count': 7, 'avgtemp': 68}, - 'xyx': {'count': 2, 'avgtemp': 69}, - }) - - def test_nonsense(self): - self.run_testcase('cat', 'dog', 'cake', 10, { - 'cakg': {'count': 99, 'avgtemp': 70}, - 'gake': {'count': 1, 'avgtemp': 59}, - }) - self.run_testcase('cat', 'dog', 'kitten', 10, { - 'kitteg': {'count': 96, 'avgtemp': 66}, - 'kitten': {'count': 4, 'avgtemp': 68}, - }) - - -if __name__ == '__main__': - unittest.main() diff --git a/copycat/workspace.py b/copycat/workspace.py index ebc7c5d..70b46a7 100644 --- a/copycat/workspace.py +++ b/copycat/workspace.py @@ -1,3 +1,6 @@ +"""Workspace module.""" + + from . import formulas from .bond import Bond from .correspondence import Correspondence @@ -14,6 +17,7 @@ def __adjustUnhappiness(values): class Workspace(object): def __init__(self, ctx): + """To initialize the workspace.""" self.ctx = ctx self.totalUnhappiness = 0.0 self.intraStringUnhappiness = 0.0 @@ -50,6 +54,16 @@ class Workspace(object): self.modified = WorkspaceString(self.ctx, self.modifiedString) self.target = WorkspaceString(self.ctx, self.targetString) + ''' + # TODO: Initial part of refactoring in this method + def getAssessedUnhappiness(self, unhappiness): + o.Unhappiness = __adjustUnhappiness( + o.relativeImportance * o.Unhappiness + for o in self.objects) + pass + ''' + + # TODO: Extract method? def assessUnhappiness(self): self.intraStringUnhappiness = __adjustUnhappiness( o.relativeImportance * o.intraStringUnhappiness @@ -61,6 +75,7 @@ class Workspace(object): o.relativeImportance * o.totalUnhappiness for o in self.objects) + # TODO: these 3 methods seem to be the same... are they? If so, Extract method. def calculateIntraStringUnhappiness(self): value = sum( o.relativeImportance * o.intraStringUnhappiness @@ -92,6 +107,7 @@ class Workspace(object): self.initial.updateIntraStringUnhappiness() self.target.updateIntraStringUnhappiness() + # TODO: use entropy def getUpdatedTemperature(self): self.calculateIntraStringUnhappiness() self.calculateInterStringUnhappiness() @@ -107,7 +123,7 @@ class Workspace(object): )) def numberOfUnrelatedObjects(self): - """A list of all objects in the workspace with >= 1 open bond slots""" + """Computes the number of all objects in the workspace with >= 1 open bond slots.""" objects = [o for o in self.objects if o.string == self.initial or o.string == self.target] objects = [o for o in objects if not o.spansString()] @@ -125,21 +141,21 @@ class Workspace(object): return len(objects) def numberOfUnreplacedObjects(self): - """A list of all unreplaced objects in the initial string""" + """A list of all unreplaced objects in the initial string.""" objects = [o for o in self.objects if o.string == self.initial and isinstance(o, Letter)] objects = [o for o in objects if not o.replacement] return len(objects) def numberOfUncorrespondingObjects(self): - """A list of all uncorresponded objects in the initial string""" + """A list of all uncorresponded objects in the initial string.""" objects = [o for o in self.objects if o.string == self.initial or o.string == self.target] objects = [o for o in objects if not o.correspondence] return len(objects) def numberOfBonds(self): - """The number of bonds in the workspace""" + """The number of bonds in the workspace.""" return sum(1 for o in self.structures if isinstance(o, Bond)) def correspondences(self): diff --git a/copycat/workspaceString.py b/copycat/workspaceString.py index 2d7a149..a57c218 100644 --- a/copycat/workspaceString.py +++ b/copycat/workspaceString.py @@ -38,7 +38,7 @@ class WorkspaceString(object): return self.string[i] def updateRelativeImportance(self): - """Update the normalised importance of all objects in the string""" + """Update the normalised importance of all objects in the string.""" total = sum(o.rawImportance for o in self.objects) if not total: for o in self.objects: diff --git a/main.py b/main.py index abc9c0b..dff79f6 100755 --- a/main.py +++ b/main.py @@ -1,16 +1,55 @@ #!/usr/bin/env python3 +""" +Main Copycat program. + +To run it, type at the terminal: + + > python main.py abc abd ppqqrr --interations 10 + +The script takes three to five arguments. The first two are a pair of strings +with some change, for example "abc" and "abd". The third is a string which the +script should try to change analogously. The fourth (which defaults to "1") is +a number of iterations. One can also specify a defined seed value for the +random number generator. + +This instruction produces output such as: + + iiijjjlll: 670 (avg time 1108.5, avg temp 23.6) + iiijjjd: 2 (avg time 1156.0, avg temp 35.0) + iiijjjkkl: 315 (avg time 1194.4, avg temp 35.5) + iiijjjkll: 8 (avg time 2096.8, avg temp 44.1) + iiijjjkkd: 5 (avg time 837.2, avg temp 48.0) + + wyz: 5 (avg time 2275.2, avg temp 14.9) + xyd: 982 (avg time 2794.4, avg temp 17.5) + yyz: 7 (avg time 2731.9, avg temp 25.1) + dyz: 2 (avg time 3320.0, avg temp 27.1) + xyy: 2 (avg time 4084.5, avg temp 31.1) + xyz: 2 (avg time 1873.5, avg temp 52.1) + +The first number indicates how many times Copycat chose that string as its +answer; higher means "more obvious". The last number indicates the average +final temperature of the workspace; lower means "more elegant". +""" + import argparse import logging from copycat import Copycat, Reporter + class SimpleReporter(Reporter): + """Reports results from a single run.""" + def report_answer(self, answer): + """Self-explanatory code.""" print('Answered %s (time %d, final temperature %.1f)' % ( answer['answer'], answer['time'], answer['temp'], )) + def main(): + """Program's main entrance point. Self-explanatory code.""" logging.basicConfig(level=logging.INFO, format='%(message)s', filename='./output/copycat.log', filemode='w') parser = argparse.ArgumentParser() @@ -27,5 +66,6 @@ def main(): for answer, d in sorted(iter(answers.items()), key=lambda kv: kv[1]['avgtemp']): print('%s: %d (avg time %.1f, avg temp %.1f)' % (answer, d['count'], d['avgtime'], d['avgtemp'])) + if __name__ == '__main__': main() diff --git a/setup.py b/setup.py index 8119fd9..ac987e5 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,5 @@ #!/usr/bin/env python - +"""Self-explanatory.""" from setuptools import setup setup( diff --git a/tests.py b/tests.py new file mode 100644 index 0000000..7842590 --- /dev/null +++ b/tests.py @@ -0,0 +1,62 @@ +import unittest +import os.path +import pickle +import argparse +import sys + +from pprint import pprint +from copycat import Problem +from copycat.statistics import iso_chi_squared + +# TODO: update test cases to use entropy + +def generate(): + print('Generating distributions for new file') + iterations = 30 + problems = [ + Problem('abc', 'abd', 'efg', iterations), + Problem('abc', 'abd', 'ijk', iterations), + Problem('abc', 'abd', 'xyz', iterations), + Problem('abc', 'abd', 'ijkk', iterations), + Problem('abc', 'abd', 'mrrjjj', iterations)] + + with open(TestCopycat.Filename, 'wb') as outfile: + pickle.dump(problems, outfile) + return problems + +class TestCopycat(unittest.TestCase): + Filename = None + + def setUp(self): + self.longMessage = True # new in Python 2.7 + + def test(self): + print('Testing copycat with input file: {}'.format(TestCopycat.Filename)) + try: + with open(TestCopycat.Filename, 'rb') as infile: + problems = pickle.load(infile) + except Exception as e: + print('Generating due to error:') + print(e) + problems = generate() + + for problem in problems: + problem.test(iso_chi_squared) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--generate', action='store_true') + parser.add_argument('filename', default='.distributions', nargs='?') + parser.add_argument('unittest_args', default=[], nargs='?') + + args = parser.parse_args() + # TODO: Go do something with args.input and args.filename + + TestCopycat.Filename = args.filename + + if args.generate: + generate() + + # Now set the sys.argv to the unittest_args (leaving sys.argv[0] alone) + sys.argv[1:] = args.unittest_args + unittest.main()