Merge branch 'feature-normal-science-backport' into feature-gui

2017-11-18 18:32:13 -07:00
parent ff152c6398 4388bede7d
commit ec9e0c333e
19 changed files with 562 additions and 168 deletions
--- a/.distributions
+++ b/.distributions
--- a/.gitignore
+++ b/.gitignore
@ -19,6 +19,7 @@ pip-log.txt
 # Unit test / coverage reports
 .coverage
 .tox
 .log
 # Other filesystems
 .svn
--- a/.ipynb_checkpoints/Copycat-checkpoint.ipynb
+++ b/.ipynb_checkpoints/Copycat-checkpoint.ipynb
@ -0,0 +1,6 @@
 {
 "cells": [],
 "metadata": {},
 "nbformat": 4,
 "nbformat_minor": 2
 }
--- a/Copycat.ipynb
+++ b/Copycat.ipynb
@ -0,0 +1,81 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Copycat \n",
    "\n",
    "Just type your copycat example, and the number of iterations."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Answered iijjkl (time 1374, final temperature 13.5)\n",
      "Answered iijjll (time 665, final temperature 19.6)\n",
      "Answered iijjll (time 406, final temperature 16.6)\n",
      "Answered iijjkl (time 379, final temperature 47.9)\n",
      "Answered iijjll (time 556, final temperature 19.2)\n",
      "Answered iijjkl (time 813, final temperature 42.8)\n",
      "Answered iijjll (time 934, final temperature 15.5)\n",
      "Answered iijjkl (time 1050, final temperature 49.5)\n",
      "Answered iijjkl (time 700, final temperature 44.0)\n",
      "Answered iijjkl (time 510, final temperature 34.8)\n",
      "Answered iijjkl (time 673, final temperature 18.1)\n",
      "Answered iijjkl (time 1128, final temperature 19.8)\n",
      "Answered iijjll (time 961, final temperature 19.9)\n",
      "Answered iijjll (time 780, final temperature 16.5)\n",
      "Answered iijjll (time 607, final temperature 17.8)\n",
      "Answered iijjll (time 594, final temperature 39.7)\n",
      "Answered iijjll (time 736, final temperature 18.4)\n",
      "Answered iijjll (time 903, final temperature 18.6)\n",
      "Answered iijjll (time 601, final temperature 20.6)\n",
      "Answered iijjll (time 949, final temperature 42.4)\n",
      "iijjll: 12 (avg time 724.3, avg temp 22.1)\n",
      "iijjkl: 8 (avg time 828.4, avg temp 33.8)\n"
     ]
    }
   ],
   "source": [
    "%run main.py abc abd iijjkk --iterations 20"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
--- a/README.md
+++ b/README.md
@ -4,7 +4,7 @@ co.py.cat
 ![GUI](https://i.imgur.com/7pb20g0.png)
 An implementation of [Douglas Hofstadter](http://prelectur.stanford.edu/lecturers/hofstadter/)'s Copycat algorithm.
-The Copycat algorithm is explained [on Wikipedia](https://en.wikipedia.org/wiki/Copycat_%28software%29), and that page has many links for deeper reading.
+The Copycat algorithm is explained [on Wikipedia](https://en.wikipedia.org/wiki/Copycat_%28software%29), and that page has many links for deeper reading.  See also [Farglexandria](https://github.com/Alex-Linhares/Farglexandria).
 This implementation is a copycat of Scott Boland's [Java implementation](https://archive.org/details/JavaCopycat).
 The original Java-to-Python translation work was done by J Alan Brogan (@jalanb on GitHub).
@ -75,3 +75,10 @@ $ python
 ```
 The result of `run` is a dict containing the same information as was printed by `main.py` above.
 Questions
 ---------
 1. Why are codelets **NOT** implemented through lambda?  
--- a/copycat/init.py
+++ b/copycat/init.py
@ -1 +1,2 @@
 from .copycat import Copycat, Reporter  # noqa
 from .problem import Problem
--- a/copycat/codeletMethods.py
+++ b/copycat/codeletMethods.py
@ -74,8 +74,10 @@ def __structureVsStructure(structure1, weight1, structure2, weight2):
    temperature = ctx.temperature
    structure1.updateStrength()
    structure2.updateStrength()
    # TODO: use entropy
    weightedStrength1 = temperature.getAdjustedValue(
        structure1.totalStrength * weight1)
    # TODO: use entropy
    weightedStrength2 = temperature.getAdjustedValue(
        structure2.totalStrength * weight2)
    return random.weighted_greater_than(weightedStrength1, weightedStrength2)
@ -111,6 +113,7 @@ def __slippability(ctx, conceptMappings):
    temperature = ctx.temperature
    for mapping in conceptMappings:
        slippiness = mapping.slippability() / 100.0
        # TODO: use entropy
        probabilityOfSlippage = temperature.getAdjustedProbability(slippiness)
        if random.coinFlip(probabilityOfSlippage):
            return True
@ -122,6 +125,7 @@ def breaker(ctx, codelet):
    random = ctx.random
    temperature = ctx.temperature
    workspace = ctx.workspace
    # TODO: use entropy
    probabilityOfFizzle = (100.0 - temperature.value()) / 100.0
    if random.coinFlip(probabilityOfFizzle):
        return
@ -138,6 +142,7 @@ def breaker(ctx, codelet):
                breakObjects += [structure.source.group]
    # Break all the objects or none of them; this matches the Java
    for structure in breakObjects:
        # TODO: use entropy
        breakProbability = temperature.getAdjustedProbability(
            structure.totalStrength / 100.0)
        if random.coinFlip(breakProbability):
@ -149,8 +154,7 @@ def breaker(ctx, codelet):
 def chooseRelevantDescriptionByActivation(ctx, workspaceObject):
    random = ctx.random
    descriptions = workspaceObject.relevantDescriptions()
-    weights = [description.descriptor.activation
+    weights = [description.descriptor.activation for description in descriptions]
                   for description in descriptions]
    return random.weighted_choice(descriptions, weights)
@ -160,6 +164,7 @@ def similarPropertyLinks(ctx, slip_node):
    result = []
    for slip_link in slip_node.propertyLinks:
        association = slip_link.degreeOfAssociation() / 100.0
        # TODO:use entropy
        probability = temperature.getAdjustedProbability(association)
        if random.coinFlip(probability):
            result += [slip_link]
@ -182,7 +187,7 @@ def bottom_up_description_scout(ctx, codelet):
    sliplinks = similarPropertyLinks(ctx, description.descriptor)
    assert sliplinks
    weights = [sliplink.degreeOfAssociation() * sliplink.destination.activation
-              for sliplink in sliplinks]
+               for sliplink in sliplinks]
    chosen = random.weighted_choice(sliplinks, weights)
    chosenProperty = chosen.destination
    coderack.proposeDescription(chosenObject, chosenProperty.category(),
@ -215,6 +220,7 @@ def description_strength_tester(ctx, codelet):
    description.descriptor.buffer = 100.0
    description.updateStrength()
    strength = description.totalStrength
    # TODO: use entropy
    probability = temperature.getAdjustedProbability(strength / 100.0)
    assert random.coinFlip(probability)
    coderack.newCodelet('description-builder', strength, [description])
@ -298,7 +304,7 @@ def rule_scout(ctx, codelet):
    workspace = ctx.workspace
    assert workspace.numberOfUnreplacedObjects() == 0
    changedObjects = [o for o in workspace.initial.objects if o.changed]
-    #assert len(changedObjects) < 2
+    # assert len(changedObjects) < 2
    # if there are no changed objects, propose a rule with no changes
    if not changedObjects:
        return coderack.proposeRule(None, None, None, None)
@ -328,8 +334,8 @@ def rule_scout(ctx, codelet):
            if targetObject.described(node):
                if targetObject.distinguishingDescriptor(node):
                    newList += [node]
-        objectList = newList  # surely this should be +=
+        objectList = newList    # surely this should be +=
-                              # "union of this and distinguishing descriptors"
+        # "union of this and distinguishing descriptors"
    assert objectList
    # use conceptual depth to choose a description
    weights = [
@ -360,6 +366,7 @@ def rule_strength_tester(ctx, codelet):
    temperature = ctx.temperature
    rule = codelet.arguments[0]
    rule.updateStrength()
    # TODO: use entropy
    probability = temperature.getAdjustedProbability(rule.totalStrength / 100.0)
    if random.coinFlip(probability):
        coderack.newCodelet('rule-builder', rule.totalStrength, [rule])
@ -392,8 +399,8 @@ def replacement_finder(ctx, codelet):
        relation = relations[diff]
    else:
        relation = None
-    letterOfInitialString.replacement = Replacement(ctx,
+    letterOfInitialString.replacement = Replacement(ctx, letterOfInitialString,
-        letterOfInitialString, letterOfModifiedString, relation)
+                                                    letterOfModifiedString, relation)
    if relation != slipnet.sameness:
        letterOfInitialString.changed = True
        workspace.changedObject = letterOfInitialString
@ -436,8 +443,8 @@ def top_down_bond_scout__direction(ctx, codelet):
    coderack = ctx.coderack
    slipnet = ctx.slipnet
    direction = codelet.arguments[0]
-    source = __getScoutSource(ctx,
+    source = __getScoutSource(ctx, direction, formulas.localDirectionCategoryRelevance,
-        direction, formulas.localDirectionCategoryRelevance, 'bond')
+                              'bond')
    destination = chooseDirectedNeighbor(ctx, source, direction)
    assert destination
    logging.info('to object: %s', destination)
@ -462,6 +469,7 @@ def bond_strength_tester(ctx, codelet):
    __showWhichStringObjectIsFrom(bond)
    bond.updateStrength()
    strength = bond.totalStrength
    # TODO: use entropy
    probability = temperature.getAdjustedProbability(strength / 100.0)
    logging.info('bond strength = %d for %s', strength, bond)
    assert random.coinFlip(probability)
@ -502,7 +510,7 @@ def bond_builder(ctx, codelet):
            if incompatibleCorrespondences:
                logging.info("trying to break incompatible correspondences")
                assert __fight(bond, 2.0, incompatibleCorrespondences, 3.0)
-            #assert __fightIncompatibles(incompatibleCorrespondences,
+            # assert __fightIncompatibles(incompatibleCorrespondences,
            #                            bond, 'correspondences', 2.0, 3.0)
    for incompatible in incompatibleBonds:
        incompatible.break_the_structure()
@ -692,7 +700,7 @@ def top_down_group_scout__direction(ctx, codelet):
                          direction, bondFacet)
-#noinspection PyStringFormat
+# noinspection PyStringFormat
@codelet('group-scout--whole-string')
 def group_scout__whole_string(ctx, codelet):
    coderack = ctx.coderack
@ -744,6 +752,7 @@ def group_strength_tester(ctx, codelet):
    __showWhichStringObjectIsFrom(group)
    group.updateStrength()
    strength = group.totalStrength
    # TODO: use entropy
    probability = temperature.getAdjustedProbability(strength / 100.0)
    if random.coinFlip(probability):
        # it is strong enough - post builder  & activate nodes
@ -871,6 +880,7 @@ def rule_translator(ctx, codelet):
        bondDensity = min(bondDensity, 1.0)
    weights = __getCutoffWeights(bondDensity)
    cutoff = 10.0 * random.weighted_choice(list(range(1, 11)), weights)
    # TODO: use entropy
    if cutoff >= temperature.actual_value:
        result = workspace.rule.buildTranslatedRule()
        if result is not None:
@ -907,11 +917,11 @@ def bottom_up_correspondence_scout(ctx, codelet):
                 and m.initialDescriptionType != slipnet.bondFacet]
    initialDescriptionTypes = [m.initialDescriptionType for m in opposites]
    flipTargetObject = False
-    if  (objectFromInitial.spansString() and
+    if (objectFromInitial.spansString() and
-         objectFromTarget.spansString() and
+        objectFromTarget.spansString() and
-         slipnet.directionCategory in initialDescriptionTypes
+        slipnet.directionCategory in initialDescriptionTypes
-         and all(m.label == slipnet.opposite for m in opposites)  # unreached?
+        and all(m.label == slipnet.opposite for m in opposites)  # unreached?
-         and slipnet.opposite.activation != 100.0):
+            and slipnet.opposite.activation != 100.0):
        objectFromTarget = objectFromTarget.flippedVersion()
        conceptMappings = formulas.getMappings(
            objectFromInitial, objectFromTarget,
@ -927,6 +937,7 @@ def important_object_correspondence_scout(ctx, codelet):
    coderack = ctx.coderack
    random = ctx.random
    slipnet = ctx.slipnet
    # TODO: use entropy
    temperature = ctx.temperature
    workspace = ctx.workspace
    objectFromInitial = chooseUnmodifiedObject(ctx, 'relativeImportance',
@ -966,11 +977,11 @@ def important_object_correspondence_scout(ctx, codelet):
                 and m.initialDescriptionType != slipnet.bondFacet]
    initialDescriptionTypes = [m.initialDescriptionType for m in opposites]
    flipTargetObject = False
-    if  (objectFromInitial.spansString()
+    if (objectFromInitial.spansString()
-         and objectFromTarget.spansString()
+        and objectFromTarget.spansString()
-         and slipnet.directionCategory in initialDescriptionTypes
+        and slipnet.directionCategory in initialDescriptionTypes
-         and all(m.label == slipnet.opposite for m in opposites)  # unreached?
+        and all(m.label == slipnet.opposite for m in opposites)  # unreached?
-         and slipnet.opposite.activation != 100.0):
+            and slipnet.opposite.activation != 100.0):
        objectFromTarget = objectFromTarget.flippedVersion()
        conceptMappings = formulas.getMappings(
            objectFromInitial, objectFromTarget,
@ -997,6 +1008,7 @@ def correspondence_strength_tester(ctx, codelet):
                 objectFromTarget.flipped_version())))
    correspondence.updateStrength()
    strength = correspondence.totalStrength
    # TODO: use entropy
    probability = temperature.getAdjustedProbability(strength / 100.0)
    if random.coinFlip(probability):
        # activate some concepts
@ -1050,8 +1062,8 @@ def correspondence_builder(ctx, codelet):
    # if there is an incompatible bond then fight against it
    initial = correspondence.objectFromInitial
    target = correspondence.objectFromTarget
-    if  (initial.leftmost or initial.rightmost and
+    if (initial.leftmost or initial.rightmost and
-         target.leftmost or target.rightmost):
+            target.leftmost or target.rightmost):
        # search for the incompatible bond
        incompatibleBond = correspondence.getIncompatibleBond()
        if incompatibleBond:
--- a/copycat/coderack.py
+++ b/copycat/coderack.py
@ -83,6 +83,7 @@ class Coderack(object):
        if 'correspondence' in codeletName:
            return workspace.interStringUnhappiness / 100.0
        if 'description' in codeletName:
            # TODO: use entropy
            return (temperature.value() / 100.0) ** 2
        return workspace.intraStringUnhappiness / 100.0
@ -161,6 +162,8 @@ class Coderack(object):
        urgency = 3
        if codeletName == 'breaker':
            urgency = 1
        # TODO: use entropy
        if temperature.value() < 25.0 and 'translator' in codeletName:
            urgency = 5
        for _ in range(howMany):
@ -287,6 +290,8 @@ class Coderack(object):
        random = self.ctx.random
        temperature = self.ctx.temperature
        assert self.codelets
        # TODO: use entropy
        scale = (100.0 - temperature.value() + 10.0) / 15.0
        chosen = random.weighted_choice(self.codelets, [codelet.urgency ** scale for codelet in self.codelets])
        self.removeCodelet(chosen)
--- a/copycat/curses_reporter.py
+++ b/copycat/curses_reporter.py
@ -239,6 +239,7 @@ class CursesReporter(Reporter):
        w.border()
        w.refresh()
    #TODO: use entropy
    def report_temperature(self, temperature):
        self.do_keyboard_shortcuts()
        w = self.temperatureWindow
--- a/copycat/group.py
+++ b/copycat/group.py
@ -96,6 +96,7 @@ class Group(WorkspaceObject):
        support = self.localSupport() / 100.0
        activation = slipnet.length.activation / 100.0
        supportedActivation = (support * activation) ** exp
        #TODO: use entropy
        return temperature.getAdjustedProbability(supportedActivation)
    def flippedVersion(self):
@ -130,6 +131,7 @@ class Group(WorkspaceObject):
        cubedlength = length ** 3
        fred = cubedlength * (100.0 - slipnet.length.activation) / 100.0
        probability = 0.5 ** fred
        #TODO: use entropy
        value = temperature.getAdjustedProbability(probability)
        if value < 0.06:
            value = 0.0
--- a/copycat/problem.py
+++ b/copycat/problem.py
@ -0,0 +1,62 @@
 from .copycat import Copycat
 from pprint import pprint
 class Problem:
    def __init__(self, initial, modified, target, iterations, distributions=None, formulas=None):
        self.formulas = formulas
        self.initial  = initial
        self.modified = modified
        self.target   = target
        self.iterations    = iterations
        if distributions is None:
            self.distributions = self.solve()
        else:
            self.distributions = distributions
        if formulas is not None:
            assert hasattr(Copycat().workspace, 'temperature')
    def test(self, comparison, expected=None):
        print('-' * 120)
        print('Testing copycat problem: {} : {} :: {} : _'.format(self.initial,
                                                                  self.modified,
                                                                  self.target))
        print('expected:')
        if expected is None:
            expected = self.distributions
        pprint(expected)
        actual = self.solve()
        print('actual:')
        pprint(actual)
        comparison(actual, expected)
        print('-' * 120)
    def solve(self):
        print('-' * 120)
        print('Testing copycat problem: {} : {} :: {} : _'.format(self.initial,
                                                                  self.modified,
                                                                  self.target))
        copycat = Copycat()
        answers  = dict()
        if self.formulas == None:
            if hasattr(copycat.workspace, 'temperature'):
                formula = copycat.workspace.temperature.getAdj()
            else:
                formula = None
            answers[formula] = copycat.run(self.initial,
                                self.modified,
                                self.target,
                                self.iterations)
        else:
            for formula in self.formulas:
                copycat.temperature.useAdj(formula)
                answers[formulas] = copycat.run(self.initial,
                                        self.modified,
                                        self.target,
                                        self.iterations)
        return answers
    def generate(self):
        self.distributions = self.solve()
--- a/copycat/statistics.py
+++ b/copycat/statistics.py
@ -0,0 +1,57 @@
 # CHI2 values for n degrees freedom
 _chiSquared_table = {
        1:3.841,
        2:5.991,
        3:7.815,
        4:9.488,
        5:11.071,
        6:12.592,
        7:14.067,
        8:15.507,
        9:16.919,
        10:18.307
        }
 class ChiSquaredException(Exception):
    pass
 def chi_squared(actual, expected):
    answerKeys = set(list(actual.keys()) + list(expected.keys()))
    degreesFreedom = len(answerKeys)
    chiSquared = 0
    get_count = lambda k, d : d[k]['count'] if k in d else 0
    for k in answerKeys:
        E = get_count(k, expected)
        O = get_count(k, actual)
        if E == 0:
            print('Warning! Expected 0 counts of {}, but got {}'.format(k, O))
        else:
            chiSquared += (O - E) ** 2 / E
    return chiSquared
 def cross_formula_chi_squared(actualDict, expectedDict):
    for ka, actual in actualDict.items():
        for ke, expected in expectedDict.items():
            print('Comparing {} with {}'.format(ka, ke))
            chiSquared = chi_squared(actual, expected)
            if chiSquared >= _chiSquared_table[degreesFreedom]:
                print('Significant difference between expected and actual answer distributions: \n' +
                    'Chi2 value: {} with {} degrees of freedom'.format(chiSquared, degreesFreedom))
 def cross_chi_squared(problemSets):
    for i, problemSetA in enumerate(problemSets):
        for problemSetB in problemSets[i + 1:]:
            for problemA in problemSetA:
                for problemB in problemSetB:
                    answersA = problemA.distributions
                    answersB = problemB.distributions
                    cross_formula_chi_squared(answersA, answersB)
 def iso_chi_squared(actualDict, expectedDict):
    for key in expectedDict.keys():
        assert key in actualDict, 'The key {} was not tested'.format(key)
        actual   = actualDict[key]
        expected = expectedDict[key]
--- a/copycat/temperature.py
+++ b/copycat/temperature.py
@ -35,6 +35,7 @@ class Temperature(object):
    def getAdjustedValue(self, value):
        return value ** (((100.0 - self.value()) / 30.0) + 0.5)
    """
    def getAdjustedProbability(self, value):
        if value == 0 or value == 0.5 or self.value() == 0:
            return value
@ -45,3 +46,180 @@ class Temperature(object):
        c = (10 - a) / 100
        f = (c + 1) * value
        return max(f, 0.5)
    """
    def getAdjustedProbability(self, value):
        """
        This function returns the probability for a decision.
        Copied above.
        Please look at the last line of it.  Strangely, it was
        return max(f, 0.5).  Does that make sense? Let's compare
        some results.  Where it was (0.5), we obtained, for example:
        iiijjjlll: 670 (avg time 1108.5, avg temp 23.6)
        iiijjjd: 2 (avg time 1156.0, avg temp 35.0)
        iiijjjkkl: 315 (avg time 1194.4, avg temp 35.5)
        iiijjjkll: 8 (avg time 2096.8, avg temp 44.1)
        iiijjjkkd: 5 (avg time 837.2, avg temp 48.0)
        wyz: 5 (avg time 2275.2, avg temp 14.9)
        xyd: 982 (avg time 2794.4, avg temp 17.5)
        yyz: 7 (avg time 2731.9, avg temp 25.1)
        dyz: 2 (avg time 3320.0, avg temp 27.1)
        xyy: 2 (avg time 4084.5, avg temp 31.1)
        xyz: 2 (avg time 1873.5, avg temp 52.1)
        Now, let's see what return max(f, 0.0000) does:
        wyz: 7 (avg time 3192.9, avg temp 13.1)
        xyd: 985 (avg time 2849.1, avg temp 17.5)
        yyz: 6 (avg time 3836.7, avg temp 18.6)
        xyy: 1 (avg time 1421.0, avg temp 19.5)
        xyz: 1 (avg time 7350.0, avg temp 48.3)
        They *seem* better (in the strict sense that we've obtained both
        lower T and more times of wyz.)  But they're *not* statistically
        significant (for 1000 runs).
        Now... looking at the code... it seems to be a mess... what does
        function f() even mean in intuitive terms?
        Work it does, but dude... quite a hack.
        Another run, with return f @line89:
        wyz: 8 (avg time 4140.5, avg temp 13.3)
        yyz: 6 (avg time 2905.2, avg temp 14.5)
        xyd: 982 (avg time 3025.4, avg temp 17.6)
        dyz: 4 (avg time 4265.0, avg temp 17.7)
        Does it even matter? Another (quick) run, I think with return (0.5):
        dyz: 1 (avg time 5198.0, avg temp 15.3)
        wyz: 3 (avg time 4043.7, avg temp 17.1)
        yyz: 9 (avg time 3373.6, avg temp 21.0)
        xyd: 84 (avg time 5011.1, avg temp 23.3)
        xyy: 3 (avg time 4752.0, avg temp 27.9)
        Compared to return(0.99):
        xyd: 1000 (avg time 1625.2, avg temp 17.3)
        Comparing to return f --> Statistically significant.
        Comparing to return(0.5) --> same, so this return value does something.
        Now running return(0.0):
        xyz: 3 (avg time 3996.7, avg temp 81.1)
        dyz: 46 (avg time 5931.7, avg temp 82.6)
        xd: 17 (avg time 6090.3, avg temp 83.8)
        xyd: 934 (avg time 7699.8, avg temp 88.1)
        It's bad overall, but at least it's statistically significant!
        return (-f * (math.log2(f))) # Entropy test #1 (global).
        wyz: 123 (avg time 5933.1, avg temp 16.5)
        xyy: 200 (avg time 6486.7, avg temp 27.8)
        yyz: 330 (avg time 6310.2, avg temp 38.5)
        dyz: 75 (avg time 6393.3, avg temp 39.6)
        yzz: 5 (avg time 4965.0, avg temp 59.3)
        xyz: 160 (avg time 6886.2, avg temp 60.2)
        xd: 4 (avg time 2841.0, avg temp 61.8)
        dz: 3 (avg time 3721.0, avg temp 62.1)
        xyd: 100 (avg time 5853.1, avg temp 67.5)
        Here we get an intuitive result: entropy/uncertainty seems better at
        exploring a whole range of possible solutions.  It even seems, at least
        to me, better than the distribution obtained by the original copycat.
        instead of log2, trying ln --> return (-f * math.log(f)):
        wyz: 78 (avg time 7793.7, avg temp 16.6)
        xyy: 202 (avg time 9168.5, avg temp 27.5)
        wxz: 1 (avg time 3154.0, avg temp 33.4)
        dyz: 63 (avg time 7950.3, avg temp 41.7)
        yyz: 217 (avg time 8147.4, avg temp 41.7)
        xyz: 201 (avg time 7579.7, avg temp 62.5)
        xxy: 1 (avg time 7994.0, avg temp 64.8)
        yzz: 8 (avg time 4672.6, avg temp 65.7)
        xd: 9 (avg time 9215.2, avg temp 68.1)
        xyd: 217 (avg time 7677.9, avg temp 73.8)
        dz: 3 (avg time 20379.0, avg temp 77.3)
        (quickly) trying out (1-this_entropy_function):
        xyd: 100 (avg time 2984.3, avg temp 18.2)
        And that's beautiful! One wants an inverse function that punishes
        exploration and creativity, that takes all the fluidity off
        the system.
        But somehow this completely messes up with abc abd iijjkk:
        jijjkk: 66 (avg time 3200.1, avg temp 61.3)
        iijjkk: 114 (avg time 5017.2, avg temp 63.5)
        dijjkk: 23 (avg time 2209.0, avg temp 67.3)
        iijjkl: 748 (avg time 3262.8, avg temp 70.0)
        iijjkd: 49 (avg time 2315.9, avg temp 76.3)
        Which leads me to suspect that someone may have overfitted the
        model for either xyz or iijjkk or some other problem, and one
        improvement there means disaster here.
        Something tells me to invert again to 1-entropy... and bingo!
        iijjll: 59 (avg time 797.4, avg temp 19.8)
        iijjkl: 41 (avg time 696.1, avg temp 28.5)
        My guess is that some code is prefering to find groups in the
        opposite form that it likes finding the "symmetry/opposite"
        concepts of the xyz problem.
        Sould compare & contrast the unhappiness and relevance of both
        the opposite/symmetry codelets and the grouping/chunking codelets.
        My hunch is the sameness group code: something there that
        interacts with Temperature is wicked, and should be relatively
        easy to find the error.
        Here's why:  the following run was done on (1-entropy(f)):
        mrrlll: 77 (avg time 2195.7, avg temp 41.4)
        mrrd: 2 (avg time 1698.0, avg temp 42.6)
        mrrkkl: 20 (avg time 1317.8, avg temp 46.6)
        mrrkkd: 1 (avg time 1835.0, avg temp 48.6)
        If (1-entropy(f)) binds the system into a tight corridor of possibilities,
        then why does it easily get the samenessGroup right?  If this is right,
        then running just entropy(f) should have big trouble with samenessGroup.
        Let's see:
        nrrkkk: 11 (avg time 3637.8, avg temp 64.6)
        drrkkk: 3 (avg time 5921.3, avg temp 66.2)
        mrrkkd: 7 (avg time 6771.3, avg temp 74.6)
        mrrkkl: 79 (avg time 3723.0, avg temp 74.9)
        So there we are: the system is unable to find that change samenessGroup
        to next letterCategory, so there ought to be something very different
        in the code that:
        * Interacts with Temperature (things like unhappiness, relevance, depth,
        urgency, and whatever else interacts with T)
        * something very close to samenessGroup... sameGroup, sameness,
        sameNeighbors, etc... is encoded in a form that is *directly opposite*
        to other concepts/categories/codlets, etc.  
        Need to play with this more... and WTF is f anyways?
        """
        if value == 0 or value == 0.5 or self.value() == 0:
            return value
        if value < 0.5:
            return 1.0 - self.getAdjustedProbability(1.0 - value)
        coldness = 100.0 - self.value()
        a = math.sqrt(coldness)
        c = (10 - a) / 100
        f = (c + 1) * value
        return (0 + (-f * math.log2(f)))  # max(f, 0.0000)
--- a/copycat/tests.py
+++ b/copycat/tests.py
@ -1,137 +0,0 @@
 import unittest
 from .copycat import Copycat
 def pnormaldist(p):
    table = {
        0.80: 1.2815,
        0.90: 1.6448,
        0.95: 1.9599,
        0.98: 2.3263,
        0.99: 2.5758,
        0.995: 2.8070,
        0.998: 3.0902,
        0.999: 3.2905,
        0.9999: 3.8905,
        0.99999: 4.4171,
        0.999999: 4.8916,
        0.9999999: 5.3267,
        0.99999999: 5.7307,
        0.999999999: 6.1094,
    }
    return max(v for k, v in table.items() if k <= p)
 def lower_bound_on_probability(hits, attempts, confidence=0.95):
    if attempts == 0:
        return 0
    z = pnormaldist(confidence)
    zsqr = z*z
    phat = 1.0 * hits / attempts
    under_sqrt = (phat * (1 - phat) + zsqr / (4*attempts)) / attempts
    denominator = (1 + zsqr / attempts)
    return (phat + zsqr / (2*attempts) - z * (under_sqrt ** 0.5)) / denominator
 def upper_bound_on_probability(hits, attempts, confidence=0.95):
    misses = attempts - hits
    return 1.0 - lower_bound_on_probability(misses, attempts, confidence)
 class TestCopycat(unittest.TestCase):
    def setUp(self):
        self.longMessage = True  # new in Python 2.7
    def assertProbabilitiesLookRoughlyLike(self, actual, expected):
        actual_count = 0.0 + sum(d['count'] for d in list(actual.values()))
        expected_count = 0.0 + sum(d['count'] for d in list(expected.values()))
        self.assertGreater(actual_count, 1)
        self.assertGreater(expected_count, 1)
        for k in set(list(actual.keys()) + list(expected.keys())):
            if k not in expected:
                self.fail('Key %s was produced but not expected! %r != %r' % (k, actual, expected))
            expected_probability = expected[k]['count'] / expected_count
            if k in actual:
                actual_lo = lower_bound_on_probability(actual[k]['count'], actual_count)
                actual_hi = upper_bound_on_probability(actual[k]['count'], actual_count)
                if not (actual_lo <= expected_probability <= actual_hi):
                    print('Failed (%s <= %s <= %s)' % (actual_lo, expected_probability, actual_hi))
                    self.fail('Count ("obviousness" metric) seems way off! %r != %r' % (actual, expected))
                if abs(actual[k]['avgtemp'] - expected[k]['avgtemp']) >= 10.0 + (10.0 / actual[k]['count']):
                    print('Failed (%s - %s >= %s)' % (actual[k]['avgtemp'], expected[k]['avgtemp'], 10.0 + (10.0 / actual[k]['count'])))
                    self.fail('Temperature ("elegance" metric) seems way off! %r != %r' % (actual, expected))
            else:
                actual_hi = upper_bound_on_probability(0, actual_count)
                if not (0 <= expected_probability <= actual_hi):
                    self.fail('No instances of expected key %s were produced! %r != %r' % (k, actual, expected))
    def run_testcase(self, initial, modified, target, iterations, expected):
        actual = Copycat().run(initial, modified, target, iterations)
        self.assertEqual(sum(a['count'] for a in list(actual.values())), iterations)
        self.assertProbabilitiesLookRoughlyLike(actual, expected)
    def test_simple_cases(self):
        self.run_testcase('abc', 'abd', 'efg', 50, {
            'efd': {'count': 1, 'avgtemp': 16},
            'efh': {'count': 99, 'avgtemp': 19},
        })
        self.run_testcase('abc', 'abd', 'ijk', 50, {
            'ijd': {'count': 4, 'avgtemp': 24},
            'ijl': {'count': 96, 'avgtemp': 20},
        })
    def test_abc_xyz(self):
        self.run_testcase('abc', 'abd', 'xyz', 20, {
            'xyd': {'count': 100, 'avgtemp': 19},
        })
    def test_ambiguous_case(self):
        self.run_testcase('abc', 'abd', 'ijkk', 50, {
            'ijkkk': {'count': 7, 'avgtemp': 21},
            'ijll': {'count': 47, 'avgtemp': 28},
            'ijkl': {'count': 44, 'avgtemp': 32},
            'ijkd': {'count': 2, 'avgtemp': 65},
        })
    def test_mrrjjj(self):
        self.run_testcase('abc', 'abd', 'mrrjjj', 50, {
            'mrrjjjj': {'count': 4, 'avgtemp': 16},
            'mrrkkk': {'count': 31, 'avgtemp': 47},
            'mrrjjk': {'count': 64, 'avgtemp': 51},
            'mrrjkk': {'count': 1, 'avgtemp': 52},
            'mrrjjd': {'count': 1, 'avgtemp': 54},
        })
    def test_elongation(self):
        # This isn't remotely what a human would say.
        self.run_testcase('abc', 'aabbcc', 'milk', 50, {
            'milj': {'count': 85, 'avgtemp': 55},
            'mikj': {'count': 10, 'avgtemp': 56},
            'milk': {'count': 1, 'avgtemp': 56},
            'lilk': {'count': 1, 'avgtemp': 57},
            'milb': {'count': 3, 'avgtemp': 57},
        })
    def test_repairing_successor_sequence(self):
        # This isn't remotely what a human would say.
        self.run_testcase('aba', 'abc', 'xyx', 50, {
            'xc': {'count': 9, 'avgtemp': 57},
            'xyc': {'count': 82, 'avgtemp': 59},
            'cyx': {'count': 7, 'avgtemp': 68},
            'xyx': {'count': 2, 'avgtemp': 69},
        })
    def test_nonsense(self):
        self.run_testcase('cat', 'dog', 'cake', 10, {
            'cakg': {'count': 99, 'avgtemp': 70},
            'gake': {'count': 1, 'avgtemp': 59},
        })
        self.run_testcase('cat', 'dog', 'kitten', 10, {
            'kitteg': {'count': 96, 'avgtemp': 66},
            'kitten': {'count': 4, 'avgtemp': 68},
        })
 if __name__ == '__main__':
    unittest.main()
--- a/copycat/workspace.py
+++ b/copycat/workspace.py
@ -1,3 +1,6 @@
 """Workspace module."""
 from . import formulas
 from .bond import Bond
 from .correspondence import Correspondence
@ -14,6 +17,7 @@ def __adjustUnhappiness(values):
 class Workspace(object):
    def __init__(self, ctx):
        """To initialize the workspace."""
        self.ctx = ctx
        self.totalUnhappiness = 0.0
        self.intraStringUnhappiness = 0.0
@ -50,6 +54,16 @@ class Workspace(object):
        self.modified = WorkspaceString(self.ctx, self.modifiedString)
        self.target = WorkspaceString(self.ctx, self.targetString)
    '''
    # TODO: Initial part of refactoring in this method
    def getAssessedUnhappiness(self, unhappiness):
        o.Unhappiness = __adjustUnhappiness(
            o.relativeImportance * o.Unhappiness
            for o in self.objects)
        pass
    '''
    # TODO: Extract method?
    def assessUnhappiness(self):
        self.intraStringUnhappiness = __adjustUnhappiness(
            o.relativeImportance * o.intraStringUnhappiness
@ -61,6 +75,7 @@ class Workspace(object):
            o.relativeImportance * o.totalUnhappiness
            for o in self.objects)
    # TODO: these 3 methods seem to be the same... are they?  If so, Extract method.
    def calculateIntraStringUnhappiness(self):
        value = sum(
            o.relativeImportance * o.intraStringUnhappiness
@ -92,6 +107,7 @@ class Workspace(object):
        self.initial.updateIntraStringUnhappiness()
        self.target.updateIntraStringUnhappiness()
    # TODO: use entropy
    def getUpdatedTemperature(self):
        self.calculateIntraStringUnhappiness()
        self.calculateInterStringUnhappiness()
@ -107,7 +123,7 @@ class Workspace(object):
        ))
    def numberOfUnrelatedObjects(self):
-        """A list of all objects in the workspace with >= 1 open bond slots"""
+        """Computes the number of all objects in the workspace with >= 1 open bond slots."""
        objects = [o for o in self.objects
                   if o.string == self.initial or o.string == self.target]
        objects = [o for o in objects if not o.spansString()]
@ -125,21 +141,21 @@ class Workspace(object):
        return len(objects)
    def numberOfUnreplacedObjects(self):
-        """A list of all unreplaced objects in the initial string"""
+        """A list of all unreplaced objects in the initial string."""
        objects = [o for o in self.objects
                   if o.string == self.initial and isinstance(o, Letter)]
        objects = [o for o in objects if not o.replacement]
        return len(objects)
    def numberOfUncorrespondingObjects(self):
-        """A list of all uncorresponded objects in the initial string"""
+        """A list of all uncorresponded objects in the initial string."""
        objects = [o for o in self.objects
                   if o.string == self.initial or o.string == self.target]
        objects = [o for o in objects if not o.correspondence]
        return len(objects)
    def numberOfBonds(self):
-        """The number of bonds in the workspace"""
+        """The number of bonds in the workspace."""
        return sum(1 for o in self.structures if isinstance(o, Bond))
    def correspondences(self):
--- a/copycat/workspaceString.py
+++ b/copycat/workspaceString.py
@ -38,7 +38,7 @@ class WorkspaceString(object):
        return self.string[i]
    def updateRelativeImportance(self):
-        """Update the normalised importance of all objects in the string"""
+        """Update the normalised importance of all objects in the string."""
        total = sum(o.rawImportance for o in self.objects)
        if not total:
            for o in self.objects:
--- a/main.py
+++ b/main.py
@ -1,16 +1,55 @@
 #!/usr/bin/env python3
 """
 Main Copycat program.
 To run it, type at the terminal:
    > python main.py abc abd ppqqrr --interations 10
 The script takes three to five arguments. The first two are a pair of strings
 with some change, for example "abc" and "abd". The third is a string which the
 script should try to change analogously. The fourth (which defaults to "1") is
 a number of iterations. One can also specify a defined seed value for the
 random number generator.
 This instruction produces output such as:
    iiijjjlll: 670 (avg time 1108.5, avg temp 23.6)
    iiijjjd: 2 (avg time 1156.0, avg temp 35.0)
    iiijjjkkl: 315 (avg time 1194.4, avg temp 35.5)
    iiijjjkll: 8 (avg time 2096.8, avg temp 44.1)
    iiijjjkkd: 5 (avg time 837.2, avg temp 48.0)
    wyz: 5 (avg time 2275.2, avg temp 14.9)
    xyd: 982 (avg time 2794.4, avg temp 17.5)
    yyz: 7 (avg time 2731.9, avg temp 25.1)
    dyz: 2 (avg time 3320.0, avg temp 27.1)
    xyy: 2 (avg time 4084.5, avg temp 31.1)
    xyz: 2 (avg time 1873.5, avg temp 52.1)
 The first number indicates how many times Copycat chose that string as its
 answer; higher means "more obvious". The last number indicates the average
 final temperature of the workspace; lower means "more elegant".
 """
 import argparse
 import logging
 from copycat import Copycat, Reporter
 class SimpleReporter(Reporter):
    """Reports results from a single run."""
    def report_answer(self, answer):
        """Self-explanatory code."""
        print('Answered %s (time %d, final temperature %.1f)' % (
            answer['answer'], answer['time'], answer['temp'],
        ))
 def main():
    """Program's main entrance point.  Self-explanatory code."""
    logging.basicConfig(level=logging.INFO, format='%(message)s', filename='./output/copycat.log', filemode='w')
    parser = argparse.ArgumentParser()
@ -27,5 +66,6 @@ def main():
    for answer, d in sorted(iter(answers.items()), key=lambda kv: kv[1]['avgtemp']):
        print('%s: %d (avg time %.1f, avg temp %.1f)' % (answer, d['count'], d['avgtime'], d['avgtemp']))
 if __name__ == '__main__':
    main()
--- a/setup.py
+++ b/setup.py
@ -1,5 +1,5 @@
 #!/usr/bin/env python
-
+"""Self-explanatory."""
 from setuptools import setup
 setup(
--- a/tests.py
+++ b/tests.py
@ -0,0 +1,62 @@
 import unittest
 import os.path
 import pickle
 import argparse
 import sys
 from pprint  import pprint
 from copycat import Problem
 from copycat.statistics import iso_chi_squared
 # TODO: update test cases to use entropy
 def generate():
    print('Generating distributions for new file')
    iterations = 30
    problems = [
            Problem('abc', 'abd', 'efg',    iterations),
            Problem('abc', 'abd', 'ijk',    iterations),
            Problem('abc', 'abd', 'xyz',    iterations),
            Problem('abc', 'abd', 'ijkk',   iterations),
            Problem('abc', 'abd', 'mrrjjj', iterations)]
    with open(TestCopycat.Filename, 'wb') as outfile:
        pickle.dump(problems, outfile)
    return problems
 class TestCopycat(unittest.TestCase):
    Filename = None
    def setUp(self):
        self.longMessage = True  # new in Python 2.7
    def test(self):
        print('Testing copycat with input file: {}'.format(TestCopycat.Filename))
        try:
            with open(TestCopycat.Filename, 'rb') as infile:
                problems = pickle.load(infile)
        except Exception as e:
            print('Generating due to error:')
            print(e)
            problems = generate()
        for problem in problems:
            problem.test(iso_chi_squared)
 if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--generate', action='store_true')
    parser.add_argument('filename', default='.distributions', nargs='?')
    parser.add_argument('unittest_args', default=[], nargs='?')
    args = parser.parse_args()
    # TODO: Go do something with args.input and args.filename
    TestCopycat.Filename = args.filename
    if args.generate:
        generate()
    # Now set the sys.argv to the unittest_args (leaving sys.argv[0] alone)
    sys.argv[1:] = args.unittest_args
    unittest.main()
`@ -1 +1,2 @@`
	`from .copycat import Copycat, Reporter # noqa`	`from .copycat import Copycat, Reporter # noqa`
		`from .problem import Problem`