diff --git a/.distributions b/.distributions
new file mode 100644
index 0000000..5f4912f
Binary files /dev/null and b/.distributions differ
diff --git a/.gitignore b/.gitignore
index fc96e13..308d314 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,6 +19,7 @@ pip-log.txt
 # Unit test / coverage reports
 .coverage
 .tox
+.log
 
 # Other filesystems
 .svn
diff --git a/.ipynb_checkpoints/Copycat-checkpoint.ipynb b/.ipynb_checkpoints/Copycat-checkpoint.ipynb
new file mode 100644
index 0000000..2fd6442
--- /dev/null
+++ b/.ipynb_checkpoints/Copycat-checkpoint.ipynb
@@ -0,0 +1,6 @@
+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/Copycat.ipynb b/Copycat.ipynb
new file mode 100644
index 0000000..2225b63
--- /dev/null
+++ b/Copycat.ipynb
@@ -0,0 +1,81 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Copycat \n",
+    "\n",
+    "Just type your copycat example, and the number of iterations."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Answered iijjkl (time 1374, final temperature 13.5)\n",
+      "Answered iijjll (time 665, final temperature 19.6)\n",
+      "Answered iijjll (time 406, final temperature 16.6)\n",
+      "Answered iijjkl (time 379, final temperature 47.9)\n",
+      "Answered iijjll (time 556, final temperature 19.2)\n",
+      "Answered iijjkl (time 813, final temperature 42.8)\n",
+      "Answered iijjll (time 934, final temperature 15.5)\n",
+      "Answered iijjkl (time 1050, final temperature 49.5)\n",
+      "Answered iijjkl (time 700, final temperature 44.0)\n",
+      "Answered iijjkl (time 510, final temperature 34.8)\n",
+      "Answered iijjkl (time 673, final temperature 18.1)\n",
+      "Answered iijjkl (time 1128, final temperature 19.8)\n",
+      "Answered iijjll (time 961, final temperature 19.9)\n",
+      "Answered iijjll (time 780, final temperature 16.5)\n",
+      "Answered iijjll (time 607, final temperature 17.8)\n",
+      "Answered iijjll (time 594, final temperature 39.7)\n",
+      "Answered iijjll (time 736, final temperature 18.4)\n",
+      "Answered iijjll (time 903, final temperature 18.6)\n",
+      "Answered iijjll (time 601, final temperature 20.6)\n",
+      "Answered iijjll (time 949, final temperature 42.4)\n",
+      "iijjll: 12 (avg time 724.3, avg temp 22.1)\n",
+      "iijjkl: 8 (avg time 828.4, avg temp 33.8)\n"
+     ]
+    }
+   ],
+   "source": [
+    "%run main.py abc abd iijjkk --iterations 20"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/README.md b/README.md
index 9a18937..0fe9ab9 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@ co.py.cat
 ![GUI](https://i.imgur.com/7pb20g0.png)
 
 An implementation of [Douglas Hofstadter](http://prelectur.stanford.edu/lecturers/hofstadter/)'s Copycat algorithm.
-The Copycat algorithm is explained [on Wikipedia](https://en.wikipedia.org/wiki/Copycat_%28software%29), and that page has many links for deeper reading.
+The Copycat algorithm is explained [on Wikipedia](https://en.wikipedia.org/wiki/Copycat_%28software%29), and that page has many links for deeper reading.  See also [Farglexandria](https://github.com/Alex-Linhares/Farglexandria).
 
 This implementation is a copycat of Scott Boland's [Java implementation](https://archive.org/details/JavaCopycat).
 The original Java-to-Python translation work was done by J Alan Brogan (@jalanb on GitHub).
@@ -75,3 +75,10 @@ $ python
 ```
 
 The result of `run` is a dict containing the same information as was printed by `main.py` above.
+
+
+
+Questions
+---------
+
+1. Why are codelets **NOT** implemented through lambda?  
diff --git a/copycat/__init__.py b/copycat/__init__.py
index 67e5cc9..292c4b6 100644
--- a/copycat/__init__.py
+++ b/copycat/__init__.py
@@ -1 +1,2 @@
 from .copycat import Copycat, Reporter  # noqa
+from .problem import Problem
diff --git a/copycat/codeletMethods.py b/copycat/codeletMethods.py
index d8484d8..e4ae446 100644
--- a/copycat/codeletMethods.py
+++ b/copycat/codeletMethods.py
@@ -74,8 +74,10 @@ def __structureVsStructure(structure1, weight1, structure2, weight2):
     temperature = ctx.temperature
     structure1.updateStrength()
     structure2.updateStrength()
+    # TODO: use entropy
     weightedStrength1 = temperature.getAdjustedValue(
         structure1.totalStrength * weight1)
+    # TODO: use entropy
     weightedStrength2 = temperature.getAdjustedValue(
         structure2.totalStrength * weight2)
     return random.weighted_greater_than(weightedStrength1, weightedStrength2)
@@ -111,6 +113,7 @@ def __slippability(ctx, conceptMappings):
     temperature = ctx.temperature
     for mapping in conceptMappings:
         slippiness = mapping.slippability() / 100.0
+        # TODO: use entropy
         probabilityOfSlippage = temperature.getAdjustedProbability(slippiness)
         if random.coinFlip(probabilityOfSlippage):
             return True
@@ -122,6 +125,7 @@ def breaker(ctx, codelet):
     random = ctx.random
     temperature = ctx.temperature
     workspace = ctx.workspace
+    # TODO: use entropy
     probabilityOfFizzle = (100.0 - temperature.value()) / 100.0
     if random.coinFlip(probabilityOfFizzle):
         return
@@ -138,6 +142,7 @@ def breaker(ctx, codelet):
                 breakObjects += [structure.source.group]
     # Break all the objects or none of them; this matches the Java
     for structure in breakObjects:
+        # TODO: use entropy
         breakProbability = temperature.getAdjustedProbability(
             structure.totalStrength / 100.0)
         if random.coinFlip(breakProbability):
@@ -149,8 +154,7 @@ def breaker(ctx, codelet):
 def chooseRelevantDescriptionByActivation(ctx, workspaceObject):
     random = ctx.random
     descriptions = workspaceObject.relevantDescriptions()
-    weights = [description.descriptor.activation
-                   for description in descriptions]
+    weights = [description.descriptor.activation for description in descriptions]
     return random.weighted_choice(descriptions, weights)
 
 
@@ -160,6 +164,7 @@ def similarPropertyLinks(ctx, slip_node):
     result = []
     for slip_link in slip_node.propertyLinks:
         association = slip_link.degreeOfAssociation() / 100.0
+        # TODO:use entropy
         probability = temperature.getAdjustedProbability(association)
         if random.coinFlip(probability):
             result += [slip_link]
@@ -182,7 +187,7 @@ def bottom_up_description_scout(ctx, codelet):
     sliplinks = similarPropertyLinks(ctx, description.descriptor)
     assert sliplinks
     weights = [sliplink.degreeOfAssociation() * sliplink.destination.activation
-              for sliplink in sliplinks]
+               for sliplink in sliplinks]
     chosen = random.weighted_choice(sliplinks, weights)
     chosenProperty = chosen.destination
     coderack.proposeDescription(chosenObject, chosenProperty.category(),
@@ -215,6 +220,7 @@ def description_strength_tester(ctx, codelet):
     description.descriptor.buffer = 100.0
     description.updateStrength()
     strength = description.totalStrength
+    # TODO: use entropy
     probability = temperature.getAdjustedProbability(strength / 100.0)
     assert random.coinFlip(probability)
     coderack.newCodelet('description-builder', strength, [description])
@@ -298,7 +304,7 @@ def rule_scout(ctx, codelet):
     workspace = ctx.workspace
     assert workspace.numberOfUnreplacedObjects() == 0
     changedObjects = [o for o in workspace.initial.objects if o.changed]
-    #assert len(changedObjects) < 2
+    # assert len(changedObjects) < 2
     # if there are no changed objects, propose a rule with no changes
     if not changedObjects:
         return coderack.proposeRule(None, None, None, None)
@@ -328,8 +334,8 @@ def rule_scout(ctx, codelet):
             if targetObject.described(node):
                 if targetObject.distinguishingDescriptor(node):
                     newList += [node]
-        objectList = newList  # surely this should be +=
-                              # "union of this and distinguishing descriptors"
+        objectList = newList    # surely this should be +=
+        # "union of this and distinguishing descriptors"
     assert objectList
     # use conceptual depth to choose a description
     weights = [
@@ -360,6 +366,7 @@ def rule_strength_tester(ctx, codelet):
     temperature = ctx.temperature
     rule = codelet.arguments[0]
     rule.updateStrength()
+    # TODO: use entropy
     probability = temperature.getAdjustedProbability(rule.totalStrength / 100.0)
     if random.coinFlip(probability):
         coderack.newCodelet('rule-builder', rule.totalStrength, [rule])
@@ -392,8 +399,8 @@ def replacement_finder(ctx, codelet):
         relation = relations[diff]
     else:
         relation = None
-    letterOfInitialString.replacement = Replacement(ctx,
-        letterOfInitialString, letterOfModifiedString, relation)
+    letterOfInitialString.replacement = Replacement(ctx, letterOfInitialString,
+                                                    letterOfModifiedString, relation)
     if relation != slipnet.sameness:
         letterOfInitialString.changed = True
         workspace.changedObject = letterOfInitialString
@@ -436,8 +443,8 @@ def top_down_bond_scout__direction(ctx, codelet):
     coderack = ctx.coderack
     slipnet = ctx.slipnet
     direction = codelet.arguments[0]
-    source = __getScoutSource(ctx,
-        direction, formulas.localDirectionCategoryRelevance, 'bond')
+    source = __getScoutSource(ctx, direction, formulas.localDirectionCategoryRelevance,
+                              'bond')
     destination = chooseDirectedNeighbor(ctx, source, direction)
     assert destination
     logging.info('to object: %s', destination)
@@ -462,6 +469,7 @@ def bond_strength_tester(ctx, codelet):
     __showWhichStringObjectIsFrom(bond)
     bond.updateStrength()
     strength = bond.totalStrength
+    # TODO: use entropy
     probability = temperature.getAdjustedProbability(strength / 100.0)
     logging.info('bond strength = %d for %s', strength, bond)
     assert random.coinFlip(probability)
@@ -502,7 +510,7 @@ def bond_builder(ctx, codelet):
             if incompatibleCorrespondences:
                 logging.info("trying to break incompatible correspondences")
                 assert __fight(bond, 2.0, incompatibleCorrespondences, 3.0)
-            #assert __fightIncompatibles(incompatibleCorrespondences,
+            # assert __fightIncompatibles(incompatibleCorrespondences,
             #                            bond, 'correspondences', 2.0, 3.0)
     for incompatible in incompatibleBonds:
         incompatible.break_the_structure()
@@ -692,7 +700,7 @@ def top_down_group_scout__direction(ctx, codelet):
                           direction, bondFacet)
 
 
-#noinspection PyStringFormat
+# noinspection PyStringFormat
 @codelet('group-scout--whole-string')
 def group_scout__whole_string(ctx, codelet):
     coderack = ctx.coderack
@@ -744,6 +752,7 @@ def group_strength_tester(ctx, codelet):
     __showWhichStringObjectIsFrom(group)
     group.updateStrength()
     strength = group.totalStrength
+    # TODO: use entropy
     probability = temperature.getAdjustedProbability(strength / 100.0)
     if random.coinFlip(probability):
         # it is strong enough - post builder  & activate nodes
@@ -871,6 +880,7 @@ def rule_translator(ctx, codelet):
         bondDensity = min(bondDensity, 1.0)
     weights = __getCutoffWeights(bondDensity)
     cutoff = 10.0 * random.weighted_choice(list(range(1, 11)), weights)
+    # TODO: use entropy
     if cutoff >= temperature.actual_value:
         result = workspace.rule.buildTranslatedRule()
         if result is not None:
@@ -907,11 +917,11 @@ def bottom_up_correspondence_scout(ctx, codelet):
                  and m.initialDescriptionType != slipnet.bondFacet]
     initialDescriptionTypes = [m.initialDescriptionType for m in opposites]
     flipTargetObject = False
-    if  (objectFromInitial.spansString() and
-         objectFromTarget.spansString() and
-         slipnet.directionCategory in initialDescriptionTypes
-         and all(m.label == slipnet.opposite for m in opposites)  # unreached?
-         and slipnet.opposite.activation != 100.0):
+    if (objectFromInitial.spansString() and
+        objectFromTarget.spansString() and
+        slipnet.directionCategory in initialDescriptionTypes
+        and all(m.label == slipnet.opposite for m in opposites)  # unreached?
+            and slipnet.opposite.activation != 100.0):
         objectFromTarget = objectFromTarget.flippedVersion()
         conceptMappings = formulas.getMappings(
             objectFromInitial, objectFromTarget,
@@ -927,6 +937,7 @@ def important_object_correspondence_scout(ctx, codelet):
     coderack = ctx.coderack
     random = ctx.random
     slipnet = ctx.slipnet
+    # TODO: use entropy
     temperature = ctx.temperature
     workspace = ctx.workspace
     objectFromInitial = chooseUnmodifiedObject(ctx, 'relativeImportance',
@@ -966,11 +977,11 @@ def important_object_correspondence_scout(ctx, codelet):
                  and m.initialDescriptionType != slipnet.bondFacet]
     initialDescriptionTypes = [m.initialDescriptionType for m in opposites]
     flipTargetObject = False
-    if  (objectFromInitial.spansString()
-         and objectFromTarget.spansString()
-         and slipnet.directionCategory in initialDescriptionTypes
-         and all(m.label == slipnet.opposite for m in opposites)  # unreached?
-         and slipnet.opposite.activation != 100.0):
+    if (objectFromInitial.spansString()
+        and objectFromTarget.spansString()
+        and slipnet.directionCategory in initialDescriptionTypes
+        and all(m.label == slipnet.opposite for m in opposites)  # unreached?
+            and slipnet.opposite.activation != 100.0):
         objectFromTarget = objectFromTarget.flippedVersion()
         conceptMappings = formulas.getMappings(
             objectFromInitial, objectFromTarget,
@@ -997,6 +1008,7 @@ def correspondence_strength_tester(ctx, codelet):
                  objectFromTarget.flipped_version())))
     correspondence.updateStrength()
     strength = correspondence.totalStrength
+    # TODO: use entropy
     probability = temperature.getAdjustedProbability(strength / 100.0)
     if random.coinFlip(probability):
         # activate some concepts
@@ -1050,8 +1062,8 @@ def correspondence_builder(ctx, codelet):
     # if there is an incompatible bond then fight against it
     initial = correspondence.objectFromInitial
     target = correspondence.objectFromTarget
-    if  (initial.leftmost or initial.rightmost and
-         target.leftmost or target.rightmost):
+    if (initial.leftmost or initial.rightmost and
+            target.leftmost or target.rightmost):
         # search for the incompatible bond
         incompatibleBond = correspondence.getIncompatibleBond()
         if incompatibleBond:
diff --git a/copycat/coderack.py b/copycat/coderack.py
index fb58e2a..d8bca0b 100644
--- a/copycat/coderack.py
+++ b/copycat/coderack.py
@@ -83,6 +83,7 @@ class Coderack(object):
         if 'correspondence' in codeletName:
             return workspace.interStringUnhappiness / 100.0
         if 'description' in codeletName:
+            # TODO: use entropy
             return (temperature.value() / 100.0) ** 2
         return workspace.intraStringUnhappiness / 100.0
 
@@ -161,6 +162,8 @@ class Coderack(object):
         urgency = 3
         if codeletName == 'breaker':
             urgency = 1
+
+        # TODO: use entropy
         if temperature.value() < 25.0 and 'translator' in codeletName:
             urgency = 5
         for _ in range(howMany):
@@ -287,6 +290,8 @@ class Coderack(object):
         random = self.ctx.random
         temperature = self.ctx.temperature
         assert self.codelets
+
+        # TODO: use entropy
         scale = (100.0 - temperature.value() + 10.0) / 15.0
         chosen = random.weighted_choice(self.codelets, [codelet.urgency ** scale for codelet in self.codelets])
         self.removeCodelet(chosen)
diff --git a/copycat/curses_reporter.py b/copycat/curses_reporter.py
index 08f24db..1bd224a 100644
--- a/copycat/curses_reporter.py
+++ b/copycat/curses_reporter.py
@@ -239,6 +239,7 @@ class CursesReporter(Reporter):
         w.border()
         w.refresh()
 
+    #TODO: use entropy
     def report_temperature(self, temperature):
         self.do_keyboard_shortcuts()
         w = self.temperatureWindow
diff --git a/copycat/group.py b/copycat/group.py
index 015dae3..0d3abb4 100644
--- a/copycat/group.py
+++ b/copycat/group.py
@@ -96,6 +96,7 @@ class Group(WorkspaceObject):
         support = self.localSupport() / 100.0
         activation = slipnet.length.activation / 100.0
         supportedActivation = (support * activation) ** exp
+        #TODO: use entropy
         return temperature.getAdjustedProbability(supportedActivation)
 
     def flippedVersion(self):
@@ -130,6 +131,7 @@ class Group(WorkspaceObject):
         cubedlength = length ** 3
         fred = cubedlength * (100.0 - slipnet.length.activation) / 100.0
         probability = 0.5 ** fred
+        #TODO: use entropy
         value = temperature.getAdjustedProbability(probability)
         if value < 0.06:
             value = 0.0
diff --git a/copycat/problem.py b/copycat/problem.py
new file mode 100644
index 0000000..bf335e5
--- /dev/null
+++ b/copycat/problem.py
@@ -0,0 +1,62 @@
+from .copycat import Copycat
+
+from pprint import pprint
+
+class Problem:
+    def __init__(self, initial, modified, target, iterations, distributions=None, formulas=None):
+        self.formulas = formulas
+        self.initial  = initial
+        self.modified = modified
+        self.target   = target
+
+        self.iterations    = iterations
+        if distributions is None:
+            self.distributions = self.solve()
+        else:
+            self.distributions = distributions
+        if formulas is not None:
+            assert hasattr(Copycat().workspace, 'temperature')
+
+    def test(self, comparison, expected=None):
+        print('-' * 120)
+        print('Testing copycat problem: {} : {} :: {} : _'.format(self.initial,
+                                                                  self.modified,
+                                                                  self.target))
+        print('expected:')
+        if expected is None:
+            expected = self.distributions
+        pprint(expected)
+
+        actual = self.solve()
+        print('actual:')
+        pprint(actual)
+        comparison(actual, expected)
+        print('-' * 120)
+
+    def solve(self):
+        print('-' * 120)
+        print('Testing copycat problem: {} : {} :: {} : _'.format(self.initial,
+                                                                  self.modified,
+                                                                  self.target))
+        copycat = Copycat()
+        answers  = dict()
+        if self.formulas == None:
+            if hasattr(copycat.workspace, 'temperature'):
+                formula = copycat.workspace.temperature.getAdj()
+            else:
+                formula = None
+            answers[formula] = copycat.run(self.initial,
+                                self.modified,
+                                self.target,
+                                self.iterations)
+        else:
+            for formula in self.formulas:
+                copycat.temperature.useAdj(formula)
+                answers[formulas] = copycat.run(self.initial,
+                                        self.modified,
+                                        self.target,
+                                        self.iterations)
+        return answers
+
+    def generate(self):
+        self.distributions = self.solve()
diff --git a/copycat/statistics.py b/copycat/statistics.py
new file mode 100644
index 0000000..4f1ffe3
--- /dev/null
+++ b/copycat/statistics.py
@@ -0,0 +1,57 @@
+# CHI2 values for n degrees freedom
+_chiSquared_table = {
+        1:3.841,
+        2:5.991,
+        3:7.815,
+        4:9.488,
+        5:11.071,
+        6:12.592,
+        7:14.067,
+        8:15.507,
+        9:16.919,
+        10:18.307
+        }
+
+class ChiSquaredException(Exception):
+    pass
+
+def chi_squared(actual, expected):
+    answerKeys = set(list(actual.keys()) + list(expected.keys()))
+    degreesFreedom = len(answerKeys)
+    chiSquared = 0
+
+    get_count = lambda k, d : d[k]['count'] if k in d else 0
+
+    for k in answerKeys:
+        E = get_count(k, expected)
+        O = get_count(k, actual)
+        if E == 0:
+            print('Warning! Expected 0 counts of {}, but got {}'.format(k, O))
+        else:
+            chiSquared += (O - E) ** 2 / E
+    return chiSquared
+
+def cross_formula_chi_squared(actualDict, expectedDict):
+    for ka, actual in actualDict.items():
+        for ke, expected in expectedDict.items():
+            print('Comparing {} with {}'.format(ka, ke))
+            chiSquared = chi_squared(actual, expected)
+
+            if chiSquared >= _chiSquared_table[degreesFreedom]:
+                print('Significant difference between expected and actual answer distributions: \n' +
+                    'Chi2 value: {} with {} degrees of freedom'.format(chiSquared, degreesFreedom))
+
+def cross_chi_squared(problemSets):
+    for i, problemSetA in enumerate(problemSets):
+        for problemSetB in problemSets[i + 1:]:
+            for problemA in problemSetA:
+                for problemB in problemSetB:
+                    answersA = problemA.distributions
+                    answersB = problemB.distributions
+                    cross_formula_chi_squared(answersA, answersB)
+
+def iso_chi_squared(actualDict, expectedDict):
+    for key in expectedDict.keys():
+        assert key in actualDict, 'The key {} was not tested'.format(key)
+        actual   = actualDict[key]
+        expected = expectedDict[key]
diff --git a/copycat/temperature.py b/copycat/temperature.py
index 37a5d6f..11a03f4 100644
--- a/copycat/temperature.py
+++ b/copycat/temperature.py
@@ -35,6 +35,7 @@ class Temperature(object):
     def getAdjustedValue(self, value):
         return value ** (((100.0 - self.value()) / 30.0) + 0.5)
 
+    """
     def getAdjustedProbability(self, value):
         if value == 0 or value == 0.5 or self.value() == 0:
             return value
@@ -45,3 +46,180 @@ class Temperature(object):
         c = (10 - a) / 100
         f = (c + 1) * value
         return max(f, 0.5)
+    """
+
+    def getAdjustedProbability(self, value):
+        """
+        This function returns the probability for a decision.
+        Copied above.
+
+        Please look at the last line of it.  Strangely, it was
+        return max(f, 0.5).  Does that make sense? Let's compare
+        some results.  Where it was (0.5), we obtained, for example:
+
+        iiijjjlll: 670 (avg time 1108.5, avg temp 23.6)
+        iiijjjd: 2 (avg time 1156.0, avg temp 35.0)
+        iiijjjkkl: 315 (avg time 1194.4, avg temp 35.5)
+        iiijjjkll: 8 (avg time 2096.8, avg temp 44.1)
+        iiijjjkkd: 5 (avg time 837.2, avg temp 48.0)
+
+        wyz: 5 (avg time 2275.2, avg temp 14.9)
+        xyd: 982 (avg time 2794.4, avg temp 17.5)
+        yyz: 7 (avg time 2731.9, avg temp 25.1)
+        dyz: 2 (avg time 3320.0, avg temp 27.1)
+        xyy: 2 (avg time 4084.5, avg temp 31.1)
+        xyz: 2 (avg time 1873.5, avg temp 52.1)
+
+        Now, let's see what return max(f, 0.0000) does:
+
+        wyz: 7 (avg time 3192.9, avg temp 13.1)
+        xyd: 985 (avg time 2849.1, avg temp 17.5)
+        yyz: 6 (avg time 3836.7, avg temp 18.6)
+        xyy: 1 (avg time 1421.0, avg temp 19.5)
+        xyz: 1 (avg time 7350.0, avg temp 48.3)
+
+        They *seem* better (in the strict sense that we've obtained both
+        lower T and more times of wyz.)  But they're *not* statistically
+        significant (for 1000 runs).
+
+        Now... looking at the code... it seems to be a mess... what does
+        function f() even mean in intuitive terms?
+
+        Work it does, but dude... quite a hack.
+
+        Another run, with return f @line89:
+
+        wyz: 8 (avg time 4140.5, avg temp 13.3)
+        yyz: 6 (avg time 2905.2, avg temp 14.5)
+        xyd: 982 (avg time 3025.4, avg temp 17.6)
+        dyz: 4 (avg time 4265.0, avg temp 17.7)
+
+        Does it even matter? Another (quick) run, I think with return (0.5):
+
+        dyz: 1 (avg time 5198.0, avg temp 15.3)
+        wyz: 3 (avg time 4043.7, avg temp 17.1)
+        yyz: 9 (avg time 3373.6, avg temp 21.0)
+        xyd: 84 (avg time 5011.1, avg temp 23.3)
+        xyy: 3 (avg time 4752.0, avg temp 27.9)
+
+        Compared to return(0.99):
+
+        xyd: 1000 (avg time 1625.2, avg temp 17.3)
+
+        Comparing to return f --> Statistically significant.
+        Comparing to return(0.5) --> same, so this return value does something.
+
+        Now running return(0.0):
+
+        xyz: 3 (avg time 3996.7, avg temp 81.1)
+        dyz: 46 (avg time 5931.7, avg temp 82.6)
+        xd: 17 (avg time 6090.3, avg temp 83.8)
+        xyd: 934 (avg time 7699.8, avg temp 88.1)
+
+        It's bad overall, but at least it's statistically significant!
+
+        return (-f * (math.log2(f))) # Entropy test #1 (global).
+
+        wyz: 123 (avg time 5933.1, avg temp 16.5)
+        xyy: 200 (avg time 6486.7, avg temp 27.8)
+        yyz: 330 (avg time 6310.2, avg temp 38.5)
+        dyz: 75 (avg time 6393.3, avg temp 39.6)
+        yzz: 5 (avg time 4965.0, avg temp 59.3)
+        xyz: 160 (avg time 6886.2, avg temp 60.2)
+        xd: 4 (avg time 2841.0, avg temp 61.8)
+        dz: 3 (avg time 3721.0, avg temp 62.1)
+        xyd: 100 (avg time 5853.1, avg temp 67.5)
+
+        Here we get an intuitive result: entropy/uncertainty seems better at
+        exploring a whole range of possible solutions.  It even seems, at least
+        to me, better than the distribution obtained by the original copycat.
+
+        instead of log2, trying ln --> return (-f * math.log(f)):
+
+        wyz: 78 (avg time 7793.7, avg temp 16.6)
+        xyy: 202 (avg time 9168.5, avg temp 27.5)
+        wxz: 1 (avg time 3154.0, avg temp 33.4)
+        dyz: 63 (avg time 7950.3, avg temp 41.7)
+        yyz: 217 (avg time 8147.4, avg temp 41.7)
+        xyz: 201 (avg time 7579.7, avg temp 62.5)
+        xxy: 1 (avg time 7994.0, avg temp 64.8)
+        yzz: 8 (avg time 4672.6, avg temp 65.7)
+        xd: 9 (avg time 9215.2, avg temp 68.1)
+        xyd: 217 (avg time 7677.9, avg temp 73.8)
+        dz: 3 (avg time 20379.0, avg temp 77.3)
+
+        (quickly) trying out (1-this_entropy_function):
+
+        xyd: 100 (avg time 2984.3, avg temp 18.2)
+
+        And that's beautiful! One wants an inverse function that punishes
+        exploration and creativity, that takes all the fluidity off
+        the system.
+
+        But somehow this completely messes up with abc abd iijjkk:
+
+        jijjkk: 66 (avg time 3200.1, avg temp 61.3)
+        iijjkk: 114 (avg time 5017.2, avg temp 63.5)
+        dijjkk: 23 (avg time 2209.0, avg temp 67.3)
+        iijjkl: 748 (avg time 3262.8, avg temp 70.0)
+        iijjkd: 49 (avg time 2315.9, avg temp 76.3)
+
+        Which leads me to suspect that someone may have overfitted the
+        model for either xyz or iijjkk or some other problem, and one
+        improvement there means disaster here.
+
+        Something tells me to invert again to 1-entropy... and bingo!
+
+        iijjll: 59 (avg time 797.4, avg temp 19.8)
+        iijjkl: 41 (avg time 696.1, avg temp 28.5)
+
+        My guess is that some code is prefering to find groups in the
+        opposite form that it likes finding the "symmetry/opposite"
+        concepts of the xyz problem.
+
+        Sould compare & contrast the unhappiness and relevance of both
+        the opposite/symmetry codelets and the grouping/chunking codelets.
+        My hunch is the sameness group code: something there that
+        interacts with Temperature is wicked, and should be relatively
+        easy to find the error.
+
+        Here's why:  the following run was done on (1-entropy(f)):
+
+        mrrlll: 77 (avg time 2195.7, avg temp 41.4)
+        mrrd: 2 (avg time 1698.0, avg temp 42.6)
+        mrrkkl: 20 (avg time 1317.8, avg temp 46.6)
+        mrrkkd: 1 (avg time 1835.0, avg temp 48.6)
+
+
+        If (1-entropy(f)) binds the system into a tight corridor of possibilities,
+        then why does it easily get the samenessGroup right?  If this is right,
+        then running just entropy(f) should have big trouble with samenessGroup.
+        Let's see:
+
+        nrrkkk: 11 (avg time 3637.8, avg temp 64.6)
+        drrkkk: 3 (avg time 5921.3, avg temp 66.2)
+        mrrkkd: 7 (avg time 6771.3, avg temp 74.6)
+        mrrkkl: 79 (avg time 3723.0, avg temp 74.9)
+
+        So there we are: the system is unable to find that change samenessGroup
+        to next letterCategory, so there ought to be something very different
+        in the code that:
+
+        * Interacts with Temperature (things like unhappiness, relevance, depth,
+        urgency, and whatever else interacts with T)
+        * something very close to samenessGroup... sameGroup, sameness,
+        sameNeighbors, etc... is encoded in a form that is *directly opposite*
+        to other concepts/categories/codlets, etc.  
+
+
+        Need to play with this more... and WTF is f anyways?
+        """
+        if value == 0 or value == 0.5 or self.value() == 0:
+            return value
+        if value < 0.5:
+            return 1.0 - self.getAdjustedProbability(1.0 - value)
+        coldness = 100.0 - self.value()
+        a = math.sqrt(coldness)
+        c = (10 - a) / 100
+        f = (c + 1) * value
+        return (0 + (-f * math.log2(f)))  # max(f, 0.0000)
diff --git a/copycat/tests.py b/copycat/tests.py
deleted file mode 100644
index 7556f4a..0000000
--- a/copycat/tests.py
+++ /dev/null
@@ -1,137 +0,0 @@
-import unittest
-
-from .copycat import Copycat
-
-
-def pnormaldist(p):
-    table = {
-        0.80: 1.2815,
-        0.90: 1.6448,
-        0.95: 1.9599,
-        0.98: 2.3263,
-        0.99: 2.5758,
-        0.995: 2.8070,
-        0.998: 3.0902,
-        0.999: 3.2905,
-        0.9999: 3.8905,
-        0.99999: 4.4171,
-        0.999999: 4.8916,
-        0.9999999: 5.3267,
-        0.99999999: 5.7307,
-        0.999999999: 6.1094,
-    }
-    return max(v for k, v in table.items() if k <= p)
-
-
-def lower_bound_on_probability(hits, attempts, confidence=0.95):
-    if attempts == 0:
-        return 0
-    z = pnormaldist(confidence)
-    zsqr = z*z
-    phat = 1.0 * hits / attempts
-    under_sqrt = (phat * (1 - phat) + zsqr / (4*attempts)) / attempts
-    denominator = (1 + zsqr / attempts)
-    return (phat + zsqr / (2*attempts) - z * (under_sqrt ** 0.5)) / denominator
-
-
-def upper_bound_on_probability(hits, attempts, confidence=0.95):
-    misses = attempts - hits
-    return 1.0 - lower_bound_on_probability(misses, attempts, confidence)
-
-
-class TestCopycat(unittest.TestCase):
-    def setUp(self):
-        self.longMessage = True  # new in Python 2.7
-
-    def assertProbabilitiesLookRoughlyLike(self, actual, expected):
-        actual_count = 0.0 + sum(d['count'] for d in list(actual.values()))
-        expected_count = 0.0 + sum(d['count'] for d in list(expected.values()))
-        self.assertGreater(actual_count, 1)
-        self.assertGreater(expected_count, 1)
-        for k in set(list(actual.keys()) + list(expected.keys())):
-            if k not in expected:
-                self.fail('Key %s was produced but not expected! %r != %r' % (k, actual, expected))
-            expected_probability = expected[k]['count'] / expected_count
-            if k in actual:
-                actual_lo = lower_bound_on_probability(actual[k]['count'], actual_count)
-                actual_hi = upper_bound_on_probability(actual[k]['count'], actual_count)
-                if not (actual_lo <= expected_probability <= actual_hi):
-                    print('Failed (%s <= %s <= %s)' % (actual_lo, expected_probability, actual_hi))
-                    self.fail('Count ("obviousness" metric) seems way off! %r != %r' % (actual, expected))
-                if abs(actual[k]['avgtemp'] - expected[k]['avgtemp']) >= 10.0 + (10.0 / actual[k]['count']):
-                    print('Failed (%s - %s >= %s)' % (actual[k]['avgtemp'], expected[k]['avgtemp'], 10.0 + (10.0 / actual[k]['count'])))
-                    self.fail('Temperature ("elegance" metric) seems way off! %r != %r' % (actual, expected))
-            else:
-                actual_hi = upper_bound_on_probability(0, actual_count)
-                if not (0 <= expected_probability <= actual_hi):
-                    self.fail('No instances of expected key %s were produced! %r != %r' % (k, actual, expected))
-
-    def run_testcase(self, initial, modified, target, iterations, expected):
-        actual = Copycat().run(initial, modified, target, iterations)
-        self.assertEqual(sum(a['count'] for a in list(actual.values())), iterations)
-        self.assertProbabilitiesLookRoughlyLike(actual, expected)
-
-    def test_simple_cases(self):
-        self.run_testcase('abc', 'abd', 'efg', 50, {
-            'efd': {'count': 1, 'avgtemp': 16},
-            'efh': {'count': 99, 'avgtemp': 19},
-        })
-        self.run_testcase('abc', 'abd', 'ijk', 50, {
-            'ijd': {'count': 4, 'avgtemp': 24},
-            'ijl': {'count': 96, 'avgtemp': 20},
-        })
-
-    def test_abc_xyz(self):
-        self.run_testcase('abc', 'abd', 'xyz', 20, {
-            'xyd': {'count': 100, 'avgtemp': 19},
-        })
-
-    def test_ambiguous_case(self):
-        self.run_testcase('abc', 'abd', 'ijkk', 50, {
-            'ijkkk': {'count': 7, 'avgtemp': 21},
-            'ijll': {'count': 47, 'avgtemp': 28},
-            'ijkl': {'count': 44, 'avgtemp': 32},
-            'ijkd': {'count': 2, 'avgtemp': 65},
-        })
-
-    def test_mrrjjj(self):
-        self.run_testcase('abc', 'abd', 'mrrjjj', 50, {
-            'mrrjjjj': {'count': 4, 'avgtemp': 16},
-            'mrrkkk': {'count': 31, 'avgtemp': 47},
-            'mrrjjk': {'count': 64, 'avgtemp': 51},
-            'mrrjkk': {'count': 1, 'avgtemp': 52},
-            'mrrjjd': {'count': 1, 'avgtemp': 54},
-        })
-
-    def test_elongation(self):
-        # This isn't remotely what a human would say.
-        self.run_testcase('abc', 'aabbcc', 'milk', 50, {
-            'milj': {'count': 85, 'avgtemp': 55},
-            'mikj': {'count': 10, 'avgtemp': 56},
-            'milk': {'count': 1, 'avgtemp': 56},
-            'lilk': {'count': 1, 'avgtemp': 57},
-            'milb': {'count': 3, 'avgtemp': 57},
-        })
-
-    def test_repairing_successor_sequence(self):
-        # This isn't remotely what a human would say.
-        self.run_testcase('aba', 'abc', 'xyx', 50, {
-            'xc': {'count': 9, 'avgtemp': 57},
-            'xyc': {'count': 82, 'avgtemp': 59},
-            'cyx': {'count': 7, 'avgtemp': 68},
-            'xyx': {'count': 2, 'avgtemp': 69},
-        })
-
-    def test_nonsense(self):
-        self.run_testcase('cat', 'dog', 'cake', 10, {
-            'cakg': {'count': 99, 'avgtemp': 70},
-            'gake': {'count': 1, 'avgtemp': 59},
-        })
-        self.run_testcase('cat', 'dog', 'kitten', 10, {
-            'kitteg': {'count': 96, 'avgtemp': 66},
-            'kitten': {'count': 4, 'avgtemp': 68},
-        })
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/copycat/workspace.py b/copycat/workspace.py
index ebc7c5d..70b46a7 100644
--- a/copycat/workspace.py
+++ b/copycat/workspace.py
@@ -1,3 +1,6 @@
+"""Workspace module."""
+
+
 from . import formulas
 from .bond import Bond
 from .correspondence import Correspondence
@@ -14,6 +17,7 @@ def __adjustUnhappiness(values):
 
 class Workspace(object):
     def __init__(self, ctx):
+        """To initialize the workspace."""
         self.ctx = ctx
         self.totalUnhappiness = 0.0
         self.intraStringUnhappiness = 0.0
@@ -50,6 +54,16 @@ class Workspace(object):
         self.modified = WorkspaceString(self.ctx, self.modifiedString)
         self.target = WorkspaceString(self.ctx, self.targetString)
 
+    '''
+    # TODO: Initial part of refactoring in this method
+    def getAssessedUnhappiness(self, unhappiness):
+        o.Unhappiness = __adjustUnhappiness(
+            o.relativeImportance * o.Unhappiness
+            for o in self.objects)
+        pass
+    '''
+
+    # TODO: Extract method?
     def assessUnhappiness(self):
         self.intraStringUnhappiness = __adjustUnhappiness(
             o.relativeImportance * o.intraStringUnhappiness
@@ -61,6 +75,7 @@ class Workspace(object):
             o.relativeImportance * o.totalUnhappiness
             for o in self.objects)
 
+    # TODO: these 3 methods seem to be the same... are they?  If so, Extract method.
     def calculateIntraStringUnhappiness(self):
         value = sum(
             o.relativeImportance * o.intraStringUnhappiness
@@ -92,6 +107,7 @@ class Workspace(object):
         self.initial.updateIntraStringUnhappiness()
         self.target.updateIntraStringUnhappiness()
 
+    # TODO: use entropy
     def getUpdatedTemperature(self):
         self.calculateIntraStringUnhappiness()
         self.calculateInterStringUnhappiness()
@@ -107,7 +123,7 @@ class Workspace(object):
         ))
 
     def numberOfUnrelatedObjects(self):
-        """A list of all objects in the workspace with >= 1 open bond slots"""
+        """Computes the number of all objects in the workspace with >= 1 open bond slots."""
         objects = [o for o in self.objects
                    if o.string == self.initial or o.string == self.target]
         objects = [o for o in objects if not o.spansString()]
@@ -125,21 +141,21 @@ class Workspace(object):
         return len(objects)
 
     def numberOfUnreplacedObjects(self):
-        """A list of all unreplaced objects in the initial string"""
+        """A list of all unreplaced objects in the initial string."""
         objects = [o for o in self.objects
                    if o.string == self.initial and isinstance(o, Letter)]
         objects = [o for o in objects if not o.replacement]
         return len(objects)
 
     def numberOfUncorrespondingObjects(self):
-        """A list of all uncorresponded objects in the initial string"""
+        """A list of all uncorresponded objects in the initial string."""
         objects = [o for o in self.objects
                    if o.string == self.initial or o.string == self.target]
         objects = [o for o in objects if not o.correspondence]
         return len(objects)
 
     def numberOfBonds(self):
-        """The number of bonds in the workspace"""
+        """The number of bonds in the workspace."""
         return sum(1 for o in self.structures if isinstance(o, Bond))
 
     def correspondences(self):
diff --git a/copycat/workspaceString.py b/copycat/workspaceString.py
index 2d7a149..a57c218 100644
--- a/copycat/workspaceString.py
+++ b/copycat/workspaceString.py
@@ -38,7 +38,7 @@ class WorkspaceString(object):
         return self.string[i]
 
     def updateRelativeImportance(self):
-        """Update the normalised importance of all objects in the string"""
+        """Update the normalised importance of all objects in the string."""
         total = sum(o.rawImportance for o in self.objects)
         if not total:
             for o in self.objects:
diff --git a/main.py b/main.py
index abc9c0b..dff79f6 100755
--- a/main.py
+++ b/main.py
@@ -1,16 +1,55 @@
 #!/usr/bin/env python3
+"""
+Main Copycat program.
+
+To run it, type at the terminal:
+
+    > python main.py abc abd ppqqrr --interations 10
+
+The script takes three to five arguments. The first two are a pair of strings
+with some change, for example "abc" and "abd". The third is a string which the
+script should try to change analogously. The fourth (which defaults to "1") is
+a number of iterations. One can also specify a defined seed value for the
+random number generator.
+
+This instruction produces output such as:
+
+    iiijjjlll: 670 (avg time 1108.5, avg temp 23.6)
+    iiijjjd: 2 (avg time 1156.0, avg temp 35.0)
+    iiijjjkkl: 315 (avg time 1194.4, avg temp 35.5)
+    iiijjjkll: 8 (avg time 2096.8, avg temp 44.1)
+    iiijjjkkd: 5 (avg time 837.2, avg temp 48.0)
+
+    wyz: 5 (avg time 2275.2, avg temp 14.9)
+    xyd: 982 (avg time 2794.4, avg temp 17.5)
+    yyz: 7 (avg time 2731.9, avg temp 25.1)
+    dyz: 2 (avg time 3320.0, avg temp 27.1)
+    xyy: 2 (avg time 4084.5, avg temp 31.1)
+    xyz: 2 (avg time 1873.5, avg temp 52.1)
+
+The first number indicates how many times Copycat chose that string as its
+answer; higher means "more obvious". The last number indicates the average
+final temperature of the workspace; lower means "more elegant".
+"""
+
 import argparse
 import logging
 
 from copycat import Copycat, Reporter
 
+
 class SimpleReporter(Reporter):
+    """Reports results from a single run."""
+
     def report_answer(self, answer):
+        """Self-explanatory code."""
         print('Answered %s (time %d, final temperature %.1f)' % (
             answer['answer'], answer['time'], answer['temp'],
         ))
 
+
 def main():
+    """Program's main entrance point.  Self-explanatory code."""
     logging.basicConfig(level=logging.INFO, format='%(message)s', filename='./output/copycat.log', filemode='w')
 
     parser = argparse.ArgumentParser()
@@ -27,5 +66,6 @@ def main():
     for answer, d in sorted(iter(answers.items()), key=lambda kv: kv[1]['avgtemp']):
         print('%s: %d (avg time %.1f, avg temp %.1f)' % (answer, d['count'], d['avgtime'], d['avgtemp']))
 
+
 if __name__ == '__main__':
     main()
diff --git a/setup.py b/setup.py
index 8119fd9..ac987e5 100644
--- a/setup.py
+++ b/setup.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-
+"""Self-explanatory."""
 from setuptools import setup
 
 setup(
diff --git a/tests.py b/tests.py
new file mode 100644
index 0000000..7842590
--- /dev/null
+++ b/tests.py
@@ -0,0 +1,62 @@
+import unittest
+import os.path
+import pickle
+import argparse
+import sys
+
+from pprint  import pprint
+from copycat import Problem
+from copycat.statistics import iso_chi_squared
+
+# TODO: update test cases to use entropy
+
+def generate():
+    print('Generating distributions for new file')
+    iterations = 30
+    problems = [
+            Problem('abc', 'abd', 'efg',    iterations),
+            Problem('abc', 'abd', 'ijk',    iterations),
+            Problem('abc', 'abd', 'xyz',    iterations),
+            Problem('abc', 'abd', 'ijkk',   iterations),
+            Problem('abc', 'abd', 'mrrjjj', iterations)]
+
+    with open(TestCopycat.Filename, 'wb') as outfile:
+        pickle.dump(problems, outfile)
+    return problems
+
+class TestCopycat(unittest.TestCase):
+    Filename = None
+
+    def setUp(self):
+        self.longMessage = True  # new in Python 2.7
+
+    def test(self):
+        print('Testing copycat with input file: {}'.format(TestCopycat.Filename))
+        try:
+            with open(TestCopycat.Filename, 'rb') as infile:
+                problems = pickle.load(infile)
+        except Exception as e:
+            print('Generating due to error:')
+            print(e)
+            problems = generate()
+
+        for problem in problems:
+            problem.test(iso_chi_squared)
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--generate', action='store_true')
+    parser.add_argument('filename', default='.distributions', nargs='?')
+    parser.add_argument('unittest_args', default=[], nargs='?')
+
+    args = parser.parse_args()
+    # TODO: Go do something with args.input and args.filename
+
+    TestCopycat.Filename = args.filename
+
+    if args.generate:
+        generate()
+
+    # Now set the sys.argv to the unittest_args (leaving sys.argv[0] alone)
+    sys.argv[1:] = args.unittest_args
+    unittest.main()