Merge branch 'feature-normal-science-backport' into feature-gui

This commit is contained in:
LSaldyt
2017-11-18 18:32:13 -07:00
19 changed files with 562 additions and 168 deletions

BIN
.distributions Normal file

Binary file not shown.

1
.gitignore vendored
View File

@ -19,6 +19,7 @@ pip-log.txt
# Unit test / coverage reports
.coverage
.tox
.log
# Other filesystems
.svn

View File

@ -0,0 +1,6 @@
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 2
}

81
Copycat.ipynb Normal file
View File

@ -0,0 +1,81 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Copycat \n",
"\n",
"Just type your copycat example, and the number of iterations."
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Answered iijjkl (time 1374, final temperature 13.5)\n",
"Answered iijjll (time 665, final temperature 19.6)\n",
"Answered iijjll (time 406, final temperature 16.6)\n",
"Answered iijjkl (time 379, final temperature 47.9)\n",
"Answered iijjll (time 556, final temperature 19.2)\n",
"Answered iijjkl (time 813, final temperature 42.8)\n",
"Answered iijjll (time 934, final temperature 15.5)\n",
"Answered iijjkl (time 1050, final temperature 49.5)\n",
"Answered iijjkl (time 700, final temperature 44.0)\n",
"Answered iijjkl (time 510, final temperature 34.8)\n",
"Answered iijjkl (time 673, final temperature 18.1)\n",
"Answered iijjkl (time 1128, final temperature 19.8)\n",
"Answered iijjll (time 961, final temperature 19.9)\n",
"Answered iijjll (time 780, final temperature 16.5)\n",
"Answered iijjll (time 607, final temperature 17.8)\n",
"Answered iijjll (time 594, final temperature 39.7)\n",
"Answered iijjll (time 736, final temperature 18.4)\n",
"Answered iijjll (time 903, final temperature 18.6)\n",
"Answered iijjll (time 601, final temperature 20.6)\n",
"Answered iijjll (time 949, final temperature 42.4)\n",
"iijjll: 12 (avg time 724.3, avg temp 22.1)\n",
"iijjkl: 8 (avg time 828.4, avg temp 33.8)\n"
]
}
],
"source": [
"%run main.py abc abd iijjkk --iterations 20"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -4,7 +4,7 @@ co.py.cat
![GUI](https://i.imgur.com/7pb20g0.png)
An implementation of [Douglas Hofstadter](http://prelectur.stanford.edu/lecturers/hofstadter/)'s Copycat algorithm.
The Copycat algorithm is explained [on Wikipedia](https://en.wikipedia.org/wiki/Copycat_%28software%29), and that page has many links for deeper reading.
The Copycat algorithm is explained [on Wikipedia](https://en.wikipedia.org/wiki/Copycat_%28software%29), and that page has many links for deeper reading. See also [Farglexandria](https://github.com/Alex-Linhares/Farglexandria).
This implementation is a copycat of Scott Boland's [Java implementation](https://archive.org/details/JavaCopycat).
The original Java-to-Python translation work was done by J Alan Brogan (@jalanb on GitHub).
@ -75,3 +75,10 @@ $ python
```
The result of `run` is a dict containing the same information as was printed by `main.py` above.
Questions
---------
1. Why are codelets **NOT** implemented through lambda?

View File

@ -1 +1,2 @@
from .copycat import Copycat, Reporter # noqa
from .problem import Problem

View File

@ -74,8 +74,10 @@ def __structureVsStructure(structure1, weight1, structure2, weight2):
temperature = ctx.temperature
structure1.updateStrength()
structure2.updateStrength()
# TODO: use entropy
weightedStrength1 = temperature.getAdjustedValue(
structure1.totalStrength * weight1)
# TODO: use entropy
weightedStrength2 = temperature.getAdjustedValue(
structure2.totalStrength * weight2)
return random.weighted_greater_than(weightedStrength1, weightedStrength2)
@ -111,6 +113,7 @@ def __slippability(ctx, conceptMappings):
temperature = ctx.temperature
for mapping in conceptMappings:
slippiness = mapping.slippability() / 100.0
# TODO: use entropy
probabilityOfSlippage = temperature.getAdjustedProbability(slippiness)
if random.coinFlip(probabilityOfSlippage):
return True
@ -122,6 +125,7 @@ def breaker(ctx, codelet):
random = ctx.random
temperature = ctx.temperature
workspace = ctx.workspace
# TODO: use entropy
probabilityOfFizzle = (100.0 - temperature.value()) / 100.0
if random.coinFlip(probabilityOfFizzle):
return
@ -138,6 +142,7 @@ def breaker(ctx, codelet):
breakObjects += [structure.source.group]
# Break all the objects or none of them; this matches the Java
for structure in breakObjects:
# TODO: use entropy
breakProbability = temperature.getAdjustedProbability(
structure.totalStrength / 100.0)
if random.coinFlip(breakProbability):
@ -149,8 +154,7 @@ def breaker(ctx, codelet):
def chooseRelevantDescriptionByActivation(ctx, workspaceObject):
random = ctx.random
descriptions = workspaceObject.relevantDescriptions()
weights = [description.descriptor.activation
for description in descriptions]
weights = [description.descriptor.activation for description in descriptions]
return random.weighted_choice(descriptions, weights)
@ -160,6 +164,7 @@ def similarPropertyLinks(ctx, slip_node):
result = []
for slip_link in slip_node.propertyLinks:
association = slip_link.degreeOfAssociation() / 100.0
# TODO:use entropy
probability = temperature.getAdjustedProbability(association)
if random.coinFlip(probability):
result += [slip_link]
@ -182,7 +187,7 @@ def bottom_up_description_scout(ctx, codelet):
sliplinks = similarPropertyLinks(ctx, description.descriptor)
assert sliplinks
weights = [sliplink.degreeOfAssociation() * sliplink.destination.activation
for sliplink in sliplinks]
for sliplink in sliplinks]
chosen = random.weighted_choice(sliplinks, weights)
chosenProperty = chosen.destination
coderack.proposeDescription(chosenObject, chosenProperty.category(),
@ -215,6 +220,7 @@ def description_strength_tester(ctx, codelet):
description.descriptor.buffer = 100.0
description.updateStrength()
strength = description.totalStrength
# TODO: use entropy
probability = temperature.getAdjustedProbability(strength / 100.0)
assert random.coinFlip(probability)
coderack.newCodelet('description-builder', strength, [description])
@ -298,7 +304,7 @@ def rule_scout(ctx, codelet):
workspace = ctx.workspace
assert workspace.numberOfUnreplacedObjects() == 0
changedObjects = [o for o in workspace.initial.objects if o.changed]
#assert len(changedObjects) < 2
# assert len(changedObjects) < 2
# if there are no changed objects, propose a rule with no changes
if not changedObjects:
return coderack.proposeRule(None, None, None, None)
@ -328,8 +334,8 @@ def rule_scout(ctx, codelet):
if targetObject.described(node):
if targetObject.distinguishingDescriptor(node):
newList += [node]
objectList = newList # surely this should be +=
# "union of this and distinguishing descriptors"
objectList = newList # surely this should be +=
# "union of this and distinguishing descriptors"
assert objectList
# use conceptual depth to choose a description
weights = [
@ -360,6 +366,7 @@ def rule_strength_tester(ctx, codelet):
temperature = ctx.temperature
rule = codelet.arguments[0]
rule.updateStrength()
# TODO: use entropy
probability = temperature.getAdjustedProbability(rule.totalStrength / 100.0)
if random.coinFlip(probability):
coderack.newCodelet('rule-builder', rule.totalStrength, [rule])
@ -392,8 +399,8 @@ def replacement_finder(ctx, codelet):
relation = relations[diff]
else:
relation = None
letterOfInitialString.replacement = Replacement(ctx,
letterOfInitialString, letterOfModifiedString, relation)
letterOfInitialString.replacement = Replacement(ctx, letterOfInitialString,
letterOfModifiedString, relation)
if relation != slipnet.sameness:
letterOfInitialString.changed = True
workspace.changedObject = letterOfInitialString
@ -436,8 +443,8 @@ def top_down_bond_scout__direction(ctx, codelet):
coderack = ctx.coderack
slipnet = ctx.slipnet
direction = codelet.arguments[0]
source = __getScoutSource(ctx,
direction, formulas.localDirectionCategoryRelevance, 'bond')
source = __getScoutSource(ctx, direction, formulas.localDirectionCategoryRelevance,
'bond')
destination = chooseDirectedNeighbor(ctx, source, direction)
assert destination
logging.info('to object: %s', destination)
@ -462,6 +469,7 @@ def bond_strength_tester(ctx, codelet):
__showWhichStringObjectIsFrom(bond)
bond.updateStrength()
strength = bond.totalStrength
# TODO: use entropy
probability = temperature.getAdjustedProbability(strength / 100.0)
logging.info('bond strength = %d for %s', strength, bond)
assert random.coinFlip(probability)
@ -502,7 +510,7 @@ def bond_builder(ctx, codelet):
if incompatibleCorrespondences:
logging.info("trying to break incompatible correspondences")
assert __fight(bond, 2.0, incompatibleCorrespondences, 3.0)
#assert __fightIncompatibles(incompatibleCorrespondences,
# assert __fightIncompatibles(incompatibleCorrespondences,
# bond, 'correspondences', 2.0, 3.0)
for incompatible in incompatibleBonds:
incompatible.break_the_structure()
@ -692,7 +700,7 @@ def top_down_group_scout__direction(ctx, codelet):
direction, bondFacet)
#noinspection PyStringFormat
# noinspection PyStringFormat
@codelet('group-scout--whole-string')
def group_scout__whole_string(ctx, codelet):
coderack = ctx.coderack
@ -744,6 +752,7 @@ def group_strength_tester(ctx, codelet):
__showWhichStringObjectIsFrom(group)
group.updateStrength()
strength = group.totalStrength
# TODO: use entropy
probability = temperature.getAdjustedProbability(strength / 100.0)
if random.coinFlip(probability):
# it is strong enough - post builder & activate nodes
@ -871,6 +880,7 @@ def rule_translator(ctx, codelet):
bondDensity = min(bondDensity, 1.0)
weights = __getCutoffWeights(bondDensity)
cutoff = 10.0 * random.weighted_choice(list(range(1, 11)), weights)
# TODO: use entropy
if cutoff >= temperature.actual_value:
result = workspace.rule.buildTranslatedRule()
if result is not None:
@ -907,11 +917,11 @@ def bottom_up_correspondence_scout(ctx, codelet):
and m.initialDescriptionType != slipnet.bondFacet]
initialDescriptionTypes = [m.initialDescriptionType for m in opposites]
flipTargetObject = False
if (objectFromInitial.spansString() and
objectFromTarget.spansString() and
slipnet.directionCategory in initialDescriptionTypes
and all(m.label == slipnet.opposite for m in opposites) # unreached?
and slipnet.opposite.activation != 100.0):
if (objectFromInitial.spansString() and
objectFromTarget.spansString() and
slipnet.directionCategory in initialDescriptionTypes
and all(m.label == slipnet.opposite for m in opposites) # unreached?
and slipnet.opposite.activation != 100.0):
objectFromTarget = objectFromTarget.flippedVersion()
conceptMappings = formulas.getMappings(
objectFromInitial, objectFromTarget,
@ -927,6 +937,7 @@ def important_object_correspondence_scout(ctx, codelet):
coderack = ctx.coderack
random = ctx.random
slipnet = ctx.slipnet
# TODO: use entropy
temperature = ctx.temperature
workspace = ctx.workspace
objectFromInitial = chooseUnmodifiedObject(ctx, 'relativeImportance',
@ -966,11 +977,11 @@ def important_object_correspondence_scout(ctx, codelet):
and m.initialDescriptionType != slipnet.bondFacet]
initialDescriptionTypes = [m.initialDescriptionType for m in opposites]
flipTargetObject = False
if (objectFromInitial.spansString()
and objectFromTarget.spansString()
and slipnet.directionCategory in initialDescriptionTypes
and all(m.label == slipnet.opposite for m in opposites) # unreached?
and slipnet.opposite.activation != 100.0):
if (objectFromInitial.spansString()
and objectFromTarget.spansString()
and slipnet.directionCategory in initialDescriptionTypes
and all(m.label == slipnet.opposite for m in opposites) # unreached?
and slipnet.opposite.activation != 100.0):
objectFromTarget = objectFromTarget.flippedVersion()
conceptMappings = formulas.getMappings(
objectFromInitial, objectFromTarget,
@ -997,6 +1008,7 @@ def correspondence_strength_tester(ctx, codelet):
objectFromTarget.flipped_version())))
correspondence.updateStrength()
strength = correspondence.totalStrength
# TODO: use entropy
probability = temperature.getAdjustedProbability(strength / 100.0)
if random.coinFlip(probability):
# activate some concepts
@ -1050,8 +1062,8 @@ def correspondence_builder(ctx, codelet):
# if there is an incompatible bond then fight against it
initial = correspondence.objectFromInitial
target = correspondence.objectFromTarget
if (initial.leftmost or initial.rightmost and
target.leftmost or target.rightmost):
if (initial.leftmost or initial.rightmost and
target.leftmost or target.rightmost):
# search for the incompatible bond
incompatibleBond = correspondence.getIncompatibleBond()
if incompatibleBond:

View File

@ -83,6 +83,7 @@ class Coderack(object):
if 'correspondence' in codeletName:
return workspace.interStringUnhappiness / 100.0
if 'description' in codeletName:
# TODO: use entropy
return (temperature.value() / 100.0) ** 2
return workspace.intraStringUnhappiness / 100.0
@ -161,6 +162,8 @@ class Coderack(object):
urgency = 3
if codeletName == 'breaker':
urgency = 1
# TODO: use entropy
if temperature.value() < 25.0 and 'translator' in codeletName:
urgency = 5
for _ in range(howMany):
@ -287,6 +290,8 @@ class Coderack(object):
random = self.ctx.random
temperature = self.ctx.temperature
assert self.codelets
# TODO: use entropy
scale = (100.0 - temperature.value() + 10.0) / 15.0
chosen = random.weighted_choice(self.codelets, [codelet.urgency ** scale for codelet in self.codelets])
self.removeCodelet(chosen)

View File

@ -239,6 +239,7 @@ class CursesReporter(Reporter):
w.border()
w.refresh()
#TODO: use entropy
def report_temperature(self, temperature):
self.do_keyboard_shortcuts()
w = self.temperatureWindow

View File

@ -96,6 +96,7 @@ class Group(WorkspaceObject):
support = self.localSupport() / 100.0
activation = slipnet.length.activation / 100.0
supportedActivation = (support * activation) ** exp
#TODO: use entropy
return temperature.getAdjustedProbability(supportedActivation)
def flippedVersion(self):
@ -130,6 +131,7 @@ class Group(WorkspaceObject):
cubedlength = length ** 3
fred = cubedlength * (100.0 - slipnet.length.activation) / 100.0
probability = 0.5 ** fred
#TODO: use entropy
value = temperature.getAdjustedProbability(probability)
if value < 0.06:
value = 0.0

62
copycat/problem.py Normal file
View File

@ -0,0 +1,62 @@
from .copycat import Copycat
from pprint import pprint
class Problem:
def __init__(self, initial, modified, target, iterations, distributions=None, formulas=None):
self.formulas = formulas
self.initial = initial
self.modified = modified
self.target = target
self.iterations = iterations
if distributions is None:
self.distributions = self.solve()
else:
self.distributions = distributions
if formulas is not None:
assert hasattr(Copycat().workspace, 'temperature')
def test(self, comparison, expected=None):
print('-' * 120)
print('Testing copycat problem: {} : {} :: {} : _'.format(self.initial,
self.modified,
self.target))
print('expected:')
if expected is None:
expected = self.distributions
pprint(expected)
actual = self.solve()
print('actual:')
pprint(actual)
comparison(actual, expected)
print('-' * 120)
def solve(self):
print('-' * 120)
print('Testing copycat problem: {} : {} :: {} : _'.format(self.initial,
self.modified,
self.target))
copycat = Copycat()
answers = dict()
if self.formulas == None:
if hasattr(copycat.workspace, 'temperature'):
formula = copycat.workspace.temperature.getAdj()
else:
formula = None
answers[formula] = copycat.run(self.initial,
self.modified,
self.target,
self.iterations)
else:
for formula in self.formulas:
copycat.temperature.useAdj(formula)
answers[formulas] = copycat.run(self.initial,
self.modified,
self.target,
self.iterations)
return answers
def generate(self):
self.distributions = self.solve()

57
copycat/statistics.py Normal file
View File

@ -0,0 +1,57 @@
# CHI2 values for n degrees freedom
_chiSquared_table = {
1:3.841,
2:5.991,
3:7.815,
4:9.488,
5:11.071,
6:12.592,
7:14.067,
8:15.507,
9:16.919,
10:18.307
}
class ChiSquaredException(Exception):
pass
def chi_squared(actual, expected):
answerKeys = set(list(actual.keys()) + list(expected.keys()))
degreesFreedom = len(answerKeys)
chiSquared = 0
get_count = lambda k, d : d[k]['count'] if k in d else 0
for k in answerKeys:
E = get_count(k, expected)
O = get_count(k, actual)
if E == 0:
print('Warning! Expected 0 counts of {}, but got {}'.format(k, O))
else:
chiSquared += (O - E) ** 2 / E
return chiSquared
def cross_formula_chi_squared(actualDict, expectedDict):
for ka, actual in actualDict.items():
for ke, expected in expectedDict.items():
print('Comparing {} with {}'.format(ka, ke))
chiSquared = chi_squared(actual, expected)
if chiSquared >= _chiSquared_table[degreesFreedom]:
print('Significant difference between expected and actual answer distributions: \n' +
'Chi2 value: {} with {} degrees of freedom'.format(chiSquared, degreesFreedom))
def cross_chi_squared(problemSets):
for i, problemSetA in enumerate(problemSets):
for problemSetB in problemSets[i + 1:]:
for problemA in problemSetA:
for problemB in problemSetB:
answersA = problemA.distributions
answersB = problemB.distributions
cross_formula_chi_squared(answersA, answersB)
def iso_chi_squared(actualDict, expectedDict):
for key in expectedDict.keys():
assert key in actualDict, 'The key {} was not tested'.format(key)
actual = actualDict[key]
expected = expectedDict[key]

View File

@ -35,6 +35,7 @@ class Temperature(object):
def getAdjustedValue(self, value):
return value ** (((100.0 - self.value()) / 30.0) + 0.5)
"""
def getAdjustedProbability(self, value):
if value == 0 or value == 0.5 or self.value() == 0:
return value
@ -45,3 +46,180 @@ class Temperature(object):
c = (10 - a) / 100
f = (c + 1) * value
return max(f, 0.5)
"""
def getAdjustedProbability(self, value):
"""
This function returns the probability for a decision.
Copied above.
Please look at the last line of it. Strangely, it was
return max(f, 0.5). Does that make sense? Let's compare
some results. Where it was (0.5), we obtained, for example:
iiijjjlll: 670 (avg time 1108.5, avg temp 23.6)
iiijjjd: 2 (avg time 1156.0, avg temp 35.0)
iiijjjkkl: 315 (avg time 1194.4, avg temp 35.5)
iiijjjkll: 8 (avg time 2096.8, avg temp 44.1)
iiijjjkkd: 5 (avg time 837.2, avg temp 48.0)
wyz: 5 (avg time 2275.2, avg temp 14.9)
xyd: 982 (avg time 2794.4, avg temp 17.5)
yyz: 7 (avg time 2731.9, avg temp 25.1)
dyz: 2 (avg time 3320.0, avg temp 27.1)
xyy: 2 (avg time 4084.5, avg temp 31.1)
xyz: 2 (avg time 1873.5, avg temp 52.1)
Now, let's see what return max(f, 0.0000) does:
wyz: 7 (avg time 3192.9, avg temp 13.1)
xyd: 985 (avg time 2849.1, avg temp 17.5)
yyz: 6 (avg time 3836.7, avg temp 18.6)
xyy: 1 (avg time 1421.0, avg temp 19.5)
xyz: 1 (avg time 7350.0, avg temp 48.3)
They *seem* better (in the strict sense that we've obtained both
lower T and more times of wyz.) But they're *not* statistically
significant (for 1000 runs).
Now... looking at the code... it seems to be a mess... what does
function f() even mean in intuitive terms?
Work it does, but dude... quite a hack.
Another run, with return f @line89:
wyz: 8 (avg time 4140.5, avg temp 13.3)
yyz: 6 (avg time 2905.2, avg temp 14.5)
xyd: 982 (avg time 3025.4, avg temp 17.6)
dyz: 4 (avg time 4265.0, avg temp 17.7)
Does it even matter? Another (quick) run, I think with return (0.5):
dyz: 1 (avg time 5198.0, avg temp 15.3)
wyz: 3 (avg time 4043.7, avg temp 17.1)
yyz: 9 (avg time 3373.6, avg temp 21.0)
xyd: 84 (avg time 5011.1, avg temp 23.3)
xyy: 3 (avg time 4752.0, avg temp 27.9)
Compared to return(0.99):
xyd: 1000 (avg time 1625.2, avg temp 17.3)
Comparing to return f --> Statistically significant.
Comparing to return(0.5) --> same, so this return value does something.
Now running return(0.0):
xyz: 3 (avg time 3996.7, avg temp 81.1)
dyz: 46 (avg time 5931.7, avg temp 82.6)
xd: 17 (avg time 6090.3, avg temp 83.8)
xyd: 934 (avg time 7699.8, avg temp 88.1)
It's bad overall, but at least it's statistically significant!
return (-f * (math.log2(f))) # Entropy test #1 (global).
wyz: 123 (avg time 5933.1, avg temp 16.5)
xyy: 200 (avg time 6486.7, avg temp 27.8)
yyz: 330 (avg time 6310.2, avg temp 38.5)
dyz: 75 (avg time 6393.3, avg temp 39.6)
yzz: 5 (avg time 4965.0, avg temp 59.3)
xyz: 160 (avg time 6886.2, avg temp 60.2)
xd: 4 (avg time 2841.0, avg temp 61.8)
dz: 3 (avg time 3721.0, avg temp 62.1)
xyd: 100 (avg time 5853.1, avg temp 67.5)
Here we get an intuitive result: entropy/uncertainty seems better at
exploring a whole range of possible solutions. It even seems, at least
to me, better than the distribution obtained by the original copycat.
instead of log2, trying ln --> return (-f * math.log(f)):
wyz: 78 (avg time 7793.7, avg temp 16.6)
xyy: 202 (avg time 9168.5, avg temp 27.5)
wxz: 1 (avg time 3154.0, avg temp 33.4)
dyz: 63 (avg time 7950.3, avg temp 41.7)
yyz: 217 (avg time 8147.4, avg temp 41.7)
xyz: 201 (avg time 7579.7, avg temp 62.5)
xxy: 1 (avg time 7994.0, avg temp 64.8)
yzz: 8 (avg time 4672.6, avg temp 65.7)
xd: 9 (avg time 9215.2, avg temp 68.1)
xyd: 217 (avg time 7677.9, avg temp 73.8)
dz: 3 (avg time 20379.0, avg temp 77.3)
(quickly) trying out (1-this_entropy_function):
xyd: 100 (avg time 2984.3, avg temp 18.2)
And that's beautiful! One wants an inverse function that punishes
exploration and creativity, that takes all the fluidity off
the system.
But somehow this completely messes up with abc abd iijjkk:
jijjkk: 66 (avg time 3200.1, avg temp 61.3)
iijjkk: 114 (avg time 5017.2, avg temp 63.5)
dijjkk: 23 (avg time 2209.0, avg temp 67.3)
iijjkl: 748 (avg time 3262.8, avg temp 70.0)
iijjkd: 49 (avg time 2315.9, avg temp 76.3)
Which leads me to suspect that someone may have overfitted the
model for either xyz or iijjkk or some other problem, and one
improvement there means disaster here.
Something tells me to invert again to 1-entropy... and bingo!
iijjll: 59 (avg time 797.4, avg temp 19.8)
iijjkl: 41 (avg time 696.1, avg temp 28.5)
My guess is that some code is prefering to find groups in the
opposite form that it likes finding the "symmetry/opposite"
concepts of the xyz problem.
Sould compare & contrast the unhappiness and relevance of both
the opposite/symmetry codelets and the grouping/chunking codelets.
My hunch is the sameness group code: something there that
interacts with Temperature is wicked, and should be relatively
easy to find the error.
Here's why: the following run was done on (1-entropy(f)):
mrrlll: 77 (avg time 2195.7, avg temp 41.4)
mrrd: 2 (avg time 1698.0, avg temp 42.6)
mrrkkl: 20 (avg time 1317.8, avg temp 46.6)
mrrkkd: 1 (avg time 1835.0, avg temp 48.6)
If (1-entropy(f)) binds the system into a tight corridor of possibilities,
then why does it easily get the samenessGroup right? If this is right,
then running just entropy(f) should have big trouble with samenessGroup.
Let's see:
nrrkkk: 11 (avg time 3637.8, avg temp 64.6)
drrkkk: 3 (avg time 5921.3, avg temp 66.2)
mrrkkd: 7 (avg time 6771.3, avg temp 74.6)
mrrkkl: 79 (avg time 3723.0, avg temp 74.9)
So there we are: the system is unable to find that change samenessGroup
to next letterCategory, so there ought to be something very different
in the code that:
* Interacts with Temperature (things like unhappiness, relevance, depth,
urgency, and whatever else interacts with T)
* something very close to samenessGroup... sameGroup, sameness,
sameNeighbors, etc... is encoded in a form that is *directly opposite*
to other concepts/categories/codlets, etc.
Need to play with this more... and WTF is f anyways?
"""
if value == 0 or value == 0.5 or self.value() == 0:
return value
if value < 0.5:
return 1.0 - self.getAdjustedProbability(1.0 - value)
coldness = 100.0 - self.value()
a = math.sqrt(coldness)
c = (10 - a) / 100
f = (c + 1) * value
return (0 + (-f * math.log2(f))) # max(f, 0.0000)

View File

@ -1,137 +0,0 @@
import unittest
from .copycat import Copycat
def pnormaldist(p):
table = {
0.80: 1.2815,
0.90: 1.6448,
0.95: 1.9599,
0.98: 2.3263,
0.99: 2.5758,
0.995: 2.8070,
0.998: 3.0902,
0.999: 3.2905,
0.9999: 3.8905,
0.99999: 4.4171,
0.999999: 4.8916,
0.9999999: 5.3267,
0.99999999: 5.7307,
0.999999999: 6.1094,
}
return max(v for k, v in table.items() if k <= p)
def lower_bound_on_probability(hits, attempts, confidence=0.95):
if attempts == 0:
return 0
z = pnormaldist(confidence)
zsqr = z*z
phat = 1.0 * hits / attempts
under_sqrt = (phat * (1 - phat) + zsqr / (4*attempts)) / attempts
denominator = (1 + zsqr / attempts)
return (phat + zsqr / (2*attempts) - z * (under_sqrt ** 0.5)) / denominator
def upper_bound_on_probability(hits, attempts, confidence=0.95):
misses = attempts - hits
return 1.0 - lower_bound_on_probability(misses, attempts, confidence)
class TestCopycat(unittest.TestCase):
def setUp(self):
self.longMessage = True # new in Python 2.7
def assertProbabilitiesLookRoughlyLike(self, actual, expected):
actual_count = 0.0 + sum(d['count'] for d in list(actual.values()))
expected_count = 0.0 + sum(d['count'] for d in list(expected.values()))
self.assertGreater(actual_count, 1)
self.assertGreater(expected_count, 1)
for k in set(list(actual.keys()) + list(expected.keys())):
if k not in expected:
self.fail('Key %s was produced but not expected! %r != %r' % (k, actual, expected))
expected_probability = expected[k]['count'] / expected_count
if k in actual:
actual_lo = lower_bound_on_probability(actual[k]['count'], actual_count)
actual_hi = upper_bound_on_probability(actual[k]['count'], actual_count)
if not (actual_lo <= expected_probability <= actual_hi):
print('Failed (%s <= %s <= %s)' % (actual_lo, expected_probability, actual_hi))
self.fail('Count ("obviousness" metric) seems way off! %r != %r' % (actual, expected))
if abs(actual[k]['avgtemp'] - expected[k]['avgtemp']) >= 10.0 + (10.0 / actual[k]['count']):
print('Failed (%s - %s >= %s)' % (actual[k]['avgtemp'], expected[k]['avgtemp'], 10.0 + (10.0 / actual[k]['count'])))
self.fail('Temperature ("elegance" metric) seems way off! %r != %r' % (actual, expected))
else:
actual_hi = upper_bound_on_probability(0, actual_count)
if not (0 <= expected_probability <= actual_hi):
self.fail('No instances of expected key %s were produced! %r != %r' % (k, actual, expected))
def run_testcase(self, initial, modified, target, iterations, expected):
actual = Copycat().run(initial, modified, target, iterations)
self.assertEqual(sum(a['count'] for a in list(actual.values())), iterations)
self.assertProbabilitiesLookRoughlyLike(actual, expected)
def test_simple_cases(self):
self.run_testcase('abc', 'abd', 'efg', 50, {
'efd': {'count': 1, 'avgtemp': 16},
'efh': {'count': 99, 'avgtemp': 19},
})
self.run_testcase('abc', 'abd', 'ijk', 50, {
'ijd': {'count': 4, 'avgtemp': 24},
'ijl': {'count': 96, 'avgtemp': 20},
})
def test_abc_xyz(self):
self.run_testcase('abc', 'abd', 'xyz', 20, {
'xyd': {'count': 100, 'avgtemp': 19},
})
def test_ambiguous_case(self):
self.run_testcase('abc', 'abd', 'ijkk', 50, {
'ijkkk': {'count': 7, 'avgtemp': 21},
'ijll': {'count': 47, 'avgtemp': 28},
'ijkl': {'count': 44, 'avgtemp': 32},
'ijkd': {'count': 2, 'avgtemp': 65},
})
def test_mrrjjj(self):
self.run_testcase('abc', 'abd', 'mrrjjj', 50, {
'mrrjjjj': {'count': 4, 'avgtemp': 16},
'mrrkkk': {'count': 31, 'avgtemp': 47},
'mrrjjk': {'count': 64, 'avgtemp': 51},
'mrrjkk': {'count': 1, 'avgtemp': 52},
'mrrjjd': {'count': 1, 'avgtemp': 54},
})
def test_elongation(self):
# This isn't remotely what a human would say.
self.run_testcase('abc', 'aabbcc', 'milk', 50, {
'milj': {'count': 85, 'avgtemp': 55},
'mikj': {'count': 10, 'avgtemp': 56},
'milk': {'count': 1, 'avgtemp': 56},
'lilk': {'count': 1, 'avgtemp': 57},
'milb': {'count': 3, 'avgtemp': 57},
})
def test_repairing_successor_sequence(self):
# This isn't remotely what a human would say.
self.run_testcase('aba', 'abc', 'xyx', 50, {
'xc': {'count': 9, 'avgtemp': 57},
'xyc': {'count': 82, 'avgtemp': 59},
'cyx': {'count': 7, 'avgtemp': 68},
'xyx': {'count': 2, 'avgtemp': 69},
})
def test_nonsense(self):
self.run_testcase('cat', 'dog', 'cake', 10, {
'cakg': {'count': 99, 'avgtemp': 70},
'gake': {'count': 1, 'avgtemp': 59},
})
self.run_testcase('cat', 'dog', 'kitten', 10, {
'kitteg': {'count': 96, 'avgtemp': 66},
'kitten': {'count': 4, 'avgtemp': 68},
})
if __name__ == '__main__':
unittest.main()

View File

@ -1,3 +1,6 @@
"""Workspace module."""
from . import formulas
from .bond import Bond
from .correspondence import Correspondence
@ -14,6 +17,7 @@ def __adjustUnhappiness(values):
class Workspace(object):
def __init__(self, ctx):
"""To initialize the workspace."""
self.ctx = ctx
self.totalUnhappiness = 0.0
self.intraStringUnhappiness = 0.0
@ -50,6 +54,16 @@ class Workspace(object):
self.modified = WorkspaceString(self.ctx, self.modifiedString)
self.target = WorkspaceString(self.ctx, self.targetString)
'''
# TODO: Initial part of refactoring in this method
def getAssessedUnhappiness(self, unhappiness):
o.Unhappiness = __adjustUnhappiness(
o.relativeImportance * o.Unhappiness
for o in self.objects)
pass
'''
# TODO: Extract method?
def assessUnhappiness(self):
self.intraStringUnhappiness = __adjustUnhappiness(
o.relativeImportance * o.intraStringUnhappiness
@ -61,6 +75,7 @@ class Workspace(object):
o.relativeImportance * o.totalUnhappiness
for o in self.objects)
# TODO: these 3 methods seem to be the same... are they? If so, Extract method.
def calculateIntraStringUnhappiness(self):
value = sum(
o.relativeImportance * o.intraStringUnhappiness
@ -92,6 +107,7 @@ class Workspace(object):
self.initial.updateIntraStringUnhappiness()
self.target.updateIntraStringUnhappiness()
# TODO: use entropy
def getUpdatedTemperature(self):
self.calculateIntraStringUnhappiness()
self.calculateInterStringUnhappiness()
@ -107,7 +123,7 @@ class Workspace(object):
))
def numberOfUnrelatedObjects(self):
"""A list of all objects in the workspace with >= 1 open bond slots"""
"""Computes the number of all objects in the workspace with >= 1 open bond slots."""
objects = [o for o in self.objects
if o.string == self.initial or o.string == self.target]
objects = [o for o in objects if not o.spansString()]
@ -125,21 +141,21 @@ class Workspace(object):
return len(objects)
def numberOfUnreplacedObjects(self):
"""A list of all unreplaced objects in the initial string"""
"""A list of all unreplaced objects in the initial string."""
objects = [o for o in self.objects
if o.string == self.initial and isinstance(o, Letter)]
objects = [o for o in objects if not o.replacement]
return len(objects)
def numberOfUncorrespondingObjects(self):
"""A list of all uncorresponded objects in the initial string"""
"""A list of all uncorresponded objects in the initial string."""
objects = [o for o in self.objects
if o.string == self.initial or o.string == self.target]
objects = [o for o in objects if not o.correspondence]
return len(objects)
def numberOfBonds(self):
"""The number of bonds in the workspace"""
"""The number of bonds in the workspace."""
return sum(1 for o in self.structures if isinstance(o, Bond))
def correspondences(self):

View File

@ -38,7 +38,7 @@ class WorkspaceString(object):
return self.string[i]
def updateRelativeImportance(self):
"""Update the normalised importance of all objects in the string"""
"""Update the normalised importance of all objects in the string."""
total = sum(o.rawImportance for o in self.objects)
if not total:
for o in self.objects:

40
main.py
View File

@ -1,16 +1,55 @@
#!/usr/bin/env python3
"""
Main Copycat program.
To run it, type at the terminal:
> python main.py abc abd ppqqrr --interations 10
The script takes three to five arguments. The first two are a pair of strings
with some change, for example "abc" and "abd". The third is a string which the
script should try to change analogously. The fourth (which defaults to "1") is
a number of iterations. One can also specify a defined seed value for the
random number generator.
This instruction produces output such as:
iiijjjlll: 670 (avg time 1108.5, avg temp 23.6)
iiijjjd: 2 (avg time 1156.0, avg temp 35.0)
iiijjjkkl: 315 (avg time 1194.4, avg temp 35.5)
iiijjjkll: 8 (avg time 2096.8, avg temp 44.1)
iiijjjkkd: 5 (avg time 837.2, avg temp 48.0)
wyz: 5 (avg time 2275.2, avg temp 14.9)
xyd: 982 (avg time 2794.4, avg temp 17.5)
yyz: 7 (avg time 2731.9, avg temp 25.1)
dyz: 2 (avg time 3320.0, avg temp 27.1)
xyy: 2 (avg time 4084.5, avg temp 31.1)
xyz: 2 (avg time 1873.5, avg temp 52.1)
The first number indicates how many times Copycat chose that string as its
answer; higher means "more obvious". The last number indicates the average
final temperature of the workspace; lower means "more elegant".
"""
import argparse
import logging
from copycat import Copycat, Reporter
class SimpleReporter(Reporter):
"""Reports results from a single run."""
def report_answer(self, answer):
"""Self-explanatory code."""
print('Answered %s (time %d, final temperature %.1f)' % (
answer['answer'], answer['time'], answer['temp'],
))
def main():
"""Program's main entrance point. Self-explanatory code."""
logging.basicConfig(level=logging.INFO, format='%(message)s', filename='./output/copycat.log', filemode='w')
parser = argparse.ArgumentParser()
@ -27,5 +66,6 @@ def main():
for answer, d in sorted(iter(answers.items()), key=lambda kv: kv[1]['avgtemp']):
print('%s: %d (avg time %.1f, avg temp %.1f)' % (answer, d['count'], d['avgtime'], d['avgtemp']))
if __name__ == '__main__':
main()

View File

@ -1,5 +1,5 @@
#!/usr/bin/env python
"""Self-explanatory."""
from setuptools import setup
setup(

62
tests.py Normal file
View File

@ -0,0 +1,62 @@
import unittest
import os.path
import pickle
import argparse
import sys
from pprint import pprint
from copycat import Problem
from copycat.statistics import iso_chi_squared
# TODO: update test cases to use entropy
def generate():
print('Generating distributions for new file')
iterations = 30
problems = [
Problem('abc', 'abd', 'efg', iterations),
Problem('abc', 'abd', 'ijk', iterations),
Problem('abc', 'abd', 'xyz', iterations),
Problem('abc', 'abd', 'ijkk', iterations),
Problem('abc', 'abd', 'mrrjjj', iterations)]
with open(TestCopycat.Filename, 'wb') as outfile:
pickle.dump(problems, outfile)
return problems
class TestCopycat(unittest.TestCase):
Filename = None
def setUp(self):
self.longMessage = True # new in Python 2.7
def test(self):
print('Testing copycat with input file: {}'.format(TestCopycat.Filename))
try:
with open(TestCopycat.Filename, 'rb') as infile:
problems = pickle.load(infile)
except Exception as e:
print('Generating due to error:')
print(e)
problems = generate()
for problem in problems:
problem.test(iso_chi_squared)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--generate', action='store_true')
parser.add_argument('filename', default='.distributions', nargs='?')
parser.add_argument('unittest_args', default=[], nargs='?')
args = parser.parse_args()
# TODO: Go do something with args.input and args.filename
TestCopycat.Filename = args.filename
if args.generate:
generate()
# Now set the sys.argv to the unittest_args (leaving sys.argv[0] alone)
sys.argv[1:] = args.unittest_args
unittest.main()