Merge branch 'feature-temperature-effect-analysis' into paper

This commit is contained in:
LSaldyt
2017-11-13 10:40:07 -07:00
29 changed files with 943 additions and 207 deletions

6
.gitignore vendored
View File

@ -19,6 +19,8 @@ pip-log.txt
# Unit test / coverage reports
.coverage
.tox
.log
copycat.log
# Other filesystems
.svn
@ -27,9 +29,11 @@ pip-log.txt
# Editors
.*.swp
output
# Output
output/*
copycat.log
papers/*.log
papers/*.pdf
papers/*.out
papers/*.aux

View File

@ -0,0 +1,6 @@
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 2
}

12
.travis.yml Normal file
View File

@ -0,0 +1,12 @@
language: python
branches:
only:
- "develop"
- "master"
python:
- "3.6"
install:
- pip3 install -r requirements.txt
script:
- python3 tests.py

81
Copycat.ipynb Normal file
View File

@ -0,0 +1,81 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Copycat \n",
"\n",
"Just type your copycat example, and the number of iterations."
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Answered iijjkl (time 1374, final temperature 13.5)\n",
"Answered iijjll (time 665, final temperature 19.6)\n",
"Answered iijjll (time 406, final temperature 16.6)\n",
"Answered iijjkl (time 379, final temperature 47.9)\n",
"Answered iijjll (time 556, final temperature 19.2)\n",
"Answered iijjkl (time 813, final temperature 42.8)\n",
"Answered iijjll (time 934, final temperature 15.5)\n",
"Answered iijjkl (time 1050, final temperature 49.5)\n",
"Answered iijjkl (time 700, final temperature 44.0)\n",
"Answered iijjkl (time 510, final temperature 34.8)\n",
"Answered iijjkl (time 673, final temperature 18.1)\n",
"Answered iijjkl (time 1128, final temperature 19.8)\n",
"Answered iijjll (time 961, final temperature 19.9)\n",
"Answered iijjll (time 780, final temperature 16.5)\n",
"Answered iijjll (time 607, final temperature 17.8)\n",
"Answered iijjll (time 594, final temperature 39.7)\n",
"Answered iijjll (time 736, final temperature 18.4)\n",
"Answered iijjll (time 903, final temperature 18.6)\n",
"Answered iijjll (time 601, final temperature 20.6)\n",
"Answered iijjll (time 949, final temperature 42.4)\n",
"iijjll: 12 (avg time 724.3, avg temp 22.1)\n",
"iijjkl: 8 (avg time 828.4, avg temp 33.8)\n"
]
}
],
"source": [
"%run main.py abc abd iijjkk --iterations 20"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -3,8 +3,15 @@ co.py.cat
![GUI](https://i.imgur.com/7pb20g0.png)
Linhares and I are planning to use this codebase to implement a variation of Copycat that uses *Entropy* instead of *Temperature*, while still preserving the parallel terraced scan in full form. If the change is viable, I plan to write a paper on that (if anyone is interested in co-authoring, let me know). For the general idea, please see pages 41 and 42 of the [*Information Sciences*](https://github.com/Alex-Linhares/FARGlexandria/blob/master/Literature/Chess-Capyblanca-2014-Linhares-Information%20Sciences.pdf) paper on [Capyblanca](https://github.com/Alex-Linhares/FARGlexandria).
**If you would like to help research and publish a paper, please let me know.**
Please see also [FARGlexandria](https://github.com/Alex-Linhares/FARGlexandria), a repository with all FARG projects (and help if you have some of the missing info there, especially about Letter Spirit and George!)
-------------------------------
An implementation of [Douglas Hofstadter](http://prelectur.stanford.edu/lecturers/hofstadter/)'s Copycat algorithm.
The Copycat algorithm is explained [on Wikipedia](https://en.wikipedia.org/wiki/Copycat_%28software%29), and that page has many links for deeper reading.
The Copycat algorithm is explained [on Wikipedia](https://en.wikipedia.org/wiki/Copycat_%28software%29), and that page has many links for deeper reading. See also [Farglexandria](https://github.com/Alex-Linhares/Farglexandria).
This implementation is a copycat of Scott Boland's [Java implementation](https://archive.org/details/JavaCopycat).
The original Java-to-Python translation work was done by J Alan Brogan (@jalanb on GitHub).
@ -75,3 +82,10 @@ $ python
```
The result of `run` is a dict containing the same information as was printed by `main.py` above.
Questions
---------
1. Why are codelets **NOT** implemented through lambda?

View File

@ -1 +1,3 @@
from .copycat import Copycat, Reporter # noqa
from .plot import plot_answers
from .io import save_answers

View File

@ -71,11 +71,14 @@ def __structureVsStructure(structure1, weight1, structure2, weight2):
"""Return true if the first structure comes out stronger than the second."""
ctx = structure1.ctx
random = ctx.random
# TODO: use entropy
temperature = ctx.temperature
structure1.updateStrength()
structure2.updateStrength()
# TODO: use entropy
weightedStrength1 = temperature.getAdjustedValue(
structure1.totalStrength * weight1)
# TODO: use entropy
weightedStrength2 = temperature.getAdjustedValue(
structure2.totalStrength * weight2)
return random.weighted_greater_than(weightedStrength1, weightedStrength2)
@ -119,6 +122,15 @@ def __slippability(ctx, conceptMappings):
@codelet('breaker')
def breaker(ctx, codelet):
# From the original LISP:
'''
First decides probabilistically whether or not to fizzle, based on
temperature. Chooses a structure and random and decides probabilistically
whether or not to break it as a function of its total weakness.
If the structure is a bond in a group, have to break the group in
order to break the bond.
'''
random = ctx.random
temperature = ctx.temperature
workspace = ctx.workspace
@ -137,6 +149,8 @@ def breaker(ctx, codelet):
if structure.source.group == structure.destination.group:
breakObjects += [structure.source.group]
# Break all the objects or none of them; this matches the Java
# "all objects" means a bond and its group, if it has one.
for structure in breakObjects:
breakProbability = temperature.getAdjustedProbability(
structure.totalStrength / 100.0)
@ -149,17 +163,18 @@ def breaker(ctx, codelet):
def chooseRelevantDescriptionByActivation(ctx, workspaceObject):
random = ctx.random
descriptions = workspaceObject.relevantDescriptions()
weights = [description.descriptor.activation
for description in descriptions]
weights = [description.descriptor.activation for description in descriptions]
return random.weighted_choice(descriptions, weights)
def similarPropertyLinks(ctx, slip_node):
random = ctx.random
# TODO: use entropy
temperature = ctx.temperature
result = []
for slip_link in slip_node.propertyLinks:
association = slip_link.degreeOfAssociation() / 100.0
# TODO:use entropy
probability = temperature.getAdjustedProbability(association)
if random.coinFlip(probability):
result += [slip_link]
@ -182,7 +197,7 @@ def bottom_up_description_scout(ctx, codelet):
sliplinks = similarPropertyLinks(ctx, description.descriptor)
assert sliplinks
weights = [sliplink.degreeOfAssociation() * sliplink.destination.activation
for sliplink in sliplinks]
for sliplink in sliplinks]
chosen = random.weighted_choice(sliplinks, weights)
chosenProperty = chosen.destination
coderack.proposeDescription(chosenObject, chosenProperty.category(),
@ -210,11 +225,13 @@ def top_down_description_scout(ctx, codelet):
def description_strength_tester(ctx, codelet):
coderack = ctx.coderack
random = ctx.random
# TODO: use entropy
temperature = ctx.temperature
description = codelet.arguments[0]
description.descriptor.buffer = 100.0
description.updateStrength()
strength = description.totalStrength
# TODO: use entropy
probability = temperature.getAdjustedProbability(strength / 100.0)
assert random.coinFlip(probability)
coderack.newCodelet('description-builder', strength, [description])
@ -294,11 +311,12 @@ def rule_scout(ctx, codelet):
coderack = ctx.coderack
random = ctx.random
slipnet = ctx.slipnet
# TODO: use entropy
temperature = ctx.temperature
workspace = ctx.workspace
assert workspace.numberOfUnreplacedObjects() == 0
changedObjects = [o for o in workspace.initial.objects if o.changed]
#assert len(changedObjects) < 2
# assert len(changedObjects) < 2
# if there are no changed objects, propose a rule with no changes
if not changedObjects:
return coderack.proposeRule(None, None, None, None)
@ -328,10 +346,11 @@ def rule_scout(ctx, codelet):
if targetObject.described(node):
if targetObject.distinguishingDescriptor(node):
newList += [node]
objectList = newList # surely this should be +=
# "union of this and distinguishing descriptors"
objectList = newList # surely this should be +=
# "union of this and distinguishing descriptors"
assert objectList
# use conceptual depth to choose a description
# TODO: use entropy
weights = [
temperature.getAdjustedValue(node.conceptualDepth)
for node in objectList
@ -343,6 +362,7 @@ def rule_scout(ctx, codelet):
objectList += [changed.replacement.relation]
objectList += [changed.replacement.objectFromModified.getDescriptor(
slipnet.letterCategory)]
# TODO: use entropy
# use conceptual depth to choose a relation
weights = [
temperature.getAdjustedValue(node.conceptualDepth)
@ -357,9 +377,11 @@ def rule_scout(ctx, codelet):
def rule_strength_tester(ctx, codelet):
coderack = ctx.coderack
random = ctx.random
# TODO: use entropy
temperature = ctx.temperature
rule = codelet.arguments[0]
rule.updateStrength()
# TODO: use entropy
probability = temperature.getAdjustedProbability(rule.totalStrength / 100.0)
if random.coinFlip(probability):
coderack.newCodelet('rule-builder', rule.totalStrength, [rule])
@ -392,8 +414,8 @@ def replacement_finder(ctx, codelet):
relation = relations[diff]
else:
relation = None
letterOfInitialString.replacement = Replacement(ctx,
letterOfInitialString, letterOfModifiedString, relation)
letterOfInitialString.replacement = Replacement(ctx, letterOfInitialString,
letterOfModifiedString, relation)
if relation != slipnet.sameness:
letterOfInitialString.changed = True
workspace.changedObject = letterOfInitialString
@ -436,8 +458,8 @@ def top_down_bond_scout__direction(ctx, codelet):
coderack = ctx.coderack
slipnet = ctx.slipnet
direction = codelet.arguments[0]
source = __getScoutSource(ctx,
direction, formulas.localDirectionCategoryRelevance, 'bond')
source = __getScoutSource(ctx, direction, formulas.localDirectionCategoryRelevance,
'bond')
destination = chooseDirectedNeighbor(ctx, source, direction)
assert destination
logging.info('to object: %s', destination)
@ -457,11 +479,13 @@ def top_down_bond_scout__direction(ctx, codelet):
def bond_strength_tester(ctx, codelet):
coderack = ctx.coderack
random = ctx.random
# TODO: use entropy
temperature = ctx.temperature
bond = codelet.arguments[0]
__showWhichStringObjectIsFrom(bond)
bond.updateStrength()
strength = bond.totalStrength
# TODO: use entropy
probability = temperature.getAdjustedProbability(strength / 100.0)
logging.info('bond strength = %d for %s', strength, bond)
assert random.coinFlip(probability)
@ -502,7 +526,7 @@ def bond_builder(ctx, codelet):
if incompatibleCorrespondences:
logging.info("trying to break incompatible correspondences")
assert __fight(bond, 2.0, incompatibleCorrespondences, 3.0)
#assert __fightIncompatibles(incompatibleCorrespondences,
# assert __fightIncompatibles(incompatibleCorrespondences,
# bond, 'correspondences', 2.0, 3.0)
for incompatible in incompatibleBonds:
incompatible.break_the_structure()
@ -692,7 +716,7 @@ def top_down_group_scout__direction(ctx, codelet):
direction, bondFacet)
#noinspection PyStringFormat
# noinspection PyStringFormat
@codelet('group-scout--whole-string')
def group_scout__whole_string(ctx, codelet):
coderack = ctx.coderack
@ -738,12 +762,14 @@ def group_strength_tester(ctx, codelet):
coderack = ctx.coderack
random = ctx.random
slipnet = ctx.slipnet
# TODO: use entropy
temperature = ctx.temperature
# update strength value of the group
group = codelet.arguments[0]
__showWhichStringObjectIsFrom(group)
group.updateStrength()
strength = group.totalStrength
# TODO: use entropy
probability = temperature.getAdjustedProbability(strength / 100.0)
if random.coinFlip(probability):
# it is strong enough - post builder & activate nodes
@ -859,6 +885,7 @@ def __getCutoffWeights(bondDensity):
def rule_translator(ctx, codelet):
coderack = ctx.coderack
random = ctx.random
# TODO: use entropy
temperature = ctx.temperature
workspace = ctx.workspace
assert workspace.rule
@ -871,6 +898,7 @@ def rule_translator(ctx, codelet):
bondDensity = min(bondDensity, 1.0)
weights = __getCutoffWeights(bondDensity)
cutoff = 10.0 * random.weighted_choice(list(range(1, 11)), weights)
# TODO: use entropy
if cutoff >= temperature.actual_value:
result = workspace.rule.buildTranslatedRule()
if result is not None:
@ -907,11 +935,11 @@ def bottom_up_correspondence_scout(ctx, codelet):
and m.initialDescriptionType != slipnet.bondFacet]
initialDescriptionTypes = [m.initialDescriptionType for m in opposites]
flipTargetObject = False
if (objectFromInitial.spansString() and
objectFromTarget.spansString() and
slipnet.directionCategory in initialDescriptionTypes
and all(m.label == slipnet.opposite for m in opposites) # unreached?
and slipnet.opposite.activation != 100.0):
if (objectFromInitial.spansString() and
objectFromTarget.spansString() and
slipnet.directionCategory in initialDescriptionTypes
and all(m.label == slipnet.opposite for m in opposites) # unreached?
and slipnet.opposite.activation != 100.0):
objectFromTarget = objectFromTarget.flippedVersion()
conceptMappings = formulas.getMappings(
objectFromInitial, objectFromTarget,
@ -927,6 +955,7 @@ def important_object_correspondence_scout(ctx, codelet):
coderack = ctx.coderack
random = ctx.random
slipnet = ctx.slipnet
# TODO: use entropy
temperature = ctx.temperature
workspace = ctx.workspace
objectFromInitial = chooseUnmodifiedObject(ctx, 'relativeImportance',
@ -934,6 +963,7 @@ def important_object_correspondence_scout(ctx, codelet):
assert objectFromInitial is not None
descriptors = objectFromInitial.relevantDistinguishingDescriptors()
# choose descriptor by conceptual depth
# TODO: use entropy
weights = [temperature.getAdjustedValue(n.conceptualDepth) for n in descriptors]
slipnode = random.weighted_choice(descriptors, weights)
assert slipnode
@ -966,11 +996,11 @@ def important_object_correspondence_scout(ctx, codelet):
and m.initialDescriptionType != slipnet.bondFacet]
initialDescriptionTypes = [m.initialDescriptionType for m in opposites]
flipTargetObject = False
if (objectFromInitial.spansString()
and objectFromTarget.spansString()
and slipnet.directionCategory in initialDescriptionTypes
and all(m.label == slipnet.opposite for m in opposites) # unreached?
and slipnet.opposite.activation != 100.0):
if (objectFromInitial.spansString()
and objectFromTarget.spansString()
and slipnet.directionCategory in initialDescriptionTypes
and all(m.label == slipnet.opposite for m in opposites) # unreached?
and slipnet.opposite.activation != 100.0):
objectFromTarget = objectFromTarget.flippedVersion()
conceptMappings = formulas.getMappings(
objectFromInitial, objectFromTarget,
@ -985,6 +1015,7 @@ def important_object_correspondence_scout(ctx, codelet):
def correspondence_strength_tester(ctx, codelet):
coderack = ctx.coderack
random = ctx.random
# TODO: use entropy
temperature = ctx.temperature
workspace = ctx.workspace
correspondence = codelet.arguments[0]
@ -997,6 +1028,7 @@ def correspondence_strength_tester(ctx, codelet):
objectFromTarget.flipped_version())))
correspondence.updateStrength()
strength = correspondence.totalStrength
# TODO: use entropy
probability = temperature.getAdjustedProbability(strength / 100.0)
if random.coinFlip(probability):
# activate some concepts
@ -1050,8 +1082,8 @@ def correspondence_builder(ctx, codelet):
# if there is an incompatible bond then fight against it
initial = correspondence.objectFromInitial
target = correspondence.objectFromTarget
if (initial.leftmost or initial.rightmost and
target.leftmost or target.rightmost):
if (initial.leftmost or initial.rightmost and
target.leftmost or target.rightmost):
# search for the incompatible bond
incompatibleBond = correspondence.getIncompatibleBond()
if incompatibleBond:

View File

@ -68,6 +68,7 @@ class Coderack(object):
self.postBottomUpCodelets()
def probabilityOfPosting(self, codeletName):
# TODO: use entropy
temperature = self.ctx.temperature
workspace = self.ctx.workspace
if codeletName == 'breaker':
@ -83,6 +84,7 @@ class Coderack(object):
if 'correspondence' in codeletName:
return workspace.interStringUnhappiness / 100.0
if 'description' in codeletName:
# TODO: use entropy
return (temperature.value() / 100.0) ** 2
return workspace.intraStringUnhappiness / 100.0
@ -155,12 +157,15 @@ class Coderack(object):
def __postBottomUpCodelets(self, codeletName):
random = self.ctx.random
# TODO: use entropy
temperature = self.ctx.temperature
probability = self.probabilityOfPosting(codeletName)
howMany = self.howManyToPost(codeletName)
urgency = 3
if codeletName == 'breaker':
urgency = 1
# TODO: use entropy
if temperature.value() < 25.0 and 'translator' in codeletName:
urgency = 5
for _ in range(howMany):
@ -285,8 +290,11 @@ class Coderack(object):
def chooseCodeletToRun(self):
random = self.ctx.random
# TODO: use entropy
temperature = self.ctx.temperature
assert self.codelets
# TODO: use entropy
scale = (100.0 - temperature.value() + 10.0) / 15.0
chosen = random.weighted_choice(self.codelets, [codelet.urgency ** scale for codelet in self.codelets])
self.removeCodelet(chosen)

View File

@ -5,6 +5,7 @@ from .temperature import Temperature
from .workspace import Workspace
from .gui import GUI
from pprint import pprint
class Reporter(object):
"""Do-nothing base class for defining new reporter types"""
@ -17,7 +18,7 @@ class Reporter(object):
def report_slipnet(self, slipnet):
pass
def report_temperature(self, temperature):
def report_temperature(self, temperature): #TODO: use entropy
pass
def report_workspace(self, workspace):
@ -29,7 +30,7 @@ class Copycat(object):
self.coderack = Coderack(self)
self.random = Randomness(rng_seed)
self.slipnet = Slipnet()
self.temperature = Temperature()
self.temperature = Temperature() # TODO: use entropy
self.workspace = Workspace(self)
self.reporter = reporter or Reporter()
if gui:
@ -72,13 +73,13 @@ class Copycat(object):
"""Run a trial of the copycat algorithm"""
self.coderack.reset()
self.slipnet.reset()
self.temperature.reset()
self.temperature.reset() # TODO: use entropy
self.workspace.reset()
while self.workspace.finalAnswer is None:
self.mainLoop()
answer = {
'answer': self.workspace.finalAnswer,
'temp': self.temperature.last_unclamped_value,
'temp': self.temperature.last_unclamped_value, # TODO: use entropy
'time': self.coderack.codeletsRun,
}
self.reporter.report_answer(answer)
@ -112,21 +113,29 @@ class Copycat(object):
def run(self, initial, modified, target, iterations):
self.workspace.resetWithStrings(initial, modified, target)
answers = {}
for i in range(iterations):
answer = self.runTrial()
d = answers.setdefault(answer['answer'], {
'count': 0,
'sumtemp': 0,
'sumtime': 0
})
d['count'] += 1
d['sumtemp'] += answer['temp']
d['sumtime'] += answer['time']
for answer, d in answers.items():
d['avgtemp'] = d.pop('sumtemp') / d['count']
d['avgtime'] = d.pop('sumtime') / d['count']
answers = {}
for formula in ['original', 'best', 'sbest', 'pbest', 'none']:
self.temperature.useAdj(formula)
answers = {}
for i in range(iterations):
answer = self.runTrial()
d = answers.setdefault(answer['answer'], {
'count': 0,
'sumtemp': 0, # TODO: use entropy
'sumtime': 0
})
d['count'] += 1
d['sumtemp'] += answer['temp'] # TODO: use entropy
d['sumtime'] += answer['time']
for answer, d in answers.items():
d['avgtemp'] = d.pop('sumtemp') / d['count']
d['avgtime'] = d.pop('sumtime') / d['count']
print('The formula {} provided:'.format(formula))
print('Average difference: {}'.format(self.temperature.getAverageDifference()))
pprint(answers)
return answers
def run_forever(self, initial, modified, target):

View File

@ -63,7 +63,7 @@ class CursesReporter(Reporter):
coderackHeight = height - upperHeight - answersHeight
self.focusOnSlipnet = focus_on_slipnet
self.fpsGoal = fps_goal
self.temperatureWindow = SafeSubwindow(window, height, 5, 0, 0)
self.temperatureWindow = SafeSubwindow(window, height, 5, 0, 0) # TODO: use entropy (entropyWindow)
self.upperWindow = SafeSubwindow(window, upperHeight, width-5, 0, 5)
self.coderackWindow = SafeSubwindow(window, coderackHeight, width-5, upperHeight, 5)
self.answersWindow = SafeSubwindow(window, answersHeight, width-5, upperHeight + coderackHeight, 5)
@ -239,6 +239,7 @@ class CursesReporter(Reporter):
w.border()
w.refresh()
#TODO: use entropy
def report_temperature(self, temperature):
self.do_keyboard_shortcuts()
w = self.temperatureWindow

View File

@ -96,6 +96,7 @@ class Group(WorkspaceObject):
support = self.localSupport() / 100.0
activation = slipnet.length.activation / 100.0
supportedActivation = (support * activation) ** exp
#TODO: use entropy
return temperature.getAdjustedProbability(supportedActivation)
def flippedVersion(self):
@ -130,6 +131,7 @@ class Group(WorkspaceObject):
cubedlength = length ** 3
fred = cubedlength * (100.0 - slipnet.length.activation) / 100.0
probability = 0.5 ** fred
#TODO: use entropy
value = temperature.getAdjustedProbability(probability)
if value < 0.06:
value = 0.0

9
copycat/io.py Normal file
View File

@ -0,0 +1,9 @@
def save_answers(answers, filename):
answers = sorted(answers.items(), key=lambda kv : kv[1]['count'])
keys = [k for k, v in answers]
counts = [str(v['count']) for k, v in answers]
with open(filename, 'w') as outfile:
outfile.write(','.join(keys))
outfile.write('\n')
outfile.write(','.join(counts))

20
copycat/plot.py Normal file
View File

@ -0,0 +1,20 @@
import matplotlib.pyplot as plt; plt.rcdefaults()
import numpy as np
import matplotlib.pyplot as plt
def plot_answers(answers, show=True, save=True, filename='distribution.png'):
answers = sorted(answers.items(), key=lambda kv : kv[1]['count'])
objects = [t[0] + ' (temp:{})'.format(round(t[1]['avgtemp'], 2)) for t in answers]
yvalues = [t[1]['count'] for t in answers]
y_pos = np.arange(len(objects))
plt.bar(y_pos, yvalues, align='center', alpha=0.5)
plt.xticks(y_pos, objects)
plt.ylabel('Count')
plt.title('Answers')
if show:
plt.show()
if save:
plt.savefig('output/{}'.format(filename))

View File

@ -30,7 +30,7 @@ class Rule(WorkspaceStructure):
return
averageDepth = (self.descriptor.conceptualDepth +
self.relation.conceptualDepth) / 2.0
averageDepth **= 1.1
averageDepth **= 1.1 # LSaldyt: This value (1.1) seems 100% contrived.
# see if the object corresponds to an object
# if so, see if the descriptor is present (modulo slippages) in the
# corresponding object
@ -45,15 +45,15 @@ class Rule(WorkspaceStructure):
self.internalStrength = 0.0
return
sharedDescriptorTerm = 100.0
conceptual_height = (100.0 - self.descriptor.conceptualDepth) / 10.0
sharedDescriptorWeight = conceptual_height ** 1.4
conceptual_height = (100.0 - self.descriptor.conceptualDepth) / 10.0 # LSaldyt: 10?
sharedDescriptorWeight = conceptual_height ** 1.4 # LSaldyt: 1.4 is also seemingly contrived
depthDifference = 100.0 - abs(self.descriptor.conceptualDepth -
self.relation.conceptualDepth)
weights = ((depthDifference, 12),
(averageDepth, 18),
(sharedDescriptorTerm, sharedDescriptorWeight))
weights = ((depthDifference, 12), # LSaldyt: ???
(averageDepth, 18), # ????
(sharedDescriptorTerm, sharedDescriptorWeight)) # 12 and 18 can be reduced to 2 and 3, depending on sharedDescriptorWeight
self.internalStrength = formulas.weightedAverage(weights)
if self.internalStrength > 100.0:
if self.internalStrength > 100.0: # LSaldyt: A better formula wouldn't need to do this.
self.internalStrength = 100.0
def ruleEqual(self, other):

View File

@ -1,9 +1,107 @@
import math
# Alternate formulas for getAdjustedProbability
def _original(temp, prob):
if prob == 0 or prob == 0.5 or temp == 0:
return prob
if prob < 0.5:
return 1.0 - _original(temp, 1.0 - prob)
coldness = 100.0 - temp
a = math.sqrt(coldness)
c = (10 - a) / 100
f = (c + 1) * prob
return max(f, 0.5)
def _entropy(temp, prob):
if prob == 0 or prob == 0.5 or temp == 0:
return prob
if prob < 0.5:
return 1.0 - _original(temp, 1.0 - prob)
coldness = 100.0 - temp
a = math.sqrt(coldness)
c = (10 - a) / 100
f = (c + 1) * prob
return -f * math.log2(f)
def _weighted(temp, prob, s, u, alpha=1, beta=1):
weighted = (temp / 100) * s + ((100 - temp) / 100) * u
return weighted
def _weighted_inverse(temp, prob):
iprob = 1 - prob
return _weighted(temp, prob, iprob, prob)
def _fifty_converge(temp, prob): # Uses .5 instead of 1-prob
return _weighted(temp, prob, .5, prob)
def _soft_curve(temp, prob): # Curves to the average of the (1-p) and .5
return min(1, _weighted(temp, prob, (1.5-prob)/2, prob))
def _weighted_soft_curve(temp, prob): # Curves to the weighted average of the (1-p) and .5
weight = 100
gamma = .5 # convergance value
alpha = 1 # gamma weight
beta = 3 # iprob weight
curved = min(1, (temp / weight) * ((alpha * gamma + beta * (1 - prob)) / (alpha + beta)) + ((weight - temp) / weight) * prob)
return curved
def _alt_fifty(temp, prob):
s = .5
u = prob ** 2 if prob < .5 else math.sqrt(prob)
return _weighted(temp, prob, s, u)
def _averaged_alt(temp, prob):
s = (1.5 - prob)/2
u = prob ** 2 if prob < .5 else math.sqrt(prob)
return _weighted(temp, prob, s, u)
def _working_best(temp, prob):
s = .5 # convergence
r = 1.05 # power
u = prob ** r if prob < .5 else prob ** (1/r)
return _weighted(temp, prob, s, u)
def _soft_best(temp, prob):
s = .5 # convergence
r = 1.05 # power
u = prob ** r if prob < .5 else prob ** (1/r)
return _weighted(temp, prob, s, u)
def _parameterized_best(temp, prob):
# (D$66/100)*($E$64*$B68 + $G$64*$F$64)/($E$64 + $G$64)+((100-D$66)/100)*IF($B68 > 0.5, $B68^(1/$H$64), $B68^$H$64)
# (T/100) * (alpha * p + beta * .5) / (alpha + beta) + ((100 - T)/100) * IF(p > .5, p^(1/2), p^2)
alpha = 5
beta = 1
s = .5
s = (alpha * prob + beta * s) / (alpha + beta)
r = 1.05
u = prob ** r if prob < .5 else prob ** (1/r)
return _weighted(temp, prob, s, u)
def _none(temp, prob):
return prob
class Temperature(object):
def __init__(self):
self.reset()
self.adjustmentType = 'inverse'
self._adjustmentFormulas = {
'original' : _original,
'entropy' : _entropy,
'inverse' : _weighted_inverse,
'fifty_converge' : _fifty_converge,
'soft' : _soft_curve,
'weighted_soft' : _weighted_soft_curve,
'alt_fifty' : _alt_fifty,
'average_alt' : _averaged_alt,
'best' : _working_best,
'sbest' : _soft_best,
'pbest' : _parameterized_best,
'none' : _none}
self.diffs = 0
self.ndiffs = 0
def reset(self):
self.actual_value = 100.0
@ -34,12 +132,20 @@ class Temperature(object):
return value ** (((100.0 - self.value()) / 30.0) + 0.5)
def getAdjustedProbability(self, value):
if value == 0 or value == 0.5 or self.value() == 0:
return value
if value < 0.5:
return 1.0 - self.getAdjustedProbability(1.0 - value)
coldness = 100.0 - self.value()
a = math.sqrt(coldness)
c = (10 - a) / 100
f = (c + 1) * value
return max(f, 0.5)
temp = self.value()
prob = value
adjusted = self._adjustmentFormulas[self.adjustmentType](temp, prob)
self.diffs += abs(adjusted - prob)
self.ndiffs += 1
return adjusted
def getAverageDifference(self):
return self.diffs / self.ndiffs
def useAdj(self, adj):
print('Changing to adjustment formula {}'.format(adj))
self.adjustmentType = adj
def adj_formulas(self):
return self._adjustmentFormulas.keys()

View File

@ -1,137 +0,0 @@
import unittest
from .copycat import Copycat
def pnormaldist(p):
table = {
0.80: 1.2815,
0.90: 1.6448,
0.95: 1.9599,
0.98: 2.3263,
0.99: 2.5758,
0.995: 2.8070,
0.998: 3.0902,
0.999: 3.2905,
0.9999: 3.8905,
0.99999: 4.4171,
0.999999: 4.8916,
0.9999999: 5.3267,
0.99999999: 5.7307,
0.999999999: 6.1094,
}
return max(v for k, v in table.items() if k <= p)
def lower_bound_on_probability(hits, attempts, confidence=0.95):
if attempts == 0:
return 0
z = pnormaldist(confidence)
zsqr = z*z
phat = 1.0 * hits / attempts
under_sqrt = (phat * (1 - phat) + zsqr / (4*attempts)) / attempts
denominator = (1 + zsqr / attempts)
return (phat + zsqr / (2*attempts) - z * (under_sqrt ** 0.5)) / denominator
def upper_bound_on_probability(hits, attempts, confidence=0.95):
misses = attempts - hits
return 1.0 - lower_bound_on_probability(misses, attempts, confidence)
class TestCopycat(unittest.TestCase):
def setUp(self):
self.longMessage = True # new in Python 2.7
def assertProbabilitiesLookRoughlyLike(self, actual, expected):
actual_count = 0.0 + sum(d['count'] for d in list(actual.values()))
expected_count = 0.0 + sum(d['count'] for d in list(expected.values()))
self.assertGreater(actual_count, 1)
self.assertGreater(expected_count, 1)
for k in set(list(actual.keys()) + list(expected.keys())):
if k not in expected:
self.fail('Key %s was produced but not expected! %r != %r' % (k, actual, expected))
expected_probability = expected[k]['count'] / expected_count
if k in actual:
actual_lo = lower_bound_on_probability(actual[k]['count'], actual_count)
actual_hi = upper_bound_on_probability(actual[k]['count'], actual_count)
if not (actual_lo <= expected_probability <= actual_hi):
print('Failed (%s <= %s <= %s)' % (actual_lo, expected_probability, actual_hi))
self.fail('Count ("obviousness" metric) seems way off! %r != %r' % (actual, expected))
if abs(actual[k]['avgtemp'] - expected[k]['avgtemp']) >= 10.0 + (10.0 / actual[k]['count']):
print('Failed (%s - %s >= %s)' % (actual[k]['avgtemp'], expected[k]['avgtemp'], 10.0 + (10.0 / actual[k]['count'])))
self.fail('Temperature ("elegance" metric) seems way off! %r != %r' % (actual, expected))
else:
actual_hi = upper_bound_on_probability(0, actual_count)
if not (0 <= expected_probability <= actual_hi):
self.fail('No instances of expected key %s were produced! %r != %r' % (k, actual, expected))
def run_testcase(self, initial, modified, target, iterations, expected):
actual = Copycat().run(initial, modified, target, iterations)
self.assertEqual(sum(a['count'] for a in list(actual.values())), iterations)
self.assertProbabilitiesLookRoughlyLike(actual, expected)
def test_simple_cases(self):
self.run_testcase('abc', 'abd', 'efg', 50, {
'efd': {'count': 1, 'avgtemp': 16},
'efh': {'count': 99, 'avgtemp': 19},
})
self.run_testcase('abc', 'abd', 'ijk', 50, {
'ijd': {'count': 4, 'avgtemp': 24},
'ijl': {'count': 96, 'avgtemp': 20},
})
def test_abc_xyz(self):
self.run_testcase('abc', 'abd', 'xyz', 20, {
'xyd': {'count': 100, 'avgtemp': 19},
})
def test_ambiguous_case(self):
self.run_testcase('abc', 'abd', 'ijkk', 50, {
'ijkkk': {'count': 7, 'avgtemp': 21},
'ijll': {'count': 47, 'avgtemp': 28},
'ijkl': {'count': 44, 'avgtemp': 32},
'ijkd': {'count': 2, 'avgtemp': 65},
})
def test_mrrjjj(self):
self.run_testcase('abc', 'abd', 'mrrjjj', 50, {
'mrrjjjj': {'count': 4, 'avgtemp': 16},
'mrrkkk': {'count': 31, 'avgtemp': 47},
'mrrjjk': {'count': 64, 'avgtemp': 51},
'mrrjkk': {'count': 1, 'avgtemp': 52},
'mrrjjd': {'count': 1, 'avgtemp': 54},
})
def test_elongation(self):
# This isn't remotely what a human would say.
self.run_testcase('abc', 'aabbcc', 'milk', 50, {
'milj': {'count': 85, 'avgtemp': 55},
'mikj': {'count': 10, 'avgtemp': 56},
'milk': {'count': 1, 'avgtemp': 56},
'lilk': {'count': 1, 'avgtemp': 57},
'milb': {'count': 3, 'avgtemp': 57},
})
def test_repairing_successor_sequence(self):
# This isn't remotely what a human would say.
self.run_testcase('aba', 'abc', 'xyx', 50, {
'xc': {'count': 9, 'avgtemp': 57},
'xyc': {'count': 82, 'avgtemp': 59},
'cyx': {'count': 7, 'avgtemp': 68},
'xyx': {'count': 2, 'avgtemp': 69},
})
def test_nonsense(self):
self.run_testcase('cat', 'dog', 'cake', 10, {
'cakg': {'count': 99, 'avgtemp': 70},
'gake': {'count': 1, 'avgtemp': 59},
})
self.run_testcase('cat', 'dog', 'kitten', 10, {
'kitteg': {'count': 96, 'avgtemp': 66},
'kitten': {'count': 4, 'avgtemp': 68},
})
if __name__ == '__main__':
unittest.main()

View File

@ -1,3 +1,6 @@
"""Workspace module."""
from . import formulas
from .bond import Bond
from .correspondence import Correspondence
@ -14,6 +17,7 @@ def __adjustUnhappiness(values):
class Workspace(object):
def __init__(self, ctx):
"""To initialize the workspace."""
self.ctx = ctx
self.totalUnhappiness = 0.0
self.intraStringUnhappiness = 0.0
@ -45,11 +49,21 @@ class Workspace(object):
self.changedObject = None
self.objects = []
self.structures = []
self.rule = None
self.rule = None # Only one rule? : LSaldyt
self.initial = WorkspaceString(self.ctx, self.initialString)
self.modified = WorkspaceString(self.ctx, self.modifiedString)
self.target = WorkspaceString(self.ctx, self.targetString)
'''
# TODO: Initial part of refactoring in this method
def getAssessedUnhappiness(self, unhappiness):
o.Unhappiness = __adjustUnhappiness(
o.relativeImportance * o.Unhappiness
for o in self.objects)
pass
'''
# TODO: Extract method?
def assessUnhappiness(self):
self.intraStringUnhappiness = __adjustUnhappiness(
o.relativeImportance * o.intraStringUnhappiness
@ -61,6 +75,7 @@ class Workspace(object):
o.relativeImportance * o.totalUnhappiness
for o in self.objects)
# TODO: these 3 methods seem to be the same... are they? If so, Extract method.
def calculateIntraStringUnhappiness(self):
value = sum(
o.relativeImportance * o.intraStringUnhappiness
@ -92,7 +107,11 @@ class Workspace(object):
self.initial.updateIntraStringUnhappiness()
self.target.updateIntraStringUnhappiness()
# TODO: use entropy
def getUpdatedTemperature(self):
'''
Calculation of global tolerance towards irrelevance
'''
self.calculateIntraStringUnhappiness()
self.calculateInterStringUnhappiness()
self.calculateTotalUnhappiness()
@ -107,7 +126,7 @@ class Workspace(object):
))
def numberOfUnrelatedObjects(self):
"""A list of all objects in the workspace with >= 1 open bond slots"""
"""Computes the number of all objects in the workspace with >= 1 open bond slots."""
objects = [o for o in self.objects
if o.string == self.initial or o.string == self.target]
objects = [o for o in objects if not o.spansString()]
@ -125,21 +144,21 @@ class Workspace(object):
return len(objects)
def numberOfUnreplacedObjects(self):
"""A list of all unreplaced objects in the initial string"""
"""A list of all unreplaced objects in the initial string."""
objects = [o for o in self.objects
if o.string == self.initial and isinstance(o, Letter)]
objects = [o for o in objects if not o.replacement]
return len(objects)
def numberOfUncorrespondingObjects(self):
"""A list of all uncorresponded objects in the initial string"""
"""A list of all uncorresponded objects in the initial string."""
objects = [o for o in self.objects
if o.string == self.initial or o.string == self.target]
objects = [o for o in objects if not o.correspondence]
return len(objects)
def numberOfBonds(self):
"""The number of bonds in the workspace"""
"""The number of bonds in the workspace."""
return sum(1 for o in self.structures if isinstance(o, Bond))
def correspondences(self):

View File

@ -1,5 +1,6 @@
def __chooseObjectFromList(ctx, objects, attribute):
# TODO: use entropy
random = ctx.random
temperature = ctx.temperature
weights = [

View File

@ -2,7 +2,6 @@ from .description import Description
from .formulas import weightedAverage
from .workspaceStructure import WorkspaceStructure
class WorkspaceObject(WorkspaceStructure):
# pylint: disable=too-many-instance-attributes
def __init__(self, workspaceString):

View File

@ -38,7 +38,7 @@ class WorkspaceString(object):
return self.string[i]
def updateRelativeImportance(self):
"""Update the normalised importance of all objects in the string"""
"""Update the normalised importance of all objects in the string."""
total = sum(o.rawImportance for o in self.objects)
if not total:
for o in self.objects:

0
input/.placeholder Normal file
View File

9
input/problems.csv Normal file
View File

@ -0,0 +1,9 @@
abc,abd,ijk
aabc,aabd,ijkk
abc,abd,kji
abc,abd,mrrjjj
abc,abd,rssttt
abc,abd,xyz
abc,abd,ijjkkk
rst,rsu,xyz
abc,abd,xyyzzz
1 abc abd ijk
2 aabc aabd ijkk
3 abc abd kji
4 abc abd mrrjjj
5 abc abd rssttt
6 abc abd xyz
7 abc abd ijjkkk
8 rst rsu xyz
9 abc abd xyyzzz

View File

@ -0,0 +1,4 @@
abc,abd,ijk
aabc,aabd,ijkk
abc,abd,xyz
abc,abd,ijjkkk
1 abc abd ijk
2 aabc aabd ijkk
3 abc abd xyz
4 abc abd ijjkkk

47
main.py
View File

@ -1,21 +1,61 @@
#!/usr/bin/env python3
"""
Main Copycat program.
To run it, type at the terminal:
> python main.py abc abd ppqqrr --interations 10
The script takes three to five arguments. The first two are a pair of strings
with some change, for example "abc" and "abd". The third is a string which the
script should try to change analogously. The fourth (which defaults to "1") is
a number of iterations. One can also specify a defined seed value for the
random number generator.
This instruction produces output such as:
iiijjjlll: 670 (avg time 1108.5, avg temp 23.6)
iiijjjd: 2 (avg time 1156.0, avg temp 35.0)
iiijjjkkl: 315 (avg time 1194.4, avg temp 35.5)
iiijjjkll: 8 (avg time 2096.8, avg temp 44.1)
iiijjjkkd: 5 (avg time 837.2, avg temp 48.0)
wyz: 5 (avg time 2275.2, avg temp 14.9)
xyd: 982 (avg time 2794.4, avg temp 17.5)
yyz: 7 (avg time 2731.9, avg temp 25.1)
dyz: 2 (avg time 3320.0, avg temp 27.1)
xyy: 2 (avg time 4084.5, avg temp 31.1)
xyz: 2 (avg time 1873.5, avg temp 52.1)
The first number indicates how many times Copycat chose that string as its
answer; higher means "more obvious". The last number indicates the average
final temperature of the workspace; lower means "more elegant".
"""
import argparse
import logging
from copycat import Copycat, Reporter
from copycat import Copycat, Reporter, plot_answers, save_answers
class SimpleReporter(Reporter):
"""Reports results from a single run."""
def report_answer(self, answer):
"""Self-explanatory code."""
print('Answered %s (time %d, final temperature %.1f)' % (
answer['answer'], answer['time'], answer['temp'],
))
def main():
"""Program's main entrance point. Self-explanatory code."""
logging.basicConfig(level=logging.INFO, format='%(message)s', filename='./output/copycat.log', filemode='w')
parser = argparse.ArgumentParser()
parser.add_argument('--seed', type=int, default=None, help='Provide a deterministic seed for the RNG.')
parser.add_argument('--iterations', type=int, default=1, help='Run the given case this many times.')
parser.add_argument('--plot', action='store_true', help='Plot a bar graph of answer distribution')
parser.add_argument('--noshow', action='store_true', help='Don\'t display bar graph at end of run')
parser.add_argument('initial', type=str, help='A...')
parser.add_argument('modified', type=str, help='...is to B...')
parser.add_argument('target', type=str, help='...as C is to... what?')
@ -27,5 +67,10 @@ def main():
for answer, d in sorted(iter(answers.items()), key=lambda kv: kv[1]['avgtemp']):
print('%s: %d (avg time %.1f, avg temp %.1f)' % (answer, d['count'], d['avgtime'], d['avgtemp']))
if options.plot:
plot_answers(answers, show=not options.noshow)
save_answers(answers, 'output/answers.csv')
if __name__ == '__main__':
main()

346
nuke_compare.txt Normal file
View File

@ -0,0 +1,346 @@
lsaldyt@shiva:~/projects/farg/copycat$ ./main.py abc abd mrrjjj
Answered mrrjjd (time 90)
{'mrrjjd': {'avgtime': 90.0, 'count': 1}}
mrrjjd: 1 (avg time 90.0)
lsaldyt@shiva:~/projects/farg/copycat$ ./main.py abc abd mrrjjj --iterations 100
Answered mrrjjk (time 112)
Answered mrrjjk (time 112)
Answered mrrjjk (time 105)
Answered mrrjjk (time 80)
Answered mrrjjk (time 97)
Answered mrrjjk (time 122)
Answered mrrjjk (time 94)
Answered mrrjjk (time 82)
Answered mrrjjk (time 97)
Answered mrrjjk (time 65)
Answered mrrjjk (time 94)
Answered mrrjjk (time 101)
Answered mrrjjk (time 79)
Answered mrrjjk (time 90)
Answered mrrjjk (time 72)
Answered mrrjjk (time 87)
Answered mrrjjk (time 100)
Answered mrrjjk (time 118)
Answered mrrjjk (time 100)
Answered mrrjjk (time 76)
Answered mrrjjk (time 109)
Answered mrrjjk (time 137)
Answered mrrjjk (time 125)
Answered mrrjjd (time 77)
Answered mrrjjk (time 92)
Answered mrrjjk (time 155)
Answered mrrjjk (time 75)
Answered mrrjjd (time 139)
Answered mrrjjk (time 115)
Answered mrrjjk (time 82)
Answered mrrjjk (time 125)
Answered mrrjjk (time 97)
Answered mrrjjk (time 81)
Answered mrrjjk (time 105)
Answered mrrjjk (time 105)
Answered mrrjjk (time 130)
Answered mrrjjk (time 110)
Answered mrrjjk (time 156)
Answered mrrjjk (time 57)
Answered mrrjjk (time 158)
Answered mrrjjk (time 90)
Answered mrrjjk (time 92)
Answered mrrjjk (time 92)
Answered mrrjjk (time 106)
Answered mrrjjk (time 98)
Answered mrrjjk (time 69)
Answered mrrjjk (time 65)
Answered mrrjjk (time 63)
Answered mrrjjk (time 91)
Answered mrrjjk (time 111)
Answered mrrjjk (time 116)
Answered mrrjjk (time 96)
Answered mrrjjk (time 114)
Answered mrrjjk (time 113)
Answered mrrjjk (time 253)
Answered mrrjjk (time 97)
Answered mrrjjk (time 138)
Answered mrrjjk (time 121)
Answered mrrjjk (time 119)
Answered mrrjjk (time 208)
Answered mrrjjk (time 88)
Answered mrrjjk (time 139)
Answered mrrjjk (time 104)
Answered mrrjjk (time 127)
Answered mrrjjk (time 92)
Answered mrrjjk (time 81)
Answered mrrjjk (time 79)
Answered mrrjjk (time 124)
Answered mrrjjk (time 103)
Answered mrrjjk (time 93)
Answered mrrjjk (time 95)
Answered mrrjjk (time 136)
Answered mrrjjk (time 124)
Answered mrrjjk (time 181)
Answered mrrjjk (time 80)
Answered mrrjjk (time 71)
Answered mrrjjk (time 107)
Answered mrrjjk (time 79)
Answered mrrjjd (time 62)
Answered mrrjjk (time 92)
Answered mrrjjk (time 99)
Answered mrrjjk (time 65)
Answered mrrjjk (time 134)
Answered mrrjjd (time 116)
Answered mrrjjk (time 85)
Answered mrrjjk (time 107)
Answered mrrjjk (time 102)
Answered mrrjjk (time 132)
Answered mrrjjk (time 127)
Answered mrrjjk (time 92)
Answered mrrjjk (time 41)
Answered mrrjjk (time 78)
Answered mrrjjk (time 77)
Answered mrrjjk (time 140)
Answered mrrjjk (time 126)
Answered mrrjjk (time 110)
Answered mrrjjk (time 95)
Answered mrrjjk (time 127)
Answered mrrjjd (time 92)
Answered mrrjjk (time 121)
{'mrrjjd': {'avgtime': 97.2, 'count': 5},
'mrrjjk': {'avgtime': 105.28421052631579, 'count': 95}}
mrrjjd: 5 (avg time 97.2)
mrrjjk: 95 (avg time 105.3)
lsaldyt@shiva:~/projects/farg/copycat$ git checkout feature-temperature-effect-analysis
Switched to branch 'feature-temperature-effect-analysis'
Your branch is ahead of 'origin/feature-temperature-effect-analysis' by 1 commit.
(use "git push" to publish your local commits)
lsaldyt@shiva:~/projects/farg/copycat$ ./main.py abc abd mrrjjj --iterations 100
Changing to adjustment formula original
Answered mrrkkk (time 559, final temperature 47.2)
Answered mrrkkk (time 1115, final temperature 39.7)
Answered mrrkkk (time 1540, final temperature 39.8)
Answered mrrjjk (time 1591, final temperature 40.2)
Answered mrrjjk (time 303, final temperature 54.8)
Answered mrrkkk (time 1418, final temperature 42.4)
Answered mrrkkk (time 610, final temperature 40.3)
Answered mrrkkk (time 2585, final temperature 41.8)
Answered mrrkkk (time 795, final temperature 38.8)
Answered mrrkkk (time 2609, final temperature 38.8)
Answered mrrkkk (time 1569, final temperature 39.3)
Answered mrrkkk (time 771, final temperature 38.5)
Answered mrrkkk (time 3046, final temperature 39.4)
Answered mrrkkk (time 1551, final temperature 41.7)
Answered mrrkkk (time 1120, final temperature 39.7)
Answered mrrkkk (time 1089, final temperature 40.0)
Answered mrrjjk (time 2029, final temperature 41.2)
Answered mrrkkk (time 1729, final temperature 39.5)
Answered mrrjjk (time 1641, final temperature 51.6)
Answered mrrkkk (time 1194, final temperature 39.0)
Answered mrrkkk (time 950, final temperature 39.0)
Answered mrrkkk (time 2388, final temperature 40.0)
Answered mrrjjk (time 547, final temperature 47.0)
Answered mrrjjk (time 3121, final temperature 39.6)
Answered mrrkkk (time 1611, final temperature 48.0)
Answered mrrkkk (time 2819, final temperature 41.7)
Answered mrrkkk (time 1249, final temperature 55.9)
Answered mrrjjk (time 9285, final temperature 40.7)
Answered mrrjjk (time 341, final temperature 45.2)
Answered mrrkkk (time 1193, final temperature 40.4)
Answered mrrjjk (time 2199, final temperature 40.4)
Answered mrrkkk (time 2958, final temperature 39.8)
Answered mrrkkk (time 1463, final temperature 39.3)
Answered mrrjjd (time 1278, final temperature 54.7)
Answered mrrkkk (time 1217, final temperature 39.5)
Answered mrrkkk (time 534, final temperature 45.9)
Answered mrrkkk (time 1032, final temperature 39.7)
Answered mrrkkk (time 1749, final temperature 40.1)
Answered mrrkkk (time 549, final temperature 39.8)
Answered mrrkkk (time 2385, final temperature 41.9)
Answered mrrkkk (time 890, final temperature 38.8)
Answered mrrkkk (time 1997, final temperature 39.6)
Answered mrrkkk (time 1369, final temperature 49.3)
Answered mrrkkk (time 1567, final temperature 42.6)
Answered mrrkkk (time 966, final temperature 39.6)
Answered mrrkkk (time 472, final temperature 44.2)
Answered mrrd (time 627, final temperature 47.4)
Answered mrrkkk (time 526, final temperature 39.9)
Answered mrrkkk (time 2873, final temperature 38.9)
Answered mrrkkk (time 2136, final temperature 41.8)
Answered mrrkkk (time 2479, final temperature 41.1)
Answered mrrkkk (time 943, final temperature 41.8)
Answered mrrjjk (time 115, final temperature 59.4)
Answered mrrkkk (time 2702, final temperature 39.0)
Answered mrrkkk (time 779, final temperature 40.1)
Answered mrrjjk (time 410, final temperature 47.7)
Answered mrrkkk (time 1402, final temperature 39.9)
Answered mrrkkk (time 986, final temperature 39.5)
Answered mrrkkk (time 929, final temperature 39.9)
Answered mrrkkk (time 2139, final temperature 39.6)
Answered mrrjjk (time 580, final temperature 51.4)
Answered mrrkkk (time 685, final temperature 41.2)
Answered mrrkkk (time 1822, final temperature 38.1)
Answered mrrkkk (time 1424, final temperature 39.9)
Answered mrrkkk (time 2572, final temperature 40.1)
Answered mrrkkk (time 1763, final temperature 39.1)
Answered mrrkkk (time 1628, final temperature 47.5)
Answered mrrjjk (time 550, final temperature 48.5)
Answered mrrkkk (time 2478, final temperature 39.8)
Answered mrrkkk (time 3211, final temperature 38.8)
Answered mrrkkk (time 465, final temperature 46.4)
Answered mrrjjk (time 2658, final temperature 41.1)
Answered mrrkkk (time 8031, final temperature 39.1)
Answered mrrjjk (time 656, final temperature 45.8)
Answered mrrkkk (time 544, final temperature 38.9)
Answered mrrkkk (time 1584, final temperature 39.7)
Answered mrrkkk (time 654, final temperature 40.5)
Answered mrrkkk (time 6083, final temperature 39.1)
Answered mrrkkk (time 4492, final temperature 39.8)
Answered mrrkkk (time 902, final temperature 39.8)
Answered mrrkkk (time 758, final temperature 39.1)
Answered mrrkkk (time 1630, final temperature 39.8)
Answered mrrjjk (time 915, final temperature 44.2)
Answered mrrjjk (time 2037, final temperature 46.5)
Answered mrrkkk (time 2473, final temperature 43.2)
Answered mrrkkk (time 1707, final temperature 43.7)
Answered mrrjjk (time 5846, final temperature 41.1)
Answered mrrkkk (time 1140, final temperature 41.1)
Answered mrrkkk (time 1978, final temperature 39.1)
Answered mrrkkk (time 1494, final temperature 39.1)
Answered mrrkkk (time 1488, final temperature 40.5)
Answered mrrjjk (time 2378, final temperature 43.9)
Answered mrrkkk (time 6334, final temperature 40.9)
Answered mrrkkk (time 984, final temperature 39.6)
Answered mrrkkk (time 3887, final temperature 40.5)
Answered mrrkkk (time 1772, final temperature 42.7)
Answered mrrjjk (time 999, final temperature 45.5)
Answered mrrkkk (time 2352, final temperature 39.9)
Answered mrrkkk (time 1065, final temperature 39.3)
Answered mrrjjk (time 905, final temperature 55.2)
The formula original provided:
Average difference: 0.028295162318835276
{'mrrd': {'avgtemp': 47.37583217746938, 'avgtime': 627.0, 'count': 1},
'mrrjjd': {'avgtemp': 54.66020964652063, 'avgtime': 1278.0, 'count': 1},
'mrrjjk': {'avgtemp': 46.23473786693079,
'avgtime': 1862.1904761904761,
'count': 21},
'mrrkkk': {'avgtemp': 40.89543617652126,
'avgtime': 1791.5844155844156,
'count': 77}}
Changing to adjustment formula best
Answered mrrjjk (time 1591, final temperature 42.2)
Answered mrrkkk (time 2596, final temperature 53.7)
Answered mrrjjk (time 2621, final temperature 47.2)
Answered mrrjjk (time 2280, final temperature 50.7)
Answered mrrjjk (time 1044, final temperature 54.1)
Answered mrrkkk (time 2718, final temperature 40.9)
Answered mrrkkk (time 1638, final temperature 43.6)
Answered mrrjjjj (time 2555, final temperature 19.8)
Answered mrrjjk (time 565, final temperature 54.4)
Answered mrrkkk (time 1374, final temperature 39.2)
Answered mrrkkk (time 836, final temperature 49.1)
Answered mrrjjjj (time 3500, final temperature 17.9)
Answered mrrkkk (time 971, final temperature 39.6)
Answered mrrjjjj (time 845, final temperature 12.2)
Answered mrrjjjj (time 856, final temperature 18.3)
Answered nrrjjj (time 1108, final temperature 46.4)
Answered mrrjjk (time 711, final temperature 59.7)
Answered mrrjjjj (time 1769, final temperature 18.1)
Answered mrrkkk (time 986, final temperature 44.0)
Answered mrrjjk (time 2288, final temperature 47.5)
Answered mrrkkk (time 1300, final temperature 40.9)
Answered mrrjjk (time 3549, final temperature 52.5)
Answered mrrkkk (time 1725, final temperature 36.6)
Answered mrrjjk (time 528, final temperature 56.5)
Answered mrrkkk (time 4370, final temperature 45.5)
Answered mrrjjk (time 4833, final temperature 48.0)
Answered mrrjjjj (time 1586, final temperature 16.2)
Answered mrrkkk (time 3567, final temperature 42.3)
Answered mrrkkk (time 2323, final temperature 42.0)
Answered mrrjjjj (time 729, final temperature 19.3)
Answered mrrkkk (time 2556, final temperature 42.7)
Answered mrrkkk (time 2177, final temperature 42.7)
Answered mrrkkk (time 3675, final temperature 43.1)
Answered mrrkkk (time 980, final temperature 41.5)
Answered mrrkkk (time 714, final temperature 51.4)
Answered mrrjjjj (time 4852, final temperature 19.7)
Answered mrrjjk (time 555, final temperature 49.5)
Answered mrrjjk (time 1966, final temperature 40.5)
Answered mrrkkk (time 2063, final temperature 39.0)
Answered mrrkkk (time 3212, final temperature 38.8)
Answered mrrkkk (time 997, final temperature 39.2)
Answered mrrkkk (time 2353, final temperature 43.8)
Answered mrrkkk (time 3841, final temperature 39.5)
Answered mrrjjjj (time 779, final temperature 19.6)
Answered mrrjjk (time 939, final temperature 43.9)
Answered mrrjjd (time 1472, final temperature 45.0)
Answered mrrjjjj (time 790, final temperature 17.7)
Answered mrrjjk (time 3372, final temperature 44.2)
Answered mrrjjjj (time 4746, final temperature 18.3)
Answered mrrkkk (time 2040, final temperature 46.4)
Answered mrrkkk (time 4106, final temperature 42.8)
Answered mrrjjjj (time 960, final temperature 14.2)
Answered mrrkkk (time 1979, final temperature 39.9)
Answered mrrjjk (time 1085, final temperature 48.1)
Answered mrrjjjj (time 693, final temperature 18.5)
Answered mrrkkk (time 1490, final temperature 41.5)
Answered mrrkkk (time 2587, final temperature 41.1)
Answered mrrjjk (time 574, final temperature 47.4)
Answered mrrjjk (time 889, final temperature 50.9)
Answered mrrjjk (time 1170, final temperature 38.0)
Answered mrrjjk (time 603, final temperature 47.9)
Answered mrrkkk (time 595, final temperature 42.8)
Answered mrrkkk (time 3329, final temperature 38.3)
Answered mrrjjjj (time 1589, final temperature 13.3)
Answered mrrjjjj (time 1146, final temperature 16.1)
Answered mrrjjk (time 1300, final temperature 51.3)
Answered mrrjjk (time 699, final temperature 41.6)
Answered mrrjjjj (time 2570, final temperature 18.7)
Answered mrrjjk (time 1412, final temperature 46.2)
Answered mrrjjk (time 1841, final temperature 44.9)
Answered mrrjjjj (time 1354, final temperature 19.0)
Answered mrrjjk (time 956, final temperature 34.7)
Answered mrrjjk (time 5125, final temperature 40.7)
Answered mrrkkk (time 2663, final temperature 49.6)
Answered mrrjjk (time 486, final temperature 47.3)
Answered mrrjjd (time 3135, final temperature 49.7)
Answered mrrjjjj (time 1960, final temperature 16.9)
Answered mrrjjk (time 384, final temperature 49.0)
Answered mrrkkk (time 12938, final temperature 41.4)
Answered mrrjjjj (time 1067, final temperature 18.4)
Answered mrrkkk (time 2759, final temperature 39.2)
Answered mrrjkk (time 573, final temperature 46.8)
Answered mrrkkk (time 799, final temperature 41.2)
Answered mrrjjk (time 671, final temperature 53.8)
Answered mrrkkk (time 851, final temperature 44.0)
Answered mrrkkk (time 3239, final temperature 44.6)
Answered mrrjjk (time 988, final temperature 53.0)
Answered mrrjjjj (time 1114, final temperature 18.8)
Answered mrrjjjj (time 1053, final temperature 20.6)
Answered mrrkkk (time 1292, final temperature 44.9)
Answered mrrjjjj (time 1969, final temperature 18.6)
Answered mrrkkk (time 1831, final temperature 40.4)
Answered mrrkkk (time 2394, final temperature 46.4)
Answered mrrkkk (time 3041, final temperature 47.4)
Answered mrrjjjj (time 1233, final temperature 18.4)
Answered mrrjjjj (time 983, final temperature 19.2)
Answered mrrkkk (time 980, final temperature 49.0)
Answered mrrkkk (time 1325, final temperature 39.2)
Answered mrrjjk (time 585, final temperature 69.1)
Answered mrrkkk (time 562, final temperature 48.7)
The formula best provided:
Average difference: 0.12629281972431122
{'mrrjjd': {'avgtemp': 47.36375177458433, 'avgtime': 2303.5, 'count': 2},
'mrrjjjj': {'avgtemp': 17.82623805422265, 'avgtime': 1695.75, 'count': 24},
'mrrjjk': {'avgtemp': 48.48478317064579,
'avgtime': 1520.3333333333333,
'count': 30},
'mrrjkk': {'avgtemp': 46.8418022350217, 'avgtime': 573.0, 'count': 1},
'mrrkkk': {'avgtemp': 43.04275727525823,
'avgtime': 2327.904761904762,
'count': 42},
'nrrjjj': {'avgtemp': 46.370799897149105, 'avgtime': 1108.0, 'count': 1}}
mrrjjjj: 24 (avg time 1695.8, avg temp 17.8)
mrrkkk: 42 (avg time 2327.9, avg temp 43.0)
nrrjjj: 1 (avg time 1108.0, avg temp 46.4)
mrrjkk: 1 (avg time 573.0, avg temp 46.8)
mrrjjd: 2 (avg time 2303.5, avg temp 47.4)
mrrjjk: 30 (avg time 1520.3, avg temp 48.5)
lsaldyt@shiva:~/projects/farg/copycat$

0
output/.placeholder Normal file
View File

2
requirements.txt Normal file
View File

@ -0,0 +1,2 @@
matplotlib
numpy

View File

@ -1,5 +1,5 @@
#!/usr/bin/env python
"""Self-explanatory."""
from setuptools import setup
setup(

142
tests.py Normal file
View File

@ -0,0 +1,142 @@
import unittest
from pprint import pprint
from copycat import Copycat
# TODO: update test cases to use entropy
# CHI2 values for n degrees freedom
_chiSquared_table = {
1:3.841,
2:5.991,
3:7.815,
4:9.488,
5:11.071,
6:12.592,
7:14.067,
8:15.507,
9:16.919,
10:18.307
}
class TestCopycat(unittest.TestCase):
def setUp(self):
self.longMessage = True # new in Python 2.7
def assertProbabilitiesLookRoughlyLike(self, actual, expected, iterations):
answerKeys = set(list(actual.keys()) + list(expected.keys()))
degreesFreedom = len(answerKeys)
chiSquared = 0
get_count = lambda k, d : d[k]['count'] if k in d else 0
for k in answerKeys:
E = get_count(k, expected)
O = get_count(k, actual)
if E == 0:
print('Warning! Expected 0 counts of {}, but got {}'.format(k, O))
else:
chiSquared += (O - E) ** 2 / E
if chiSquared >= _chiSquared_table[degreesFreedom]:
self.fail('Significant difference between expected and actual answer distributions: \n' +
'Chi2 value: {} with {} degrees of freedom'.format(chiSquared, degreesFreedom))
def run_testcase(self, initial, modified, target, iterations, expected):
print('expected:')
pprint(expected)
actual = Copycat().run(initial, modified, target, iterations)
print('actual:')
pprint(actual)
self.assertEqual(sum(a['count'] for a in list(actual.values())), iterations)
self.assertProbabilitiesLookRoughlyLike(actual, expected, iterations)
def test_simple_cases(self):
self.run_testcase('abc', 'abd', 'efg', 30,
{'dfg': {'avgtemp': 72.37092377767368, 'avgtime': 475.0, 'count': 1},
'efd': {'avgtemp': 49.421147725239024, 'avgtime': 410.5, 'count': 2},
'efh': {'avgtemp': 19.381658717913258,
'avgtime': 757.1851851851852,
'count': 27}})
self.run_testcase('abc', 'abd', 'ijk', 30,
{'ijd': {'avgtemp': 14.691978036611559, 'avgtime': 453.0, 'count': 1},
'ijl': {'avgtemp': 22.344023091153964,
'avgtime': 742.1428571428571,
'count': 28},
'jjk': {'avgtemp': 11.233344554288019, 'avgtime': 595.0, 'count': 1}})
def test_abc_xyz(self):
self.run_testcase('abc', 'abd', 'xyz', 100,
{'dyz': {'avgtemp': 16.78130739435325, 'avgtime': 393.0, 'count': 1},
'wyz': {'avgtemp': 26.100450643627426, 'avgtime': 4040.0, 'count': 2},
'xyd': {'avgtemp': 21.310415433987586,
'avgtime': 5592.277777777777,
'count': 90},
'xyz': {'avgtemp': 23.798124933747882, 'avgtime': 3992.0, 'count': 1},
'yyz': {'avgtemp': 27.137975077133788, 'avgtime': 4018.5, 'count': 6}})
def test_ambiguous_case(self):
self.run_testcase('abc', 'abd', 'ijkk', 100,
{'ijd': {'avgtemp': 55.6767488926397, 'avgtime': 948.0, 'count': 1},
'ijkd': {'avgtemp': 78.09357723857647, 'avgtime': 424.5, 'count': 2},
'ijkk': {'avgtemp': 68.54252699118226, 'avgtime': 905.5, 'count': 2},
'ijkkk': {'avgtemp': 21.75444235750483,
'avgtime': 2250.3333333333335,
'count': 3},
'ijkl': {'avgtemp': 38.079858245918466,
'avgtime': 1410.2391304347825,
'count': 46},
'ijll': {'avgtemp': 27.53845719945872,
'avgtime': 1711.8863636363637,
'count': 44},
'jjkk': {'avgtemp': 75.76606718990365, 'avgtime': 925.0, 'count': 2}})
def test_mrrjjj(self):
self.run_testcase('abc', 'abd', 'mrrjjj', 30,
{'mrrjjd': {'avgtemp': 44.46354725386579, 'avgtime': 1262.0, 'count': 1},
'mrrjjjj': {'avgtemp': 17.50702440140412, 'avgtime': 1038.375, 'count': 8},
'mrrjjk': {'avgtemp': 55.189156978290264,
'avgtime': 1170.6363636363637,
'count': 11},
'mrrkkk': {'avgtemp': 43.709349775080746, 'avgtime': 1376.2, 'count': 10}})
'''
Below are examples of improvements that could be made to copycat.
def test_elongation(self):
# This isn't remotely what a human would say.
self.run_testcase('abc', 'aabbcc', 'milk', 30,
{'lilk': {'avgtemp': 68.18128407669258,
'avgtime': 1200.6666666666667,
'count': 3},
'mikj': {'avgtemp': 57.96973195905564,
'avgtime': 1236.888888888889,
'count': 9},
'milb': {'avgtemp': 79.98413990245763, 'avgtime': 255.0, 'count': 1},
'milj': {'avgtemp': 64.95289549955349, 'avgtime': 1192.4, 'count': 15},
'milk': {'avgtemp': 66.11387816293755, 'avgtime': 1891.5, 'count': 2}})
def test_repairing_successor_sequence(self):
# This isn't remotely what a human would say.
self.run_testcase('aba', 'abc', 'xyx', 30,
{'cyx': {'avgtemp': 82.10555880340601, 'avgtime': 2637.0, 'count': 2},
'xc': {'avgtemp': 73.98845045179358, 'avgtime': 5459.5, 'count': 2},
'xyc': {'avgtemp': 77.1384941639991,
'avgtime': 4617.434782608696,
'count': 23},
'xyx': {'avgtemp': 74.39287653046891, 'avgtime': 3420.0, 'count': 3}})
def test_nonsense(self):
self.run_testcase('cat', 'dog', 'cake', 10, {
'cakg': {'count': 99, 'avgtemp': 70},
'gake': {'count': 1, 'avgtemp': 59},
})
self.run_testcase('cat', 'dog', 'kitten', 10, {
'kitteg': {'count': 96, 'avgtemp': 66},
'kitten': {'count': 4, 'avgtemp': 68},
})
'''
if __name__ == '__main__':
unittest.main()