Merge branch 'revision-2.0' into develop

This commit is contained in:
LSaldyt
2018-01-12 15:33:51 -07:00
40 changed files with 2702 additions and 163076 deletions

11
.gitignore vendored
View File

@ -20,6 +20,7 @@ pip-log.txt
.coverage
.tox
.log
copycat.log
# Other filesystems
.svn
@ -30,3 +31,13 @@ pip-log.txt
# Output
output/*
<<<<<<< HEAD
copycat.log
papers/*.log
papers/*.pdf
papers/*.out
papers/*.aux
papers/words
*.txt
=======
>>>>>>> develop

BIN
.old_distributions Normal file

Binary file not shown.

View File

@ -1,13 +1,8 @@
co.py.cat
=========
I am planning to use this codebase, or Joseph A. Hager's, to implement a variation of Copycat that uses *Entropy* instead of *Temperature*, while still preserving the parallel terraced scan in full form. If the change is viable, I plan to write a paper on that (if anyone is interested in co-authoring, let me know). For the general idea, please see pages 41 and 42 of the [*Information Sciences*](https://github.com/Alex-Linhares/FARGlexandria/blob/master/Literature/Chess-Capyblanca-2014-Linhares-Information%20Sciences.pdf) paper on [Capyblanca](https://github.com/Alex-Linhares/FARGlexandria).
![GUI](https://i.imgur.com/7pb20g0.png)
**If you would like to help research and publish a paper, please let me know.**
Please see also [FARGlexandria](https://github.com/Alex-Linhares/FARGlexandria), a repository with all FARG projects (and help if you have some of the missing info there, especially about Letter Spirit and George!)
-------------------------------
An implementation of [Douglas Hofstadter](http://prelectur.stanford.edu/lecturers/hofstadter/)'s Copycat algorithm.
The Copycat algorithm is explained [on Wikipedia](https://en.wikipedia.org/wiki/Copycat_%28software%29), and that page has many links for deeper reading. See also [Farglexandria](https://github.com/Alex-Linhares/Farglexandria).

View File

@ -1,3 +1,4 @@
from .copycat import Copycat, Reporter # noqa
from .problem import Problem
from .plot import plot_answers
from .io import save_answers

View File

@ -63,9 +63,9 @@ class Copycat(object):
def mainLoop(self):
currentTime = self.coderack.codeletsRun
self.temperature.tryUnclamp(currentTime) # TODO: use entropy
# Every 15 codelets, we update the workspace.
if currentTime >= self.lastUpdate + 15:
self.temperature.tryUnclamp(currentTime)
# Every 5 codelets, we update the workspace.
if currentTime >= self.lastUpdate + 5:
self.update_workspace(currentTime)
self.step()
@ -116,29 +116,26 @@ class Copycat(object):
return answers
def run(self, initial, modified, target, iterations):
self.temperature.useAdj('best')
self.workspace.resetWithStrings(initial, modified, target)
answers = {}
for formula in ['original', 'best', 'sbest', 'pbest']:
self.temperature.useAdj(formula)
answers = {}
for i in range(iterations):
answer = self.runTrial()
d = answers.setdefault(answer['answer'], {
'count': 0,
'sumtemp': 0, # TODO: use entropy
'sumtime': 0
})
d['count'] += 1
d['sumtemp'] += answer['temp'] # TODO: use entropy
d['sumtime'] += answer['time']
for answer, d in answers.items():
d['avgtemp'] = d.pop('sumtemp') / d['count']
d['avgtime'] = d.pop('sumtime') / d['count']
print('The formula {} provided:'.format(formula))
pprint(answers)
formula = 'pbest'
self.temperature.useAdj(formula)
for i in range(iterations):
answer = self.runTrial()
d = answers.setdefault(answer['answer'], {
'count': 0,
'sumtemp': 0, # TODO: use entropy
'sumtime': 0
})
d['count'] += 1
d['sumtemp'] += answer['temp'] # TODO: use entropy
d['sumtime'] += answer['time']
for answer, d in answers.items():
d['avgtemp'] = d.pop('sumtemp') / d['count']
d['avgtime'] = d.pop('sumtime') / d['count']
print('The formula {} provided:'.format(formula))
print('Average difference: {}'.format(self.temperature.getAverageDifference()))
return answers
def run_forever(self, initial, modified, target):

View File

@ -54,3 +54,6 @@ class Control(GridFrame):
def get_vars(self):
return self.entry.a.get(), self.entry.b.get(), self.entry.c.get()
def reset(self):
self.go = False

View File

@ -16,7 +16,7 @@ from .primary import Primary
from .list import List
from .style import configure_style
from .plot import plot_imbedded
from .plot import plot_answers, plot_temp
plt.style.use('dark_background')
@ -40,10 +40,13 @@ class MainApplication(GridFrame):
self.add(self.codeletList, 1, 1)
self.objectList = List(self, columns)
self.add(self.objectList, 2, 1)
self.add(self.objectList, 2, 1, xspan=2)
self.graph1 = Plot(self, 'Temperature history')
self.add(self.graph1, 2, 0)
self.graph2 = Plot(self, 'Answer Distribution')
self.add(self.graph2, 2, 0)
self.add(self.graph2, 3, 0)
def update(self, copycat):
self.primary.update(copycat)
@ -57,16 +60,11 @@ class MainApplication(GridFrame):
self.codeletList.update(codelets, key=lambda c:c.urgency, formatter= lambda s : '{}: {}'.format(s.name, round(s.urgency, 2)))
get_descriptors = lambda s : ', '.join('({}={})'.format(d.descriptionType.name, d.descriptor.name) for d in s.descriptions)
self.objectList.update(objects, formatter=lambda s : '{}: {}'.format(s, get_descriptors(s)))
'''
if len(objects) > 0:
print('Descriptions:')
for obj in objects:
print(obj)
for description in obj.descriptions:
print(' {}:'.format(description))
print(' {}'.format(description.descriptionType.name))
print(' {}'.format(description.descriptor.name))
'''
def modifier(status):
with plt.style.context(('dark_background')):
plot_temp(copycat.temperature, status)
self.graph1.status.modifier = modifier
def reset_with_strings(self, initial, modified, target):
self.primary.reset_with_strings(initial, modified, target)
@ -79,13 +77,12 @@ class GUI(object):
tk.Grid.columnconfigure(self.root, 0, weight=1)
self.app = MainApplication(self.root)
self.app.grid(row=0, column=0, sticky=tk.N+tk.S+tk.E+tk.W)
configure_style(ttk.Style())
def add_answers(self, answers):
def modifier(status):
with plt.style.context(('dark_background')):
plot_imbedded(answers, status)
plot_answers(answers, status)
self.app.graph2.status.modifier = modifier
def refresh(self):

View File

@ -2,9 +2,14 @@ import matplotlib.pyplot as plt; plt.rcdefaults()
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('dark_background')
def plot_temp(temperature, status):
status.subplot.clear()
status.subplot.plot(temperature.history)
status.subplot.set_ylabel('Temperature')
status.subplot.set_xlabel('Time')
status.subplot.set_title('Temperature History')
def plot_imbedded(answers, status):
def plot_answers(answers, status):
answers = sorted(answers.items(), key=lambda kv : kv[1]['count'])
objects = [t[0] for t in answers]
yvalues = [t[1]['count'] for t in answers]

View File

@ -27,3 +27,4 @@ class Primary(GridFrame):
def reset_with_strings(self, initial, modified, target):
self.canvas.reset_with_strings(initial, modified, target)
self.control.reset()

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -21,6 +21,7 @@ class WorkspaceCanvas(GridFrame):
self.changed = False
self.canvas = tk.Canvas(self, background='black')
#self.canvas['width'] = 1600
self.add(self.canvas, 0, 0)
GridFrame.configure(self)

69
copycat/problem.py Normal file
View File

@ -0,0 +1,69 @@
from .copycat import Copycat
from pprint import pprint
class Problem:
def __init__(self, initial, modified, target, iterations, distributions=None, formulas=None):
self.formulas = formulas
if formulas is not None:
assert hasattr(Copycat(), 'temperature')
else:
if hasattr(Copycat(), 'temperature'):
self.formulas = set(Copycat().temperature.adj_formulas())
print(self.formulas)
self.initial = initial
self.modified = modified
self.target = target
self.iterations = iterations
if distributions is None:
self.distributions = self.solve()
else:
self.distributions = distributions
print(self.formulas)
def test(self, comparison, expected=None):
print('-' * 120)
print('Testing copycat problem: {} : {} :: {} : _'.format(self.initial,
self.modified,
self.target))
print('expected:')
if expected is None:
expected = self.distributions
pprint(expected)
actual = self.solve()
print('actual:')
pprint(actual)
comparison(actual, expected)
print('-' * 120)
def solve(self):
print('-' * 120)
print('Testing copycat problem: {} : {} :: {} : _'.format(self.initial,
self.modified,
self.target))
copycat = Copycat()
answers = dict()
if self.formulas == None:
if hasattr(copycat, 'temperature'):
formula = copycat.temperature.getAdj()
else:
formula = None
answers[formula] = copycat.run(self.initial,
self.modified,
self.target,
self.iterations)
else:
print(self.formulas)
for formula in self.formulas:
copycat.temperature.useAdj(formula)
answers[formula] = copycat.run(self.initial,
self.modified,
self.target,
self.iterations)
print('Done with {}'.format(formula))
return answers
def generate(self):
self.distributions = self.solve()

View File

@ -30,7 +30,7 @@ class Rule(WorkspaceStructure):
return
averageDepth = (self.descriptor.conceptualDepth +
self.relation.conceptualDepth) / 2.0
averageDepth **= 1.1
averageDepth **= 1.1 # LSaldyt: This value (1.1) seems 100% contrived.
# see if the object corresponds to an object
# if so, see if the descriptor is present (modulo slippages) in the
# corresponding object
@ -45,15 +45,15 @@ class Rule(WorkspaceStructure):
self.internalStrength = 0.0
return
sharedDescriptorTerm = 100.0
conceptual_height = (100.0 - self.descriptor.conceptualDepth) / 10.0
sharedDescriptorWeight = conceptual_height ** 1.4
conceptual_height = (100.0 - self.descriptor.conceptualDepth) / 10.0 # LSaldyt: 10?
sharedDescriptorWeight = conceptual_height ** 1.4 # LSaldyt: 1.4 is also seemingly contrived
depthDifference = 100.0 - abs(self.descriptor.conceptualDepth -
self.relation.conceptualDepth)
weights = ((depthDifference, 12),
(averageDepth, 18),
(sharedDescriptorTerm, sharedDescriptorWeight))
weights = ((depthDifference, 12), # LSaldyt: ???
(averageDepth, 18), # ????
(sharedDescriptorTerm, sharedDescriptorWeight)) # 12 and 18 can be reduced to 2 and 3, depending on sharedDescriptorWeight
self.internalStrength = formulas.weightedAverage(weights)
if self.internalStrength > 100.0:
if self.internalStrength > 100.0: # LSaldyt: A better formula wouldn't need to do this.
self.internalStrength = 100.0
def ruleEqual(self, other):

128
copycat/statistics.py Normal file
View File

@ -0,0 +1,128 @@
from collections import defaultdict
from pprint import pprint
from math import log
# comparison values for n degrees freedom
# These values are useable for both the chi^2 and G tests
_ptable = {
1:3.841,
2:5.991,
3:7.815,
4:9.488,
5:11.071,
6:12.592,
7:14.067,
8:15.507,
9:16.919,
10:18.307,
11:19.7,
12:21,
13:22.4,
14:23.7,
15:25,
16:26.3
}
_get_count = lambda k, d : d[k]['count'] if k in d else 0
def g_value(actual, expected):
# G = 2 * sum(Oi * ln(Oi/Ei))
answerKeys = set(list(actual.keys()) + list(expected.keys()))
degreesFreedom = len(answerKeys)
G = 0
for k in answerKeys:
E = _get_count(k, expected)
O = _get_count(k, actual)
if E == 0:
print(' Warning! Expected 0 counts of {}, but got {}'.format(k, O))
elif O == 0:
print(' Warning! O = {}'.format(O))
else:
G += O * log(O/E)
G *= 2
return degreesFreedom, G
def chi_value(actual, expected):
answerKeys = set(list(actual.keys()) + list(expected.keys()))
degreesFreedom = len(answerKeys)
chiSquared = 0
for k in answerKeys:
E = _get_count(k, expected)
O = _get_count(k, actual)
if E == 0:
print(' Warning! Expected 0 counts of {}, but got {}'.format(k, O))
else:
chiSquared += (O - E) ** 2 / E
return degreesFreedom, chiSquared
def probability_difference(actual, expected):
actualC = 0
expectedC = 0
for k in set(list(actual.keys()) + list(expected.keys())):
expectedC += _get_count(k, expected)
actualC += _get_count(k, actual)
p = 0
Et = 0
Ot = 0
for k in set(list(actual.keys()) + list(expected.keys())):
E = _get_count(k, expected)
O = _get_count(k, actual)
Ep = E / expectedC
Op = O / actualC
p += abs(Ep - Op)
p /= 2 # P is between 0 and 2 -> P is between 0 and 1
return p
def dist_test(actual, expected, calculation):
df, p = calculation(actual, expected)
if df not in _ptable:
raise Exception('{} degrees of freedom does not have a corresponding chi squared value.' + \
' Please look up the value and add it to the table in copycat/statistics.py'.format(df))
return (p < _ptable[df])
def cross_formula_table(actualDict, expectedDict, calculation, probs=False):
data = dict()
for ka, actual in actualDict.items():
for ke, expected in expectedDict.items():
if probs:
data[(ka, ke)] = probability_difference(actual, expected)
else:
data[(ka, ke)] = dist_test(actual, expected, calculation)
return data
def cross_table(problemSets, calculation=g_value, probs=False):
table = defaultdict(dict)
for i, (a, problemSetA) in enumerate(problemSets):
for b, problemSetB in problemSets[i + 1:]:
for problemA in problemSetA:
for problemB in problemSetB:
if (problemA.initial == problemB.initial and
problemA.modified == problemB.modified and
problemA.target == problemB.target):
answersA = problemA.distributions
answersB = problemB.distributions
table[(problemA.initial,
problemA.modified,
problemA.target)][(a, b)] = (
cross_formula_table(
answersA, answersB, calculation, probs))
return table
def iso_chi_squared(actualDict, expectedDict):
for key in expectedDict.keys():
assert key in actualDict, 'The key {} was not tested'.format(key)
actual = actualDict[key]
expected = expectedDict[key]
if not dist_test(actual, expected, g_value):
raise Exception('Value of G higher than expected')

View File

@ -24,19 +24,19 @@ def _entropy(temp, prob):
f = (c + 1) * prob
return -f * math.log2(f)
def _weighted(temp, prob, s, u, alpha=1, beta=1):
def _weighted(temp, s, u):
weighted = (temp / 100) * s + ((100 - temp) / 100) * u
return weighted
def _weighted_inverse(temp, prob):
iprob = 1 - prob
return _weighted(temp, prob, iprob, prob)
return _weighted(temp, iprob, prob)
def _fifty_converge(temp, prob): # Uses .5 instead of 1-prob
return _weighted(temp, prob, .5, prob)
return _weighted(temp, .5, prob)
def _soft_curve(temp, prob): # Curves to the average of the (1-p) and .5
return min(1, _weighted(temp, prob, (1.5-prob)/2, prob))
return min(1, _weighted(temp, (1.5-prob)/2, prob))
def _weighted_soft_curve(temp, prob): # Curves to the weighted average of the (1-p) and .5
weight = 100
@ -49,25 +49,25 @@ def _weighted_soft_curve(temp, prob): # Curves to the weighted average of the (1
def _alt_fifty(temp, prob):
s = .5
u = prob ** 2 if prob < .5 else math.sqrt(prob)
return _weighted(temp, prob, s, u)
return _weighted(temp, s, u)
def _averaged_alt(temp, prob):
s = (1.5 - prob)/2
u = prob ** 2 if prob < .5 else math.sqrt(prob)
return _weighted(temp, prob, s, u)
return _weighted(temp, s, u)
def _working_best(temp, prob):
s = .5 # convergence
r = 2 # power
r = 1.05 # power
u = prob ** r if prob < .5 else prob ** (1/r)
return _weighted(temp, prob, s, u)
return _weighted(temp, s, u)
def _soft_best(temp, prob):
s = .5 # convergence
r = 1.05 # power
u = prob ** r if prob < .5 else prob ** (1/r)
return _weighted(temp, prob, s, u)
return _weighted(temp, s, u)
def _parameterized_best(temp, prob):
# (D$66/100)*($E$64*$B68 + $G$64*$F$64)/($E$64 + $G$64)+((100-D$66)/100)*IF($B68 > 0.5, $B68^(1/$H$64), $B68^$H$64)
@ -78,8 +78,27 @@ def _parameterized_best(temp, prob):
s = (alpha * prob + beta * s) / (alpha + beta)
r = 1.05
u = prob ** r if prob < .5 else prob ** (1/r)
return _weighted(temp, prob, s, u)
return _weighted(temp, s, u)
def _meta(temp, prob):
r = _weighted(temp, 1, 2) # Make r a function of temperature
s = .5
u = prob ** r if prob < .5 else prob ** (1/r)
return _weighted(temp, s, u)
def _meta_parameterized(temp, prob):
r = _weighted(temp, 1, 2) # Make r a function of temperature
alpha = 5
beta = 1
s = .5
s = (alpha * prob + beta * s) / (alpha + beta)
u = prob ** r if prob < .5 else prob ** (1/r)
return _weighted(temp, s, u)
def _none(temp, prob):
return prob
class Temperature(object):
def __init__(self):
@ -96,9 +115,15 @@ class Temperature(object):
'average_alt' : _averaged_alt,
'best' : _working_best,
'sbest' : _soft_best,
'pbest' : _parameterized_best}
'pbest' : _parameterized_best,
'meta' : _meta,
'pmeta' : _meta_parameterized,
'none' : _none}
self.diffs = 0
self.ndiffs = 0
def reset(self):
self.history = [100.0]
self.actual_value = 100.0
self.last_unclamped_value = 100.0
self.clamped = True
@ -109,6 +134,7 @@ class Temperature(object):
if self.clamped:
self.actual_value = 100.0
else:
self.history.append(value)
self.actual_value = value
def clampUntil(self, when):
@ -129,11 +155,21 @@ class Temperature(object):
def getAdjustedProbability(self, value):
temp = self.value()
prob = value
return self._adjustmentFormulas[self.adjustmentType](temp, prob)
adjusted = self._adjustmentFormulas[self.adjustmentType](temp, prob)
self.diffs += abs(adjusted - prob)
self.ndiffs += 1
return adjusted
def getAverageDifference(self):
return self.diffs / self.ndiffs
def useAdj(self, adj):
print('Changing to adjustment formula {}'.format(adj))
self.adjustmentType = adj
def getAdj(self):
return self.adjustmentType
def adj_formulas(self):
return self._adjustmentFormulas.keys()

View File

@ -111,8 +111,6 @@ class Workspace(object):
def getUpdatedTemperature(self):
'''
Calculation of global tolerance towards irrelevance
temp = weightedAverage(totalUnhappiness(.8), ruleWeakness(.2))
'''
self.calculateIntraStringUnhappiness()
self.calculateInterStringUnhappiness()

53
cross_compare.py Executable file
View File

@ -0,0 +1,53 @@
#!/usr/bin/env python3
import sys
import pickle
from pprint import pprint
from collections import defaultdict
from copycat import Problem
from copycat.statistics import cross_table
def compare_sets():
pass
def main(args):
branchProblemSets = dict()
problemSets = []
for filename in args:
with open(filename, 'rb') as infile:
pSet = pickle.load(infile)
branchProblemSets[filename] = pSet
problemSets.append((filename, pSet))
crossTable = cross_table(problemSets, probs=True)
key_sorted_items = lambda d : sorted(d.items(), key=lambda t:t[0])
tableItems = key_sorted_items(crossTable)
assert len(tableItems) > 0, 'Empty table'
headKey, headSubDict = tableItems[0]
# Create table and add headers
table = [['source', 'compare', 'source formula', 'compare formula']]
for key, _ in tableItems:
problem = '{}:{}::{}:_'.format(*key)
table[-1].append(problem)
# Arranged results in terms of copycat variants and formulas
arranged = defaultdict(list)
for key, subdict in tableItems:
for subkey, subsubdict in key_sorted_items(subdict):
for subsubkey, result in key_sorted_items(subsubdict):
arranged[subkey + subsubkey].append((key, result))
# Add test results to table
for key, results in arranged.items():
table.append(list(map(str, [*key])))
for _, result in results:
table[-1].append(str(result))
with open('output/cross_compare.csv', 'w') as outfile:
outfile.write('\n'.join(','.join(row) for row in table) + '\n')
return 0
if __name__ == '__main__':
sys.exit(main(sys.argv[1:]))

BIN
distributions/.adj-tests Normal file

Binary file not shown.

BIN
distributions/.legacy Normal file

Binary file not shown.

BIN
distributions/.no-adj Normal file

Binary file not shown.

BIN
distributions/.no-fizzle Normal file

Binary file not shown.

BIN
distributions/.no-prob-adj Normal file

Binary file not shown.

BIN
distributions/.nuke-temp Normal file

Binary file not shown.

BIN
distributions/.soft-remove Normal file

Binary file not shown.

View File

@ -71,6 +71,5 @@ def main():
plot_answers(answers, show=not options.noshow)
save_answers(answers, 'output/answers.csv')
if __name__ == '__main__':
main()

12
papers/Makefile Normal file
View File

@ -0,0 +1,12 @@
all:
make draft
make clean
draft:
pdflatex draft.tex
biber draft
pdflatex draft.tex
clean:
rm *.out *.log *.xml *.bbl *.bcf *.blg *.aux

356
papers/draft.tex Normal file
View File

@ -0,0 +1,356 @@
\documentclass[a4paper]{article}
%% Sets page size and margins
\usepackage[a4paper,top=3cm,bottom=2cm,left=3cm,right=3cm,marginparwidth=1.75cm]{geometry}
%% Useful packages
\usepackage{listings}
\usepackage{amsmath}
\usepackage{pdfpages}
\usepackage{graphicx}
\usepackage{indentfirst} %% Personal taste of LSaldyt
\usepackage[utf8]{inputenc}
\usepackage[english]{babel}
\usepackage[backend=biber]{biblatex}
\addbibresource{sources.bib}
\usepackage[colorinlistoftodos]{todonotes}
\usepackage[colorlinks=true, allcolors=blue]{hyperref}
\definecolor{lightgrey}{rgb}{0.9, 0.9, 0.9}
\lstset{ %
backgroundcolor=\color{lightgrey}}
\title{Distributed Behavior in a Fluid Analogy Architecture}
\author{Lucas Saldyt, Alexandre Linhares}
\begin{document}
\maketitle
\begin{abstract}
This project focuses on effectively simulating intelligent processes behind fluid analogy-making through increasingly distributed decision-making.
In the process, it discusses creating an effective scientific framework for fluid analogy architectures.
This draft assumes extensive knowledge with the Copycat software, which was pioneered by Melanie Mitchell \cite{analogyasperception}.
A humanistic search algorithm, the Parallel Terraced Scan, is altered and tested.
Originally, this search algorithm contains a centralizing variable, called \emph{temperature}.
This paper investigates the influence of this centralizing variable by modifying, testing, and eventually removing all code related to it.
In this process, several variants of the copycat software are created.
The produced answer distributions of each resulting branch of the copycat software were then cross-compared with a Pearson's $\chi^2$ distribution test.
This paper draft explores tests done on five novel copycat problems with thirty answers given per cross comparison.
[For now, it is safest to say that the results of this paper are inconclusive: See Results section]
%% Based on this cross-comparison, the original adjustment formulas have no significant effect (But these results are preliminary, see Results section for more detail).
\end{abstract}
\section{Introduction}
This paper stems from Melanie Mitchell's \cite{analogyasperception} and Douglas Hofstadter's \& FARG's \cite{fluidconcepts} work on the copycat program.
It is also based on work from a previous paper by Alexandre Linhares \cite{linhares}.
This project focuses on effectively simulating intelligent processes through increasingly distributed decision-making.
In the process of evaluating the distributed nature of copycat, this paper also proposes a "Normal Science" framework.
Copycat's behavior is based on the "Parallel Terraced Scan," a humanistic-inspired search algorithm.
The Parallel Terraced Scan is, roughly, a mix between a depth-first and breadth-first search.
To switch between modes of search, FARG models use the global variable \emph{temperature}.
\emph{Temperature} is ultimately a function of the workspace rule strength then the importance and happiness of each workspace structure.
Therefore, \emph{temperature} is a global metric, but is sometimes used to make local decisions.
Since copycat means to simulate intelligence in a distributed nature, it should make use of local metrics for local decisions.
This paper explores the extent to which copycat's behavior can be improved through distributing decision making.
Specifically, the effects of temperature are first tested.
Then, once the statistically significant effects of temperature are understood, work is done to replace temperature with a distributed metric.
Initially, temperature is removed destructively, essentially removing any lines of code that mention it, simply to see what effect it has.
Then, a surgical removal of temperature is attempted, leaving in tact affected structures or replacing them by effective distributed mechanisms.
To evaluate the distributed nature of copycat, this paper focuses on the creation of a `normal science' framework.
By `Normal science,' this paper means the term created by Thomas Kuhn--the collaborative enterprise of furthering understanding within a paradigm.
Today, "normal science" is simply not done on FARG architectures (and on most computational cognitive architectures too... see Addyman \& French \cite{compmodeling}).
Unlike mathematical theories or experiments, which can be replicated by following the materials and methods, computational models generally have dozens of particularly tuned variables, undocumented procedures, multiple assumptions about the users computational environment, etc.
It then becomes close to impossible to reproduce a result, or to test some new idea scientifically.
This paper focuses on the introduction of statistical techniques, reduction of "magic numbers", improvement and documentation of formulas, and proposals for statistical human comparison.
Each of these methods will reduce the issues with scientific inquiry in the copycat architecture.
To evaluate two different versions of copycat, the resulting answer distributions from a problem are compared with a Pearson's $\chi^2$ test.
Using this, the degree of difference between distributions can be calculated.
Then, desirability of answer distributions can be found as well, and the following hypotheses can be tested:
\begin{enumerate}
\item $H_i$ Centralized, global variables constrict copycat's ability.
\item $H_0$ Centralized, global variables either improve or have no effect on copycat's ability.
\end{enumerate}
\subsection{Objective}
The aim of this paper is to create and test a new version of the copycat software that makes effective use of a multiple level description.
Until now, copycat has made many of its decisions, even local ones, based on a global variable, \emph{temperature}.
Two approaches will be taken toward improving copycat.
First, small portions of copycat will be removed and then tested individually.
If they do not significantly change the answer distributions given by copycat, they will be collectively removed from a working version of copycat.
Then, alternate, distributed versions of copycat will be compared to the original copycat software to effectively decide on which design choices to make.
\subsection{Theory}
\subsubsection{Centralized Structures}
Since computers are universal and have vastly improved in the past five decades, it is clear that computers are capable of simulating intelligent processes \cite{computerandthebrain}.
The primary obstacle blocking strong A.I. is \emph{comprehension} of intelligent processes.
Once the brain is truly understood, writing software that emulates intelligence will be a (relatively) simple engineering task when compared to understanding the brain.
In making progress towards understanding the brain fully, models must remain true to what is already known about intelligent processes.
Outside of speed, the largest difference between the computer and the brain is the distributed nature of computation.
Specifically, our computers as they exist today have central processing units, where literally all of computation happens.
Brains have some centralized structures, but certainly no single central location where all processing happens.
Luckily, the difference in speed between brains and computers allows computers to simulate brains even when they are running serial code.
From a design perspective, however, software should take the distributed nature of the brain into consideration, because it is most likely that distributed computation plays a large role in the brain's functionality.
For example, codelets should behave more like ants in an anthill, as described in \emph{Gödel, Escher, Bach} \cite{geb}.
Instead of querying a global structure (i.e. the queen), ants might query each other, and each carry information about what they've last seen.
In this way, distributed computation can be carried out through many truly parallel (non-blocking) agents.
It is clear from basic classical psychology that the brain contains some centralized structures.
For example, Broca's area and Wernicke's area are specialized for linguistic input and output.
Another great example is the hippocampi.
If any of these specialized chunks of brain are surgically removed, for instance, then the ability to perform certain tasks is greatly impacted.
To some extent, the same is true for copycat.
For example, removing the ability to update the workspace would be \emph{*roughly*} equivalent to removing both hippocampi from a human.
This paper means to first test the impact of centralized structures, like \emph{temperature}, by removing or altering them and then performing tests.
Then, distributed structures will be proposed and tested in place of centralized ones.
However: “How gullible are you? Is your gullibility located in some "gullibility center" in your brain? Could a neurosurgeon reach in and perform some delicate operation to lower your gullibility, otherwise leaving you alone? If you believe this, you are pretty gullible, and should perhaps consider such an operation.”
― Douglas R. Hofstadter, Gödel, Escher, Bach: An Eternal Golden Braid
Outside of \emph{temperature}, other structures in copycat, like the workspace itself, or the coderack, are also centralized.
Hopefully, these centralized structures are not constraining, but it possible they are.
If they are, their unifying effect should be taken into account.
For example, the workspace is atomic, just like centralized structures in the brain, like the hippocampi, are also atomic.
If copycat can be run such that -- during the majority of the program's runtime -- codelets may actually execute at the same time (without pausing to access globals), then it will much better replicate the human brain.
A good model for this is the functional-programming \emph{map} procedure.
From this perspective, the brain would simply be carrying out the same function in many locations (i.e. \emph{map}ping neuron.process() across each of its neurons)
Note that this is more similar to the behavior of a GPU than a CPU.
This model doesn't work when code has to synchronize to access global variables.
Notably, however, functional distributed code is Turing complete just like imperative centralized code is Turing complete.
Especially given the speed of modern computers, functional code cannot do anything that imperative code can't.
However, working in a mental framework that models the functionality of the human brain may assist in actually modeling its processes.
\subsubsection{Local Descriptions}
A global description of the system (\emph{temperature}) is, at times, potentially useful.
However, in summing together the values of each workspace object, information is lost regarding which workspace objects are offending.
In general, the changes that occur will eventually be object-specific.
So, it seems to me that going from object-specific descriptions to a global description back to an object-specific action is a waste of time, at least when the end action is an object-specific action.
A global description shouldn't be \emph{obliterated} (removed 100\%).
Maybe a global description should be reserved for \emph{only} when global actions are taking place.
For example, when deciding that copycat has found a satisfactory answer, a global description should be used, because deciding to stop copycat is a global action.
However, when deciding to remove a particular structure, a global description should not be used, because removing a particular offending structure is NOT a global action.
Of course, global description has some benefits even when it is being used to change local information.
For example, the global formula for temperature converts the raw importance value for each object into a relative importance value for each object.
If a distributed metric was used, this importance value would have to be left in its raw form.
\section{Methods}
\subsection{Formula Documentation}
Many of copycat's formulas use magic numbers and marginally documented formulas.
This is less of a problem in the original LISP code, and more of a problem in the twice-translated Python3 version of copycat.
However, even in copycat's LISP implementation, formulas have redundant parameters.
For example, if given two formulas: $f(x) = x^2$ and $g(x) = 2x$, a single formula can be written $h(x) = 4x^2$ (The composed and then simplified formula).
Ideally, the adjustment formulas within copycat could be reduced in the same way, so that much of copycat's behavior rested on a handful of parameters in a single location, as opposed to more than ten parameters scattered throughout the repository.
Also, often parameters in copycat have little statistically significant effect.
As will be discussed in the $\chi^2$ distribution testing section, any copycat formulas without a significant effect will be hard-removed.
\subsection{Testing the Effect of Temperature}
To begin with, the existing effect of the centralizing variable, temperature, will be analyzed.
As the probability adjustment formulas are used by default, very little effect is had.
To evaluate the effect of temperature-based probability adjustment formulas, a spreadsheet was created that showed a color gradient based on each formula.
View the spreadsheets \href{https://docs.google.com/spreadsheets/d/1JT2yCBUAsFzMcbKsQUcH1DhcBbuWDKTgPvUwD9EqyTY/edit?usp=sharing}{here}.
Then, to evaluate the effect of different temperature usages, separate usages of temperature were individually removed and answer distributions were compared statistically (See section: $\chi^2$ Distribution Testing).
\subsection{Temperature Probability Adjustment}
Once the effect of temperature was evaluated, new temperature-based probability adjustment formulas were proposed that each had a significant effect on the answer distributions produced by copycat.
Instead of representing a temperature-less, decentralized version of copycat, these formulas are meant to represent the centralized branch of copycat.
These formulas curve probabilities, making unlikely events more likely and likely events less likely as a function of the global \emph{temperature} variable.
The desired (LISP documented) behavior is as follows:
At high temperatures, the system should explore options that would otherwise be unlikely.
So, at temperatures above half of the maximum temperature, probabilities with a base value less than fifty percent will be curved higher, to some threshold.
At temperatures below half of the maximum temperature, probabilities with a base value above fifty percent will be curved lower, to some threshold.
The original formulas being used to do this were overly complicated.
In summary, many formulas were tested in a spreadsheet, and an optimal one was chosen that replicated the desired behavior.
The remainder of the section discusses different formulas and their advantages/disadvantages.
Also, as a general rule, changing these formulas causes copycat to produce statistically significantly different answer distributions.
The original formula for curving probabilities in copycat:
\lstinputlisting[language=Python]{resources/original.py}
An alternative that seems to improve performance on the "abd:abd::xyz:\_" problem:
This formula produces probabilities that are not bounded between 0 and 1. These are generally truncated.
\lstinputlisting[language=Python]{resources/entropy.py}
However, this formula worsens performance on non "xyz" problems.
Likely, because of how novel the "xyz" problem is, it will require more advanced architecture changes.
For instance, MetaCat claims to assist in solving the "xyz" problem.
The entropy formula is an improvement, but other formulas are possible too.
Below are variations on a "weighted" formula.
The general structure is:
\[\emph{p'} = \frac{T}{100} * S + \frac{100-T}{100} * U\]
Where: $S$ is the convergence value for when $T = 100$ and
$U$ is the convergence value for when $T = 0$.
The below formulas simply experiment with different values for $S$ and $U$
\lstinputlisting[language=Python]{resources/weighted.py}
After some experimentation and reading the original copycat documentation, it was clear that $S$ should be chosen to be $0.5$ (All events are equally likely at high temperature) and that $U$ should implement the probability curving desired at low temperatures.
The following formulas let $U = p^r$ if $p < 0.5$ and let $U = p^\frac{1}{r}$ if $p >= 0.5$.
This controls whether/when curving happens.
Now, the \emph{single} parameter $r$ simply controls the degree to which curving happens.
Different values of $r$ were experimented with (values between $10$ and $1$ were experimented with at increasingly smaller step sizes).
$2$ and $1.05$ are both good choices at opposite "extremes".
$2$ works because it is large enough to produce novel changes in behavior at extreme temperatures without totally disregarding the original probabilities.
Values above $2$ do not work because they make probabilities too uniform.
Values below $2$ (and above $1.05$) are feasible, but produce less curving and therefore less unique behavior.
$1.05$ works because it very closely replicates the original copycat formulas, providing a very smooth curving.
Values beneath $1.05$ essentially leave probabilities unaffected, producing no significant unique behavior dependent on temperature.
\lstinputlisting[language=Python]{resources/best.py}
All of these separate formulas will later be cross-compared to other variants of the copycat software using a Pearson's $\chi^2$ test.
\subsection{Temperature Usage Adjustment}
Once the behavior based on temperature was well understood, experimentation was made with hard and soft removals of temperature and features that depend on it.
For example, first probability adjustments based on temperature were removed.
Then, the new branch of copycat was $\chi^2$ compared against the original branch.
Then, breaker-fizzling, an independent temperature-related feature was removed from the original branch and another $\chi^2$ comparison was made.
The same process was repeated for non-probability temperature-based adjustments, and then for the copycat stopping decision.
Then, a temperature-less branch of the repository was created and tested.
Then, a branch of the repository was created that removed probability adjustments, value adjustments, and fizzling, and made all other temperature-related operations use a dynamic temperature calculation.
All repository branches were then cross compared using a $\chi^2$ distribution test.
\subsection{$\chi^2$ Distribution Testing}
To test each different branch of the repository, a scientific framework was created.
Each run of copycat on a particular problem produces a distribution of answers.
Distributions of answers can be compared against one another with a (Pearson's) $\chi^2$ distribution test.
$$\chi^2 = \sum_{i=1}^{n} \frac{(O_i - E_i)^2}{E_i}$$
Where:
\newline\indent
$O_i = $ The number of observations of a particular answer
\newline\indent
$E_i = $ The number of expected observations of a particular answer
\newline
\newline\indent
Then, $\chi^2$ is calculated, using one copycat variant as a source for expected observations, and another copycat variant as a source for novel observations.
If the $\chi^2$ value is above some threshold (dependent on degrees of freedom and confidence level), then the two copycat variants are significantly different.
A standard confidence level of $95\%$ is used, and degrees of freedom is calculated as the number of different answers given from the source-variant of copycat.
Because of this, comparing copycat variants like this is \emph{not} always commutative.
\subsection{Effectiveness Definition}
Quantitatively evaluating the effectiveness of a cognitive architecture is difficult.
However, for copycat specifically, effectiveness can be defined as a function of the frequency of desirable answers and equivalently as the inverse frequency of undesirable answers.
Since answers are desirable to the extent that they respect the original transformation of letter sequences, desirability can also be approximated by a concrete metric.
A simple metric for desirability is simply the existing temperature formula.
So, one metric for effectiveness of a copycat variant is the frequency of low-temperature answers.
$$e = \frac{\sum_{i=i}^{n} \frac{O_i}{T_i}}{N} $$
For simplicity, only this metric will be used.
However, this metric could be extended relatively easily.
For example, the unique variants in copycat answers could be taken into account ($n$).
\section{Results}
\subsection{Cross $\chi^2$ Table}
The Cross$\chi^2$ table summarizes the results of comparing each copycat-variant's distribution with each other copycat-variant and with different internal formulas.
For the table, please see \href{"https://docs.google.com/spreadsheets/d/1d4EyEbWLJpLYlE7qSPPb8e1SqCAZUvtqVCd0Ns88E-8/edit?usp=sharing"}{google sheets}.
This table contains a lot of information, but most importantly it shows which copycat variants produce novel changes and which do not.
The following variants of copycat were created:
\begin{enumerate}
\item The original copycat (legacy)
\item Copycat with no probability adjustment formulas (no-prob-adj)
\item Copycat with no fizzling (no-fizzle)
\item Copycat with no adjustment formulas at all (no-adj)
\item Copycat with several different internal adjustment formulas (adj-tests)
\begin{enumerate}
\item alt\_fifty
\item average\_alt
\item best
\item entropy
\item fifty\_converge
\item inverse
\item meta
\item none
\item original
\item pbest
\item pmeta
\item sbest
\item soft
\item weighted\_soft
\end{enumerate}
\item Copycat with temperature 100\% removed (nuke-temp)
\item Copycat with a surgically removed temperature (soft-remove)
\end{enumerate}
Each variant was cross-compared with each other variant on this set of problems (from \cite{fluidconcepts}).
\begin{enumerate}
\item abc:abd::efg:\_
\item abc:abd::ijk:\_
\item abc:abd::ijkk:\_
\item abc:abd::mrrjjj:\_
\item abc:abd::xyz:\_
\end{enumerate}
On a trial run with thirty iterations each, the following cross-comparisons showed \emph{no} difference in answer distributions:
\begin{enumerate}
\item .no-adj x .adj-tests(none)
\item .no-adj x .adj-tests(original)
\item .no-adj x .no-prob-adj
\item .no-prob-adj x .adj-tests(original)
\item .no-prob-adj x .adj-tests(pbest)
\item .no-prob-adj x .adj-tests(weighted\_soft)
\item .nuke-temp x .adj-tests(entropy)
\item .soft-remove x .adj-tests(best)
\item .soft-remove x .no-prob-adj
\end{enumerate}
There are also several variant comparisons that only vary on one or two problems.
As discussed below, it will be easier to evaluate them with more data.
Before the final draft of this paper, a trial will be conducted with a larger number of iterations and a variant of the Pearson's $\chi^2$ test that accounts for zero-count answer frequencies.
Also, because the comparison test is non commutative, "backwards" tests will be conducted.
Additionally, more problems will be added to the problem set, even if they are reducible.
This will provide additional data points for comparison (If two copycat variants are indistinguishable on some novel problem, they should be indistinguishable on some structurally identifical variant of the novel problem).
It is also possible that additional versions of copycat will be tested (I plan on testing small features of copycat, like parameters and so on, and removing them bit by bit).
\section{Discussion}
\subsection{Interpretation of table}
It is clear that the original copycat probability adjustment formula had no statistically significant effects.
Additionally, new formulas that emulate the performance of the original formula also have no significant effects.
However, novel adjustment formulas, like the "best" formula, provide the same results as soft-removing temperature.
Soft-removing temperature is also identical to running copycat with no probability adjustments.
\subsection{Distributed Computation Accuracy}
[Summary of introduction, elaboration based on results]
\subsection{Prediction??}
Even though imperative, serial, centralized code is Turing complete just like functional, parallel, distributed code, I predict that the most progressive cognitive architectures of the future will be created using functional programming languages that run distributively and are at least capable of running in true, CPU-bound parallel.
\printbibliography
\end{document}

336
papers/legacy/draft.tex Normal file
View File

@ -0,0 +1,336 @@
\documentclass[a4paper]{article}
%% Language and font encodings
\usepackage[english]{babel}
\usepackage[utf8x]{inputenc}
\usepackage[T1]{fontenc}
%% Sets page size and margins
\usepackage[a4paper,top=3cm,bottom=2cm,left=3cm,right=3cm,marginparwidth=1.75cm]{geometry}
%% Useful packages
\usepackage{listings}
\usepackage{amsmath}
\usepackage{graphicx}
\usepackage[colorinlistoftodos]{todonotes}
\usepackage[colorlinks=true, allcolors=blue]{hyperref}
\definecolor{lightgrey}{rgb}{0.9, 0.9, 0.9}
\lstset{ %
backgroundcolor=\color{lightgrey}}
\title{Distributed Behavior in a Fluid Analogy Architecture}
\author{Lucas Saldyt, Alexandre Linhares}
\begin{document}
\maketitle
\begin{abstract}
We investigate the distributed nature of computation in a FARG architecture, Copycat.
One of the foundations of those models is the \emph{Parallel Terraced Scan}--a psychologically-plausible model that enables a system to fluidly move between different modes of processing.
Previous work has modeled decision-making under Parallel Terraced Scan by using a central variable of \emph{Temperature}.
However, it is unlikely that this design decision accurately replicates the processes in the human brain.
This paper proposes several changes to copycat architectures that will increase their modeling accuracy.
\end{abstract}
\section{Introduction}
This paper stems from Mitchell's (1993) and Hofstadter's \& FARG's (1995) work on the copycat program.
This project focuses on effectively simulating intelligent processes through increasingly distributed decision-making.
In the process of evaluating the distributed nature of copycat, this paper also proposes a "Normal Science" framework.
First, copycat uses a "Parallel Terraced Scan" as a humanistic inspired search algorithm.
The Parallel Terraced Scan corresponds to the psychologically-plausible behavior of briefly browsing, say, a book, and delving deeper whenever something sparks one's interest.
In a way, it is a mix between a depth-first and breadth-first search.
This type of behavior seems to very fluidly change the intensity of an activity based on local, contextual cues.
Previous FARG models use centralized structures, like the global temperature value, to control the behavior of the Parallel Terraced Scan.
This paper explores how to maintain the same behavior while distributing decision-making throughout the system.
Specifically, this paper attempts different refactors of the copycat architecture.
First, the probability adjustment formulas based on temperature are changed.
Then, we experiment with two methods for replacing temperature with a distributed metric.
Initially, temperature is removed destructively, essentially removing any lines of code that mention it, simply to see what effect it has.
Then, a surgical removal of temperature is attempted, leaving in tact affected structures or replacing them by effective distributed mechanisms.
To evaluate the distributed nature of copycat, this paper focuses on the creation of a `normal science' framework.
By `Normal science,' this paper means the term created by Thomas Kuhn--the collaborative enterprise of furthering understanding within a paradigm.
Today, "normal science" is simply not done on FARG architectures (and on most computational cognitive architectures too... see Addyman \& French 2012).
Unlike mathematical theories or experiments, which can be replicated by following the materials and methods, computational models generally have dozens of particularly tuned variables, undocumented procedures, multiple assumptions about the users computational environment, etc.
It then becomes close to impossible to reproduce a result, or to test some new idea scientifically.
This paper focuses on the introduction of statistical techniques, reduction of "magic numbers", improvement and documentation of formulas, and proposals for statistical human comparison.
We also discuss, in general, the nature of the brain as a distributed system.
While the removal of a single global variable may initially seem trivial, one must realize that copycat and other cognitive architectures have many central structures.
This paper explores the justification of these central structures in general.
Is it possible to model intelligence with them, or are they harmful?
\section{Theory}
\subsection{Notes}
According to the differences we can enumerate between brains and computers, it is clear that, since computers are universal and have vastly improved in the past five decades, that computers are capable of simulating intelligent processes.
[Cite Von Neumann].
Primarily, the main obstacle now lies in our comprehension of intelligent processes.
Once we truly understand the brain, writing software that emulates intelligence will be a relatively simple software engineering task.
However, we must be careful to remain true to what we already know about intelligent processes so that we may come closer to learning more about them and eventually replicating them in full.
The largest difference between the computer and the brain is the distributed nature of computation.
Specifically, our computers as they exist today have central processing units, where literally all of computation happens.
On the other hand, our brains have no central location where all processing happens.
Luckily, the speed advantage and universality of computers makes it possible to simulate the distributed behavior of the brain.
However, this simulation is only possible if computers are programmed with concern for the distributed nature of the brain.
[Actually, I go back and forth on this: global variables might be plausible, but likely aren't]
Also, even though the brain is distributed, some clustered processes must take place.
In general, centralized structures should be removed from the copycat software, because they will likely improve the accuracy of simulating intelligent processes.
It isn't clear to what degree this refactor should take place.
The easiest target is the central variable, temperature, but other central structures exist.
This paper focuses primarily on temperature, and the unwanted global unification associated with it.
Even though copycat uses simulated parallel code, if copycat were actually parallelized, the global variable of temperature would actually prevent most copycat codelets from running at the same time.
If this global variable and other constricting centralized structures were removed, copycat's code would more closely replicate intelligent processes and would be able to be run much faster.
From a function-programming like perspective (i.e. LISP, the original language of copycat), the brain should simply be carrying out the same function in many locations (i.e. mapping neuron.process() across each of its neurons, if you will...)
Note that this is more similar to the behavior of a GPU than a CPU....?
However, in violating this model with the introduction of global variables......
Global variables seem like a construct that people use to model the real world.
...
It is entirely possible that at the level of abstraction that copycat uses, global variables are perfectly acceptable.
For example, a quick grep-search of copycat shows that the workspace singleton also exists as a global variable.
Making all of copycat distributed clearly would require a full rewrite of the software....
If copycat can be run such that codelets may actually execute at the same time (without pausing to access globals), then it will much better replicate the human brain.
However, I question the assumption that the human brain has absolutely no centralized processing.
For example, input and output channels (i.e. speech mechanisms) are not accessible from the entire brain.
Also, brain-region science leads me to believe that some (for example, research concerning wernicke's or broca's areas) brain regions truly are "specialized," and thus lend some support to the existence of centralized structures in a computer model of the brain.
However, these centralized structures may be emergent?
So, to re-iterate: Two possibilities exist (hypotheses)
A computer model of the brain can contain centralized structures and still be effective in its modeling.
A computer model cannot have any centralzied structures if it is going to be effective in its modeling.
Another important problem is defining the word "effective".
I suppose that "effective" would mean capable of solving fluid analogy problems, producing similar answers to an identically biased human.
However, it isn't clear to me that removing temperature increases the ability to solve problems effectively.
Is this because models are allowed to have centralized structures, or because temperature isn't the only centralized structure?
Clearly, creating a model of copycat that doesn't have centralized structures will take an excessive amount of effort.
\break
The calculation for temperature in the first place is extremely convoluted (in the Python version of copycat).
It lacks any documentation, is full of magic numbers, and contains seemingly arbitrary conditionals.
(If I submitted this as a homework assignment, I would probably get a C. Lol)
Edit: Actually, the lisp version of copycat does a very good job of documenting magic numbers and procedures.
My main complaint is that this hasn't been translated into the Python version of copycat.
However, the Python version is translated from the Java version..
Lost in translation.
My goal isn't to roast copycat's code, however.
Instead, what I see is that all this convolution is \emph{unnecessary}.
Ideally, a future version of copycat, or an underlying FARG architecure will remove this convolution, and make temperature calculation simpler, streamlined, documented, understandble.
How will this happen, though?
A global description of the system is, at times, potentially useful.
However, in summing together the values of each workspace object, information is lost regarding which workspace objects are offending.
In general, the changes that occur will eventually be object-specific.
So, it seems to me that going from object-specific descriptions to a global description back to an object-specific action is a waste of time.
I don't think that a global description should be \emph{obliterated} (removed 100\%).
I just think that a global description should be reserved for when global actions are taking place.
For example, when deciding that copycat has found a satisfactory answer, a global description should be used, because deciding to stop copycat is a global action.
However, when deciding to remove a particular structure, a global description should not be used, because removing a particular offending structure is NOT a global action.
Summary: it is silly to use global information to make local decisions that would be better made using local information (self-evident).
Benefits of using local information to make local decisions:
Code can be truly distributed, running in true parallel, CPU-bound.
This means that copycat would be faster and more like a human brain.
Specific structures would be removed based on their own offenses.
This means that relvant structures would remain untouched, which would be great!
Likely, this change to copycat would produce better answer distributions testable through the normal science framework.
On the other hand (I've never met a one-handed researcher), global description has some benefits.
For example, the global formula for temperature converts the raw importance value for each object into a relative importance value for each object.
If a distributed metric was used, this importance value would have to be left in its raw form.
\break
The original copycat was written in LISP, a mixed-paradigm language.
Because of LISP's preference for functional code, global variables must be explicitly marked with surrounding asterisks.
Temperature, the workspace, and final answers are all marked global variables as discussed in this paper.
These aspects of copycat are all - by definition - impure, and therefore imperative code that relies on central state changes.
It is clear that, since imperative, mutation-focused languages (like Python) are turing complete in the same way that functional, purity-focused languages (like Haskell) are turing complete, each method is clearly capable of modeling the human brain.
However, the algorithm run by the brain is more similar to distributed, parallel functional code than it is to centralized, serial imperative code.
While there is some centralization in the brain, and evidently some state changes, it is clear that 100\% centralized 100\% serial code is not a good model of the brain.
Also, temperature is, ultimately, just a function of objects in the global workspace.
The git branch soft-temp-removal hard-removes most usages of temperature, but continues to use a functional version of the temperature calculation for certain processes, like determining if the given answer is satisfactory or not.
So, all mentions of temperature could theoretically be removed and replaced with a dynamic calculation of temperature instead.
It is clear that in this case, this change is unnecessary.
With the goal of creating a distributed model in mind, what actually bothers me more is the global nature of the workspace, coderack, and other singleton copycat structures.
Really, when temperature is removed and replaced with some distributed metric, it is clear that the true "offending" global is the workspace/coderack.
Alternatively, codelets could be equated to ants in an anthill (see anthill analogy in GEB).
Instead of querying a global structure, codelets could query their neighbors, the same way that ants query their neighbors (rather than, say, relying on instructions from their queen).
Biological or psychological plausibility only matters if it actually affects the presence of intelligent processes. For example, neurons don't exist in copycat because we feel that they aren't required to simulate the processes being studied. Instead, copycat uses higher-level structures to simulate the same emergent processes that neurons do. However, codelets and the control of them relies on a global function representing tolerance to irrelevant structures. Other higher level structures in copycat likely rely on globals as well. Another central variable in copycat is the "rule" structure, of which there is only one. While some global variables might be viable, others may actually obstruct the ability to model intelligent processes. For example, a distributed notion of temperature will not only increase biological and psychological plausibility, but increase copycat's effectiveness at producing acceptable answer distributions.
We must also realize that copycat is only a model, so even if we take goals (level of abstraction) and biological plausibility into account...
It is only worth changing temperature if it affects the model.
Arguably, it does affect the model. (Or, rather, we hypothesize that it does. There is only one way to find out for sure, and that's the point of this paper)
So, maybe this is a paper about goals, model accuracy, and an attempt to find which cognitive details matter and which don't. It also might provide some insight into making a "Normal Science" framework.
Copycat is full of random uncommented parameters and formulas. Personally, I would advocate for removing or at least documenting as many of these as possible. In an ideal model, all of the numbers present might be either from existing mathematical formulas, or present for a very good (emergent and explainable - so that no other number would make sense in the same place) reason. However, settling on so called "magic" numbers because the authors of the program believed that their parameterizations were correct is very dangerous. If we removed random magic numbers, we would gain confidence in our model, progress towards a normal science, and gain a better understanding of cognitive processes.
Similarly, a lot of the testing of copycat is based on human perception of answer distributions. However, I suggest that we move to a more statistical approach. For example, deciding on some arbitrary baseline answer distribution and then modifying copycat to obtain other answer distributions and then comparing distributions with a statistical significance test would actually be indicative of what effect each change had. This paper will include code changes and proposals that lead copycat (and FARG projects in general) to a more statistical and verifiable approach.
While there is a good argument about copycat representing an individual with biases and therefore being incomparable to a distributed group of individuals, I believe that additional effort should be made to test copycat against human subjects. I may include in this paper a concrete proposal on how such an experiment might be done.
Let's simply test the hypothesis: \[H_i\] Copycat will have an improved (significantly different with increased frequencies of more desirable answers and decreased frequencies of less desirable answers: desirability will be determined by some concrete metric, such as the number of relationships that are preserved or mirrored) answer distribution if temperature is turned to a set of distributed metrics. \[H_0\] Copycat's answer distribution will be unaffected by changing temperature to a set of distributed metrics.
\subsection{Normal Science}
\subsubsection{Scientific Style}
The Python3 version of copycat contains many undocumented formulas and magic numbers.
Also, because of the random nature of copycat, sometimes answer distributions can be affected by the computer architecture that the software is being executed on.
To avoid this, this paper suggests documentation of formulas, removal or clear justification of magic numbers, and the use of seeding to get around random processes.
Additionally, I might discuss how randomness doesn't *really* exist.
Because of this, maybe the explicit psuedo-random nature of Copycat shouldn't exist?
Instead.. The distributed nature of computation might act as a psuedo-random process in and of itself.
\subsubsection{Scientific Testing}
Previously, no statistical tests have been done with the copycat software.
Copycat can be treated like a black box, where, when given a particular problem, copycat produces a distribution of answers as output.
In this perspective, copycat can be tweaked, and then output distributions on the same problem can be compared with a statistical test, like a $\chi^2$ test.
The $\chi^2$ value indicates the degree to which a new copycat distribution differs from an old one.
So, a $\chi^2$ test is useful both as a unit test and as a form of scientific inquiry.
For example, if a new feature is added to copycat (say, the features included in the Metcat software), then the new distributions can be compared to the distributions produced by the original version of copycat.
Ideally, these distributions will differ, giving us a binary indication of whether the changes to the software actually had any effect.
Then, once we know that a distribution is statistically novel, we can decide on metrics that evaluate its effectiveness in solving the given problem.
For example, since Metacat claims to solve the "xyz" problem, and "wyz" is generally seen as the best answer to the "xyz" problem, a metric that evaluates the health of a distribution might simply be the percentage of "wyz" answers.
This can be generalized to the percentage of desirable answers given by some copycat variant in general.
Another metric might be the inverse percentage of undesirable answers.
For example, "xyd" is an undesirable answer to the "xyz" problem.
So, if Metacat produced large quantities of "xyd," it would be worse than the legacy copycat.
However, the legacy copycat produces large quantities of "xyd" and small quantities of "wyz".
Given these two discussed metrics, it would be clear that, through our normal science framework, Metacat is superior at solving the "xyz" problem.
Ideally, this framework can be applied to other copycat variants and on other problems.
Through the lens of this framework, copycat can be evaluated scientifically.
\subsection{Distribution}
\subsubsection{Von Neumann Discussion}
An objective, scientifically oriented framework is essential to making progress in the domain of cognitive science.
[John Von Neumann: The Computer and the Brain?
He pointed out that there were good grounds merely in terms of electrical analysis to show that the mind, the brain itself, could not be working on a digital system. It did not have enough accuracy; or... it did not have enough memory. ...And he wrote some classical sentences saying there is a statistical language in the brain... different from any other statistical language that we use... this is what we have to discover. ...I think we shall make some progress along the lines of looking for what kind of statistical language would work.]
Notion that the brain obeys statistical, entropical mathematics
\subsubsection{Turing Completeness}
In a nutshell, because computers are turing complete, it is clear that they can simulate the human brain, given enough power/time.
\subsubsection{Simulation of Distributed Processes}
Despite the ability of computers to simulate the human brain, simulation may not always be accurate unless programmed to be accurate...
\subsubsection{Efficiency of True Distribution}
\subsubsection{Temperature in Copycat}
\subsubsection{Other Centralizers in Copycat}
\subsubsection{The Motivation for Removing Centralizers in Copycat}
\section{Methods}
\subsection{Formula Adjustments}
\subsubsection{Temperature Probability Adjustment}
This research begin with adjustments to probability weighting formulas.
In copycat, temperature affects the simulation in multiple ways:
\begin{enumerate}
\item Certain codelets are probabalistically chosen to run
\item Certain structures are probabalistically chosen to be destroyed
\item ...
\end{enumerate}
In many cases, the formulas "get-adjusted-probability" and "get-adjusted-value" are used.
Each curves a probability as a function of temperature.
The desired behavior is as follows:
At high temperatures, the system should explore options that would otherwise be unlikely.
So, at temperatures above half of the maximum temperature, probabilities with a base value less than fifty percent will be curved higher, to some threshold.
At temperatures below half of the maximum temperature, probabilities with a base value above fifty percent will be curved lower, to some threshold.
The original formulas being used to do this were overly complicated.
In summary, many formulas were tested in a spreadsheet, and an optimal one was chosen that replicated the desired behavior.
The original formula for curving probabilties in copycat:
\lstinputlisting[language=Python]{formulas/original.py}
An alternative that seems to improve performance on the abd->abd xyz->? problem:
This formula produces probabilities that are not bounded between 0 and 1. These are generally truncated.
\lstinputlisting[language=Python]{formulas/entropy.py}
Ultimately, it wasn't clear to me that the so-called "xyz" problem should even be considered.
As discussed in [the literature], the "xyz" problem is a novel example of a cognitive obstacle.
Generally, the best techniques for solving the "xyz" problem are discussed in the the publications around the "Metacat" project, which gives copycat a temporary memory and levels of reflection upon its actions.
However, it is possible that the formula changes that target improvement in other problems may produce better results for the "xyz" problem.
Focusing on the "xyz" problem, however, will likely be harmful to the improvement of performanace on other problems.
So, the original copycat formula is overly complicated, and doesn't perform optimally on several problems.
The entropy formula is an improvement, but other formulas are possible too.
Below are variations on a "weighted" formula.
The general structure is:
\[\emph{p'} = \frac{T}{100} * S + \frac{100-T}{100} * U\]
Where: $S$ is the convergence value for when $T = 0$ and
$U$ is the convergence value for when $T = 100$.
The below formulas simply experiment with different values for $S$ and $U$
The values of $\alpha$ and $\beta$ can be used to provide additional weighting for the formula, but are not used in this section.
\lstinputlisting[language=Python]{formulas/weighted.py}
[Discuss inverse formula and why $S$ was chosen to be constant]
After some experimentation and reading the original copycat documentation, it was clear that $S$ should be chosen to be $0.5$ and that $U$ should implement the probability curving desired at high temperatures.
The following formulas let $U = p^r$ if $p < 0.5$ and let $U = p^\frac{1}{r}$ if $p >= 0.5$.
This controls whether/when curving happens.
Now, the parameter $r$ simply controls the degree to which curving happens.
Different values of $r$ were experimented with (values between $10$ and $1$ were experimented with at increasingly smaller step sizes).
$2$ and $1.05$ are both good choices at opposite "extremes".
$2$ works because it is large enough to produce novel changes in behavior at extreme temperatures without totally disregarding the original probabilities.
Values above $2$ do not work because they make probabilities too uniform.
Values below $2$ (and above $1.05$) are feasible, but produce less curving and therefore less unique behavior.
$1.05$ works because it very closely replicates the original copycat formulas, providing a very smooth curving.
Values beneath $1.05$ essentially leave probabilities unaffected, producing no significant unique behavior dependent on temperature.
\lstinputlisting[language=Python]{formulas/best.py}
Random thought:
It would be interesting to not hardcode the value of $r$, but to instead leave it as a variable between $0$ and $2$ that changes depending on frustration.
However, this would be much like temperature in the first place....?
$r$ could itself be a function of temperature. That would be.... meta.... lol.
\break
...
\break
And ten minutes later, it was done.
The "meta" formula performs as well as the "best" formula on the "ijjkkk" problem, which I consider the most novel.
Interestingly, I noticed that the paramterized formulas aren't as good on this problem. What did I parameterize them for? Was it well justified?
(Probably not)
At this point, I plan on using the git branch "feature-normal-science-framework" to implement a system that takes in a problem set and provides several answer distributions as output.
Then, I'll do a massive cross-formula answer distribution comparison with $\chi^2$ tests. This will give me an idea about which formula and which changes are best.
I'll also be able to compare all of these answer distributions to the frequencies obtained in temperature removal branches of the repository.
\subsubsection{Temperature Calculation Adjustment}
\subsubsection{Temperature Usage Adjustment}
\subsection{$\chi^2$ Distribution Testing}
\section{Results}
\subsection{$\chi^2$ Table}
\section{Discussion}
\subsection{Distributed Computation Accuracy}
\subsection{Prediction}
\bibliographystyle{alpha}
\bibliography{sample}
\end{document}

292
papers/legacy/legacy.tex Normal file
View File

@ -0,0 +1,292 @@
\section{LSaldyt: Brainstorm, Planning, and Outline}
\subsection{Steps/plan}
Normal Science:
\begin{enumerate}
\item Introduce statistical techniques
\item Reduce magic number usage, document reasoning and math
\item Propose effective human subject comparison
\end{enumerate}
Temperature:
\begin{enumerate}
\item Propose formula improvements
\item Experiment with a destructive removal of temperature
\item Experiment with a "surgical" removal of temperature
\item Assess different copycat versions with/without temperature
\end{enumerate}
\subsection{Semi-structured Notes}
Biological or psychological plausibility only matters if it actually affects the presence of intelligent processes. For example, neurons don't exist in copycat because we feel that they aren't required to simulate the processes being studied. Instead, copycat uses higher-level structures to simulate the same emergent processes that neurons do. However, codelets and the control of them relies on a global function representing tolerance to irrelevant structures. Other higher level structures in copycat likely rely on globals as well. Another central variable in copycat is the "rule" structure, of which there is only one. While some global variables might be viable, others may actually obstruct the ability to model intelligent processes. For example, a distributed notion of temperature will not only increase biological and psychological plausibility, but increase copycat's effectiveness at producing acceptable answer distributions.
We must also realize that copycat is only a model, so even if we take goals (level of abstraction) and biological plausibility into account...
It is only worth changing temperature if it affects the model.
Arguably, it does affect the model. (Or, rather, we hypothesize that it does. There is only one way to find out for sure, and that's the point of this paper)
So, maybe this is a paper about goals, model accuracy, and an attempt to find which cognitive details matter and which don't. It also might provide some insight into making a "Normal Science" framework.
Copycat is full of random uncommented parameters and formulas. Personally, I would advocate for removing or at least documenting as many of these as possible. In an ideal model, all of the numbers present might be either from existing mathematical formulas, or present for a very good (emergent and explainable - so that no other number would make sense in the same place) reason. However, settling on so called "magic" numbers because the authors of the program believed that their parameterizations were correct is very dangerous. If we removed random magic numbers, we would gain confidence in our model, progress towards a normal science, and gain a better understanding of cognitive processes.
Similarly, a lot of the testing of copycat is based on human perception of answer distributions. However, I suggest that we move to a more statistical approach. For example, deciding on some arbitrary baseline answer distribution and then modifying copycat to obtain other answer distributions and then comparing distributions with a statistical significance test would actually be indicative of what effect each change had. This paper will include code changes and proposals that lead copycat (and FARG projects in general) to a more statistical and verifiable approach.
While there is a good argument about copycat representing an individual with biases and therefore being incomparable to a distributed group of individuals, I believe that additional effort should be made to test copycat against human subjects. I may include in this paper a concrete proposal on how such an experiment might be done.
Let's simply test the hypothesis: \[H_i\] Copycat will have an improved (significantly different with increased frequencies of more desirable answers and decreased frequencies of less desirable answers: desirability will be determined by some concrete metric, such as the number of relationships that are preserved or mirrored) answer distribution if temperature is turned to a set of distributed metrics. \[H_0\] Copycat's answer distribution will be unaffected by changing temperature to a set of distributed metrics.
\subsection{Random Notes}
This is all just free-flow unstructured notes. Don't take anything too seriously :).
Below are a list of relevant primary and secondary sources I am reviewing:
Biological/Psychological Plausibility:
\begin{verbatim}
http://www.cell.com/trends/cognitive-sciences/abstract/S1364-6613(16)30217-0
"There is no evidence for a single site of working memory storage."
https://ekmillerlab.mit.edu/2017/01/10/the-distributed-nature-of-working-memory/
Creativity as a distributed process (SECONDARY: Review primaries)
https://blogs.scientificamerican.com/beautiful-minds/the-real-neuroscience-of-creativity/
cognition results from the dynamic interactions of distributed brain areas operating in large-scale networks
http://scottbarrykaufman.com/wp-content/uploads/2013/08/Bressler_Large-Scale_Brain_10.pdf
MIT Encyclopedia of the Cognitive Sciences:
In reference to connectionist models:
"Advantages of distribution are generally held to include greater representational capacity, content addressability, automatic generalization, fault tolerance, and biological plausibility. Disadvantages include slow learning, catastrophic interference, and binding problems."
Cites:
French, R. (1992). Semi-distributed representation and catastrophic forgetting in connectionist networks.
Smolensky, P. (1991). Connectionism, constituency, and the language of thought.
[...]
\end{verbatim}
(Sure, we know that the brain is a distributed system, but citing some neuroscience makes me feel much safer.)
Goal related sources:
\begin{verbatim}
This will all most likely be FARG related stuff
Isolating and enumerating FARG's goals will help show me what direction to take
[..]
\end{verbatim}
Eliminating global variables might create a program that is more psychologically and biologically plausible, as according to the above. But our goals should be kept in mind. If we wanted full psychological and biological plausibility, we would just replicate a human mind atom for atom, particle for particle, or string for string.
Levels of abstraction in modeling the human brain and its processes:
\begin{enumerate}
\item Cloning a brain at the smallest scale possible (i.e. preserving quantum states of electrons or something)
\item Simulating true neurons, abstracting away quantum mechanical detail
\item Artificial neurons that abstract away electrochemical detail
\item Simulation of higher-level brain structures and behaviors that transcends individual neurons
...
\item Highest level of abstraction that still produces intelligent processes
\end{enumerate}
How far do we plan to go? What are we even abstracting? Which details matter and which don't?
One side: Abstraction from biological detail may eventually mean that global variables become plausible
Alt: Abstraction may remove some features and not others. Global variables may \emph{never} be plausible, even at the highest level of abstraction. (Of course, this extreme is probably not the case).
Lack of a centralized structure versus lack of a global phenomena
Since temperature, for example, is really just a function of several local phenomena, how global is it? I mean: If a centralized decision maker queried local phenomena separately, and made decisions based on that, it would be the same. Maybe centralized decision makers don't exist. Decisions, while seemingly central, have to emerge from agent processes. But what level of abstraction are we working on?
Clearly, if we knew 100\% which details mattered, we would already have an effective architecture.
\section{A formalization of the model}
Let $\Omega = \{\omega_1, \omega_2, ..., \omega_n\}$ be a finite discrete space. In FARG models $\Omega$ represents the \emph{working short-term memory} of the system and the goal is to craft a context-sensitive representation (cite FRENCH here). Hence $\Omega$ holds \emph{all possible configurations} of objects that could possibly exist in one's working memory; a large space.
Let us define the neighborhood function $A:(\Omega,$C$) \to 2^\Omega$ as the set of \emph{perceived affordances} under \emph{context} $C$. The affordances $A$ define which state transitions $\omega_i \to \omega_j$ are possible at a particular context $C$. Another term that has been used in the complexity literature is \emph{the adjacent possible}.
A context is defined by the high-level ideas, the concepts that are active at a particular point in time.
The \emph{Cohesion} of the system is measured by the mutual information between the external memory, the short-term memory state $\omega_i$, and the context $C$.
\subsection{Copycat}
% LUCAS: this entire section is copies from my old "minds and machines" paper... so we should discuss at some point whether to re-write it or not.
\subsubsection{The letter-analogy domain}
Consider the following, seemingly trivial, analogy problem: $abc \to abd:ijk \to ?$, that is, if the letter string “abc” changes to the letter string “abd”, how would the letter string “ijk” change “in the same way”? This is the domain of the Copycat project, and before we attempt a full description of the system, let us discuss in more detail some of the underlying intricacies. Most people will in this case come up with a rule of transformation that looks like: “Replace the rightmost letter by its successor in the alphabet”, the application of which would lead to $ijl$. This is a simple and straightforward example. But other examples bring us the full subtlety of this domain. The reader unfamiliar with the Copycat project is invited to consider the following problems: $abc\to abd: ijjkkk?\to $, $abc\to abd: xyz\to ?$, $abc\to abd: mrrkkk\to ?$, among others (Mitchell, 2003) to have a sense of the myriad of subtle intuitions involved in solving these problems.
To solve this type of problem, one could come up with a scheme where the computer must first find a representation that models the change and then apply that change to the new string. This natural sequence of operations is \emph{not possible}, however, because \emph{the transformation rule representing the change itself must bend to contextual cues and adapt to the particularities of the letter strings}. For example, in the problem $abc\to abd: xyz\to ?$, the system may at first find a rule like “change rightmost letter to its successor in the alphabet”. However, this explicit rule cannot be carried out in this case, simply because $z$ has no successor. This leads to an impasse, out of which the only alternative by the system is to use a flexible, context-sensitive, representation system.
The reader may have noticed that this cognitive processing bears some similarities to the process of chess perception. Perception obviously plays a significant role in letter string analogies, as it is necessary to connect a set of individual units--in this case, letter sequences--, into a meaningful interpretation which stresses the underlying pressures of the analogy. In chess it is also necessary to connect disparate pieces into a meaningful description stressing the positions pressures. But the most striking similarities with chess perception (in what concerns bounded rationality) seems to be the absolute lack of a single objectively correct answer, we have instead just an intuitive subjective feeling, given by the great number of simultaneous pressures arising in each problem.
In the previous section we have made reference to some studies considering multiple, incompatible chunks that emerge in chess positions. In letter strings this same problem appears. Consider for instance the following problem:
If $aabc\to aabd: ijkk?$
\begin{itemize}
\item One may chunk the initial strings as $(a)(abc)$ and $(a)(abd)$ and find a `corresponding chunk $(ijk)(k)$, which could lead to the following transformation rule: “change the last letter of the increasing sequence to its successor in the alphabet”. This interpretation would lead to the answer $ijlk$.
\item Or, alternatively, one may chunk the initial strings as $(aa)(b)(c)$ and $(aa)(b)(d)$ and find a counterpart string with the chunking $(i)(j)(kk)$, and, in this case, the mapping can be inverted: The first letter group $(aa)$ maps to the last letter group $(kk)$, and this will also invert the other mappings, leading to $(b)$ mapping to $(j)$ and $(c)$ mapping to $(i)$. Because this viewpoint substantially stresses the concept `opposite, Copycat is able to create the transformation rule “change the first letter to its predecessor in the alphabet”, leading to the solution $hjkk$, which preserves symmetry between group letter sizes and between successorship and predecessorship relations.
\item Other potential transformation rules could lead, in this problem, to $ijkl$ (change the last letter to its successor in the alphabet), $ijll$ (change the last group of letters to its successor in the alphabet), or $jjkk$ (change the first letter to its successor in the alphabet). This problem of many incompatible (and overlapping) chunkings is of importance. The specific chunking of a problem is directly linked to its solution, because chunks stress what is important on the underlying relations.
\end{itemize}
\subsubsection{The FARG architecture of Copycat}
How does the Copycat system work? Before reviewing its underlying parts, let us bear in mind one of its principal philosophical points. Copycat is not intended solely as a letter-string analogy program. The intention of the project is the test of a theory; a theory of `statistically emergent active symbols (Hofstadter 1979; Hofstadter 1985) which is diametrically opposite to the “symbol system hypothesis” (Newell, 1980; Simon, 1980). The major idea of active symbols is that instead of being tokens passively manipulated by programs, active symbols emerge from high numbers of interdependent subcognitive processes, which swarm over the system and drive its processing by triggering a complex `chain reaction of concepts. The system is termed `subsymbolic because these processes are intended to correspond to subliminal human information processes of few milliseconds, such as a subtle activation of a concept (i.e., priming), or an unconscious urge to look for a particular object. So the models are of collective (or emergent) computation, where a multitude of local processes gradually build a context-sensitive representation of the problem. These symbols are active because they drive processing, leading a chain reaction of activation spreading, in which active concepts continuously trigger related concepts, and short-term memory structures are construed to represent the symbol (in this philosophical view a token does not have any associated meaning, while a meaningful representation, a symbol, emerges from an interlocked interpretation of many subcognitive pressing urges).
This cognitively plausible architecture has been applied to numerous domains (see for instance French 1992; Mitchell and Hofstadter 1990; Mitchell 1993; McGraw 1995; Marshall 1999; Rehling 2001 MANY ARE MISSING HERE!). It has five principal components:
\begin{enumerate}
\item A workspace that interacts with external memory--this is the working short-term memory of the model. The workspace is where the representations are construed, with innumerable pressing urges waiting for attention and their corresponding impulsive processes swarming over the representation, independently perceiving and creating many types of subpatterns. Common examples of such subpatterns are bonds between letters such as group bonds between $a*a$ or successor bonds between successive letters $a*b$ , or relations between objects, awareness of abstract roles played by objects, and so on.
\item Pressing urges and impulsive processes The computational processes constructing the representations on short-term memory are subcognitive impulsive processes named codelets. The system perceives a great number of subtle pressures that immediately invoke subcognitive urges to handle them. These urges will eventually become impulsive processes. Some of these impulsive processes may look for particular objects, some may look for particular relations between objects and create bonds between them, some may group objects into chunks, or associate descriptions to objects, etc. The collective computation of these impulsive processes, at any given time, stands for the working memory of the model. These processes can be described as impulsive for a number of reasons: first of all, they are involuntary, as there is no conscious decision required for their triggering. (As Daniel Dennett once put it, if I ask you “not to think of an elephant”, it is too late, you already have done so, in an involuntary way.) They are also automatic, as there is no need for conscious decisions to be taken in their internal processing; they simply know how to do their job without asking for help. They are fast, with only a few operations carried out. They accomplish direct connections between their micro-perceptions and their micro-actions. Processing is also granular and fragmented as opposed to a linearly structured sequence of operations that cannot be interrupted (Linhares 2003). Finally, they are functional, associated with a subpattern, and operate on a subsymbolic level (but not restricted to the manipulation of internal numerical parameters as opposed to most connectionist systems).
\item List of parallel priorities— Each impulsive process executes a local, incremental, change to the emerging representation, but the philosophy of the system is that all pressing urges are perceived simultaneously, in parallel. So there is at any point in time a list of subcognitive urges ready to execute, fighting for the attention of the system and waiting probabilistically to fire as an impulsive process. This list of parallel priorities is named in Copycat as the coderack.
\item A semantic associative network undergoing constant flux The system has very limited basic knowledge: it knows the 26 letters of the alphabet, and the immediate successorship relations entailed (it does not, for instance, know that the shapes of lowercase letters p, b, q bear some resemblance). The long-term memory of the system is embedded over a network of nodes representing concepts with links between nodes associating related concepts. This network is a crucial part for the formation of the chain reaction of conceptual activation: any specific concept, when activated, propagates activation to its related concepts, which will in turn launch top-down expectation-driven urges to look for those related concepts. This mode of computation not only enforces a context-sensitive search but also is the basis of the chain reaction of activation spreading hence the term active symbols. This network is named in Copycat as the slipnet. One of the most original features of the slipnet is the ability to “slip one concept into another”, in which analogies between concepts are made (for details see Hofstadter 1995, Mitchell 1993).
\item A temperature measure It should be obvious that the system does not zoom in immediately and directly into a faultless representation. The process of representation construction is gradual, tentative, and numerous impulsive processes are executed erroneously. At start, the system has no expectations of the content of letter strings, so it slowly wanders through many possibilities before converging on an specific interpretation, a process named the parallel terraced scan (Hofstadter 1995); and embedded within it is a control parameter of temperature that is similar in some aspects to that found in simulated annealing (Cagan and Kotovsky 1997; Hofstadter 1995). The temperature measures the global amount of disorder and misunderstanding contained in the situation. So at the beginning of the process, when no relevant information has been gathered, the temperature will be high, but it will gradually decrease as intricate relationships are perceived, first concepts are activated, the abstract roles played by letters and chunks are found; and meaning starts to emerge. Though other authors have proposed a relationship between temperature and understanding (Cagan and Kotovsky, 1997), there is still a crucial difference here (see Hofstadter 1985, 1995): unlike the simulated annealing process that has a forcedly monotonically decreasing temperature schedule, the construction of a representation for these letter strings does not necessarily get monotonically improved as time flows. As in the $abc\to abd : xyz\to ?$ problem, there are many instants when roadblocks are reached, when snags appear, and incompatible structures arise. At these moments, complexity (and entropy and confusion) grows, and so the temperature decrease is not monotonic.
Finally, temperature does not act as a control parameter dictated by the user, that is, \emph{forced} to go either down or up, but it also acts \emph{as a feedback mechanism} to the system, which may reorganize itself, accepting or rejecting changes as temperature allows. As pressing urges are perceived, their corresponding impulses eventually propose changes to working memory, to construct or to destruct structures. How do these proposed changes get accepted? Through the guidance of temperature. At start $T$ is high and the vast majority of proposed structures are built, but as it decreases it becomes increasingly more important for a proposed change to be compatible with the existing interpretation. And the system may thus focus on developing a particular viewpoint.
\end{enumerate}
\begin{figure}
\centering
\includegraphics[width=0.9\textwidth]{fig4-copycat.png}
\caption{\label{fig:run-1}Copycat after 110 codelets have executed. This implementation was carried out by Scott Bolland from the University of Queensland, Australia (2003, available online).}
\end{figure}
\subsubsection{An example run}
Let us consider an example run of the Copycat system, and look at some specific steps in its processing of the problem $abc\to abd : iijjkk \to ?$
Figure \ref{fig:run-1} presents the working memory (workspace) after 110 codelets. The system at this point has not perceived much structure. It has perceived each individual letter, it has mapped the letters $c$ and $d$ between the original and target strings, and it has perceived some initial bonds between neighboring letters. Some of these bonds are sameness bonds (such as $i*i$), some are successorship bonds (such as $i*j$), and some are predecessorship bonds (such as $b*c$). In fact, there is confusion between the competing views of successorship and predecessorship relations in the string $abc$. These incompatible interpretations will occasionally compete. The system is also mapping the leftmost letter $a$ to the leftmost letter $i$.
Notice that a first chunk has been created in the group `$jj$'. Now \emph{this chunk is an individual object on its own}, capable of bonding with (and relating to) other objects. Notice also that the system has not yet perceived---and built the corresponding bond between---the two $k$'s in succession. So perception in Copycat is granular, fragmented over large numbers of small `micro-events'.
\begin{figure}
\centering
\includegraphics[width=0.9\textwidth]{fig5-copycat.png}
\caption{\label{fig:run-2}Copycats working memory after the execution of 260 codelets.}
\end{figure}
After an additional 150 codelets have been executed (Figure \ref{fig:run-2}), more structure is built: we now have three group chunks perceived; and there is also less confusion in the $abc$, as a `staircase' relation is perceived: that is, the system now perceives $abc$ as a successorship group, another chunked object. Finally, an initial translation rule appears: replace letter category of rightmost letter by successor. If the system were to stop processing at this stage it would apply this rule rather crudely and obtain the answer $iijjkl$. Note that temperature is dropping as more structure is created.
\begin{figure}
\centering
\includegraphics[width=0.9\textwidth]{fig6-copycat.png}
\caption{\label{fig:run-3}Copycats working memory after the execution of 280 codelets. }
\end{figure}
Let us slow down our overview a little bit and return in Figure \ref{fig:run-3} after only 20 codelets have run, to illustrate an important phenomenon: though $c$ now will map to the group $kk$, which is an important discovery, the global temperature will still be higher than that of the previous point (Figure \ref{fig:run-2}). This occurs because there is some `confusion' arising from the predecessorship bond which was found between chunks `$ii$' and `$jj$', which does not seem to fit well with all those successorship relations already perceived and with the high activation of the successorship concept. So temperature does not always drop monotonically.
\begin{figure}
\centering
\includegraphics[width=0.9\textwidth]{fig7-copycat.png}
\caption{\label{fig:frog}Copycat's working memory the after execution of 415 codelets.}
\end{figure}
On the next step we can perceive two important changes: first, the system perceives some successorship relations between the groups $ii$ and $jj$ and between the groups $jj$ and $kk$, but these relations are perceived in isolation from each other. Another important discovery is that $jj$ is interpreted as being in `the middle of' $iijjkk$, which will eventually lead to its mapping to the letter $b$ in the original string.
\begin{figure}
\centering
\includegraphics[width=0.9\textwidth]{fig8-copycat.png}
\caption{\label{fig:f8}Copycats working memory after the execution of 530 codelets.}
\end{figure}
\begin{figure}
\centering
\includegraphics[width=0.9\textwidth]{fig9-copycat.png}
\caption{\label{fig:f9}Final solution obtained after the execution of 695 codelets.}
\end{figure}
The system finally perceives that the successorship relations between the $ii$, $jj$, and $kk$ groups are not isolated and creates a single successorship group encompassing these three sameness groups. Thus two successor groups are perceived on the workspace, and a mapping between them is built. However, a still maps to the letter $i$, instead of to the group $ii$, and $c$ still maps to the letter $k$, instead of to the group $kk$.
From this stage it still remains for the letter $a$ to map to the group $ii$ and for the letter $c$ to map to group $kk$, which will lead naturally to the translated rule ``replace letter category of rightmost group to successor'', illustrating the slipping of the concept letter to the concept group.
After 695 codelets, the system reaches the answer $iijjll$. The workspace may seem very clean and symmetric, but it has evolved from a great deal of disorder and from many microscopic `battles' between incompatible interpretations.
The most important concepts activated in this example were group and successor group. Once some sameness bonds were constructed, they rapidly activated the concept sameness group which re-inforced the search to find sameness groups, such as $kk$. Once the initial successorship bonds were created, the activation of the corresponding concept rapidly enabled the system to find other instances of successorship relations (between, for instance, the sameness groups $jj$ and $kk$). Different problems would activate other sets of concepts. For example, `$abc\to abd: xyz\to ?$ would probably activate the concept \emph{opposite}. And `$abc\to abd: mrrjjj\to ?$' would probably activate the concept length (Mitchell 1993). This rapid activation of concepts (and their top-down pressing urges), with the associated propagation of activation to related concepts, creates a chain reaction of impulsive cognition, and is the key to active symbols theory. The reader is refereed to Mitchell (1993) and to Marshall (1999) to have an idea of how the answers provided by Copycat resemble human intuition.
We may safely conclude at this point that there are many similarities between copycat and the chess perception process, including: (i) an iterative locking in process into a representation; (ii) smaller units bond and combine to form higher level, meaningfully coherent structures; (iii) the perception process is fragmented, granular, with great levels of confusion and entropy at start, but as time progresses it is able to gradually converge into a context-sensitive representation; (iv) there is a high interaction between an external memory, a limited size short term memory, and a long term memory; and (v) this interaction is done simultaneously by bottom-up and top-down processes.
\subsection{How to include Figures}
First you have to upload the image file from your computer using the upload link the project menu. Then use the includegraphics command to include it in your document. Use the figure environment and the caption command to add a number and a caption to your figure. See the code for Figure \ref{fig:frog} in this section for an example.
\subsection{How to add Comments}
Comments can be added to your project by clicking on the comment icon in the toolbar above. % * <john.hammersley@gmail.com> 2016-07-03T09:54:16.211Z:
%
% Here's an example comment!
%
To reply to a comment, simply click the reply button in the lower right corner of the comment, and you can close them when you're done.
Comments can also be added to the margins of the compiled PDF using the todo command\todo{Here's a comment in the margin!}, as shown in the example on the right. You can also add inline comments:
\todo[inline, color=green!40]{This is an inline comment.}
\subsection{How to add Tables}
Use the table and tabular commands for basic tables --- see Table~\ref{tab:widgets}, for example.
\begin{table}
\centering
\begin{tabular}{l|r}
Item & Quantity \\\hline
Widgets & 42 \\
Gadgets & 13
\end{tabular}
\caption{\label{tab:widgets}An example table.}
\end{table}
\subsection{How to write Mathematics}
\LaTeX{} is great at typesetting mathematics. Let $X_1, X_2, \ldots, X_n$ be a sequence of independent and identically distributed random variables with $\text{E}[X_i] = \mu$ and $\text{Var}[X_i] = \sigma^2 < \infty$, and let
\[S_n = \frac{X_1 + X_2 + \cdots + X_n}{n}
= \frac{1}{n}\sum_{i}^{n} X_i\]
denote their mean. Then as $n$ approaches infinity, the random variables $\sqrt{n}(S_n - \mu)$ converge in distribution to a normal $\mathcal{N}(0, \sigma^2)$.
\subsection{How to create Sections and Subsections}
Use section and subsections to organize your document. Simply use the section and subsection buttons in the toolbar to create them, and we'll handle all the formatting and numbering automatically.
\subsection{How to add Lists}
You can make lists with automatic numbering \dots
\begin{enumerate}
\item Like this,
\item and like this.
\end{enumerate}
\dots or bullet points \dots
\begin{itemize}
\item Like this,
\item and like this.
\end{itemize}
\subsection{How to add Citations and a References List}
You can upload a \verb|.bib| file containing your BibTeX entries, created with JabRef; or import your \href{https://www.overleaf.com/blog/184}{Mendeley}, CiteULike or Zotero library as a \verb|.bib| file. You can then cite entries from it, like this: \cite{greenwade93}. Just remember to specify a bibliography style, as well as the filename of the \verb|.bib|.
You can find a \href{https://www.overleaf.com/help/97-how-to-include-a-bibliography-using-bibtex}{video tutorial here} to learn more about BibTeX.
We hope you find Overleaf useful, and please let us know if you have any feedback using the help menu above --- or use the contact form at \url{https://www.overleaf.com/contact}!

339
papers/legacy/paper.tex Normal file
View File

@ -0,0 +1,339 @@
\documentclass[a4paper]{article}
%% Language and font encodings
\usepackage[english]{babel}
\usepackage[utf8x]{inputenc}
\usepackage[T1]{fontenc}
%% Sets page size and margins
\usepackage[a4paper,top=3cm,bottom=2cm,left=3cm,right=3cm,marginparwidth=1.75cm]{geometry}
%% Useful packages
\usepackage{listings}
\usepackage{amsmath}
\usepackage{graphicx}
\usepackage[colorinlistoftodos]{todonotes}
\usepackage[colorlinks=true, allcolors=blue]{hyperref}
\definecolor{lightgrey}{rgb}{0.9, 0.9, 0.9}
\lstset{ %
backgroundcolor=\color{lightgrey}}
\title{The Distributed Nature of Copycat..? (WIP)}
\author{Lucas Saldyt, Alexandre Linhares}
\begin{document}
\maketitle
\begin{abstract}
We investigate the distributed nature of computation in a FARG architecture, Copycat.
One of the foundations of those models is the \emph{Parallel Terraced Scan}--a psychologically-plausible model that enables a system to fluidly move between different modes of processing.
Previous work has modeled decision-making under Parallel Terraced Scan by using a central variable of \emph{Temperature}.
However, it is unlikely that this design decision accurately replicates the processes in the human brain.
Additionally, Copycat and other FARG architectures have incredible high rates of unscientific inquiry.
Specifically, Copycat uses many undocumented formulas and magic numbers, some of which have been parameterized to fix particular problems at the expense of performing worse on others.
This paper aims to add a framework for conducting so-called "Normal" science with Copycat, in the hopes of making our findings more concrete.
\end{abstract}
\section{Introduction}
This paper stems from Mitchell's (1993) and Hofstadter \& FARG (1995). The goals of this project are twofold:
Firstly, we focus on effectively simulating intelligent processes through increasingly distributed decision-making.
...
Written by Linhares:
The Parallel Terraced Scan is a major innovation of FARG architectures.
It corresponds to the psychologically-plausible behavior of briefly browsing, say, a book, and delving deeper whenever something sparks one's interest.
This type of behavior seems to very fluidly change the intensity of an activity based on local, contextual cues.
It is found in high-level decisions such as marriage and low-level decisions such as a foraging predator choosing whether to further explore a particular area.
Previous FARG models have used a central temperature T to implement this behavior.
We explore how to maintain the same behavior while distributing decision-making throughout the system.
...
Specifically, we begin by attempting different refactors of the copycat architecture.
First, we experiment with different treatments of temperature, adjusting the formulas that depend on it
Then, we experiment with two methods for replacing temperature with a distributed metric, instead.
First, we remove temperature destructively, essentially removing any lines of code that mention it, simply to see what effect it has.
Then, we move toward a surgical removal of temperature, leaving in tact affected structures or replacing them by effective distributed mechanisms.
Secondly, we focus on the creation of a `normal science' framework in FARG architectures.
By `normal science' we use the term created by Thomas Kuhn--the collaborative enterprise of furthering understanding within a paradigm.
Today, "normal science" is simply not done on FARG architectures (and on most computational cognitive architectures too... see Addyman \& French 2012).
Unlike mathematical theories or experiments, which can be replicated by following the materials and methods, computational models generally have dozens of particularly tuned variables, undocumented procedures, multiple assumptions about the users computational environment, etc.
It then becomes close to impossible to reproduce a result, or to test some new idea.
This paper focuses on the introduction of statistical techniques, reduction of "magic numbers", improvement and documentation of formulas, and proposals for effective human comparison.
We also discuss, in general, the nature of the brain as a distributed system.
While the removal of a single global variable may initially seem trivial, one must realize that copycat and other cognitive architectures have many central structures.
This paper explores the justification of these central structures in general.
Is it possible to model intelligence with them, or are they harmful?
...
\section{Body: Distributed Decision Making and Normal Science}
\subsection{Distributed Decision Making}
The distributed nature of decision making is essential to modeling intelligent processes [..]
\subsection{Normal Science}
An objective, scientifically oriented framework is essential to making progress in the domain of cognitive science.
[John Von Neumann: The Computer and the Brain?
He pointed out that there were good grounds merely in terms of electrical analysis to show that the mind, the brain itself, could not be working on a digital system. It did not have enough accuracy; or... it did not have enough memory. ...And he wrote some classical sentences saying there is a statistical language in the brain... different from any other statistical language that we use... this is what we have to discover. ...I think we shall make some progress along the lines of looking for what kind of statistical language would work.]
Notion that the brain obeys statistical, entropical mathematics
\subsection{Notes}
According to the differences we can enumerate between brains and computers, it is clear that, since computers are universal and have vastly improved in the past five decades, that computers are capable of simulating intelligent processes.
[Cite Von Neumann].
Primarily, the main obstacle now lies in our comprehension of intelligent processes.
Once we truly understand the brain, writing software that emulates intelligence will be a relatively simple software engineering task.
However, we must be careful to remain true to what we already know about intelligent processes so that we may come closer to learning more about them and eventually replicating them in full.
The largest difference between the computer and the brain is the distributed nature of computation.
Specifically, our computers as they exist today have central processing units, where literally all of computation happens.
On the other hand, our brains have no central location where all processing happens.
Luckily, the speed advantage and universality of computers makes it possible to simulate the distributed behavior of the brain.
However, this simulation is only possible if computers are programmed with concern for the distributed nature of the brain.
[Actually, I go back and forth on this: global variables might be plausible, but likely aren't]
Also, even though the brain is distributed, some clustered processes must take place.
In general, centralized structures should be removed from the copycat software, because they will likely improve the accuracy of simulating intelligent processes.
It isn't clear to what degree this refactor should take place.
The easiest target is the central variable, temperature, but other central structures exist.
This paper focuses primarily on temperature, and the unwanted global unification associated with it.
Even though copycat uses simulated parallel code, if copycat were actually parallelized, the global variable of temperature would actually prevent most copycat codelets from running at the same time.
If this global variable and other constricting centralized structures were removed, copycat's code would more closely replicate intelligent processes and would be able to be run much faster.
From a function-programming like perspective (i.e. LISP, the original language of copycat), the brain should simply be carrying out the same function in many locations (i.e. mapping neuron.process() across each of its neurons, if you will...)
However, in violating this model with the introduction of global variables......
Global variables seem like a construct that people use to model the real world.
...
It is entirely possible that at the level of abstraction that copycat uses, global variables are perfectly acceptable.
For example, a quick grep-search of copycat shows that the workspace singleton also exists as a global variable.
Making all of copycat distributed clearly would require a full rewrite of the software....
If copycat can be run such that codelets may actually execute at the same time (without pausing to access globals), then it will much better replicate the human brain.
However, I question the assumption that the human brain has absolutely no centralized processing.
For example, input and output channels (i.e. speech mechanisms) are not accessible from the entire brain.
Also, brain-region science leads me to believe that some (for example, research concerning wernicke's or broca's areas) brain regions truly are "specialized," and thus lend some support to the existence of centralized structures in a computer model of the brain.
However, these centralized structures may be emergent?
So, to re-iterate: Two possibilities exist (hypotheses)
A computer model of the brain can contain centralized structures and still be effective in its modeling.
A computer model cannot have any centralzied structures if it is going to be effective in its modeling.
Another important problem is defining the word "effective".
I suppose that "effective" would mean capable of solving fluid analogy problems, producing similar answers to an identically biased human.
However, it isn't clear to me that removing temperature increases the ability to solve problems effectively.
Is this because models are allowed to have centralized structures, or because temperature isn't the only centralized structure?
Clearly, creating a model of copycat that doesn't have centralized structures will take an excessive amount of effort.
\break
.....
\break
The calculation for temperature in the first place is extremely convoluted (in the Python version of copycat).
It lacks any documentation, is full of magic numbers, and contains seemingly arbitrary conditionals.
(If I submitted this as a homework assignment, I would probably get a C. Lol)
Edit: Actually, the lisp version of copycat does a very good job of documenting magic numbers and procedures.
My main complaint is that this hasn't been translated into the Python version of copycat.
However, the Python version is translated from the Java version..
Lost in translation.
My goal isn't to roast copycat's code, however.
Instead, what I see is that all this convolution is \emph{unnecessary}.
Ideally, a future version of copycat, or an underlying FARG architecure will remove this convolution, and make temperature calculation simpler, streamlined, documented, understandble.
How will this happen, though?
A global description of the system is, at times, potentially useful.
However, in summing together the values of each workspace object, information is lost regarding which workspace objects are offending.
In general, the changes that occur will eventually be object-specific.
So, it seems to me that going from object-specific descriptions to a global description back to an object-specific action is a waste of time.
I don't think that a global description should be \emph{obliterated} (removed 100\%).
I just think that a global description should be reserved for when global actions are taking place.
For example, when deciding that copycat has found a satisfactory answer, a global description should be used, because deciding to stop copycat is a global action.
However, when deciding to remove a particular structure, a global description should not be used, because removing a particular offending structure is NOT a global action.
Summary: it is silly to use global information to make local decisions that would be better made using local information (self-evident).
Benefits of using local information to make local decisions:
Code can be truly distributed, running in true parallel, CPU-bound.
This means that copycat would be faster and more like a human brain.
Specific structures would be removed based on their own offenses.
This means that relvant structures would remain untouched, which would be great!
Likely, this change to copycat would produce better answer distributions testable through the normal science framework.
On the other hand (I've never met a one-handed researcher), global description has some benefits.
For example, the global formula for temperature converts the raw importance value for each object into a relative importance value for each object.
If a distributed metric was used, this importance value would have to be left in its raw form.
\subsubsection{Functional Programming Languages and the Brain}
The original copycat was written in LISP, a mixed-paradigm language.
Because of LISP's preference for functional code, global variables must be explicitly marked with surrounding asterisks.
Temperature, the workspace, and final answers are all marked global variables as discussed in this paper.
These aspects of copycat are all - by definition - impure, and therefore imperative code that relies on central state changes.
It is clear that, since imperative, mutation-focused languages (like Python) are turing complete in the same way that functional, purity-focused languages (like Haskell) are turing complete, each method is clearly capable of modeling the human brain.
However, the algorithm run by the brain is more similar to distributed, parallel functional code than it is to centralized, serial imperative code.
While there is some centralization in the brain, and evidently some state changes, it is clear that 100\% centralized 100\% serial code is not a good model of the brain.
Also, temperature is, ultimately, just a function of objects in the global workspace.
The git branch soft-temp-removal hard-removes most usages of temperature, but continues to use a functional version of the temperature calculation for certain processes, like determining if the given answer is satisfactory or not.
So, all mentions of temperature could theoretically be removed and replaced with a dynamic calculation of temperature instead.
It is clear that in this case, this change is unnecessary.
With the goal of creating a distributed model in mind, what actually bothers me more is the global nature of the workspace, coderack, and other singleton copycat structures.
Really, when temperature is removed and replaced with some distributed metric, it is clear that the true "offending" global is the workspace/coderack.
Alternatively, codelets could be equated to ants in an anthill (see anthill analogy in GEB).
Instead of querying a global structure, codelets could query their neighbors, the same way that ants query their neighbors (rather than, say, relying on instructions from their queen).
\subsection{Initial Formula Adjustments}
This research begin with adjustments to probability weighting formulas.
In copycat, temperature affects the simulation in multiple ways:
\begin{enumerate}
\item Certain codelets are probabalistically chosen to run
\item Certain structures are probabalistically chosen to be destroyed
\item ...
\end{enumerate}
In many cases, the formulas "get-adjusted-probability" and "get-adjusted-value" are used.
Each curves a probability as a function of temperature.
The desired behavior is as follows:
At high temperatures, the system should explore options that would otherwise be unlikely.
So, at temperatures above half of the maximum temperature, probabilities with a base value less than fifty percent will be curved higher, to some threshold.
At temperatures below half of the maximum temperature, probabilities with a base value above fifty percent will be curved lower, to some threshold.
The original formulas being used to do this were overly complicated.
In summary, many formulas were tested in a spreadsheet, and an optimal one was chosen that replicated the desired behavior.
The original formula for curving probabilties in copycat:
\lstinputlisting[language=Python]{formulas/original.py}
An alternative that seems to improve performance on the abd->abd xyz->? problem:
This formula produces probabilities that are not bounded between 0 and 1. These are generally truncated.
\lstinputlisting[language=Python]{formulas/entropy.py}
Ultimately, it wasn't clear to me that the so-called "xyz" problem should even be considered.
As discussed in [the literature], the "xyz" problem is a novel example of a cognitive obstacle.
Generally, the best techniques for solving the "xyz" problem are discussed in the the publications around the "Metacat" project, which gives copycat a temporary memory and levels of reflection upon its actions.
However, it is possible that the formula changes that target improvement in other problems may produce better results for the "xyz" problem.
Focusing on the "xyz" problem, however, will likely be harmful to the improvement of performanace on other problems.
So, the original copycat formula is overly complicated, and doesn't perform optimally on several problems.
The entropy formula is an improvement, but other formulas are possible too.
Below are variations on a "weighted" formula.
The general structure is:
\[\emph{p'} = \frac{T}{100} * S + \frac{100-T}{100} * U\]
Where: $S$ is the convergence value for when $T = 0$ and
$U$ is the convergence value for when $T = 100$.
The below formulas simply experiment with different values for $S$ and $U$
The values of $\alpha$ and $\beta$ can be used to provide additional weighting for the formula, but are not used in this section.
\lstinputlisting[language=Python]{formulas/weighted.py}
[Discuss inverse formula and why $S$ was chosen to be constant]
After some experimentation and reading the original copycat documentation, it was clear that $S$ should be chosen to be $0.5$ and that $U$ should implement the probability curving desired at high temperatures.
The following formulas let $U = p^r$ if $p < 0.5$ and let $U = p^\frac{1}{r}$ if $p >= 0.5$.
This controls whether/when curving happens.
Now, the parameter $r$ simply controls the degree to which curving happens.
Different values of $r$ were experimented with (values between $10$ and $1$ were experimented with at increasingly smaller step sizes.
$2$ and $1.05$ are both good choices at opposite "extremes".
$2$ works because it is large enough to produce novel changes in behavior at extreme temperatures without totally disregarding the original probabilities.
Values above $2$ do not work because they make probabilities too uniform.
Values below $2$ (and above $1.05$) are feasible, but produce less curving and therefore less unique behavior.
$1.05$ works because it very closely replicates the original copycat formulas, providing a very smooth curving.
Values beneath $1.05$ essentially leave probabilities unaffected, producing no significant unique behavior dependent on temperature.
\lstinputlisting[language=Python]{formulas/best.py}
Random thought:
It would be interesting to not hardcode the value of $r$, but to instead leave it as a variable between $0$ and $2$ that changes depending on frustration.
However, this would be much like temperature in the first place....?
$r$ could itself be a function of temperature. That would be.... meta.... lol.
\break
...
\break
And ten minutes later, it was done.
The "meta" formula performs as well as the "best" formula on the "ijjkkk" problem, which I consider the most novel.
Interestingly, I noticed that the paramterized formulas aren't as good on this problem. What did I parameterize them for? Was it well justified?
(Probably not)
At this point, I plan on using the git branch "feature-normal-science-framework" to implement a system that takes in a problem set and provides several answer distributions as output.
Then, I'll do a massive cross-formula answer distribution comparison with $\chi^2$ tests. This will give me an idea about which formula and which changes are best.
I'll also be able to compare all of these answer distributions to the frequencies obtained in temperature removal branches of the repository.
\subsection{Steps/plan}
Normal Science:
\begin{enumerate}
\item Introduce statistical techniques
\item Reduce magic number usage, document reasoning and math
\item Propose effective human subject comparison
\end{enumerate}
Temperature:
\begin{enumerate}
\item Propose formula improvements
\item Experiment with a destructive removal of temperature
\item Experiment with a "surgical" removal of temperature
\item Assess different copycat versions with/without temperature
\end{enumerate}
\subsection{Semi-structured Notes}
Biological or psychological plausibility only matters if it actually affects the presence of intelligent processes. For example, neurons don't exist in copycat because we feel that they aren't required to simulate the processes being studied. Instead, copycat uses higher-level structures to simulate the same emergent processes that neurons do. However, codelets and the control of them relies on a global function representing tolerance to irrelevant structures. Other higher level structures in copycat likely rely on globals as well. Another central variable in copycat is the "rule" structure, of which there is only one. While some global variables might be viable, others may actually obstruct the ability to model intelligent processes. For example, a distributed notion of temperature will not only increase biological and psychological plausibility, but increase copycat's effectiveness at producing acceptable answer distributions.
We must also realize that copycat is only a model, so even if we take goals (level of abstraction) and biological plausibility into account...
It is only worth changing temperature if it affects the model.
Arguably, it does affect the model. (Or, rather, we hypothesize that it does. There is only one way to find out for sure, and that's the point of this paper)
So, maybe this is a paper about goals, model accuracy, and an attempt to find which cognitive details matter and which don't. It also might provide some insight into making a "Normal Science" framework.
Copycat is full of random uncommented parameters and formulas. Personally, I would advocate for removing or at least documenting as many of these as possible. In an ideal model, all of the numbers present might be either from existing mathematical formulas, or present for a very good (emergent and explainable - so that no other number would make sense in the same place) reason. However, settling on so called "magic" numbers because the authors of the program believed that their parameterizations were correct is very dangerous. If we removed random magic numbers, we would gain confidence in our model, progress towards a normal science, and gain a better understanding of cognitive processes.
Similarly, a lot of the testing of copycat is based on human perception of answer distributions. However, I suggest that we move to a more statistical approach. For example, deciding on some arbitrary baseline answer distribution and then modifying copycat to obtain other answer distributions and then comparing distributions with a statistical significance test would actually be indicative of what effect each change had. This paper will include code changes and proposals that lead copycat (and FARG projects in general) to a more statistical and verifiable approach.
While there is a good argument about copycat representing an individual with biases and therefore being incomparable to a distributed group of individuals, I believe that additional effort should be made to test copycat against human subjects. I may include in this paper a concrete proposal on how such an experiment might be done.
Let's simply test the hypothesis: \[H_i\] Copycat will have an improved (significantly different with increased frequencies of more desirable answers and decreased frequencies of less desirable answers: desirability will be determined by some concrete metric, such as the number of relationships that are preserved or mirrored) answer distribution if temperature is turned to a set of distributed metrics. \[H_0\] Copycat's answer distribution will be unaffected by changing temperature to a set of distributed metrics.
\subsection{Random Notes}
This is all just free-flow unstructured notes. Don't take anything too seriously :).
Below are a list of relevant primary and secondary sources I am reviewing:
Biological/Psychological Plausibility:
\begin{verbatim}
http://www.cell.com/trends/cognitive-sciences/abstract/S1364-6613(16)30217-0
"There is no evidence for a single site of working memory storage."
https://ekmillerlab.mit.edu/2017/01/10/the-distributed-nature-of-working-memory/
Creativity as a distributed process (SECONDARY: Review primaries)
https://blogs.scientificamerican.com/beautiful-minds/the-real-neuroscience-of-creativity/
cognition results from the dynamic interactions of distributed brain areas operating in large-scale networks
http://scottbarrykaufman.com/wp-content/uploads/2013/08/Bressler_Large-Scale_Brain_10.pdf
\end{verbatim}
\bibliographystyle{alpha}
\bibliography{sample}
\end{document}

28
papers/resources/adj.l Normal file
View File

@ -0,0 +1,28 @@
(defun get-temperature-adjusted-probability (prob &aux low-prob-factor
result)
; This function is a filter: it inputs a value (from 0 to 100) and returns
; a probability (from 0 - 1) based on that value and the temperature. When
; the temperature is 0, the result is (/ value 100), but at higher
; temperatures, values below 50 get raised and values above 50 get lowered
; as a function of temperature.
; I think this whole formula could probably be simplified.
(setq result
(cond ((= prob 0) 0)
((<= prob .5)
(setq low-prob-factor (max 1 (truncate (abs (log prob 10)))))
(min (+ prob
(* (/ (- 10 (sqrt (fake-reciprocal *temperature*)))
100)
(- (expt 10 (- (1- low-prob-factor))) prob)))
.5))
((= prob .5) .5)
((> prob .5)
(max (- 1
(+ (- 1 prob)
(* (/ (- 10 (sqrt (fake-reciprocal *temperature*)))
100)
(- 1 (- 1 prob)))))
.5))))
result)

21
papers/resources/best.py Normal file
View File

@ -0,0 +1,21 @@
def _working_best(temp, prob):
s = .5 # convergence
r = 1.05 # power
u = prob ** r if prob < .5 else prob ** (1/r)
return _weighted(temp, prob, s, u)
def _soft_best(temp, prob):
s = .5 # convergence
r = 1.05 # power
u = prob ** r if prob < .5 else prob ** (1/r)
return _weighted(temp, prob, s, u)
def _parameterized_best(temp, prob):
alpha = 5
beta = 1
s = .5
s = (alpha * prob + beta * s) / (alpha + beta)
r = 1.05
u = prob ** r if prob < .5 else prob ** (1/r)
return _weighted(temp, prob, s, u)

View File

@ -0,0 +1,12 @@
import math
def _entropy(temp, prob):
if prob == 0 or prob == 0.5 or temp == 0:
return prob
if prob < 0.5:
return 1.0 - _original(temp, 1.0 - prob)
coldness = 100.0 - temp
a = math.sqrt(coldness)
c = (10 - a) / 100
f = (c + 1) * prob
return -f * math.log2(f)

BIN
papers/resources/final.pdf Normal file

Binary file not shown.

View File

@ -0,0 +1,12 @@
import math
def _original(temp, prob):
if prob == 0 or prob == 0.5 or temp == 0:
return prob
if prob < 0.5:
return 1.0 - _original(temp, 1.0 - prob)
coldness = 100.0 - temp
a = math.sqrt(coldness)
c = (10 - a) / 100
f = (c + 1) * prob
return max(f, 0.5)

View File

@ -0,0 +1,28 @@
def _weighted(temp, prob, s, u):
weighted = (temp / 100) * s + ((100 - temp) / 100) * u
return weighted
def _weighted_inverse(temp, prob):
iprob = 1 - prob
return _weighted(temp, prob, iprob, prob)
# Uses .5 instead of 1-prob
def _fifty_converge(temp, prob):
return _weighted(temp, prob, .5, prob)
# Curves to the average of the (1-p) and .5
def _soft_curve(temp, prob):
return min(1, _weighted(temp, prob, (1.5-prob)/2, prob))
# Curves to the weighted average of the (1-p) and .5
def _weighted_soft_curve(temp, prob):
weight = 100
gamma = .5 # convergance value
alpha = 1 # gamma weight
beta = 3 # iprob weight
curved = min(1,
(temp / weight) *
((alpha * gamma + beta * (1 - prob)) /
(alpha + beta)) +
((weight - temp) / weight) * prob)
return curved

55
papers/sources.bib Normal file
View File

@ -0,0 +1,55 @@
@article{linhares,
author = "Alexandre Linhares",
title = "The emergence of choice: Decision-making and strategic thinking through analogies",
journal = "Information Sciences",
volume = "259",
pages = "36-56",
year = "2014"
}
@article{compmodeling,
author = "Casper Addyman , Robert M. French",
title = "Computational modeling in cognitive science: a manifesto for change.",
journal = "Topics in Cognitive Science",
year="2012"
}
@book{analogyasperception,
title = {Analogy Making as Perception},
author = {Melanie Mitchell},
isbn = {0-262-13289-3},
year = {1993},
publisher = {Massachusetts Institute of Technology}
}
@book{fluidconcepts,
title={Fluid Concepts and Creative Analogies},
author={Douglas Hofstadter, FARG},
isbn={0-465-02475-0},
year={1995},
publisher={Basic Books}
}
@book{computerandthebrain,
title={The Computer \& The Brain},
author={John Von Neumann},
isbn={978-0-300-18111-1},
year={1958},
publisher={Yale University Press}
}
@book{geb,
title={Gödel, Escher, Bach: an Eternal Golden Braid},
author={Douglas Hofstadter},
isbn={0-456-02656-7},
year={1979},
publisher={Basic Books}
}
@online{knuthwebsite,
author = "Donald Knuth",
title = "Knuth: Computers and Typesetting",
url = "http://www-cs-faculty.stanford.edu/~uno/abcde.html",
keywords = "latex,knuth"
}

802
results.txt Normal file
View File

@ -0,0 +1,802 @@
--------------------------------------------------------------------------------
distributions/.legacy x distributions/.adj-tests
Problem: abc:abd::efg:_
Comparing None with pmeta: Warning! Expected 0 counts of efg, but got 1
Warning! Expected 0 counts of efd, but got 1
Warning! Expected 0 counts of eff, but got 6
Warning! Expected 0 counts of ffg, but got 1
Warning! Expected 0 counts of dfg, but got 2
Succeeded.
Comparing None with soft: Warning! Expected 0 counts of efg, but got 1
Warning! Expected 0 counts of eff, but got 6
Warning! Expected 0 counts of dfg, but got 2
Succeeded.
Comparing None with entropy: Warning! Expected 0 counts of efg, but got 1
Warning! Expected 0 counts of efd, but got 1
Warning! Expected 0 counts of eff, but got 6
Warning! Expected 0 counts of ffg, but got 1
Warning! Expected 0 counts of dfg, but got 2
Succeeded.
Comparing None with meta: Warning! Expected 0 counts of efg, but got 1
Warning! Expected 0 counts of efd, but got 1
Warning! Expected 0 counts of eff, but got 6
Warning! Expected 0 counts of dfg, but got 2
Succeeded.
Comparing None with fifty_converge: Warning! Expected 0 counts of efg, but got 1
Warning! Expected 0 counts of efd, but got 1
Warning! Expected 0 counts of eff, but got 6
Warning! Expected 0 counts of dfg, but got 2
Succeeded.
Comparing None with sbest: Warning! Expected 0 counts of efg, but got 1
Warning! Expected 0 counts of eff, but got 6
Warning! Expected 0 counts of ffg, but got 1
Warning! Expected 0 counts of dfg, but got 2
Succeeded.
Comparing None with average_alt: Warning! Expected 0 counts of efg, but got 1
Warning! Expected 0 counts of efd, but got 1
Warning! Expected 0 counts of eff, but got 6
Warning! Expected 0 counts of dfg, but got 2
Succeeded.
Comparing None with weighted_soft: Warning! Expected 0 counts of efg, but got 1
Warning! Expected 0 counts of ffg, but got 1
Warning! Expected 0 counts of dfg, but got 2
Failed.
Comparing None with inverse: Warning! Expected 0 counts of eff, but got 6
Warning! Expected 0 counts of dfg, but got 2
Succeeded.
Comparing None with alt_fifty: Warning! Expected 0 counts of efg, but got 1
Warning! Expected 0 counts of eff, but got 6
Warning! Expected 0 counts of ffg, but got 1
Warning! Expected 0 counts of dfg, but got 2
Succeeded.
Comparing None with best: Warning! Expected 0 counts of efg, but got 1
Warning! Expected 0 counts of eff, but got 6
Warning! Expected 0 counts of ffg, but got 1
Warning! Expected 0 counts of dfg, but got 2
Succeeded.
Comparing None with none: Warning! Expected 0 counts of efg, but got 1
Warning! Expected 0 counts of eff, but got 6
Warning! Expected 0 counts of ffg, but got 1
Warning! Expected 0 counts of dfg, but got 2
Succeeded.
Comparing None with pbest: Warning! Expected 0 counts of efg, but got 1
Warning! Expected 0 counts of efd, but got 1
Warning! Expected 0 counts of eff, but got 6
Warning! Expected 0 counts of ffg, but got 1
Warning! Expected 0 counts of dfg, but got 2
Succeeded.
Comparing None with original: Warning! Expected 0 counts of efg, but got 1
Warning! Expected 0 counts of eff, but got 6
Warning! Expected 0 counts of ffg, but got 1
Warning! Expected 0 counts of dfg, but got 2
Succeeded.
--------------------------------------------------------------------------------
distributions/.legacy x distributions/.adj-tests
Problem: abc:abd::ijk:_
Comparing None with pmeta: Warning! Expected 0 counts of hjk, but got 1
Warning! Expected 0 counts of ijj, but got 4
Warning! Expected 0 counts of jjk, but got 4
Succeeded.
Comparing None with soft: Warning! Expected 0 counts of hjk, but got 1
Warning! Expected 0 counts of ijj, but got 4
Failed.
Comparing None with entropy: Warning! Expected 0 counts of hjk, but got 1
Warning! Expected 0 counts of jjk, but got 4
Failed.
Comparing None with meta: Warning! Expected 0 counts of hjk, but got 1
Warning! Expected 0 counts of ijd, but got 1
Warning! Expected 0 counts of ijj, but got 4
Warning! Expected 0 counts of jjk, but got 4
Succeeded.
Comparing None with fifty_converge: Warning! Expected 0 counts of hjk, but got 1
Warning! Expected 0 counts of ijj, but got 4
Warning! Expected 0 counts of jjk, but got 4
Succeeded.
Comparing None with sbest: Warning! Expected 0 counts of hjk, but got 1
Warning! Expected 0 counts of ijd, but got 1
Warning! Expected 0 counts of ijj, but got 4
Failed.
Comparing None with average_alt: Warning! Expected 0 counts of hjk, but got 1
Warning! Expected 0 counts of ijj, but got 4
Warning! Expected 0 counts of jjk, but got 4
Succeeded.
Comparing None with weighted_soft: Warning! Expected 0 counts of ijj, but got 4
Succeeded.
Comparing None with inverse: Warning! Expected 0 counts of ijd, but got 1
Failed.
Comparing None with alt_fifty: Warning! Expected 0 counts of hjk, but got 1
Warning! Expected 0 counts of ijj, but got 4
Warning! Expected 0 counts of jjk, but got 4
Succeeded.
Comparing None with best: Warning! Expected 0 counts of hjk, but got 1
Warning! Expected 0 counts of ijd, but got 1
Warning! Expected 0 counts of ijj, but got 4
Warning! Expected 0 counts of jjk, but got 4
Succeeded.
Comparing None with none: Warning! Expected 0 counts of hjk, but got 1
Warning! Expected 0 counts of ijj, but got 4
Failed.
Comparing None with pbest: Warning! Expected 0 counts of hjk, but got 1
Warning! Expected 0 counts of ijj, but got 4
Warning! Expected 0 counts of jjk, but got 4
Succeeded.
Comparing None with original: Warning! Expected 0 counts of hjk, but got 1
Warning! Expected 0 counts of ijj, but got 4
Warning! Expected 0 counts of jjk, but got 4
Succeeded.
--------------------------------------------------------------------------------
distributions/.legacy x distributions/.adj-tests
Problem: abc:abd::xyz:_
Comparing None with pmeta: Warning! Expected 0 counts of xyy, but got 5
Warning! Expected 0 counts of wyz, but got 9
Warning! Expected 0 counts of xd, but got 1
Warning! Expected 0 counts of xyz, but got 5
Failed.
Comparing None with soft: Warning! Expected 0 counts of xd, but got 1
Failed.
Comparing None with entropy: Warning! Expected 0 counts of wyz, but got 9
Warning! Expected 0 counts of xd, but got 1
Warning! Expected 0 counts of dyz, but got 1
Failed.
Comparing None with meta: Warning! Expected 0 counts of xyy, but got 5
Warning! Expected 0 counts of xd, but got 1
Warning! Expected 0 counts of dyz, but got 1
Warning! Expected 0 counts of xyz, but got 5
Failed.
Comparing None with fifty_converge: Warning! Expected 0 counts of xyy, but got 5
Warning! Expected 0 counts of wyz, but got 9
Warning! Expected 0 counts of xd, but got 1
Warning! Expected 0 counts of dyz, but got 1
Warning! Expected 0 counts of xyz, but got 5
Failed.
Comparing None with sbest: Warning! Expected 0 counts of xyy, but got 5
Warning! Expected 0 counts of wyz, but got 9
Warning! Expected 0 counts of xd, but got 1
Warning! Expected 0 counts of dyz, but got 1
Warning! Expected 0 counts of xyz, but got 5
Failed.
Comparing None with average_alt: Warning! Expected 0 counts of xd, but got 1
Warning! Expected 0 counts of dyz, but got 1
Warning! Expected 0 counts of xyz, but got 5
Failed.
Comparing None with weighted_soft: Warning! Expected 0 counts of xd, but got 1
Warning! Expected 0 counts of xyz, but got 5
Failed.
Comparing None with inverse: Warning! Expected 0 counts of xd, but got 1
Failed.
Comparing None with alt_fifty: Warning! Expected 0 counts of xyy, but got 5
Warning! Expected 0 counts of xd, but got 1
Warning! Expected 0 counts of dyz, but got 1
Failed.
Comparing None with best: Warning! Expected 0 counts of xd, but got 1
Warning! Expected 0 counts of dyz, but got 1
Warning! Expected 0 counts of xyz, but got 5
Failed.
Comparing None with none: Warning! Expected 0 counts of xyy, but got 5
Warning! Expected 0 counts of wyz, but got 9
Warning! Expected 0 counts of xd, but got 1
Warning! Expected 0 counts of yyz, but got 9
Warning! Expected 0 counts of dyz, but got 1
Warning! Expected 0 counts of xyz, but got 5
Failed.
Comparing None with pbest: Warning! Expected 0 counts of xyy, but got 5
Warning! Expected 0 counts of wyz, but got 9
Warning! Expected 0 counts of xd, but got 1
Warning! Expected 0 counts of yyz, but got 9
Warning! Expected 0 counts of dyz, but got 1
Warning! Expected 0 counts of xyz, but got 5
Failed.
Comparing None with original: Warning! Expected 0 counts of xyy, but got 5
Warning! Expected 0 counts of wyz, but got 9
Warning! Expected 0 counts of xd, but got 1
Warning! Expected 0 counts of dyz, but got 1
Warning! Expected 0 counts of xyz, but got 5
Failed.
--------------------------------------------------------------------------------
distributions/.legacy x distributions/.adj-tests
Problem: abc:abd::ijkk:_
Comparing None with pmeta: Warning! Expected 0 counts of ijkd, but got 1
Warning! Expected 0 counts of ijkk, but got 18
Failed.
Comparing None with soft: Warning! Expected 0 counts of ijkd, but got 1
Warning! Expected 0 counts of ijkk, but got 18
Failed.
Comparing None with entropy: Warning! Expected 0 counts of ijkk, but got 18
Failed.
Comparing None with meta: Warning! Expected 0 counts of ijkd, but got 1
Warning! Expected 0 counts of ijkk, but got 18
Failed.
Comparing None with fifty_converge: Warning! Expected 0 counts of ijkd, but got 1
Warning! Expected 0 counts of ijkk, but got 18
Failed.
Comparing None with sbest: Warning! Expected 0 counts of ijkd, but got 1
Warning! Expected 0 counts of ijkk, but got 18
Failed.
Comparing None with average_alt: Warning! Expected 0 counts of ijkd, but got 1
Warning! Expected 0 counts of ijkk, but got 18
Failed.
Comparing None with weighted_soft: Warning! Expected 0 counts of ijkk, but got 18
Failed.
Comparing None with inverse: Warning! Expected 0 counts of ijkd, but got 1
Warning! Expected 0 counts of ijkk, but got 18
Failed.
Comparing None with alt_fifty: Warning! Expected 0 counts of ijkd, but got 1
Warning! Expected 0 counts of ijkk, but got 18
Failed.
Comparing None with best: Warning! Expected 0 counts of ijkd, but got 1
Warning! Expected 0 counts of ijkk, but got 18
Failed.
Comparing None with none: Warning! Expected 0 counts of ijkk, but got 18
Failed.
Comparing None with pbest: Warning! Expected 0 counts of ijkd, but got 1
Warning! Expected 0 counts of ijkk, but got 18
Failed.
Comparing None with original: Warning! Expected 0 counts of ijkd, but got 1
Warning! Expected 0 counts of ijkk, but got 18
Failed.
--------------------------------------------------------------------------------
distributions/.legacy x distributions/.adj-tests
Problem: abc:abd::mrrjjj:_
Comparing None with pmeta: Warning! Expected 0 counts of nrrjjj, but got 8
Warning! Expected 0 counts of mrrjjd, but got 1
Failed.
Comparing None with soft: Warning! Expected 0 counts of nrrjjj, but got 8
Warning! Expected 0 counts of mrrjjd, but got 1
Failed.
Comparing None with entropy: Warning! Expected 0 counts of nrrjjj, but got 8
Warning! Expected 0 counts of mrrjjd, but got 1
Succeeded.
Comparing None with meta: Warning! Expected 0 counts of nrrjjj, but got 8
Warning! Expected 0 counts of mrrjjd, but got 1
Failed.
Comparing None with fifty_converge: Warning! Expected 0 counts of nrrjjj, but got 8
Warning! Expected 0 counts of mrrjjd, but got 1
Failed.
Comparing None with sbest: Warning! Expected 0 counts of nrrjjj, but got 8
Failed.
Comparing None with average_alt: Warning! Expected 0 counts of nrrjjj, but got 8
Failed.
Comparing None with weighted_soft: Warning! Expected 0 counts of mrrjjd, but got 1
Failed.
Comparing None with inverse: Failed.
Comparing None with alt_fifty: Warning! Expected 0 counts of nrrjjj, but got 8
Failed.
Comparing None with best: Warning! Expected 0 counts of nrrjjj, but got 8
Warning! Expected 0 counts of mrrjjd, but got 1
Failed.
Comparing None with none: Warning! Expected 0 counts of nrrjjj, but got 8
Failed.
Comparing None with pbest: Warning! Expected 0 counts of nrrjjj, but got 8
Warning! Expected 0 counts of mrrjjd, but got 1
Failed.
Comparing None with original: Warning! Expected 0 counts of nrrjjj, but got 8
Warning! Expected 0 counts of mrrjjd, but got 1
Failed.
--------------------------------------------------------------------------------
distributions/.legacy x distributions/.nuke-temp
Problem: abc:abd::efg:_
Comparing None with None: Warning! Expected 0 counts of efg, but got 1
Warning! Expected 0 counts of eff, but got 6
Warning! Expected 0 counts of dfg, but got 2
Succeeded.
--------------------------------------------------------------------------------
distributions/.legacy x distributions/.nuke-temp
Problem: abc:abd::ijk:_
Comparing None with None: Warning! Expected 0 counts of hjk, but got 1
Warning! Expected 0 counts of ijj, but got 4
Warning! Expected 0 counts of jjk, but got 4
Succeeded.
--------------------------------------------------------------------------------
distributions/.legacy x distributions/.nuke-temp
Problem: abc:abd::xyz:_
Comparing None with None: Warning! Expected 0 counts of xyy, but got 5
Warning! Expected 0 counts of wyz, but got 9
Warning! Expected 0 counts of xd, but got 1
Warning! Expected 0 counts of yyz, but got 9
Warning! Expected 0 counts of dyz, but got 1
Warning! Expected 0 counts of xyz, but got 5
Failed.
--------------------------------------------------------------------------------
distributions/.legacy x distributions/.nuke-temp
Problem: abc:abd::ijkk:_
Comparing None with None: Warning! Expected 0 counts of ijkk, but got 18
Failed.
--------------------------------------------------------------------------------
distributions/.legacy x distributions/.nuke-temp
Problem: abc:abd::mrrjjj:_
Comparing None with None: Failed.
--------------------------------------------------------------------------------
distributions/.legacy x distributions/.soft-remove
Problem: abc:abd::efg:_
Comparing None with None: Warning! Expected 0 counts of efg, but got 1
Warning! Expected 0 counts of eff, but got 6
Warning! Expected 0 counts of ffg, but got 1
Warning! Expected 0 counts of dfg, but got 2
Succeeded.
--------------------------------------------------------------------------------
distributions/.legacy x distributions/.soft-remove
Problem: abc:abd::ijk:_
Comparing None with None: Warning! Expected 0 counts of hjk, but got 1
Warning! Expected 0 counts of ijj, but got 4
Warning! Expected 0 counts of jjk, but got 4
Succeeded.
--------------------------------------------------------------------------------
distributions/.legacy x distributions/.soft-remove
Problem: abc:abd::xyz:_
Comparing None with None: Warning! Expected 0 counts of xyy, but got 5
Warning! Expected 0 counts of wyz, but got 9
Warning! Expected 0 counts of xd, but got 1
Warning! Expected 0 counts of dyz, but got 1
Warning! Expected 0 counts of xyz, but got 5
Failed.
--------------------------------------------------------------------------------
distributions/.legacy x distributions/.soft-remove
Problem: abc:abd::ijkk:_
Comparing None with None: Warning! Expected 0 counts of ijkk, but got 18
Failed.
--------------------------------------------------------------------------------
distributions/.legacy x distributions/.soft-remove
Problem: abc:abd::mrrjjj:_
Comparing None with None: Warning! Expected 0 counts of nrrjjj, but got 8
Warning! Expected 0 counts of mrrjjd, but got 1
Failed.
--------------------------------------------------------------------------------
distributions/.adj-tests x distributions/.nuke-temp
Problem: abc:abd::efg:_
Comparing pmeta with None: Succeeded.
Comparing soft with None: Succeeded.
Comparing entropy with None: Succeeded.
Comparing meta with None: Succeeded.
Comparing fifty_converge with None: Succeeded.
Comparing sbest with None: Succeeded.
Comparing average_alt with None: Succeeded.
Comparing weighted_soft with None: Warning! Expected 0 counts of eff, but got 1
Succeeded.
Comparing inverse with None: Warning! Expected 0 counts of efg, but got 1
Succeeded.
Comparing alt_fifty with None: Succeeded.
Comparing best with None: Succeeded.
Comparing none with None: Succeeded.
Comparing pbest with None: Succeeded.
Comparing original with None: Succeeded.
--------------------------------------------------------------------------------
distributions/.adj-tests x distributions/.nuke-temp
Problem: abc:abd::ijk:_
Comparing pmeta with None: Succeeded.
Comparing soft with None: Warning! Expected 0 counts of jjk, but got 1
Succeeded.
Comparing entropy with None: Warning! Expected 0 counts of ijj, but got 1
Succeeded.
Comparing meta with None: Succeeded.
Comparing fifty_converge with None: Succeeded.
Comparing sbest with None: Warning! Expected 0 counts of jjk, but got 1
Succeeded.
Comparing average_alt with None: Succeeded.
Comparing weighted_soft with None: Warning! Expected 0 counts of djk, but got 2
Warning! Expected 0 counts of hjk, but got 1
Warning! Expected 0 counts of jjk, but got 1
Succeeded.
Comparing inverse with None: Warning! Expected 0 counts of hjk, but got 1
Warning! Expected 0 counts of ijj, but got 1
Warning! Expected 0 counts of jjk, but got 2
Succeeded.
Comparing alt_fifty with None: Succeeded.
Comparing best with None: Succeeded.
Comparing none with None: Warning! Expected 0 counts of jjk, but got 1
Succeeded.
Comparing pbest with None: Succeeded.
Comparing original with None: Succeeded.
--------------------------------------------------------------------------------
distributions/.adj-tests x distributions/.nuke-temp
Problem: abc:abd::xyz:_
Comparing pmeta with None: Warning! Expected 0 counts of yyz, but got 1
Warning! Expected 0 counts of dyz, but got 1
Succeeded.
Comparing soft with None: Warning! Expected 0 counts of xyy, but got 1
Warning! Expected 0 counts of wyz, but got 3
Warning! Expected 0 counts of yyz, but got 2
Warning! Expected 0 counts of dyz, but got 2
Warning! Expected 0 counts of xyz, but got 1
Succeeded.
Comparing entropy with None: Warning! Expected 0 counts of yyz, but got 2
Warning! Expected 0 counts of xyy, but got 1
Warning! Expected 0 counts of xyz, but got 1
Succeeded.
Comparing meta with None: Warning! Expected 0 counts of yyz, but got 2
Warning! Expected 0 counts of wyz, but got 1
Succeeded.
Comparing fifty_converge with None: Warning! Expected 0 counts of yyz, but got 1
Succeeded.
Comparing sbest with None: Warning! Expected 0 counts of yyz, but got 1
Succeeded.
Comparing average_alt with None: Warning! Expected 0 counts of yyz, but got 1
Warning! Expected 0 counts of xyy, but got 2
Warning! Expected 0 counts of wyz, but got 2
Succeeded.
Comparing weighted_soft with None: Warning! Expected 0 counts of yyz, but got 4
Warning! Expected 0 counts of xyy, but got 3
Warning! Expected 0 counts of dyz, but got 1
Warning! Expected 0 counts of wyz, but got 3
Succeeded.
Comparing inverse with None: Warning! Expected 0 counts of xyy, but got 1
Warning! Expected 0 counts of wyz, but got 4
Warning! Expected 0 counts of yyz, but got 9
Warning! Expected 0 counts of dyz, but got 1
Warning! Expected 0 counts of xyz, but got 1
Succeeded.
Comparing alt_fifty with None: Warning! Expected 0 counts of yyz, but got 2
Warning! Expected 0 counts of wyz, but got 2
Warning! Expected 0 counts of xyz, but got 1
Succeeded.
Comparing best with None: Warning! Expected 0 counts of yyz, but got 1
Warning! Expected 0 counts of xyy, but got 1
Warning! Expected 0 counts of wyz, but got 1
Succeeded.
Comparing none with None: Succeeded.
Comparing pbest with None: Succeeded.
Comparing original with None: Warning! Expected 0 counts of yyz, but got 1
Succeeded.
--------------------------------------------------------------------------------
distributions/.adj-tests x distributions/.nuke-temp
Problem: abc:abd::ijkk:_
Comparing pmeta with None: Warning! Expected 0 counts of ijll, but got 13
Succeeded.
Comparing soft with None: Warning! Expected 0 counts of ijll, but got 17
Failed.
Comparing entropy with None: Succeeded.
Comparing meta with None: Warning! Expected 0 counts of ijkkk, but got 1
Warning! Expected 0 counts of ijll, but got 11
Succeeded.
Comparing fifty_converge with None: Warning! Expected 0 counts of ijll, but got 15
Failed.
Comparing sbest with None: Warning! Expected 0 counts of ijll, but got 19
Failed.
Comparing average_alt with None: Warning! Expected 0 counts of ijkkk, but got 1
Warning! Expected 0 counts of ijll, but got 11
Succeeded.
Comparing weighted_soft with None: Warning! Expected 0 counts of djkk, but got 1
Warning! Expected 0 counts of ijll, but got 10
Succeeded.
Comparing inverse with None: Warning! Expected 0 counts of djkk, but got 1
Warning! Expected 0 counts of jjkk, but got 1
Warning! Expected 0 counts of ijll, but got 11
Succeeded.
Comparing alt_fifty with None: Warning! Expected 0 counts of ijkkk, but got 2
Warning! Expected 0 counts of ijll, but got 15
Failed.
Comparing best with None: Warning! Expected 0 counts of ijll, but got 18
Failed.
Comparing none with None: Warning! Expected 0 counts of ijll, but got 16
Failed.
Comparing pbest with None: Warning! Expected 0 counts of ijkkk, but got 2
Warning! Expected 0 counts of ijll, but got 17
Failed.
Comparing original with None: Warning! Expected 0 counts of ijll, but got 13
Succeeded.
--------------------------------------------------------------------------------
distributions/.adj-tests x distributions/.nuke-temp
Problem: abc:abd::mrrjjj:_
Comparing pmeta with None: Warning! Expected 0 counts of mrrkkk, but got 24
Warning! Expected 0 counts of mrrjjjj, but got 4
Failed.
Comparing soft with None: Warning! Expected 0 counts of mrrkkk, but got 10
Warning! Expected 0 counts of mrrjjjj, but got 5
Succeeded.
Comparing entropy with None: Succeeded.
Comparing meta with None: Warning! Expected 0 counts of mrrjjjj, but got 7
Warning! Expected 0 counts of mrrd, but got 1
Warning! Expected 0 counts of mrrkkk, but got 12
Failed.
Comparing fifty_converge with None: Warning! Expected 0 counts of mrrkkk, but got 13
Warning! Expected 0 counts of mrrjjjj, but got 5
Failed.
Comparing sbest with None: Warning! Expected 0 counts of mrrkkk, but got 7
Warning! Expected 0 counts of mrrjjjj, but got 8
Succeeded.
Comparing average_alt with None: Warning! Expected 0 counts of mrrkkk, but got 11
Warning! Expected 0 counts of mrrjjjj, but got 2
Succeeded.
Comparing weighted_soft with None: Warning! Expected 0 counts of mrrjjjj, but got 2
Warning! Expected 0 counts of mrrkkk, but got 14
Warning! Expected 0 counts of mrrjkk, but got 2
Succeeded.
Comparing inverse with None: Warning! Expected 0 counts of mrrjjjj, but got 1
Warning! Expected 0 counts of mrrkkk, but got 9
Warning! Expected 0 counts of mrrjkk, but got 1
Succeeded.
Comparing alt_fifty with None: Warning! Expected 0 counts of mrrkkk, but got 13
Warning! Expected 0 counts of mrrjjjj, but got 7
Failed.
Comparing best with None: Warning! Expected 0 counts of mrrkkk, but got 13
Warning! Expected 0 counts of mrrjjjj, but got 4
Succeeded.
Comparing none with None: Warning! Expected 0 counts of mrrkkk, but got 25
Failed.
Comparing pbest with None: Warning! Expected 0 counts of mrrd, but got 1
Warning! Expected 0 counts of mrrjjjj, but got 4
Warning! Expected 0 counts of mrrkkk, but got 18
Failed.
Comparing original with None: Warning! Expected 0 counts of mrrkkk, but got 25
Failed.
--------------------------------------------------------------------------------
distributions/.adj-tests x distributions/.soft-remove
Problem: abc:abd::efg:_
Comparing pmeta with None: Succeeded.
Comparing soft with None: Warning! Expected 0 counts of ffg, but got 1
Succeeded.
Comparing entropy with None: Succeeded.
Comparing meta with None: Warning! Expected 0 counts of ffg, but got 1
Succeeded.
Comparing fifty_converge with None: Warning! Expected 0 counts of ffg, but got 1
Succeeded.
Comparing sbest with None: Succeeded.
Comparing average_alt with None: Warning! Expected 0 counts of ffg, but got 1
Succeeded.
Comparing weighted_soft with None: Warning! Expected 0 counts of eff, but got 1
Succeeded.
Comparing inverse with None: Warning! Expected 0 counts of efg, but got 1
Warning! Expected 0 counts of ffg, but got 2
Succeeded.
Comparing alt_fifty with None: Succeeded.
Comparing best with None: Succeeded.
Comparing none with None: Succeeded.
Comparing pbest with None: Succeeded.
Comparing original with None: Succeeded.
--------------------------------------------------------------------------------
distributions/.adj-tests x distributions/.soft-remove
Problem: abc:abd::ijk:_
Comparing pmeta with None: Succeeded.
Comparing soft with None: Warning! Expected 0 counts of jjk, but got 1
Succeeded.
Comparing entropy with None: Warning! Expected 0 counts of ijj, but got 1
Succeeded.
Comparing meta with None: Succeeded.
Comparing fifty_converge with None: Succeeded.
Comparing sbest with None: Warning! Expected 0 counts of jjk, but got 1
Succeeded.
Comparing average_alt with None: Succeeded.
Comparing weighted_soft with None: Warning! Expected 0 counts of djk, but got 2
Warning! Expected 0 counts of hjk, but got 1
Warning! Expected 0 counts of jjk, but got 1
Succeeded.
Comparing inverse with None: Warning! Expected 0 counts of hjk, but got 1
Warning! Expected 0 counts of ijj, but got 1
Warning! Expected 0 counts of jjk, but got 2
Succeeded.
Comparing alt_fifty with None: Succeeded.
Comparing best with None: Succeeded.
Comparing none with None: Warning! Expected 0 counts of jjk, but got 1
Succeeded.
Comparing pbest with None: Succeeded.
Comparing original with None: Succeeded.
--------------------------------------------------------------------------------
distributions/.adj-tests x distributions/.soft-remove
Problem: abc:abd::xyz:_
Comparing pmeta with None: Warning! Expected 0 counts of dyz, but got 1
Succeeded.
Comparing soft with None: Warning! Expected 0 counts of xyy, but got 1
Warning! Expected 0 counts of wyz, but got 3
Warning! Expected 0 counts of dyz, but got 2
Warning! Expected 0 counts of xyz, but got 1
Succeeded.
Comparing entropy with None: Warning! Expected 0 counts of xyy, but got 1
Warning! Expected 0 counts of xyz, but got 1
Succeeded.
Comparing meta with None: Warning! Expected 0 counts of wyz, but got 1
Succeeded.
Comparing fifty_converge with None: Succeeded.
Comparing sbest with None: Succeeded.
Comparing average_alt with None: Warning! Expected 0 counts of xyy, but got 2
Warning! Expected 0 counts of wyz, but got 2
Succeeded.
Comparing weighted_soft with None: Warning! Expected 0 counts of xyy, but got 3
Warning! Expected 0 counts of dyz, but got 1
Warning! Expected 0 counts of wyz, but got 3
Failed.
Comparing inverse with None: Warning! Expected 0 counts of xyy, but got 1
Warning! Expected 0 counts of wyz, but got 4
Warning! Expected 0 counts of dyz, but got 1
Warning! Expected 0 counts of xyz, but got 1
Failed.
Comparing alt_fifty with None: Warning! Expected 0 counts of wyz, but got 2
Warning! Expected 0 counts of xyz, but got 1
Succeeded.
Comparing best with None: Warning! Expected 0 counts of xyy, but got 1
Warning! Expected 0 counts of wyz, but got 1
Succeeded.
Comparing none with None: Succeeded.
Comparing pbest with None: Succeeded.
Comparing original with None: Succeeded.
--------------------------------------------------------------------------------
distributions/.adj-tests x distributions/.soft-remove
Problem: abc:abd::ijkk:_
Comparing pmeta with None: Succeeded.
Comparing soft with None: Failed.
Comparing entropy with None: Failed.
Comparing meta with None: Warning! Expected 0 counts of ijkkk, but got 1
Succeeded.
Comparing fifty_converge with None: Succeeded.
Comparing sbest with None: Failed.
Comparing average_alt with None: Warning! Expected 0 counts of ijkkk, but got 1
Succeeded.
Comparing weighted_soft with None: Warning! Expected 0 counts of djkk, but got 1
Succeeded.
Comparing inverse with None: Warning! Expected 0 counts of djkk, but got 1
Warning! Expected 0 counts of jjkk, but got 1
Succeeded.
Comparing alt_fifty with None: Warning! Expected 0 counts of ijkkk, but got 2
Succeeded.
Comparing best with None: Failed.
Comparing none with None: Succeeded.
Comparing pbest with None: Warning! Expected 0 counts of ijkkk, but got 2
Failed.
Comparing original with None: Succeeded.
--------------------------------------------------------------------------------
distributions/.adj-tests x distributions/.soft-remove
Problem: abc:abd::mrrjjj:_
Comparing pmeta with None: Failed.
Comparing soft with None: Failed.
Comparing entropy with None: Failed.
Comparing meta with None: Failed.
Comparing fifty_converge with None: Failed.
Comparing sbest with None: Warning! Expected 0 counts of mrrjjd, but got 1
Failed.
Comparing average_alt with None: Warning! Expected 0 counts of mrrjjd, but got 1
Failed.
Comparing weighted_soft with None: Warning! Expected 0 counts of nrrjjj, but got 1
Warning! Expected 0 counts of mrrjkk, but got 2
Succeeded.
Comparing inverse with None: Warning! Expected 0 counts of nrrjjj, but got 1
Warning! Expected 0 counts of mrrjkk, but got 1
Warning! Expected 0 counts of mrrjjd, but got 3
Succeeded.
Comparing alt_fifty with None: Warning! Expected 0 counts of mrrjjd, but got 1
Failed.
Comparing best with None: Succeeded.
Comparing none with None: Warning! Expected 0 counts of mrrjjd, but got 1
Succeeded.
Comparing pbest with None: Succeeded.
Comparing original with None: Failed.
--------------------------------------------------------------------------------
distributions/.nuke-temp x distributions/.soft-remove
Problem: abc:abd::efg:_
Comparing None with None: Warning! Expected 0 counts of ffg, but got 1
Succeeded.
--------------------------------------------------------------------------------
distributions/.nuke-temp x distributions/.soft-remove
Problem: abc:abd::ijk:_
Comparing None with None: Succeeded.
--------------------------------------------------------------------------------
distributions/.nuke-temp x distributions/.soft-remove
Problem: abc:abd::xyz:_
Comparing None with None: Succeeded.
--------------------------------------------------------------------------------
distributions/.nuke-temp x distributions/.soft-remove
Problem: abc:abd::ijkk:_
Comparing None with None: Failed.
--------------------------------------------------------------------------------
distributions/.nuke-temp x distributions/.soft-remove
Problem: abc:abd::mrrjjj:_
Comparing None with None: Warning! Expected 0 counts of nrrjjj, but got 1
Warning! Expected 0 counts of mrrjjd, but got 2
Failed.

175
tests.py
View File

@ -1,143 +1,62 @@
import unittest
from pprint import pprint
import os.path
import pickle
import argparse
import sys
from copycat import Copycat
from pprint import pprint
from copycat import Problem
from copycat.statistics import iso_chi_squared
# TODO: update test cases to use entropy
# CHI2 values for n degrees freedom
_chiSquared_table = {
1:3.841,
2:5.991,
3:7.815,
4:9.488,
5:11.071,
6:12.592,
7:14.067,
8:15.507,
9:16.919,
10:18.307
}
def generate():
print('Generating distributions for new file')
iterations = 30
problems = [
Problem('abc', 'abd', 'efg', iterations),
Problem('abc', 'abd', 'ijk', iterations),
Problem('abc', 'abd', 'xyz', iterations),
Problem('abc', 'abd', 'ijkk', iterations),
Problem('abc', 'abd', 'mrrjjj', iterations)]
with open(TestCopycat.Filename, 'wb') as outfile:
pickle.dump(problems, outfile)
return problems
class TestCopycat(unittest.TestCase):
Filename = None
def setUp(self):
self.longMessage = True # new in Python 2.7
def assertProbabilitiesLookRoughlyLike(self, actual, expected, iterations):
answerKeys = set(list(actual.keys()) + list(expected.keys()))
degreesFreedom = len(answerKeys)
chiSquared = 0
get_count = lambda k, d : d[k]['count'] if k in d else 0
for k in answerKeys:
E = get_count(k, expected)
O = get_count(k, actual)
if E == 0:
print('Warning! Expected 0 counts of {}, but got {}'.format(k, O))
else:
chiSquared += (O - E) ** 2 / E
if chiSquared >= _chiSquared_table[degreesFreedom]:
self.fail('Significant different between expected and actual answer distributions: \n' +
'Chi2 value: {} with {} degrees of freedom'.format(chiSquared, degreesFreedom))
def run_testcase(self, initial, modified, target, iterations, expected):
print('expected:')
pprint(expected)
actual = Copycat().run(initial, modified, target, iterations)
print('actual:')
pprint(actual)
self.assertEqual(sum(a['count'] for a in list(actual.values())), iterations)
self.assertProbabilitiesLookRoughlyLike(actual, expected, iterations)
def test_simple_cases(self):
self.run_testcase('abc', 'abd', 'efg', 30,
{'dfg': {'avgtemp': 72.37092377767368, 'avgtime': 475.0, 'count': 1},
'efd': {'avgtemp': 49.421147725239024, 'avgtime': 410.5, 'count': 2},
'efh': {'avgtemp': 19.381658717913258,
'avgtime': 757.1851851851852,
'count': 27}})
self.run_testcase('abc', 'abd', 'ijk', 30,
{'ijd': {'avgtemp': 14.691978036611559, 'avgtime': 453.0, 'count': 1},
'ijl': {'avgtemp': 22.344023091153964,
'avgtime': 742.1428571428571,
'count': 28},
'jjk': {'avgtemp': 11.233344554288019, 'avgtime': 595.0, 'count': 1}})
def test_abc_xyz(self):
self.run_testcase('abc', 'abd', 'xyz', 100,
{'dyz': {'avgtemp': 16.78130739435325, 'avgtime': 393.0, 'count': 1},
'wyz': {'avgtemp': 26.100450643627426, 'avgtime': 4040.0, 'count': 2},
'xyd': {'avgtemp': 21.310415433987586,
'avgtime': 5592.277777777777,
'count': 90},
'xyz': {'avgtemp': 23.798124933747882, 'avgtime': 3992.0, 'count': 1},
'yyz': {'avgtemp': 27.137975077133788, 'avgtime': 4018.5, 'count': 6}})
def test_ambiguous_case(self):
self.run_testcase('abc', 'abd', 'ijkk', 100,
{'ijd': {'avgtemp': 55.6767488926397, 'avgtime': 948.0, 'count': 1},
'ijkd': {'avgtemp': 78.09357723857647, 'avgtime': 424.5, 'count': 2},
'ijkk': {'avgtemp': 68.54252699118226, 'avgtime': 905.5, 'count': 2},
'ijkkk': {'avgtemp': 21.75444235750483,
'avgtime': 2250.3333333333335,
'count': 3},
'ijkl': {'avgtemp': 38.079858245918466,
'avgtime': 1410.2391304347825,
'count': 46},
'ijll': {'avgtemp': 27.53845719945872,
'avgtime': 1711.8863636363637,
'count': 44},
'jjkk': {'avgtemp': 75.76606718990365, 'avgtime': 925.0, 'count': 2}})
def test_mrrjjj(self):
self.run_testcase('abc', 'abd', 'mrrjjj', 30,
{'mrrjjd': {'avgtemp': 44.46354725386579, 'avgtime': 1262.0, 'count': 1},
'mrrjjjj': {'avgtemp': 17.50702440140412, 'avgtime': 1038.375, 'count': 8},
'mrrjjk': {'avgtemp': 55.189156978290264,
'avgtime': 1170.6363636363637,
'count': 11},
'mrrkkk': {'avgtemp': 43.709349775080746, 'avgtime': 1376.2, 'count': 10}})
'''
Below are examples of improvements that could be made to copycat.
def test_elongation(self):
# This isn't remotely what a human would say.
self.run_testcase('abc', 'aabbcc', 'milk', 30,
{'lilk': {'avgtemp': 68.18128407669258,
'avgtime': 1200.6666666666667,
'count': 3},
'mikj': {'avgtemp': 57.96973195905564,
'avgtime': 1236.888888888889,
'count': 9},
'milb': {'avgtemp': 79.98413990245763, 'avgtime': 255.0, 'count': 1},
'milj': {'avgtemp': 64.95289549955349, 'avgtime': 1192.4, 'count': 15},
'milk': {'avgtemp': 66.11387816293755, 'avgtime': 1891.5, 'count': 2}})
def test_repairing_successor_sequence(self):
# This isn't remotely what a human would say.
self.run_testcase('aba', 'abc', 'xyx', 30,
{'cyx': {'avgtemp': 82.10555880340601, 'avgtime': 2637.0, 'count': 2},
'xc': {'avgtemp': 73.98845045179358, 'avgtime': 5459.5, 'count': 2},
'xyc': {'avgtemp': 77.1384941639991,
'avgtime': 4617.434782608696,
'count': 23},
'xyx': {'avgtemp': 74.39287653046891, 'avgtime': 3420.0, 'count': 3}})
def test_nonsense(self):
self.run_testcase('cat', 'dog', 'cake', 10, {
'cakg': {'count': 99, 'avgtemp': 70},
'gake': {'count': 1, 'avgtemp': 59},
})
self.run_testcase('cat', 'dog', 'kitten', 10, {
'kitteg': {'count': 96, 'avgtemp': 66},
'kitten': {'count': 4, 'avgtemp': 68},
})
'''
def test(self):
print('Testing copycat with input file: {}'.format(TestCopycat.Filename))
try:
with open(TestCopycat.Filename, 'rb') as infile:
problems = pickle.load(infile)
except Exception as e:
print('Generating due to error:')
print(e)
problems = generate()
for problem in problems:
problem.test(iso_chi_squared)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--generate', action='store_true')
parser.add_argument('filename', default='.distributions', nargs='?')
parser.add_argument('unittest_args', default=[], nargs='?')
args = parser.parse_args()
# TODO: Go do something with args.input and args.filename
TestCopycat.Filename = args.filename
if args.generate:
generate()
# Now set the sys.argv to the unittest_args (leaving sys.argv[0] alone)
sys.argv[1:] = args.unittest_args
unittest.main()