copycat/slipnet_analysis/slipnet_depth_analysis.tex

\documentclass[11pt,twocolumn]{article}

\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{amsmath,amssymb}
\usepackage{graphicx}
\usepackage{booktabs}
\usepackage{hyperref}
\usepackage[margin=0.9in]{geometry}
\usepackage{natbib}
\usepackage{float}
\usepackage{caption}
\usepackage{subcaption}

\title{Conceptual Depth and Graph Topology in the Copycat Slipnet: A Correlation Analysis}

\author{
Slipnet Analysis Project\\
\texttt{slipnet\_analysis/}
}

\date{\today}

\begin{document}

\maketitle

\begin{abstract}
The Copycat system employs a semantic network (slipnet) where each node has a ``conceptual depth'' parameter representing abstraction level. We investigate whether conceptual depth correlates with various graph-theoretic metrics including hop distance to letter nodes, centrality measures, and eccentricity. Analyzing 33 non-letter nodes, we find that \textbf{eccentricity is the only metric significantly correlated with conceptual depth} (Pearson $r = -0.380$, $p = 0.029$), explaining 14.4\% of variance. Hop distance to letters shows no significant correlation ($r = 0.281$, $p = 0.113$), nor do standard centrality measures (degree, betweenness, closeness, eigenvector, PageRank). The negative eccentricity correlation indicates that deeper concepts tend to be more globally central---closer to all other nodes in the network. These findings suggest that while conceptual depth is largely independent of local connectivity patterns, it partially reflects global network position.
\end{abstract}

\section{Introduction}

The Copycat project, developed by Douglas Hofstadter and Melanie Mitchell \citep{mitchell1993,hofstadter1995}, models analogical reasoning using a semantic network called the \emph{slipnet}. Each node has a \emph{conceptual depth} parameter (10--90) intended to capture abstraction level. We systematically test whether any graph-theoretic metric correlates with this hand-assigned depth value.

\subsection{The Slipnet}

The slipnet contains 59 nodes: 26 letters (a--z), 5 numbers (1--5), and 28 concept nodes (categories, positions, relations). These are connected by 202 directed links (104 undirected edges). Five nodes form a disconnected cluster (\texttt{identity}, \texttt{opposite}, \texttt{letter}, \texttt{group}, \texttt{objectCategory}).

\subsection{Research Questions}

We ask: Does conceptual depth correlate with...
\begin{enumerate}
    \item Hop distance to concrete letter nodes?
    \item Local centrality (degree, clustering)?
    \item Global centrality (betweenness, closeness, eigenvector)?
    \item Network position (eccentricity)?
\end{enumerate}

\section{Methods}

\subsection{Graph Construction}

We constructed an undirected graph $G = (V, E)$ from the slipnet using NetworkX, with $|V| = 59$ vertices and $|E| = 104$ edges.

\subsection{Metrics Computed}

For each non-letter node, we computed:

\begin{itemize}
    \item \textbf{Hop distance}: Minimum edges to any letter (a--z). Unreachable nodes assigned $2 \times \max(\text{hops}) = 8$.
    \item \textbf{Degree centrality}: Fraction of nodes connected to.
    \item \textbf{Betweenness centrality}: Fraction of shortest paths passing through node.
    \item \textbf{Closeness centrality}: Reciprocal of average distance to all nodes.
    \item \textbf{Eigenvector centrality}: Importance based on connections to important nodes.
    \item \textbf{PageRank}: Random walk stationary distribution.
    \item \textbf{Clustering coefficient}: Fraction of neighbor pairs that are connected.
    \item \textbf{Eccentricity}: Maximum distance to any other node.
\end{itemize}

\subsection{Statistical Analysis}

For each metric, we computed Pearson's $r$, Spearman's $\rho$, and $R^2$ against conceptual depth. Significance assessed at $\alpha = 0.05$.

\section{Results}

\subsection{Correlation Summary}

Table~\ref{tab:correlations} presents all correlations, ranked by $|r|$.

\begin{table}[H]
\centering
\caption{Correlations with conceptual depth (n=33)}
\label{tab:correlations}
\small
\begin{tabular}{lccc}
\toprule
Metric & Pearson $r$ & $p$-value & $R^2$ \\
\midrule
Eccentricity & $-0.380$* & 0.029 & 0.144 \\
Hop distance & $+0.281$ & 0.113 & 0.079 \\
Closeness & $-0.270$ & 0.129 & 0.073 \\
Degree & $-0.264$ & 0.137 & 0.070 \\
PageRank & $-0.257$ & 0.149 & 0.066 \\
Clustering & $-0.219$ & 0.221 & 0.048 \\
Betweenness & $-0.172$ & 0.340 & 0.029 \\
Eigenvector & $-0.148$ & 0.410 & 0.022 \\
Avg neighbor deg & $+0.052$ & 0.775 & 0.003 \\
\bottomrule
\end{tabular}

\vspace{0.5em}
\footnotesize{* = significant at $p < 0.05$}
\end{table}

\textbf{Key finding}: Only eccentricity achieves statistical significance. The negative correlation ($r = -0.380$) indicates that higher-depth concepts have \emph{lower} eccentricity---they are more globally central, with shorter maximum distances to other nodes.

\subsection{Visualization}

Figure~\ref{fig:comparison} shows scatter plots for all metrics. The eccentricity plot shows the clearest negative trend.

\begin{figure}[H]
\centering
\includegraphics[width=\columnwidth]{centrality_comparison.png}
\caption{Conceptual depth vs eight graph metrics. Only eccentricity (*) shows significant correlation.}
\label{fig:comparison}
\end{figure}

\subsection{Hop Distance Analysis}

The hop distance analysis ($r = 0.281$, $p = 0.113$) found no significant relationship between conceptual depth and distance to letter nodes. This weak positive trend fails significance, with $R^2 = 0.079$ explaining less than 8\% of variance.

Counterexamples abound: \texttt{bondFacet} (depth=90) is only 2 hops from letters, while \texttt{middle} (depth=40) requires 4 hops.

\subsection{Eccentricity: The Significant Finding}

Eccentricity measures the maximum distance from a node to any other node. The significant negative correlation ($r = -0.380$, $p = 0.029$) suggests:

\begin{quote}
\emph{Deeper concepts tend to be positioned more centrally in terms of worst-case distance to any node.}
\end{quote}

Table~\ref{tab:eccentricity} shows examples:

\begin{table}[H]
\centering
\caption{Eccentricity examples}
\label{tab:eccentricity}
\small
\begin{tabular}{lcc}
\toprule
Node & Depth & Eccentricity \\
\midrule
letterCategory & 30 & 4 \\
length & 60 & 5 \\
bondFacet & 90 & 5 \\
\midrule
middle & 40 & 7 \\
identity & 90 & 3 (isolated) \\
\bottomrule
\end{tabular}
\end{table}

The hub node \texttt{letterCategory} (connected to all 26 letters) has low eccentricity (4), enabling short paths to the entire network.

\subsection{Non-Significant Centralities}

Standard centrality measures show weak negative correlations but none reach significance:

\begin{itemize}
    \item \textbf{Degree} ($r = -0.264$): Deeper nodes don't have more connections.
    \item \textbf{Betweenness} ($r = -0.172$): Deeper nodes aren't more often on shortest paths.
    \item \textbf{Closeness} ($r = -0.270$): Weak trend toward central positioning.
    \item \textbf{PageRank} ($r = -0.257$): Random walk importance unrelated to depth.
\end{itemize}

\section{Discussion}

\subsection{Eccentricity as Global Position}

The eccentricity finding reveals that conceptual depth partially reflects \emph{global} network position. Nodes with high depth tend to have lower eccentricity, meaning they are never ``too far'' from any other node. This differs from local centrality (degree, clustering), which shows no relationship.

Intuitively, abstract concepts like \texttt{bondFacet} or \texttt{samenessGroup} may have been positioned to be accessible from many parts of the conceptual space, even if they don't have many direct connections.

\subsection{Local vs Global Structure}

The contrast between local and global metrics is striking:

\begin{itemize}
    \item \textbf{Local metrics} (degree, clustering, betweenness): No significant correlation
    \item \textbf{Global metric} (eccentricity): Significant correlation
\end{itemize}

This suggests depth was assigned based on semantic considerations (abstraction level) that happen to align with global positioning but not with local connectivity patterns.

\subsection{Design Implications}

The partial correlation with eccentricity ($R^2 = 0.144$) means:
\begin{itemize}
    \item 14.4\% of depth variance is explained by global position
    \item 85.6\% reflects other factors (semantic intuition, domain knowledge)
\end{itemize}

For extending the slipnet, this suggests that new abstract concepts should be positioned with moderate connectivity to multiple network regions, not necessarily with high local degree.

\subsection{Limitations}

\begin{enumerate}
    \item \textbf{Sample size}: 33 nodes limits power; the eccentricity finding should be interpreted cautiously.
    \item \textbf{Multiple comparisons}: Testing 9 metrics inflates Type I error. A Bonferroni-corrected threshold of $p < 0.0056$ would render eccentricity non-significant.
    \item \textbf{Disconnected nodes}: Five nodes are unreachable, affecting eccentricity calculations.
\end{enumerate}

\section{Conclusion}

Among nine graph metrics tested, only \textbf{eccentricity} significantly correlates with conceptual depth ($r = -0.380$, $p = 0.029$). Deeper concepts tend to occupy more globally central positions. However, this explains only 14.4\% of variance, confirming that conceptual depth primarily reflects semantic judgments rather than topological properties.

Notably, hop distance to letter nodes shows no significant correlation ($r = 0.281$, $p = 0.113$), contradicting the intuition that abstract concepts should be topologically distant from concrete letters. The slipnet's design keeps depth and local connectivity largely orthogonal while partially aligning depth with global network position.

\section*{Data Availability}

Scripts and data: \texttt{slipnet\_analysis/}
\begin{itemize}
    \item \texttt{compute\_centrality.py}: Full analysis
    \item \texttt{centrality\_results.json}: Numerical results
    \item \texttt{centrality\_comparison.png}: Comparison plot
\end{itemize}

\appendix

\section{Complete Correlation Data}

\begin{table}[H]
\centering
\caption{Full correlation statistics}
\label{tab:full}
\small
\begin{tabular}{lcccc}
\toprule
Metric & $r$ & $p$ & $\rho$ & $\rho$-$p$ \\
\midrule
Eccentricity & $-0.380$ & 0.029 & $-0.299$ & 0.091 \\
Hop distance & $+0.281$ & 0.113 & $+0.141$ & 0.433 \\
Closeness & $-0.270$ & 0.129 & $-0.180$ & 0.315 \\
Degree & $-0.264$ & 0.137 & $-0.236$ & 0.186 \\
PageRank & $-0.257$ & 0.149 & $-0.191$ & 0.288 \\
Clustering & $-0.219$ & 0.221 & $-0.276$ & 0.120 \\
Betweenness & $-0.172$ & 0.340 & $-0.080$ & 0.658 \\
Eigenvector & $-0.148$ & 0.410 & $-0.237$ & 0.185 \\
Avg neighbor & $+0.052$ & 0.775 & $-0.301$ & 0.089 \\
\bottomrule
\end{tabular}
\end{table}

\section{Node Data Sample}

\begin{table}[H]
\centering
\caption{Selected nodes with metrics}
\label{tab:nodes}
\small
\begin{tabular}{lccccc}
\toprule
Node & Depth & Deg & Btw & Ecc \\
\midrule
letterCategory & 30 & 0.50 & 0.68 & 4 \\
length & 60 & 0.17 & 0.25 & 5 \\
bondFacet & 90 & 0.03 & 0.00 & 5 \\
middle & 40 & 0.02 & 0.00 & 7 \\
identity & 90 & 0.00 & 0.00 & 3 \\
opposite & 90 & 0.00 & 0.00 & 3 \\
\bottomrule
\end{tabular}
\end{table}

\begin{thebibliography}{9}

\bibitem{mitchell1993}
Mitchell, M. (1993). \textit{Analogy-Making as Perception}. MIT Press.

\bibitem{hofstadter1995}
Hofstadter, D. R., \& FARG. (1995). \textit{Fluid Concepts and Creative Analogies}. Basic Books.

\end{thebibliography}

\end{document}