Files
copycat/slipnet_analysis/compute_stats.py
Alex Linhares 50b6fbdc27 Add slipnet analysis: depth vs topology correlation study
Analysis shows no significant correlation between conceptual depth
and hop distance to letter nodes (r=0.281, p=0.113). Includes
Python scripts, visualizations, and LaTeX paper.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-01 20:58:15 +00:00

86 lines
2.9 KiB
Python

"""Compute correlation statistics for the paper (hop-based)."""
import json
import numpy as np
from scipy import stats
def main():
with open(r'C:\Users\alexa\copycat\slipnet_analysis\slipnet.json', 'r') as f:
data = json.load(f)
# Extract data points (excluding letter nodes themselves)
names = []
depths = []
hops = []
is_unreachable = []
for node in data['nodes']:
name = node['name']
depth = node['conceptualDepth']
path_info = node.get('minPathToLetter', {})
hop_count = path_info.get('hops')
nearest = path_info.get('nearestLetter')
# Skip letter nodes (hops 0)
if hop_count is not None and hop_count > 0:
names.append(name)
depths.append(depth)
hops.append(hop_count)
is_unreachable.append(nearest is None)
# Convert to numpy arrays
depths = np.array(depths)
hops = np.array(hops)
# Compute correlation
correlation, p_value = stats.pearsonr(depths, hops)
spearman_corr, spearman_p = stats.spearmanr(depths, hops)
# Linear regression
z = np.polyfit(depths, hops, 1)
# R-squared
y_pred = np.polyval(z, depths)
ss_res = np.sum((hops - y_pred) ** 2)
ss_tot = np.sum((hops - np.mean(hops)) ** 2)
r_squared = 1 - (ss_res / ss_tot)
num_unreachable = sum(is_unreachable)
print(f"Number of nodes analyzed: {len(names)}")
print(f"Total nodes: {data['nodeCount']}")
print(f"Letter nodes (excluded): 26")
print(f"Unreachable nodes (hops = 2*max): {num_unreachable}")
print()
print(f"Pearson correlation: r = {correlation:.4f}")
print(f"Pearson p-value: p = {p_value:.6f}")
print(f"Spearman correlation: rho = {spearman_corr:.4f}")
print(f"Spearman p-value: p = {spearman_p:.6f}")
print(f"R-squared: {r_squared:.4f}")
print(f"Linear regression: hops = {z[0]:.4f} * depth + {z[1]:.4f}")
print()
print(f"Depth range: {min(depths):.1f} - {max(depths):.1f}")
print(f"Hops range: {min(hops)} - {max(hops)}")
print(f"Mean depth: {np.mean(depths):.2f}")
print(f"Mean hops: {np.mean(hops):.2f}")
print(f"Std depth: {np.std(depths):.2f}")
print(f"Std hops: {np.std(hops):.2f}")
print()
# Distribution of hops
print("Distribution of hops:")
for h in sorted(set(hops)):
count = sum(1 for x in hops if x == h)
nodes_at_h = [n for n, hp in zip(names, hops) if hp == h]
print(f" {h} hops: {count} nodes")
print()
print("Data points (sorted by hops, then depth):")
print(f"{'Node':<30} {'Depth':<10} {'Hops':<10} {'Reachable':<10}")
print("-" * 60)
for name, depth, hop, unreachable in sorted(zip(names, depths, hops, is_unreachable), key=lambda x: (x[2], x[1])):
status = "No" if unreachable else "Yes"
print(f"{name:<30} {depth:<10.1f} {hop:<10} {status:<10}")
if __name__ == '__main__':
main()