Add slipnet analysis: depth vs topology correlation study
Analysis shows no significant correlation between conceptual depth and hop distance to letter nodes (r=0.281, p=0.113). Includes Python scripts, visualizations, and LaTeX paper. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
85
slipnet_analysis/compute_stats.py
Normal file
85
slipnet_analysis/compute_stats.py
Normal file
@ -0,0 +1,85 @@
|
||||
"""Compute correlation statistics for the paper (hop-based)."""
|
||||
|
||||
import json
|
||||
import numpy as np
|
||||
from scipy import stats
|
||||
|
||||
def main():
|
||||
with open(r'C:\Users\alexa\copycat\slipnet_analysis\slipnet.json', 'r') as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Extract data points (excluding letter nodes themselves)
|
||||
names = []
|
||||
depths = []
|
||||
hops = []
|
||||
is_unreachable = []
|
||||
|
||||
for node in data['nodes']:
|
||||
name = node['name']
|
||||
depth = node['conceptualDepth']
|
||||
path_info = node.get('minPathToLetter', {})
|
||||
hop_count = path_info.get('hops')
|
||||
nearest = path_info.get('nearestLetter')
|
||||
|
||||
# Skip letter nodes (hops 0)
|
||||
if hop_count is not None and hop_count > 0:
|
||||
names.append(name)
|
||||
depths.append(depth)
|
||||
hops.append(hop_count)
|
||||
is_unreachable.append(nearest is None)
|
||||
|
||||
# Convert to numpy arrays
|
||||
depths = np.array(depths)
|
||||
hops = np.array(hops)
|
||||
|
||||
# Compute correlation
|
||||
correlation, p_value = stats.pearsonr(depths, hops)
|
||||
spearman_corr, spearman_p = stats.spearmanr(depths, hops)
|
||||
|
||||
# Linear regression
|
||||
z = np.polyfit(depths, hops, 1)
|
||||
|
||||
# R-squared
|
||||
y_pred = np.polyval(z, depths)
|
||||
ss_res = np.sum((hops - y_pred) ** 2)
|
||||
ss_tot = np.sum((hops - np.mean(hops)) ** 2)
|
||||
r_squared = 1 - (ss_res / ss_tot)
|
||||
|
||||
num_unreachable = sum(is_unreachable)
|
||||
print(f"Number of nodes analyzed: {len(names)}")
|
||||
print(f"Total nodes: {data['nodeCount']}")
|
||||
print(f"Letter nodes (excluded): 26")
|
||||
print(f"Unreachable nodes (hops = 2*max): {num_unreachable}")
|
||||
print()
|
||||
print(f"Pearson correlation: r = {correlation:.4f}")
|
||||
print(f"Pearson p-value: p = {p_value:.6f}")
|
||||
print(f"Spearman correlation: rho = {spearman_corr:.4f}")
|
||||
print(f"Spearman p-value: p = {spearman_p:.6f}")
|
||||
print(f"R-squared: {r_squared:.4f}")
|
||||
print(f"Linear regression: hops = {z[0]:.4f} * depth + {z[1]:.4f}")
|
||||
print()
|
||||
print(f"Depth range: {min(depths):.1f} - {max(depths):.1f}")
|
||||
print(f"Hops range: {min(hops)} - {max(hops)}")
|
||||
print(f"Mean depth: {np.mean(depths):.2f}")
|
||||
print(f"Mean hops: {np.mean(hops):.2f}")
|
||||
print(f"Std depth: {np.std(depths):.2f}")
|
||||
print(f"Std hops: {np.std(hops):.2f}")
|
||||
print()
|
||||
|
||||
# Distribution of hops
|
||||
print("Distribution of hops:")
|
||||
for h in sorted(set(hops)):
|
||||
count = sum(1 for x in hops if x == h)
|
||||
nodes_at_h = [n for n, hp in zip(names, hops) if hp == h]
|
||||
print(f" {h} hops: {count} nodes")
|
||||
|
||||
print()
|
||||
print("Data points (sorted by hops, then depth):")
|
||||
print(f"{'Node':<30} {'Depth':<10} {'Hops':<10} {'Reachable':<10}")
|
||||
print("-" * 60)
|
||||
for name, depth, hop, unreachable in sorted(zip(names, depths, hops, is_unreachable), key=lambda x: (x[2], x[1])):
|
||||
status = "No" if unreachable else "Yes"
|
||||
print(f"{name:<30} {depth:<10.1f} {hop:<10} {status:<10}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user