Analysis shows no significant correlation between conceptual depth and hop distance to letter nodes (r=0.281, p=0.113). Includes Python scripts, visualizations, and LaTeX paper. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
86 lines
2.9 KiB
Python
86 lines
2.9 KiB
Python
"""Compute correlation statistics for the paper (hop-based)."""
|
|
|
|
import json
|
|
import numpy as np
|
|
from scipy import stats
|
|
|
|
def main():
|
|
with open(r'C:\Users\alexa\copycat\slipnet_analysis\slipnet.json', 'r') as f:
|
|
data = json.load(f)
|
|
|
|
# Extract data points (excluding letter nodes themselves)
|
|
names = []
|
|
depths = []
|
|
hops = []
|
|
is_unreachable = []
|
|
|
|
for node in data['nodes']:
|
|
name = node['name']
|
|
depth = node['conceptualDepth']
|
|
path_info = node.get('minPathToLetter', {})
|
|
hop_count = path_info.get('hops')
|
|
nearest = path_info.get('nearestLetter')
|
|
|
|
# Skip letter nodes (hops 0)
|
|
if hop_count is not None and hop_count > 0:
|
|
names.append(name)
|
|
depths.append(depth)
|
|
hops.append(hop_count)
|
|
is_unreachable.append(nearest is None)
|
|
|
|
# Convert to numpy arrays
|
|
depths = np.array(depths)
|
|
hops = np.array(hops)
|
|
|
|
# Compute correlation
|
|
correlation, p_value = stats.pearsonr(depths, hops)
|
|
spearman_corr, spearman_p = stats.spearmanr(depths, hops)
|
|
|
|
# Linear regression
|
|
z = np.polyfit(depths, hops, 1)
|
|
|
|
# R-squared
|
|
y_pred = np.polyval(z, depths)
|
|
ss_res = np.sum((hops - y_pred) ** 2)
|
|
ss_tot = np.sum((hops - np.mean(hops)) ** 2)
|
|
r_squared = 1 - (ss_res / ss_tot)
|
|
|
|
num_unreachable = sum(is_unreachable)
|
|
print(f"Number of nodes analyzed: {len(names)}")
|
|
print(f"Total nodes: {data['nodeCount']}")
|
|
print(f"Letter nodes (excluded): 26")
|
|
print(f"Unreachable nodes (hops = 2*max): {num_unreachable}")
|
|
print()
|
|
print(f"Pearson correlation: r = {correlation:.4f}")
|
|
print(f"Pearson p-value: p = {p_value:.6f}")
|
|
print(f"Spearman correlation: rho = {spearman_corr:.4f}")
|
|
print(f"Spearman p-value: p = {spearman_p:.6f}")
|
|
print(f"R-squared: {r_squared:.4f}")
|
|
print(f"Linear regression: hops = {z[0]:.4f} * depth + {z[1]:.4f}")
|
|
print()
|
|
print(f"Depth range: {min(depths):.1f} - {max(depths):.1f}")
|
|
print(f"Hops range: {min(hops)} - {max(hops)}")
|
|
print(f"Mean depth: {np.mean(depths):.2f}")
|
|
print(f"Mean hops: {np.mean(hops):.2f}")
|
|
print(f"Std depth: {np.std(depths):.2f}")
|
|
print(f"Std hops: {np.std(hops):.2f}")
|
|
print()
|
|
|
|
# Distribution of hops
|
|
print("Distribution of hops:")
|
|
for h in sorted(set(hops)):
|
|
count = sum(1 for x in hops if x == h)
|
|
nodes_at_h = [n for n, hp in zip(names, hops) if hp == h]
|
|
print(f" {h} hops: {count} nodes")
|
|
|
|
print()
|
|
print("Data points (sorted by hops, then depth):")
|
|
print(f"{'Node':<30} {'Depth':<10} {'Hops':<10} {'Reachable':<10}")
|
|
print("-" * 60)
|
|
for name, depth, hop, unreachable in sorted(zip(names, depths, hops, is_unreachable), key=lambda x: (x[2], x[1])):
|
|
status = "No" if unreachable else "Yes"
|
|
print(f"{name:<30} {depth:<10.1f} {hop:<10} {status:<10}")
|
|
|
|
if __name__ == '__main__':
|
|
main()
|