Add comprehensive centrality analysis to slipnet study

Key finding: Eccentricity is the only metric significantly correlated with conceptual depth (r=-0.380, p=0.029). Local centrality measures (degree, betweenness, closeness) show no significant correlation. New files: - compute_centrality.py: Computes 8 graph metrics - centrality_comparison.png: Visual comparison of all metrics - Updated paper with full analysis Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-01 21:17:02 +00:00
parent 50b6fbdc27
commit 72d0bf3d3e
9 changed files with 621 additions and 531 deletions
--- a/slipnet_analysis/compute_centrality.py
+++ b/slipnet_analysis/compute_centrality.py
@ -0,0 +1,283 @@
+"""
+Compute various centrality and graph metrics for slipnet nodes.
+Compare correlations with conceptual depth.
+"""
+
+import json
+import numpy as np
+import networkx as nx
+from scipy import stats
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+
+def load_slipnet(filepath):
+    with open(filepath, 'r') as f:
+        return json.load(f)
+
+def build_graph(data):
+    """Build an undirected graph from slipnet JSON."""
+    G = nx.Graph()
+    for node in data['nodes']:
+        G.add_node(node['name'], depth=node['conceptualDepth'])
+    for link in data['links']:
+        G.add_edge(link['source'], link['destination'])
+    return G
+
+def get_letter_nodes():
+    return set(chr(i) for i in range(ord('a'), ord('z') + 1))
+
+def compute_all_metrics(G):
+    """Compute all centrality and graph metrics."""
+    metrics = {}
+
+    # Degree centrality
+    metrics['degree'] = nx.degree_centrality(G)
+
+    # Betweenness centrality
+    metrics['betweenness'] = nx.betweenness_centrality(G)
+
+    # Closeness centrality
+    metrics['closeness'] = nx.closeness_centrality(G)
+
+    # Eigenvector centrality (may fail on disconnected graphs)
+    try:
+        metrics['eigenvector'] = nx.eigenvector_centrality(G, max_iter=1000)
+    except nx.PowerIterationFailedConvergence:
+        # For disconnected graphs, compute on largest component
+        largest_cc = max(nx.connected_components(G), key=len)
+        subG = G.subgraph(largest_cc)
+        eig = nx.eigenvector_centrality(subG, max_iter=1000)
+        # Assign 0 to disconnected nodes
+        metrics['eigenvector'] = {n: eig.get(n, 0.0) for n in G.nodes()}
+
+    # PageRank
+    metrics['pagerank'] = nx.pagerank(G)
+
+    # Clustering coefficient
+    metrics['clustering'] = nx.clustering(G)
+
+    # Average neighbor degree
+    metrics['avg_neighbor_degree'] = nx.average_neighbor_degree(G)
+
+    # Eccentricity (only for connected components)
+    metrics['eccentricity'] = {}
+    for component in nx.connected_components(G):
+        subG = G.subgraph(component)
+        ecc = nx.eccentricity(subG)
+        metrics['eccentricity'].update(ecc)
+    # Disconnected nodes get max eccentricity + 1
+    max_ecc = max(metrics['eccentricity'].values()) if metrics['eccentricity'] else 0
+    for n in G.nodes():
+        if n not in metrics['eccentricity']:
+            metrics['eccentricity'][n] = max_ecc + 1
+
+    return metrics
+
+def main():
+    filepath = r'C:\Users\alexa\copycat\slipnet_analysis\slipnet.json'
+    data = load_slipnet(filepath)
+
+    print(f"Loaded slipnet with {data['nodeCount']} nodes and {data['linkCount']} links")
+
+    # Build graph
+    G = build_graph(data)
+    print(f"Built graph with {G.number_of_nodes()} nodes and {G.number_of_edges()} edges")
+
+    # Get letter nodes
+    letter_nodes = get_letter_nodes()
+
+    # Compute all metrics
+    print("\nComputing centrality metrics...")
+    metrics = compute_all_metrics(G)
+
+    # Extract non-letter nodes with their depths
+    names = []
+    depths = []
+    for node in data['nodes']:
+        if node['name'] not in letter_nodes:
+            names.append(node['name'])
+            depths.append(node['conceptualDepth'])
+
+    depths = np.array(depths)
+
+    # Compute correlations for each metric
+    print("\n" + "=" * 80)
+    print("CORRELATION ANALYSIS: Conceptual Depth vs Graph Metrics")
+    print("=" * 80)
+
+    results = []
+
+    metric_names = {
+        'degree': 'Degree Centrality',
+        'betweenness': 'Betweenness Centrality',
+        'closeness': 'Closeness Centrality',
+        'eigenvector': 'Eigenvector Centrality',
+        'pagerank': 'PageRank',
+        'clustering': 'Clustering Coefficient',
+        'avg_neighbor_degree': 'Avg Neighbor Degree',
+        'eccentricity': 'Eccentricity'
+    }
+
+    for metric_key, metric_label in metric_names.items():
+        metric_values = np.array([metrics[metric_key][n] for n in names])
+
+        # Skip if all values are the same (no variance)
+        if np.std(metric_values) == 0:
+            print(f"\n{metric_label}: No variance, skipping")
+            continue
+
+        # Compute correlations
+        pearson_r, pearson_p = stats.pearsonr(depths, metric_values)
+        spearman_r, spearman_p = stats.spearmanr(depths, metric_values)
+
+        # R-squared
+        z = np.polyfit(depths, metric_values, 1)
+        y_pred = np.polyval(z, depths)
+        ss_res = np.sum((metric_values - y_pred) ** 2)
+        ss_tot = np.sum((metric_values - np.mean(metric_values)) ** 2)
+        r_squared = 1 - (ss_res / ss_tot) if ss_tot > 0 else 0
+
+        results.append({
+            'metric': metric_label,
+            'key': metric_key,
+            'pearson_r': pearson_r,
+            'pearson_p': pearson_p,
+            'spearman_r': spearman_r,
+            'spearman_p': spearman_p,
+            'r_squared': r_squared,
+            'slope': z[0],
+            'intercept': z[1],
+            'values': metric_values
+        })
+
+        print(f"\n{metric_label}:")
+        print(f"  Pearson r = {pearson_r:.4f} (p = {pearson_p:.6f})")
+        print(f"  Spearman rho = {spearman_r:.4f} (p = {spearman_p:.6f})")
+        print(f"  R-squared = {r_squared:.4f}")
+
+    # Sort by absolute Pearson correlation
+    results.sort(key=lambda x: abs(x['pearson_r']), reverse=True)
+
+    print("\n" + "=" * 80)
+    print("SUMMARY: Metrics ranked by |Pearson r|")
+    print("=" * 80)
+    print(f"{'Metric':<25} {'Pearson r':>12} {'p-value':>12} {'Spearman':>12} {'R-squared':>12}")
+    print("-" * 75)
+    for r in results:
+        sig = "*" if r['pearson_p'] < 0.05 else " "
+        print(f"{r['metric']:<25} {r['pearson_r']:>11.4f}{sig} {r['pearson_p']:>12.6f} {r['spearman_r']:>12.4f} {r['r_squared']:>12.4f}")
+
+    print("\n* = statistically significant at p < 0.05")
+
+    # Create comparison plot (2x4 grid)
+    fig, axes = plt.subplots(2, 4, figsize=(16, 8))
+    axes = axes.flatten()
+
+    for idx, r in enumerate(results):
+        if idx >= 8:
+            break
+        ax = axes[idx]
+
+        # Add jitter for visibility
+        jitter = np.random.normal(0, 0.02 * np.std(r['values']), len(r['values']))
+
+        ax.scatter(depths, r['values'] + jitter, alpha=0.7, s=60, c='steelblue', edgecolors='navy')
+
+        # Trend line
+        x_line = np.linspace(min(depths), max(depths), 100)
+        y_line = r['slope'] * x_line + r['intercept']
+        ax.plot(x_line, y_line, 'r--', alpha=0.8)
+
+        ax.set_xlabel('Conceptual Depth', fontsize=10)
+        ax.set_ylabel(r['metric'], fontsize=10)
+
+        sig_marker = "*" if r['pearson_p'] < 0.05 else ""
+        ax.set_title(f"r = {r['pearson_r']:.3f}{sig_marker}, R² = {r['r_squared']:.3f}", fontsize=10)
+        ax.grid(True, alpha=0.3)
+
+    # Hide unused subplots
+    for idx in range(len(results), 8):
+        axes[idx].set_visible(False)
+
+    plt.suptitle('Conceptual Depth vs Graph Metrics (n=33 non-letter nodes)', fontsize=12, y=1.02)
+    plt.tight_layout()
+    plt.savefig(r'C:\Users\alexa\copycat\slipnet_analysis\centrality_comparison.png', dpi=150, bbox_inches='tight')
+    print(f"\nComparison plot saved to: centrality_comparison.png")
+
+    # Create individual detailed plots for top 4 metrics
+    fig2, axes2 = plt.subplots(2, 2, figsize=(12, 10))
+    axes2 = axes2.flatten()
+
+    for idx, r in enumerate(results[:4]):
+        ax = axes2[idx]
+
+        jitter = np.random.normal(0, 0.02 * np.std(r['values']), len(r['values']))
+
+        ax.scatter(depths, r['values'] + jitter, alpha=0.7, s=80, c='steelblue', edgecolors='navy')
+
+        # Add labels
+        for i, name in enumerate(names):
+            ax.annotate(name, (depths[i], r['values'][i] + jitter[i]),
+                       fontsize=7, alpha=0.7, xytext=(3, 3), textcoords='offset points')
+
+        # Trend line
+        x_line = np.linspace(min(depths), max(depths), 100)
+        y_line = r['slope'] * x_line + r['intercept']
+        ax.plot(x_line, y_line, 'r--', alpha=0.8,
+               label=f'y = {r["slope"]:.4f}x + {r["intercept"]:.4f}')
+
+        ax.set_xlabel('Conceptual Depth', fontsize=11)
+        ax.set_ylabel(r['metric'], fontsize=11)
+
+        sig_text = " (significant)" if r['pearson_p'] < 0.05 else " (not significant)"
+        ax.set_title(f"{r['metric']}\nPearson r = {r['pearson_r']:.3f} (p = {r['pearson_p']:.4f}){sig_text}",
+                    fontsize=11)
+        ax.legend(loc='best', fontsize=9)
+        ax.grid(True, alpha=0.3)
+
+    plt.suptitle('Top 4 Metrics: Conceptual Depth Correlations', fontsize=13)
+    plt.tight_layout()
+    plt.savefig(r'C:\Users\alexa\copycat\slipnet_analysis\top_metrics_detailed.png', dpi=150, bbox_inches='tight')
+    print(f"Detailed plot saved to: top_metrics_detailed.png")
+
+    # Save results to JSON for paper
+    output_data = {
+        'analysis_type': 'centrality_correlation',
+        'n_nodes': len(names),
+        'metrics': []
+    }
+    for r in results:
+        output_data['metrics'].append({
+            'name': r['metric'],
+            'key': r['key'],
+            'pearson_r': round(r['pearson_r'], 4),
+            'pearson_p': round(r['pearson_p'], 6),
+            'spearman_r': round(r['spearman_r'], 4),
+            'spearman_p': round(r['spearman_p'], 6),
+            'r_squared': round(r['r_squared'], 4),
+            'significant': bool(r['pearson_p'] < 0.05)
+        })
+
+    with open(r'C:\Users\alexa\copycat\slipnet_analysis\centrality_results.json', 'w') as f:
+        json.dump(output_data, f, indent=2)
+    print(f"Results saved to: centrality_results.json")
+
+    # Print data table for paper
+    print("\n" + "=" * 80)
+    print("DATA TABLE FOR PAPER")
+    print("=" * 80)
+    print(f"{'Node':<25} {'Depth':>6} {'Degree':>8} {'Between':>8} {'Close':>8} {'Eigen':>8} {'PageRank':>8}")
+    print("-" * 80)
+
+    sorted_nodes = sorted(zip(names, depths), key=lambda x: x[1])
+    for name, depth in sorted_nodes:
+        deg = metrics['degree'][name]
+        bet = metrics['betweenness'][name]
+        clo = metrics['closeness'][name]
+        eig = metrics['eigenvector'][name]
+        pr = metrics['pagerank'][name]
+        print(f"{name:<25} {depth:>6.0f} {deg:>8.4f} {bet:>8.4f} {clo:>8.4f} {eig:>8.4f} {pr:>8.4f}")
+
+if __name__ == '__main__':
+    main()