""" Compute various centrality and graph metrics for slipnet nodes. Compare correlations with conceptual depth. """ import json import numpy as np import networkx as nx from scipy import stats import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt def load_slipnet(filepath): with open(filepath, 'r') as f: return json.load(f) def build_graph(data): """Build an undirected graph from slipnet JSON.""" G = nx.Graph() for node in data['nodes']: G.add_node(node['name'], depth=node['conceptualDepth']) for link in data['links']: G.add_edge(link['source'], link['destination']) return G def get_letter_nodes(): return set(chr(i) for i in range(ord('a'), ord('z') + 1)) def compute_all_metrics(G): """Compute all centrality and graph metrics.""" metrics = {} # Degree centrality metrics['degree'] = nx.degree_centrality(G) # Betweenness centrality metrics['betweenness'] = nx.betweenness_centrality(G) # Closeness centrality metrics['closeness'] = nx.closeness_centrality(G) # Eigenvector centrality (may fail on disconnected graphs) try: metrics['eigenvector'] = nx.eigenvector_centrality(G, max_iter=1000) except nx.PowerIterationFailedConvergence: # For disconnected graphs, compute on largest component largest_cc = max(nx.connected_components(G), key=len) subG = G.subgraph(largest_cc) eig = nx.eigenvector_centrality(subG, max_iter=1000) # Assign 0 to disconnected nodes metrics['eigenvector'] = {n: eig.get(n, 0.0) for n in G.nodes()} # PageRank metrics['pagerank'] = nx.pagerank(G) # Clustering coefficient metrics['clustering'] = nx.clustering(G) # Average neighbor degree metrics['avg_neighbor_degree'] = nx.average_neighbor_degree(G) # Eccentricity (only for connected components) metrics['eccentricity'] = {} for component in nx.connected_components(G): subG = G.subgraph(component) ecc = nx.eccentricity(subG) metrics['eccentricity'].update(ecc) # Disconnected nodes get max eccentricity + 1 max_ecc = max(metrics['eccentricity'].values()) if metrics['eccentricity'] else 0 for n in G.nodes(): if n not in metrics['eccentricity']: metrics['eccentricity'][n] = max_ecc + 1 return metrics def main(): filepath = r'C:\Users\alexa\copycat\slipnet_analysis\slipnet.json' data = load_slipnet(filepath) print(f"Loaded slipnet with {data['nodeCount']} nodes and {data['linkCount']} links") # Build graph G = build_graph(data) print(f"Built graph with {G.number_of_nodes()} nodes and {G.number_of_edges()} edges") # Get letter nodes letter_nodes = get_letter_nodes() # Compute all metrics print("\nComputing centrality metrics...") metrics = compute_all_metrics(G) # Extract non-letter nodes with their depths names = [] depths = [] for node in data['nodes']: if node['name'] not in letter_nodes: names.append(node['name']) depths.append(node['conceptualDepth']) depths = np.array(depths) # Compute correlations for each metric print("\n" + "=" * 80) print("CORRELATION ANALYSIS: Conceptual Depth vs Graph Metrics") print("=" * 80) results = [] metric_names = { 'degree': 'Degree Centrality', 'betweenness': 'Betweenness Centrality', 'closeness': 'Closeness Centrality', 'eigenvector': 'Eigenvector Centrality', 'pagerank': 'PageRank', 'clustering': 'Clustering Coefficient', 'avg_neighbor_degree': 'Avg Neighbor Degree', 'eccentricity': 'Eccentricity' } for metric_key, metric_label in metric_names.items(): metric_values = np.array([metrics[metric_key][n] for n in names]) # Skip if all values are the same (no variance) if np.std(metric_values) == 0: print(f"\n{metric_label}: No variance, skipping") continue # Compute correlations pearson_r, pearson_p = stats.pearsonr(depths, metric_values) spearman_r, spearman_p = stats.spearmanr(depths, metric_values) # R-squared z = np.polyfit(depths, metric_values, 1) y_pred = np.polyval(z, depths) ss_res = np.sum((metric_values - y_pred) ** 2) ss_tot = np.sum((metric_values - np.mean(metric_values)) ** 2) r_squared = 1 - (ss_res / ss_tot) if ss_tot > 0 else 0 results.append({ 'metric': metric_label, 'key': metric_key, 'pearson_r': pearson_r, 'pearson_p': pearson_p, 'spearman_r': spearman_r, 'spearman_p': spearman_p, 'r_squared': r_squared, 'slope': z[0], 'intercept': z[1], 'values': metric_values }) print(f"\n{metric_label}:") print(f" Pearson r = {pearson_r:.4f} (p = {pearson_p:.6f})") print(f" Spearman rho = {spearman_r:.4f} (p = {spearman_p:.6f})") print(f" R-squared = {r_squared:.4f}") # Sort by absolute Pearson correlation results.sort(key=lambda x: abs(x['pearson_r']), reverse=True) print("\n" + "=" * 80) print("SUMMARY: Metrics ranked by |Pearson r|") print("=" * 80) print(f"{'Metric':<25} {'Pearson r':>12} {'p-value':>12} {'Spearman':>12} {'R-squared':>12}") print("-" * 75) for r in results: sig = "*" if r['pearson_p'] < 0.05 else " " print(f"{r['metric']:<25} {r['pearson_r']:>11.4f}{sig} {r['pearson_p']:>12.6f} {r['spearman_r']:>12.4f} {r['r_squared']:>12.4f}") print("\n* = statistically significant at p < 0.05") # Create comparison plot (2x4 grid) fig, axes = plt.subplots(2, 4, figsize=(16, 8)) axes = axes.flatten() for idx, r in enumerate(results): if idx >= 8: break ax = axes[idx] # Add jitter for visibility jitter = np.random.normal(0, 0.02 * np.std(r['values']), len(r['values'])) ax.scatter(depths, r['values'] + jitter, alpha=0.7, s=60, c='steelblue', edgecolors='navy') # Trend line x_line = np.linspace(min(depths), max(depths), 100) y_line = r['slope'] * x_line + r['intercept'] ax.plot(x_line, y_line, 'r--', alpha=0.8) ax.set_xlabel('Conceptual Depth', fontsize=10) ax.set_ylabel(r['metric'], fontsize=10) sig_marker = "*" if r['pearson_p'] < 0.05 else "" ax.set_title(f"r = {r['pearson_r']:.3f}{sig_marker}, R² = {r['r_squared']:.3f}", fontsize=10) ax.grid(True, alpha=0.3) # Hide unused subplots for idx in range(len(results), 8): axes[idx].set_visible(False) plt.suptitle('Conceptual Depth vs Graph Metrics (n=33 non-letter nodes)', fontsize=12, y=1.02) plt.tight_layout() plt.savefig(r'C:\Users\alexa\copycat\slipnet_analysis\centrality_comparison.png', dpi=150, bbox_inches='tight') print(f"\nComparison plot saved to: centrality_comparison.png") # Create individual detailed plots for top 4 metrics fig2, axes2 = plt.subplots(2, 2, figsize=(12, 10)) axes2 = axes2.flatten() for idx, r in enumerate(results[:4]): ax = axes2[idx] jitter = np.random.normal(0, 0.02 * np.std(r['values']), len(r['values'])) ax.scatter(depths, r['values'] + jitter, alpha=0.7, s=80, c='steelblue', edgecolors='navy') # Add labels for i, name in enumerate(names): ax.annotate(name, (depths[i], r['values'][i] + jitter[i]), fontsize=7, alpha=0.7, xytext=(3, 3), textcoords='offset points') # Trend line x_line = np.linspace(min(depths), max(depths), 100) y_line = r['slope'] * x_line + r['intercept'] ax.plot(x_line, y_line, 'r--', alpha=0.8, label=f'y = {r["slope"]:.4f}x + {r["intercept"]:.4f}') ax.set_xlabel('Conceptual Depth', fontsize=11) ax.set_ylabel(r['metric'], fontsize=11) sig_text = " (significant)" if r['pearson_p'] < 0.05 else " (not significant)" ax.set_title(f"{r['metric']}\nPearson r = {r['pearson_r']:.3f} (p = {r['pearson_p']:.4f}){sig_text}", fontsize=11) ax.legend(loc='best', fontsize=9) ax.grid(True, alpha=0.3) plt.suptitle('Top 4 Metrics: Conceptual Depth Correlations', fontsize=13) plt.tight_layout() plt.savefig(r'C:\Users\alexa\copycat\slipnet_analysis\top_metrics_detailed.png', dpi=150, bbox_inches='tight') print(f"Detailed plot saved to: top_metrics_detailed.png") # Save results to JSON for paper output_data = { 'analysis_type': 'centrality_correlation', 'n_nodes': len(names), 'metrics': [] } for r in results: output_data['metrics'].append({ 'name': r['metric'], 'key': r['key'], 'pearson_r': round(r['pearson_r'], 4), 'pearson_p': round(r['pearson_p'], 6), 'spearman_r': round(r['spearman_r'], 4), 'spearman_p': round(r['spearman_p'], 6), 'r_squared': round(r['r_squared'], 4), 'significant': bool(r['pearson_p'] < 0.05) }) with open(r'C:\Users\alexa\copycat\slipnet_analysis\centrality_results.json', 'w') as f: json.dump(output_data, f, indent=2) print(f"Results saved to: centrality_results.json") # Print data table for paper print("\n" + "=" * 80) print("DATA TABLE FOR PAPER") print("=" * 80) print(f"{'Node':<25} {'Depth':>6} {'Degree':>8} {'Between':>8} {'Close':>8} {'Eigen':>8} {'PageRank':>8}") print("-" * 80) sorted_nodes = sorted(zip(names, depths), key=lambda x: x[1]) for name, depth in sorted_nodes: deg = metrics['degree'][name] bet = metrics['betweenness'][name] clo = metrics['closeness'][name] eig = metrics['eigenvector'][name] pr = metrics['pagerank'][name] print(f"{name:<25} {depth:>6.0f} {deg:>8.4f} {bet:>8.4f} {clo:>8.4f} {eig:>8.4f} {pr:>8.4f}") if __name__ == '__main__': main()