Add comprehensive centrality analysis to slipnet study

Key finding: Eccentricity is the only metric significantly correlated
with conceptual depth (r=-0.380, p=0.029). Local centrality measures
(degree, betweenness, closeness) show no significant correlation.

New files:
- compute_centrality.py: Computes 8 graph metrics
- centrality_comparison.png: Visual comparison of all metrics
- Updated paper with full analysis

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Alex Linhares
2026-02-01 21:17:02 +00:00
parent 50b6fbdc27
commit 72d0bf3d3e
9 changed files with 621 additions and 531 deletions

View File

@ -0,0 +1,283 @@
"""
Compute various centrality and graph metrics for slipnet nodes.
Compare correlations with conceptual depth.
"""
import json
import numpy as np
import networkx as nx
from scipy import stats
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
def load_slipnet(filepath):
with open(filepath, 'r') as f:
return json.load(f)
def build_graph(data):
"""Build an undirected graph from slipnet JSON."""
G = nx.Graph()
for node in data['nodes']:
G.add_node(node['name'], depth=node['conceptualDepth'])
for link in data['links']:
G.add_edge(link['source'], link['destination'])
return G
def get_letter_nodes():
return set(chr(i) for i in range(ord('a'), ord('z') + 1))
def compute_all_metrics(G):
"""Compute all centrality and graph metrics."""
metrics = {}
# Degree centrality
metrics['degree'] = nx.degree_centrality(G)
# Betweenness centrality
metrics['betweenness'] = nx.betweenness_centrality(G)
# Closeness centrality
metrics['closeness'] = nx.closeness_centrality(G)
# Eigenvector centrality (may fail on disconnected graphs)
try:
metrics['eigenvector'] = nx.eigenvector_centrality(G, max_iter=1000)
except nx.PowerIterationFailedConvergence:
# For disconnected graphs, compute on largest component
largest_cc = max(nx.connected_components(G), key=len)
subG = G.subgraph(largest_cc)
eig = nx.eigenvector_centrality(subG, max_iter=1000)
# Assign 0 to disconnected nodes
metrics['eigenvector'] = {n: eig.get(n, 0.0) for n in G.nodes()}
# PageRank
metrics['pagerank'] = nx.pagerank(G)
# Clustering coefficient
metrics['clustering'] = nx.clustering(G)
# Average neighbor degree
metrics['avg_neighbor_degree'] = nx.average_neighbor_degree(G)
# Eccentricity (only for connected components)
metrics['eccentricity'] = {}
for component in nx.connected_components(G):
subG = G.subgraph(component)
ecc = nx.eccentricity(subG)
metrics['eccentricity'].update(ecc)
# Disconnected nodes get max eccentricity + 1
max_ecc = max(metrics['eccentricity'].values()) if metrics['eccentricity'] else 0
for n in G.nodes():
if n not in metrics['eccentricity']:
metrics['eccentricity'][n] = max_ecc + 1
return metrics
def main():
filepath = r'C:\Users\alexa\copycat\slipnet_analysis\slipnet.json'
data = load_slipnet(filepath)
print(f"Loaded slipnet with {data['nodeCount']} nodes and {data['linkCount']} links")
# Build graph
G = build_graph(data)
print(f"Built graph with {G.number_of_nodes()} nodes and {G.number_of_edges()} edges")
# Get letter nodes
letter_nodes = get_letter_nodes()
# Compute all metrics
print("\nComputing centrality metrics...")
metrics = compute_all_metrics(G)
# Extract non-letter nodes with their depths
names = []
depths = []
for node in data['nodes']:
if node['name'] not in letter_nodes:
names.append(node['name'])
depths.append(node['conceptualDepth'])
depths = np.array(depths)
# Compute correlations for each metric
print("\n" + "=" * 80)
print("CORRELATION ANALYSIS: Conceptual Depth vs Graph Metrics")
print("=" * 80)
results = []
metric_names = {
'degree': 'Degree Centrality',
'betweenness': 'Betweenness Centrality',
'closeness': 'Closeness Centrality',
'eigenvector': 'Eigenvector Centrality',
'pagerank': 'PageRank',
'clustering': 'Clustering Coefficient',
'avg_neighbor_degree': 'Avg Neighbor Degree',
'eccentricity': 'Eccentricity'
}
for metric_key, metric_label in metric_names.items():
metric_values = np.array([metrics[metric_key][n] for n in names])
# Skip if all values are the same (no variance)
if np.std(metric_values) == 0:
print(f"\n{metric_label}: No variance, skipping")
continue
# Compute correlations
pearson_r, pearson_p = stats.pearsonr(depths, metric_values)
spearman_r, spearman_p = stats.spearmanr(depths, metric_values)
# R-squared
z = np.polyfit(depths, metric_values, 1)
y_pred = np.polyval(z, depths)
ss_res = np.sum((metric_values - y_pred) ** 2)
ss_tot = np.sum((metric_values - np.mean(metric_values)) ** 2)
r_squared = 1 - (ss_res / ss_tot) if ss_tot > 0 else 0
results.append({
'metric': metric_label,
'key': metric_key,
'pearson_r': pearson_r,
'pearson_p': pearson_p,
'spearman_r': spearman_r,
'spearman_p': spearman_p,
'r_squared': r_squared,
'slope': z[0],
'intercept': z[1],
'values': metric_values
})
print(f"\n{metric_label}:")
print(f" Pearson r = {pearson_r:.4f} (p = {pearson_p:.6f})")
print(f" Spearman rho = {spearman_r:.4f} (p = {spearman_p:.6f})")
print(f" R-squared = {r_squared:.4f}")
# Sort by absolute Pearson correlation
results.sort(key=lambda x: abs(x['pearson_r']), reverse=True)
print("\n" + "=" * 80)
print("SUMMARY: Metrics ranked by |Pearson r|")
print("=" * 80)
print(f"{'Metric':<25} {'Pearson r':>12} {'p-value':>12} {'Spearman':>12} {'R-squared':>12}")
print("-" * 75)
for r in results:
sig = "*" if r['pearson_p'] < 0.05 else " "
print(f"{r['metric']:<25} {r['pearson_r']:>11.4f}{sig} {r['pearson_p']:>12.6f} {r['spearman_r']:>12.4f} {r['r_squared']:>12.4f}")
print("\n* = statistically significant at p < 0.05")
# Create comparison plot (2x4 grid)
fig, axes = plt.subplots(2, 4, figsize=(16, 8))
axes = axes.flatten()
for idx, r in enumerate(results):
if idx >= 8:
break
ax = axes[idx]
# Add jitter for visibility
jitter = np.random.normal(0, 0.02 * np.std(r['values']), len(r['values']))
ax.scatter(depths, r['values'] + jitter, alpha=0.7, s=60, c='steelblue', edgecolors='navy')
# Trend line
x_line = np.linspace(min(depths), max(depths), 100)
y_line = r['slope'] * x_line + r['intercept']
ax.plot(x_line, y_line, 'r--', alpha=0.8)
ax.set_xlabel('Conceptual Depth', fontsize=10)
ax.set_ylabel(r['metric'], fontsize=10)
sig_marker = "*" if r['pearson_p'] < 0.05 else ""
ax.set_title(f"r = {r['pearson_r']:.3f}{sig_marker}, R² = {r['r_squared']:.3f}", fontsize=10)
ax.grid(True, alpha=0.3)
# Hide unused subplots
for idx in range(len(results), 8):
axes[idx].set_visible(False)
plt.suptitle('Conceptual Depth vs Graph Metrics (n=33 non-letter nodes)', fontsize=12, y=1.02)
plt.tight_layout()
plt.savefig(r'C:\Users\alexa\copycat\slipnet_analysis\centrality_comparison.png', dpi=150, bbox_inches='tight')
print(f"\nComparison plot saved to: centrality_comparison.png")
# Create individual detailed plots for top 4 metrics
fig2, axes2 = plt.subplots(2, 2, figsize=(12, 10))
axes2 = axes2.flatten()
for idx, r in enumerate(results[:4]):
ax = axes2[idx]
jitter = np.random.normal(0, 0.02 * np.std(r['values']), len(r['values']))
ax.scatter(depths, r['values'] + jitter, alpha=0.7, s=80, c='steelblue', edgecolors='navy')
# Add labels
for i, name in enumerate(names):
ax.annotate(name, (depths[i], r['values'][i] + jitter[i]),
fontsize=7, alpha=0.7, xytext=(3, 3), textcoords='offset points')
# Trend line
x_line = np.linspace(min(depths), max(depths), 100)
y_line = r['slope'] * x_line + r['intercept']
ax.plot(x_line, y_line, 'r--', alpha=0.8,
label=f'y = {r["slope"]:.4f}x + {r["intercept"]:.4f}')
ax.set_xlabel('Conceptual Depth', fontsize=11)
ax.set_ylabel(r['metric'], fontsize=11)
sig_text = " (significant)" if r['pearson_p'] < 0.05 else " (not significant)"
ax.set_title(f"{r['metric']}\nPearson r = {r['pearson_r']:.3f} (p = {r['pearson_p']:.4f}){sig_text}",
fontsize=11)
ax.legend(loc='best', fontsize=9)
ax.grid(True, alpha=0.3)
plt.suptitle('Top 4 Metrics: Conceptual Depth Correlations', fontsize=13)
plt.tight_layout()
plt.savefig(r'C:\Users\alexa\copycat\slipnet_analysis\top_metrics_detailed.png', dpi=150, bbox_inches='tight')
print(f"Detailed plot saved to: top_metrics_detailed.png")
# Save results to JSON for paper
output_data = {
'analysis_type': 'centrality_correlation',
'n_nodes': len(names),
'metrics': []
}
for r in results:
output_data['metrics'].append({
'name': r['metric'],
'key': r['key'],
'pearson_r': round(r['pearson_r'], 4),
'pearson_p': round(r['pearson_p'], 6),
'spearman_r': round(r['spearman_r'], 4),
'spearman_p': round(r['spearman_p'], 6),
'r_squared': round(r['r_squared'], 4),
'significant': bool(r['pearson_p'] < 0.05)
})
with open(r'C:\Users\alexa\copycat\slipnet_analysis\centrality_results.json', 'w') as f:
json.dump(output_data, f, indent=2)
print(f"Results saved to: centrality_results.json")
# Print data table for paper
print("\n" + "=" * 80)
print("DATA TABLE FOR PAPER")
print("=" * 80)
print(f"{'Node':<25} {'Depth':>6} {'Degree':>8} {'Between':>8} {'Close':>8} {'Eigen':>8} {'PageRank':>8}")
print("-" * 80)
sorted_nodes = sorted(zip(names, depths), key=lambda x: x[1])
for name, depth in sorted_nodes:
deg = metrics['degree'][name]
bet = metrics['betweenness'][name]
clo = metrics['closeness'][name]
eig = metrics['eigenvector'][name]
pr = metrics['pagerank'][name]
print(f"{name:<25} {depth:>6.0f} {deg:>8.4f} {bet:>8.4f} {clo:>8.4f} {eig:>8.4f} {pr:>8.4f}")
if __name__ == '__main__':
main()