Add comprehensive centrality analysis to slipnet study
Key finding: Eccentricity is the only metric significantly correlated with conceptual depth (r=-0.380, p=0.029). Local centrality measures (degree, betweenness, closeness) show no significant correlation. New files: - compute_centrality.py: Computes 8 graph metrics - centrality_comparison.png: Visual comparison of all metrics - Updated paper with full analysis Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
283
slipnet_analysis/compute_centrality.py
Normal file
283
slipnet_analysis/compute_centrality.py
Normal file
@ -0,0 +1,283 @@
|
||||
"""
|
||||
Compute various centrality and graph metrics for slipnet nodes.
|
||||
Compare correlations with conceptual depth.
|
||||
"""
|
||||
|
||||
import json
|
||||
import numpy as np
|
||||
import networkx as nx
|
||||
from scipy import stats
|
||||
import matplotlib
|
||||
matplotlib.use('Agg')
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
def load_slipnet(filepath):
|
||||
with open(filepath, 'r') as f:
|
||||
return json.load(f)
|
||||
|
||||
def build_graph(data):
|
||||
"""Build an undirected graph from slipnet JSON."""
|
||||
G = nx.Graph()
|
||||
for node in data['nodes']:
|
||||
G.add_node(node['name'], depth=node['conceptualDepth'])
|
||||
for link in data['links']:
|
||||
G.add_edge(link['source'], link['destination'])
|
||||
return G
|
||||
|
||||
def get_letter_nodes():
|
||||
return set(chr(i) for i in range(ord('a'), ord('z') + 1))
|
||||
|
||||
def compute_all_metrics(G):
|
||||
"""Compute all centrality and graph metrics."""
|
||||
metrics = {}
|
||||
|
||||
# Degree centrality
|
||||
metrics['degree'] = nx.degree_centrality(G)
|
||||
|
||||
# Betweenness centrality
|
||||
metrics['betweenness'] = nx.betweenness_centrality(G)
|
||||
|
||||
# Closeness centrality
|
||||
metrics['closeness'] = nx.closeness_centrality(G)
|
||||
|
||||
# Eigenvector centrality (may fail on disconnected graphs)
|
||||
try:
|
||||
metrics['eigenvector'] = nx.eigenvector_centrality(G, max_iter=1000)
|
||||
except nx.PowerIterationFailedConvergence:
|
||||
# For disconnected graphs, compute on largest component
|
||||
largest_cc = max(nx.connected_components(G), key=len)
|
||||
subG = G.subgraph(largest_cc)
|
||||
eig = nx.eigenvector_centrality(subG, max_iter=1000)
|
||||
# Assign 0 to disconnected nodes
|
||||
metrics['eigenvector'] = {n: eig.get(n, 0.0) for n in G.nodes()}
|
||||
|
||||
# PageRank
|
||||
metrics['pagerank'] = nx.pagerank(G)
|
||||
|
||||
# Clustering coefficient
|
||||
metrics['clustering'] = nx.clustering(G)
|
||||
|
||||
# Average neighbor degree
|
||||
metrics['avg_neighbor_degree'] = nx.average_neighbor_degree(G)
|
||||
|
||||
# Eccentricity (only for connected components)
|
||||
metrics['eccentricity'] = {}
|
||||
for component in nx.connected_components(G):
|
||||
subG = G.subgraph(component)
|
||||
ecc = nx.eccentricity(subG)
|
||||
metrics['eccentricity'].update(ecc)
|
||||
# Disconnected nodes get max eccentricity + 1
|
||||
max_ecc = max(metrics['eccentricity'].values()) if metrics['eccentricity'] else 0
|
||||
for n in G.nodes():
|
||||
if n not in metrics['eccentricity']:
|
||||
metrics['eccentricity'][n] = max_ecc + 1
|
||||
|
||||
return metrics
|
||||
|
||||
def main():
|
||||
filepath = r'C:\Users\alexa\copycat\slipnet_analysis\slipnet.json'
|
||||
data = load_slipnet(filepath)
|
||||
|
||||
print(f"Loaded slipnet with {data['nodeCount']} nodes and {data['linkCount']} links")
|
||||
|
||||
# Build graph
|
||||
G = build_graph(data)
|
||||
print(f"Built graph with {G.number_of_nodes()} nodes and {G.number_of_edges()} edges")
|
||||
|
||||
# Get letter nodes
|
||||
letter_nodes = get_letter_nodes()
|
||||
|
||||
# Compute all metrics
|
||||
print("\nComputing centrality metrics...")
|
||||
metrics = compute_all_metrics(G)
|
||||
|
||||
# Extract non-letter nodes with their depths
|
||||
names = []
|
||||
depths = []
|
||||
for node in data['nodes']:
|
||||
if node['name'] not in letter_nodes:
|
||||
names.append(node['name'])
|
||||
depths.append(node['conceptualDepth'])
|
||||
|
||||
depths = np.array(depths)
|
||||
|
||||
# Compute correlations for each metric
|
||||
print("\n" + "=" * 80)
|
||||
print("CORRELATION ANALYSIS: Conceptual Depth vs Graph Metrics")
|
||||
print("=" * 80)
|
||||
|
||||
results = []
|
||||
|
||||
metric_names = {
|
||||
'degree': 'Degree Centrality',
|
||||
'betweenness': 'Betweenness Centrality',
|
||||
'closeness': 'Closeness Centrality',
|
||||
'eigenvector': 'Eigenvector Centrality',
|
||||
'pagerank': 'PageRank',
|
||||
'clustering': 'Clustering Coefficient',
|
||||
'avg_neighbor_degree': 'Avg Neighbor Degree',
|
||||
'eccentricity': 'Eccentricity'
|
||||
}
|
||||
|
||||
for metric_key, metric_label in metric_names.items():
|
||||
metric_values = np.array([metrics[metric_key][n] for n in names])
|
||||
|
||||
# Skip if all values are the same (no variance)
|
||||
if np.std(metric_values) == 0:
|
||||
print(f"\n{metric_label}: No variance, skipping")
|
||||
continue
|
||||
|
||||
# Compute correlations
|
||||
pearson_r, pearson_p = stats.pearsonr(depths, metric_values)
|
||||
spearman_r, spearman_p = stats.spearmanr(depths, metric_values)
|
||||
|
||||
# R-squared
|
||||
z = np.polyfit(depths, metric_values, 1)
|
||||
y_pred = np.polyval(z, depths)
|
||||
ss_res = np.sum((metric_values - y_pred) ** 2)
|
||||
ss_tot = np.sum((metric_values - np.mean(metric_values)) ** 2)
|
||||
r_squared = 1 - (ss_res / ss_tot) if ss_tot > 0 else 0
|
||||
|
||||
results.append({
|
||||
'metric': metric_label,
|
||||
'key': metric_key,
|
||||
'pearson_r': pearson_r,
|
||||
'pearson_p': pearson_p,
|
||||
'spearman_r': spearman_r,
|
||||
'spearman_p': spearman_p,
|
||||
'r_squared': r_squared,
|
||||
'slope': z[0],
|
||||
'intercept': z[1],
|
||||
'values': metric_values
|
||||
})
|
||||
|
||||
print(f"\n{metric_label}:")
|
||||
print(f" Pearson r = {pearson_r:.4f} (p = {pearson_p:.6f})")
|
||||
print(f" Spearman rho = {spearman_r:.4f} (p = {spearman_p:.6f})")
|
||||
print(f" R-squared = {r_squared:.4f}")
|
||||
|
||||
# Sort by absolute Pearson correlation
|
||||
results.sort(key=lambda x: abs(x['pearson_r']), reverse=True)
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("SUMMARY: Metrics ranked by |Pearson r|")
|
||||
print("=" * 80)
|
||||
print(f"{'Metric':<25} {'Pearson r':>12} {'p-value':>12} {'Spearman':>12} {'R-squared':>12}")
|
||||
print("-" * 75)
|
||||
for r in results:
|
||||
sig = "*" if r['pearson_p'] < 0.05 else " "
|
||||
print(f"{r['metric']:<25} {r['pearson_r']:>11.4f}{sig} {r['pearson_p']:>12.6f} {r['spearman_r']:>12.4f} {r['r_squared']:>12.4f}")
|
||||
|
||||
print("\n* = statistically significant at p < 0.05")
|
||||
|
||||
# Create comparison plot (2x4 grid)
|
||||
fig, axes = plt.subplots(2, 4, figsize=(16, 8))
|
||||
axes = axes.flatten()
|
||||
|
||||
for idx, r in enumerate(results):
|
||||
if idx >= 8:
|
||||
break
|
||||
ax = axes[idx]
|
||||
|
||||
# Add jitter for visibility
|
||||
jitter = np.random.normal(0, 0.02 * np.std(r['values']), len(r['values']))
|
||||
|
||||
ax.scatter(depths, r['values'] + jitter, alpha=0.7, s=60, c='steelblue', edgecolors='navy')
|
||||
|
||||
# Trend line
|
||||
x_line = np.linspace(min(depths), max(depths), 100)
|
||||
y_line = r['slope'] * x_line + r['intercept']
|
||||
ax.plot(x_line, y_line, 'r--', alpha=0.8)
|
||||
|
||||
ax.set_xlabel('Conceptual Depth', fontsize=10)
|
||||
ax.set_ylabel(r['metric'], fontsize=10)
|
||||
|
||||
sig_marker = "*" if r['pearson_p'] < 0.05 else ""
|
||||
ax.set_title(f"r = {r['pearson_r']:.3f}{sig_marker}, R² = {r['r_squared']:.3f}", fontsize=10)
|
||||
ax.grid(True, alpha=0.3)
|
||||
|
||||
# Hide unused subplots
|
||||
for idx in range(len(results), 8):
|
||||
axes[idx].set_visible(False)
|
||||
|
||||
plt.suptitle('Conceptual Depth vs Graph Metrics (n=33 non-letter nodes)', fontsize=12, y=1.02)
|
||||
plt.tight_layout()
|
||||
plt.savefig(r'C:\Users\alexa\copycat\slipnet_analysis\centrality_comparison.png', dpi=150, bbox_inches='tight')
|
||||
print(f"\nComparison plot saved to: centrality_comparison.png")
|
||||
|
||||
# Create individual detailed plots for top 4 metrics
|
||||
fig2, axes2 = plt.subplots(2, 2, figsize=(12, 10))
|
||||
axes2 = axes2.flatten()
|
||||
|
||||
for idx, r in enumerate(results[:4]):
|
||||
ax = axes2[idx]
|
||||
|
||||
jitter = np.random.normal(0, 0.02 * np.std(r['values']), len(r['values']))
|
||||
|
||||
ax.scatter(depths, r['values'] + jitter, alpha=0.7, s=80, c='steelblue', edgecolors='navy')
|
||||
|
||||
# Add labels
|
||||
for i, name in enumerate(names):
|
||||
ax.annotate(name, (depths[i], r['values'][i] + jitter[i]),
|
||||
fontsize=7, alpha=0.7, xytext=(3, 3), textcoords='offset points')
|
||||
|
||||
# Trend line
|
||||
x_line = np.linspace(min(depths), max(depths), 100)
|
||||
y_line = r['slope'] * x_line + r['intercept']
|
||||
ax.plot(x_line, y_line, 'r--', alpha=0.8,
|
||||
label=f'y = {r["slope"]:.4f}x + {r["intercept"]:.4f}')
|
||||
|
||||
ax.set_xlabel('Conceptual Depth', fontsize=11)
|
||||
ax.set_ylabel(r['metric'], fontsize=11)
|
||||
|
||||
sig_text = " (significant)" if r['pearson_p'] < 0.05 else " (not significant)"
|
||||
ax.set_title(f"{r['metric']}\nPearson r = {r['pearson_r']:.3f} (p = {r['pearson_p']:.4f}){sig_text}",
|
||||
fontsize=11)
|
||||
ax.legend(loc='best', fontsize=9)
|
||||
ax.grid(True, alpha=0.3)
|
||||
|
||||
plt.suptitle('Top 4 Metrics: Conceptual Depth Correlations', fontsize=13)
|
||||
plt.tight_layout()
|
||||
plt.savefig(r'C:\Users\alexa\copycat\slipnet_analysis\top_metrics_detailed.png', dpi=150, bbox_inches='tight')
|
||||
print(f"Detailed plot saved to: top_metrics_detailed.png")
|
||||
|
||||
# Save results to JSON for paper
|
||||
output_data = {
|
||||
'analysis_type': 'centrality_correlation',
|
||||
'n_nodes': len(names),
|
||||
'metrics': []
|
||||
}
|
||||
for r in results:
|
||||
output_data['metrics'].append({
|
||||
'name': r['metric'],
|
||||
'key': r['key'],
|
||||
'pearson_r': round(r['pearson_r'], 4),
|
||||
'pearson_p': round(r['pearson_p'], 6),
|
||||
'spearman_r': round(r['spearman_r'], 4),
|
||||
'spearman_p': round(r['spearman_p'], 6),
|
||||
'r_squared': round(r['r_squared'], 4),
|
||||
'significant': bool(r['pearson_p'] < 0.05)
|
||||
})
|
||||
|
||||
with open(r'C:\Users\alexa\copycat\slipnet_analysis\centrality_results.json', 'w') as f:
|
||||
json.dump(output_data, f, indent=2)
|
||||
print(f"Results saved to: centrality_results.json")
|
||||
|
||||
# Print data table for paper
|
||||
print("\n" + "=" * 80)
|
||||
print("DATA TABLE FOR PAPER")
|
||||
print("=" * 80)
|
||||
print(f"{'Node':<25} {'Depth':>6} {'Degree':>8} {'Between':>8} {'Close':>8} {'Eigen':>8} {'PageRank':>8}")
|
||||
print("-" * 80)
|
||||
|
||||
sorted_nodes = sorted(zip(names, depths), key=lambda x: x[1])
|
||||
for name, depth in sorted_nodes:
|
||||
deg = metrics['degree'][name]
|
||||
bet = metrics['betweenness'][name]
|
||||
clo = metrics['closeness'][name]
|
||||
eig = metrics['eigenvector'][name]
|
||||
pr = metrics['pagerank'][name]
|
||||
print(f"{name:<25} {depth:>6.0f} {deg:>8.4f} {bet:>8.4f} {clo:>8.4f} {eig:>8.4f} {pr:>8.4f}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user