"""
Compute various centrality and graph metrics for slipnet nodes.
Compare correlations with conceptual depth.
"""

import json
import numpy as np
import networkx as nx
from scipy import stats
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

def load_slipnet(filepath):
    with open(filepath, 'r') as f:
        return json.load(f)

def build_graph(data):
    """Build an undirected graph from slipnet JSON."""
    G = nx.Graph()
    for node in data['nodes']:
        G.add_node(node['name'], depth=node['conceptualDepth'])
    for link in data['links']:
        G.add_edge(link['source'], link['destination'])
    return G

def get_letter_nodes():
    return set(chr(i) for i in range(ord('a'), ord('z') + 1))

def compute_all_metrics(G):
    """Compute all centrality and graph metrics."""
    metrics = {}

    # Degree centrality
    metrics['degree'] = nx.degree_centrality(G)

    # Betweenness centrality
    metrics['betweenness'] = nx.betweenness_centrality(G)

    # Closeness centrality
    metrics['closeness'] = nx.closeness_centrality(G)

    # Eigenvector centrality (may fail on disconnected graphs)
    try:
        metrics['eigenvector'] = nx.eigenvector_centrality(G, max_iter=1000)
    except nx.PowerIterationFailedConvergence:
        # For disconnected graphs, compute on largest component
        largest_cc = max(nx.connected_components(G), key=len)
        subG = G.subgraph(largest_cc)
        eig = nx.eigenvector_centrality(subG, max_iter=1000)
        # Assign 0 to disconnected nodes
        metrics['eigenvector'] = {n: eig.get(n, 0.0) for n in G.nodes()}

    # PageRank
    metrics['pagerank'] = nx.pagerank(G)

    # Clustering coefficient
    metrics['clustering'] = nx.clustering(G)

    # Average neighbor degree
    metrics['avg_neighbor_degree'] = nx.average_neighbor_degree(G)

    # Eccentricity (only for connected components)
    metrics['eccentricity'] = {}
    for component in nx.connected_components(G):
        subG = G.subgraph(component)
        ecc = nx.eccentricity(subG)
        metrics['eccentricity'].update(ecc)
    # Disconnected nodes get max eccentricity + 1
    max_ecc = max(metrics['eccentricity'].values()) if metrics['eccentricity'] else 0
    for n in G.nodes():
        if n not in metrics['eccentricity']:
            metrics['eccentricity'][n] = max_ecc + 1

    return metrics

def main():
    filepath = r'C:\Users\alexa\copycat\slipnet_analysis\slipnet.json'
    data = load_slipnet(filepath)

    print(f"Loaded slipnet with {data['nodeCount']} nodes and {data['linkCount']} links")

    # Build graph
    G = build_graph(data)
    print(f"Built graph with {G.number_of_nodes()} nodes and {G.number_of_edges()} edges")

    # Get letter nodes
    letter_nodes = get_letter_nodes()

    # Compute all metrics
    print("\nComputing centrality metrics...")
    metrics = compute_all_metrics(G)

    # Extract non-letter nodes with their depths
    names = []
    depths = []
    for node in data['nodes']:
        if node['name'] not in letter_nodes:
            names.append(node['name'])
            depths.append(node['conceptualDepth'])

    depths = np.array(depths)

    # Compute correlations for each metric
    print("\n" + "=" * 80)
    print("CORRELATION ANALYSIS: Conceptual Depth vs Graph Metrics")
    print("=" * 80)

    results = []

    metric_names = {
        'degree': 'Degree Centrality',
        'betweenness': 'Betweenness Centrality',
        'closeness': 'Closeness Centrality',
        'eigenvector': 'Eigenvector Centrality',
        'pagerank': 'PageRank',
        'clustering': 'Clustering Coefficient',
        'avg_neighbor_degree': 'Avg Neighbor Degree',
        'eccentricity': 'Eccentricity'
    }

    for metric_key, metric_label in metric_names.items():
        metric_values = np.array([metrics[metric_key][n] for n in names])

        # Skip if all values are the same (no variance)
        if np.std(metric_values) == 0:
            print(f"\n{metric_label}: No variance, skipping")
            continue

        # Compute correlations
        pearson_r, pearson_p = stats.pearsonr(depths, metric_values)
        spearman_r, spearman_p = stats.spearmanr(depths, metric_values)

        # R-squared
        z = np.polyfit(depths, metric_values, 1)
        y_pred = np.polyval(z, depths)
        ss_res = np.sum((metric_values - y_pred) ** 2)
        ss_tot = np.sum((metric_values - np.mean(metric_values)) ** 2)
        r_squared = 1 - (ss_res / ss_tot) if ss_tot > 0 else 0

        results.append({
            'metric': metric_label,
            'key': metric_key,
            'pearson_r': pearson_r,
            'pearson_p': pearson_p,
            'spearman_r': spearman_r,
            'spearman_p': spearman_p,
            'r_squared': r_squared,
            'slope': z[0],
            'intercept': z[1],
            'values': metric_values
        })

        print(f"\n{metric_label}:")
        print(f"  Pearson r = {pearson_r:.4f} (p = {pearson_p:.6f})")
        print(f"  Spearman rho = {spearman_r:.4f} (p = {spearman_p:.6f})")
        print(f"  R-squared = {r_squared:.4f}")

    # Sort by absolute Pearson correlation
    results.sort(key=lambda x: abs(x['pearson_r']), reverse=True)

    print("\n" + "=" * 80)
    print("SUMMARY: Metrics ranked by |Pearson r|")
    print("=" * 80)
    print(f"{'Metric':<25} {'Pearson r':>12} {'p-value':>12} {'Spearman':>12} {'R-squared':>12}")
    print("-" * 75)
    for r in results:
        sig = "*" if r['pearson_p'] < 0.05 else " "
        print(f"{r['metric']:<25} {r['pearson_r']:>11.4f}{sig} {r['pearson_p']:>12.6f} {r['spearman_r']:>12.4f} {r['r_squared']:>12.4f}")

    print("\n* = statistically significant at p < 0.05")

    # Create comparison plot (2x4 grid)
    fig, axes = plt.subplots(2, 4, figsize=(16, 8))
    axes = axes.flatten()

    for idx, r in enumerate(results):
        if idx >= 8:
            break
        ax = axes[idx]

        # Add jitter for visibility
        jitter = np.random.normal(0, 0.02 * np.std(r['values']), len(r['values']))

        ax.scatter(depths, r['values'] + jitter, alpha=0.7, s=60, c='steelblue', edgecolors='navy')

        # Trend line
        x_line = np.linspace(min(depths), max(depths), 100)
        y_line = r['slope'] * x_line + r['intercept']
        ax.plot(x_line, y_line, 'r--', alpha=0.8)

        ax.set_xlabel('Conceptual Depth', fontsize=10)
        ax.set_ylabel(r['metric'], fontsize=10)

        sig_marker = "*" if r['pearson_p'] < 0.05 else ""
        ax.set_title(f"r = {r['pearson_r']:.3f}{sig_marker}, R² = {r['r_squared']:.3f}", fontsize=10)
        ax.grid(True, alpha=0.3)

    # Hide unused subplots
    for idx in range(len(results), 8):
        axes[idx].set_visible(False)

    plt.suptitle('Conceptual Depth vs Graph Metrics (n=33 non-letter nodes)', fontsize=12, y=1.02)
    plt.tight_layout()
    plt.savefig(r'C:\Users\alexa\copycat\slipnet_analysis\centrality_comparison.png', dpi=150, bbox_inches='tight')
    print(f"\nComparison plot saved to: centrality_comparison.png")

    # Create individual detailed plots for top 4 metrics
    fig2, axes2 = plt.subplots(2, 2, figsize=(12, 10))
    axes2 = axes2.flatten()

    for idx, r in enumerate(results[:4]):
        ax = axes2[idx]

        jitter = np.random.normal(0, 0.02 * np.std(r['values']), len(r['values']))

        ax.scatter(depths, r['values'] + jitter, alpha=0.7, s=80, c='steelblue', edgecolors='navy')

        # Add labels
        for i, name in enumerate(names):
            ax.annotate(name, (depths[i], r['values'][i] + jitter[i]),
                       fontsize=7, alpha=0.7, xytext=(3, 3), textcoords='offset points')

        # Trend line
        x_line = np.linspace(min(depths), max(depths), 100)
        y_line = r['slope'] * x_line + r['intercept']
        ax.plot(x_line, y_line, 'r--', alpha=0.8,
               label=f'y = {r["slope"]:.4f}x + {r["intercept"]:.4f}')

        ax.set_xlabel('Conceptual Depth', fontsize=11)
        ax.set_ylabel(r['metric'], fontsize=11)

        sig_text = " (significant)" if r['pearson_p'] < 0.05 else " (not significant)"
        ax.set_title(f"{r['metric']}\nPearson r = {r['pearson_r']:.3f} (p = {r['pearson_p']:.4f}){sig_text}",
                    fontsize=11)
        ax.legend(loc='best', fontsize=9)
        ax.grid(True, alpha=0.3)

    plt.suptitle('Top 4 Metrics: Conceptual Depth Correlations', fontsize=13)
    plt.tight_layout()
    plt.savefig(r'C:\Users\alexa\copycat\slipnet_analysis\top_metrics_detailed.png', dpi=150, bbox_inches='tight')
    print(f"Detailed plot saved to: top_metrics_detailed.png")

    # Save results to JSON for paper
    output_data = {
        'analysis_type': 'centrality_correlation',
        'n_nodes': len(names),
        'metrics': []
    }
    for r in results:
        output_data['metrics'].append({
            'name': r['metric'],
            'key': r['key'],
            'pearson_r': round(r['pearson_r'], 4),
            'pearson_p': round(r['pearson_p'], 6),
            'spearman_r': round(r['spearman_r'], 4),
            'spearman_p': round(r['spearman_p'], 6),
            'r_squared': round(r['r_squared'], 4),
            'significant': bool(r['pearson_p'] < 0.05)
        })

    with open(r'C:\Users\alexa\copycat\slipnet_analysis\centrality_results.json', 'w') as f:
        json.dump(output_data, f, indent=2)
    print(f"Results saved to: centrality_results.json")

    # Print data table for paper
    print("\n" + "=" * 80)
    print("DATA TABLE FOR PAPER")
    print("=" * 80)
    print(f"{'Node':<25} {'Depth':>6} {'Degree':>8} {'Between':>8} {'Close':>8} {'Eigen':>8} {'PageRank':>8}")
    print("-" * 80)

    sorted_nodes = sorted(zip(names, depths), key=lambda x: x[1])
    for name, depth in sorted_nodes:
        deg = metrics['degree'][name]
        bet = metrics['betweenness'][name]
        clo = metrics['closeness'][name]
        eig = metrics['eigenvector'][name]
        pr = metrics['pagerank'][name]
        print(f"{name:<25} {depth:>6.0f} {deg:>8.4f} {bet:>8.4f} {clo:>8.4f} {eig:>8.4f} {pr:>8.4f}")

if __name__ == '__main__':
    main()