"""
Plot correlation between minimum hops to letter nodes and conceptual depth.
"""

import json
import matplotlib
matplotlib.use('Agg')  # Non-interactive backend
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats

def load_slipnet(filepath):
    with open(filepath, 'r') as f:
        return json.load(f)

def main():
    filepath = r'C:\Users\alexa\copycat\slipnet_analysis\slipnet.json'
    data = load_slipnet(filepath)

    # Extract data points (excluding letter nodes themselves)
    names = []
    depths = []
    hops = []
    is_unreachable = []  # Track which nodes are unreachable

    for node in data['nodes']:
        name = node['name']
        depth = node['conceptualDepth']
        path_info = node.get('minPathToLetter', {})
        hop_count = path_info.get('hops')
        nearest = path_info.get('nearestLetter')

        # Skip letter nodes (hops 0)
        if hop_count is not None and hop_count > 0:
            names.append(name)
            depths.append(depth)
            hops.append(hop_count)
            # Unreachable nodes have no nearestLetter
            is_unreachable.append(nearest is None)

    # Convert to numpy arrays
    depths = np.array(depths)
    hops = np.array(hops)
    is_unreachable = np.array(is_unreachable)

    # Compute correlation
    correlation, p_value = stats.pearsonr(depths, hops)
    spearman_corr, spearman_p = stats.spearmanr(depths, hops)

    # Create the plot
    fig, ax = plt.subplots(figsize=(10, 8))

    # Scatter plot with jitter for overlapping points
    jitter = np.random.normal(0, 0.08, len(hops))

    # Plot reachable nodes in blue
    reachable_mask = ~is_unreachable
    ax.scatter(depths[reachable_mask], hops[reachable_mask] + jitter[reachable_mask],
               alpha=0.7, s=100, c='steelblue', edgecolors='navy', label='Reachable')

    # Plot unreachable nodes in red
    if np.any(is_unreachable):
        ax.scatter(depths[is_unreachable], hops[is_unreachable] + jitter[is_unreachable],
                   alpha=0.7, s=100, c='crimson', edgecolors='darkred', label='Unreachable (2×max)')

    # Add labels to each point
    for i, name in enumerate(names):
        ax.annotate(name, (depths[i], hops[i] + jitter[i]), fontsize=8, alpha=0.8,
                    xytext=(5, 5), textcoords='offset points')

    # Add trend line
    z = np.polyfit(depths, hops, 1)
    p = np.poly1d(z)
    x_line = np.linspace(min(depths), max(depths), 100)
    ax.plot(x_line, p(x_line), "r--", alpha=0.8, label=f'Linear fit (y = {z[0]:.3f}x + {z[1]:.2f})')

    # Labels and title
    ax.set_xlabel('Conceptual Depth', fontsize=12)
    ax.set_ylabel('Minimum Hops to Letter Node (Erdos-style)', fontsize=12)
    ax.set_title('Correlation: Conceptual Depth vs Hops to Nearest Letter\n'
                 f'Pearson r = {correlation:.3f} (p = {p_value:.4f}), '
                 f'Spearman rho = {spearman_corr:.3f} (p = {spearman_p:.4f})',
                 fontsize=11)

    ax.legend(loc='upper left')
    ax.grid(True, alpha=0.3)
    max_hops = int(max(hops))
    ax.set_yticks(range(1, max_hops + 1))
    ax.set_ylim(0.5, max_hops + 0.5)

    # Print statistics
    print(f"Number of nodes with paths (excluding letters): {len(names)}")
    print(f"\nPearson correlation: r = {correlation:.4f}, p-value = {p_value:.6f}")
    print(f"Spearman correlation: rho = {spearman_corr:.4f}, p-value = {spearman_p:.6f}")
    print(f"\nLinear regression: hops = {z[0]:.4f} * depth + {z[1]:.4f}")

    print("\nData points:")
    print(f"{'Node':<30} {'Depth':<10} {'Hops':<10}")
    print("-" * 50)
    for name, depth, hop in sorted(zip(names, depths, hops), key=lambda x: (x[2], x[1])):
        print(f"{name:<30} {depth:<10.1f} {hop:<10}")

    plt.tight_layout()
    plt.savefig(r'C:\Users\alexa\copycat\slipnet_analysis\depth_hops_correlation.png', dpi=150)
    print(f"\nPlot saved to: C:\\Users\\alexa\\copycat\\slipnet_analysis\\depth_hops_correlation.png")

if __name__ == '__main__':
    main()