""" Plot correlation between minimum hops to letter nodes and conceptual depth. """ import json import matplotlib matplotlib.use('Agg') # Non-interactive backend import matplotlib.pyplot as plt import numpy as np from scipy import stats def load_slipnet(filepath): with open(filepath, 'r') as f: return json.load(f) def main(): filepath = r'C:\Users\alexa\copycat\slipnet_analysis\slipnet.json' data = load_slipnet(filepath) # Extract data points (excluding letter nodes themselves) names = [] depths = [] hops = [] is_unreachable = [] # Track which nodes are unreachable for node in data['nodes']: name = node['name'] depth = node['conceptualDepth'] path_info = node.get('minPathToLetter', {}) hop_count = path_info.get('hops') nearest = path_info.get('nearestLetter') # Skip letter nodes (hops 0) if hop_count is not None and hop_count > 0: names.append(name) depths.append(depth) hops.append(hop_count) # Unreachable nodes have no nearestLetter is_unreachable.append(nearest is None) # Convert to numpy arrays depths = np.array(depths) hops = np.array(hops) is_unreachable = np.array(is_unreachable) # Compute correlation correlation, p_value = stats.pearsonr(depths, hops) spearman_corr, spearman_p = stats.spearmanr(depths, hops) # Create the plot fig, ax = plt.subplots(figsize=(10, 8)) # Scatter plot with jitter for overlapping points jitter = np.random.normal(0, 0.08, len(hops)) # Plot reachable nodes in blue reachable_mask = ~is_unreachable ax.scatter(depths[reachable_mask], hops[reachable_mask] + jitter[reachable_mask], alpha=0.7, s=100, c='steelblue', edgecolors='navy', label='Reachable') # Plot unreachable nodes in red if np.any(is_unreachable): ax.scatter(depths[is_unreachable], hops[is_unreachable] + jitter[is_unreachable], alpha=0.7, s=100, c='crimson', edgecolors='darkred', label='Unreachable (2×max)') # Add labels to each point for i, name in enumerate(names): ax.annotate(name, (depths[i], hops[i] + jitter[i]), fontsize=8, alpha=0.8, xytext=(5, 5), textcoords='offset points') # Add trend line z = np.polyfit(depths, hops, 1) p = np.poly1d(z) x_line = np.linspace(min(depths), max(depths), 100) ax.plot(x_line, p(x_line), "r--", alpha=0.8, label=f'Linear fit (y = {z[0]:.3f}x + {z[1]:.2f})') # Labels and title ax.set_xlabel('Conceptual Depth', fontsize=12) ax.set_ylabel('Minimum Hops to Letter Node (Erdos-style)', fontsize=12) ax.set_title('Correlation: Conceptual Depth vs Hops to Nearest Letter\n' f'Pearson r = {correlation:.3f} (p = {p_value:.4f}), ' f'Spearman rho = {spearman_corr:.3f} (p = {spearman_p:.4f})', fontsize=11) ax.legend(loc='upper left') ax.grid(True, alpha=0.3) max_hops = int(max(hops)) ax.set_yticks(range(1, max_hops + 1)) ax.set_ylim(0.5, max_hops + 0.5) # Print statistics print(f"Number of nodes with paths (excluding letters): {len(names)}") print(f"\nPearson correlation: r = {correlation:.4f}, p-value = {p_value:.6f}") print(f"Spearman correlation: rho = {spearman_corr:.4f}, p-value = {spearman_p:.6f}") print(f"\nLinear regression: hops = {z[0]:.4f} * depth + {z[1]:.4f}") print("\nData points:") print(f"{'Node':<30} {'Depth':<10} {'Hops':<10}") print("-" * 50) for name, depth, hop in sorted(zip(names, depths, hops), key=lambda x: (x[2], x[1])): print(f"{name:<30} {depth:<10.1f} {hop:<10}") plt.tight_layout() plt.savefig(r'C:\Users\alexa\copycat\slipnet_analysis\depth_hops_correlation.png', dpi=150) print(f"\nPlot saved to: C:\\Users\\alexa\\copycat\\slipnet_analysis\\depth_hops_correlation.png") if __name__ == '__main__': main()