#!/usr/bin/env python3 """ Generate SQL INSERT statements for ARC puzzle data Reads all JSON files from arc_data/training and arc_data/evaluation and creates INSERT statements for the arc_jsons table """ import json import os import sys from pathlib import Path def escape_sql_string(s): """Escape single quotes for SQL""" return s.replace("'", "''").replace("\\", "\\\\") def generate_insert_statement(file_path): """Generate INSERT statement for a single JSON file""" # Extract ID from filename (without .json extension) puzzle_id = Path(file_path).stem # Read JSON content with open(file_path, 'r') as f: json_content = f.read().strip() # Escape the JSON string for SQL escaped_json = escape_sql_string(json_content) # Generate INSERT statement sql = f"INSERT INTO arc_jsons (id, json) VALUES ('{puzzle_id}', '{escaped_json}');" return sql def main(): base_dir = Path('arc_data') # Collect all JSON files training_files = sorted(base_dir.glob('training/*.json')) evaluation_files = sorted(base_dir.glob('evaluation/*.json')) all_files = training_files + evaluation_files print(f"-- Found {len(training_files)} training files") print(f"-- Found {len(evaluation_files)} evaluation files") print(f"-- Total: {len(all_files)} files") print() # Check if user wants preview mode preview_mode = '--preview' in sys.argv if preview_mode: print("-- PREVIEW MODE: Showing first 5 INSERT statements") print() files_to_show = all_files[:5] else: print("-- Generating all INSERT statements...") print() files_to_show = all_files # Generate INSERT statements for i, file_path in enumerate(files_to_show, 1): try: sql = generate_insert_statement(file_path) print(sql) except Exception as e: print(f"-- ERROR processing {file_path}: {e}", file=sys.stderr) if preview_mode: print() print(f"-- ... and {len(all_files) - 5} more") print() print("-- To generate all statements, run: python3 generate_sql.py > insert_arc_data.sql") print("-- To see more preview: python3 generate_sql.py --preview") else: print() print(f"-- Successfully generated {len(all_files)} INSERT statements") if __name__ == '__main__': main()