Files
arc-humans-interface-db/scripts/generate_sql.py
bmachado b8856c0660 initial
2025-11-05 00:24:05 +00:00

80 lines
2.3 KiB
Python

#!/usr/bin/env python3
"""
Generate SQL INSERT statements for ARC puzzle data
Reads all JSON files from arc_data/training and arc_data/evaluation
and creates INSERT statements for the arc_jsons table
"""
import json
import os
import sys
from pathlib import Path
def escape_sql_string(s):
"""Escape single quotes for SQL"""
return s.replace("'", "''").replace("\\", "\\\\")
def generate_insert_statement(file_path):
"""Generate INSERT statement for a single JSON file"""
# Extract ID from filename (without .json extension)
puzzle_id = Path(file_path).stem
# Read JSON content
with open(file_path, 'r') as f:
json_content = f.read().strip()
# Escape the JSON string for SQL
escaped_json = escape_sql_string(json_content)
# Generate INSERT statement
sql = f"INSERT INTO arc_jsons (id, json) VALUES ('{puzzle_id}', '{escaped_json}');"
return sql
def main():
base_dir = Path('arc_data')
# Collect all JSON files
training_files = sorted(base_dir.glob('training/*.json'))
evaluation_files = sorted(base_dir.glob('evaluation/*.json'))
all_files = training_files + evaluation_files
print(f"-- Found {len(training_files)} training files")
print(f"-- Found {len(evaluation_files)} evaluation files")
print(f"-- Total: {len(all_files)} files")
print()
# Check if user wants preview mode
preview_mode = '--preview' in sys.argv
if preview_mode:
print("-- PREVIEW MODE: Showing first 5 INSERT statements")
print()
files_to_show = all_files[:5]
else:
print("-- Generating all INSERT statements...")
print()
files_to_show = all_files
# Generate INSERT statements
for i, file_path in enumerate(files_to_show, 1):
try:
sql = generate_insert_statement(file_path)
print(sql)
except Exception as e:
print(f"-- ERROR processing {file_path}: {e}", file=sys.stderr)
if preview_mode:
print()
print(f"-- ... and {len(all_files) - 5} more")
print()
print("-- To generate all statements, run: python3 generate_sql.py > insert_arc_data.sql")
print("-- To see more preview: python3 generate_sql.py --preview")
else:
print()
print(f"-- Successfully generated {len(all_files)} INSERT statements")
if __name__ == '__main__':
main()