80 lines
2.3 KiB
Python
80 lines
2.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Generate SQL INSERT statements for ARC puzzle data
|
|
Reads all JSON files from arc_data/training and arc_data/evaluation
|
|
and creates INSERT statements for the arc_jsons table
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
def escape_sql_string(s):
|
|
"""Escape single quotes for SQL"""
|
|
return s.replace("'", "''").replace("\\", "\\\\")
|
|
|
|
def generate_insert_statement(file_path):
|
|
"""Generate INSERT statement for a single JSON file"""
|
|
# Extract ID from filename (without .json extension)
|
|
puzzle_id = Path(file_path).stem
|
|
|
|
# Read JSON content
|
|
with open(file_path, 'r') as f:
|
|
json_content = f.read().strip()
|
|
|
|
# Escape the JSON string for SQL
|
|
escaped_json = escape_sql_string(json_content)
|
|
|
|
# Generate INSERT statement
|
|
sql = f"INSERT INTO arc_jsons (id, json) VALUES ('{puzzle_id}', '{escaped_json}');"
|
|
|
|
return sql
|
|
|
|
def main():
|
|
base_dir = Path('arc_data')
|
|
|
|
# Collect all JSON files
|
|
training_files = sorted(base_dir.glob('training/*.json'))
|
|
evaluation_files = sorted(base_dir.glob('evaluation/*.json'))
|
|
|
|
all_files = training_files + evaluation_files
|
|
|
|
print(f"-- Found {len(training_files)} training files")
|
|
print(f"-- Found {len(evaluation_files)} evaluation files")
|
|
print(f"-- Total: {len(all_files)} files")
|
|
print()
|
|
|
|
# Check if user wants preview mode
|
|
preview_mode = '--preview' in sys.argv
|
|
|
|
if preview_mode:
|
|
print("-- PREVIEW MODE: Showing first 5 INSERT statements")
|
|
print()
|
|
files_to_show = all_files[:5]
|
|
else:
|
|
print("-- Generating all INSERT statements...")
|
|
print()
|
|
files_to_show = all_files
|
|
|
|
# Generate INSERT statements
|
|
for i, file_path in enumerate(files_to_show, 1):
|
|
try:
|
|
sql = generate_insert_statement(file_path)
|
|
print(sql)
|
|
except Exception as e:
|
|
print(f"-- ERROR processing {file_path}: {e}", file=sys.stderr)
|
|
|
|
if preview_mode:
|
|
print()
|
|
print(f"-- ... and {len(all_files) - 5} more")
|
|
print()
|
|
print("-- To generate all statements, run: python3 generate_sql.py > insert_arc_data.sql")
|
|
print("-- To see more preview: python3 generate_sql.py --preview")
|
|
else:
|
|
print()
|
|
print(f"-- Successfully generated {len(all_files)} INSERT statements")
|
|
|
|
if __name__ == '__main__':
|
|
main()
|