initial
This commit is contained in:
79
scripts/generate_sql.py
Normal file
79
scripts/generate_sql.py
Normal file
@ -0,0 +1,79 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Generate SQL INSERT statements for ARC puzzle data
|
||||
Reads all JSON files from arc_data/training and arc_data/evaluation
|
||||
and creates INSERT statements for the arc_jsons table
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
def escape_sql_string(s):
|
||||
"""Escape single quotes for SQL"""
|
||||
return s.replace("'", "''").replace("\\", "\\\\")
|
||||
|
||||
def generate_insert_statement(file_path):
|
||||
"""Generate INSERT statement for a single JSON file"""
|
||||
# Extract ID from filename (without .json extension)
|
||||
puzzle_id = Path(file_path).stem
|
||||
|
||||
# Read JSON content
|
||||
with open(file_path, 'r') as f:
|
||||
json_content = f.read().strip()
|
||||
|
||||
# Escape the JSON string for SQL
|
||||
escaped_json = escape_sql_string(json_content)
|
||||
|
||||
# Generate INSERT statement
|
||||
sql = f"INSERT INTO arc_jsons (id, json) VALUES ('{puzzle_id}', '{escaped_json}');"
|
||||
|
||||
return sql
|
||||
|
||||
def main():
|
||||
base_dir = Path('arc_data')
|
||||
|
||||
# Collect all JSON files
|
||||
training_files = sorted(base_dir.glob('training/*.json'))
|
||||
evaluation_files = sorted(base_dir.glob('evaluation/*.json'))
|
||||
|
||||
all_files = training_files + evaluation_files
|
||||
|
||||
print(f"-- Found {len(training_files)} training files")
|
||||
print(f"-- Found {len(evaluation_files)} evaluation files")
|
||||
print(f"-- Total: {len(all_files)} files")
|
||||
print()
|
||||
|
||||
# Check if user wants preview mode
|
||||
preview_mode = '--preview' in sys.argv
|
||||
|
||||
if preview_mode:
|
||||
print("-- PREVIEW MODE: Showing first 5 INSERT statements")
|
||||
print()
|
||||
files_to_show = all_files[:5]
|
||||
else:
|
||||
print("-- Generating all INSERT statements...")
|
||||
print()
|
||||
files_to_show = all_files
|
||||
|
||||
# Generate INSERT statements
|
||||
for i, file_path in enumerate(files_to_show, 1):
|
||||
try:
|
||||
sql = generate_insert_statement(file_path)
|
||||
print(sql)
|
||||
except Exception as e:
|
||||
print(f"-- ERROR processing {file_path}: {e}", file=sys.stderr)
|
||||
|
||||
if preview_mode:
|
||||
print()
|
||||
print(f"-- ... and {len(all_files) - 5} more")
|
||||
print()
|
||||
print("-- To generate all statements, run: python3 generate_sql.py > insert_arc_data.sql")
|
||||
print("-- To see more preview: python3 generate_sql.py --preview")
|
||||
else:
|
||||
print()
|
||||
print(f"-- Successfully generated {len(all_files)} INSERT statements")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user