This commit is contained in:
bmachado
2025-11-05 00:24:05 +00:00
commit b8856c0660
1157 changed files with 26817 additions and 0 deletions

View File

@ -0,0 +1,54 @@
#!/usr/bin/env python3
import pymysql
from dotenv import load_dotenv
import os
load_dotenv()
config = {
'host': os.getenv('DB_HOST'),
'user': os.getenv('DB_USER'),
'password': os.getenv('DB_PASSWORD'),
'database': os.getenv('DB_NAME'),
'port': int(os.getenv('DB_PORT', 3306)),
}
conn = pymysql.connect(**config)
cursor = conn.cursor()
# Check column type
cursor.execute("SHOW COLUMNS FROM arc_jsons WHERE Field = 'json'")
result = cursor.fetchone()
print(f"Current column type: {result[1]}")
# Check if we need to alter it
if 'text' in result[1].lower() and 'medium' not in result[1].lower() and 'long' not in result[1].lower():
print("\n⚠ Column is TEXT (max 65,535 bytes)")
print(" One file (4a21e3da.json) is ~69KB and failed to insert")
print("\nRecommended fix: ALTER TABLE arc_jsons MODIFY json MEDIUMTEXT;")
print(" MEDIUMTEXT supports up to 16MB")
response = input("\nApply fix now? (yes/no): ").strip().lower()
if response in ['yes', 'y']:
print("\nAltering column to MEDIUMTEXT...")
cursor.execute("ALTER TABLE arc_jsons MODIFY json MEDIUMTEXT")
conn.commit()
print("✓ Column altered successfully!")
# Now insert the failed record
print("\nRe-inserting failed record (4a21e3da)...")
with open('arc_data/evaluation/4a21e3da.json', 'r') as f:
json_content = f.read().strip()
cursor.execute("INSERT INTO arc_jsons (id, json) VALUES (%s, %s)", ('4a21e3da', json_content))
conn.commit()
print("✓ Record inserted successfully!")
# Final count
cursor.execute("SELECT COUNT(*) FROM arc_jsons")
count = cursor.fetchone()[0]
print(f"\n✓ Total records in database: {count}")
else:
print(f"✓ Column type is sufficient: {result[1]}")
conn.close()

View File

@ -0,0 +1,222 @@
#!/usr/bin/env python3
"""
Extract test outputs from ARC puzzle JSON and store in solution column
This script adds a 'solution' column and populates it with the test output grid
"""
import json
import os
import sys
from pathlib import Path
import pymysql
from dotenv import load_dotenv
# Force unbuffered output
sys.stdout.reconfigure(line_buffering=True)
sys.stderr.reconfigure(line_buffering=True)
def load_env_config():
"""Load database configuration from .env file"""
load_dotenv()
config = {
'host': os.getenv('DB_HOST'),
'user': os.getenv('DB_USER'),
'password': os.getenv('DB_PASSWORD'),
'database': os.getenv('DB_NAME'),
'port': int(os.getenv('DB_PORT', 3306)),
'charset': 'utf8mb4'
}
return config
def check_and_add_solution_column(cursor):
"""Check if solution column exists, add it if not"""
cursor.execute("""
SELECT COLUMN_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_SCHEMA = DATABASE()
AND TABLE_NAME = 'arc_jsons'
AND COLUMN_NAME = 'solution'
""")
if cursor.fetchone():
print("✓ Column 'solution' already exists")
return True
print("Adding 'solution' column to arc_jsons table...")
cursor.execute("ALTER TABLE arc_jsons ADD COLUMN solution JSON AFTER json")
print("✓ Column 'solution' added successfully")
return True
def extract_test_output(json_content):
"""Extract test output from puzzle JSON"""
try:
# Parse JSON (handle both string and object)
if isinstance(json_content, str):
puzzle_data = json.loads(json_content)
else:
puzzle_data = json_content
# Extract test output
if puzzle_data.get('test') and len(puzzle_data['test']) > 0:
test_case = puzzle_data['test'][0]
if test_case.get('output'):
return test_case['output']
return None
except Exception as e:
print(f"Error parsing JSON: {e}")
return None
def update_solution(cursor, puzzle_id, json_content):
"""Extract test output and update solution column"""
test_output = extract_test_output(json_content)
if test_output is None:
return False, "No test output found"
try:
# Convert to JSON string
solution_json = json.dumps(test_output)
# Update database
sql = "UPDATE arc_jsons SET solution = %s WHERE id = %s"
cursor.execute(sql, (solution_json, puzzle_id))
return True, None
except Exception as e:
return False, str(e)
def main():
print("ARC Solution Extraction Tool")
print("=" * 60)
print("This script will:")
print(" 1. Add a 'solution' column to arc_jsons (if needed)")
print(" 2. Extract test outputs from JSON data")
print(" 3. Store solutions in the new column")
print("=" * 60)
# Load configuration
try:
config = load_env_config()
print(f"\n✓ Loaded configuration from .env")
print(f" Host: {config['host']}")
print(f" Database: {config['database']}")
print(f" User: {config['user']}")
except Exception as e:
print(f"✗ Error loading configuration: {e}")
return 1
# Connect to database
try:
print(f"\nConnecting to database...")
connection = pymysql.connect(**config)
print(f"✓ Connected successfully")
except Exception as e:
print(f"✗ Database connection failed: {e}")
return 1
try:
cursor = connection.cursor()
# Check if table exists
cursor.execute("SHOW TABLES LIKE 'arc_jsons'")
if not cursor.fetchone():
print(f"✗ Table 'arc_jsons' does not exist")
return 1
# Get current count
cursor.execute("SELECT COUNT(*) FROM arc_jsons")
total_count = cursor.fetchone()[0]
print(f"✓ Table 'arc_jsons' found ({total_count} records)")
# Check/add solution column
print()
check_and_add_solution_column(cursor)
connection.commit()
# Ask for confirmation (unless --yes flag is provided)
if '--yes' not in sys.argv:
print(f"\n⚠ About to process {total_count} records")
response = input("Continue? (yes/no): ").strip().lower()
if response not in ['yes', 'y']:
print("Extraction cancelled")
return 0
else:
print(f"\n⚠ About to process {total_count} records (auto-confirmed with --yes flag)")
# Fetch all records
print(f"\nFetching records...")
cursor.execute("SELECT id, json FROM arc_jsons")
records = cursor.fetchall()
print(f"✓ Retrieved {len(records)} records")
# Process each record
print(f"\nProcessing records...")
updated = 0
errors = 0
no_output = 0
for i, (puzzle_id, json_content) in enumerate(records, 1):
success, error = update_solution(cursor, puzzle_id, json_content)
if success:
updated += 1
elif error == "No test output found":
no_output += 1
if no_output <= 5: # Show first 5 cases
print(f" ⚠ No output: {puzzle_id}")
else:
errors += 1
if errors <= 5: # Show first 5 errors
print(f" ✗ Error {puzzle_id}: {error}")
# Show progress every 100 records
if i % 100 == 0 or i == len(records):
print(f" Progress: {i}/{len(records)} ({updated} updated, {no_output} no output, {errors} errors)")
# Commit the transaction
connection.commit()
print(f"\n{'=' * 60}")
print(f"✓ Extraction complete!")
print(f" Successfully updated: {updated}")
print(f" No test output: {no_output}")
print(f" Errors: {errors}")
print(f" Total processed: {len(records)}")
# Show sample
if updated > 0:
print(f"\nSample record (first with solution):")
cursor.execute("SELECT id, solution FROM arc_jsons WHERE solution IS NOT NULL LIMIT 1")
sample = cursor.fetchone()
if sample:
sample_id, sample_solution = sample
solution_data = json.loads(sample_solution)
rows = len(solution_data)
cols = len(solution_data[0]) if rows > 0 else 0
print(f" ID: {sample_id}")
print(f" Solution grid: {cols}×{rows}")
print(f" First row: {solution_data[0][:10]}..." if cols > 10 else f" First row: {solution_data[0]}")
except Exception as e:
connection.rollback()
print(f"\n✗ Error during extraction: {e}")
import traceback
traceback.print_exc()
return 1
finally:
connection.close()
print(f"\n✓ Database connection closed")
return 0
if __name__ == '__main__':
sys.exit(main())

79
scripts/generate_sql.py Normal file
View File

@ -0,0 +1,79 @@
#!/usr/bin/env python3
"""
Generate SQL INSERT statements for ARC puzzle data
Reads all JSON files from arc_data/training and arc_data/evaluation
and creates INSERT statements for the arc_jsons table
"""
import json
import os
import sys
from pathlib import Path
def escape_sql_string(s):
"""Escape single quotes for SQL"""
return s.replace("'", "''").replace("\\", "\\\\")
def generate_insert_statement(file_path):
"""Generate INSERT statement for a single JSON file"""
# Extract ID from filename (without .json extension)
puzzle_id = Path(file_path).stem
# Read JSON content
with open(file_path, 'r') as f:
json_content = f.read().strip()
# Escape the JSON string for SQL
escaped_json = escape_sql_string(json_content)
# Generate INSERT statement
sql = f"INSERT INTO arc_jsons (id, json) VALUES ('{puzzle_id}', '{escaped_json}');"
return sql
def main():
base_dir = Path('arc_data')
# Collect all JSON files
training_files = sorted(base_dir.glob('training/*.json'))
evaluation_files = sorted(base_dir.glob('evaluation/*.json'))
all_files = training_files + evaluation_files
print(f"-- Found {len(training_files)} training files")
print(f"-- Found {len(evaluation_files)} evaluation files")
print(f"-- Total: {len(all_files)} files")
print()
# Check if user wants preview mode
preview_mode = '--preview' in sys.argv
if preview_mode:
print("-- PREVIEW MODE: Showing first 5 INSERT statements")
print()
files_to_show = all_files[:5]
else:
print("-- Generating all INSERT statements...")
print()
files_to_show = all_files
# Generate INSERT statements
for i, file_path in enumerate(files_to_show, 1):
try:
sql = generate_insert_statement(file_path)
print(sql)
except Exception as e:
print(f"-- ERROR processing {file_path}: {e}", file=sys.stderr)
if preview_mode:
print()
print(f"-- ... and {len(all_files) - 5} more")
print()
print("-- To generate all statements, run: python3 generate_sql.py > insert_arc_data.sql")
print("-- To see more preview: python3 generate_sql.py --preview")
else:
print()
print(f"-- Successfully generated {len(all_files)} INSERT statements")
if __name__ == '__main__':
main()

View File

@ -0,0 +1,51 @@
#!/usr/bin/env python3
"""
Generate a CSV file with ARC V1 task IDs and their set (training/evaluation)
"""
import csv
def main():
# Read the training IDs
with open('arc_v1_training_ids.txt', 'r') as f:
training_ids = [line.strip() for line in f if line.strip()]
# Read the evaluation IDs
with open('arc_v1_evaluation_ids.txt', 'r') as f:
evaluation_ids = [line.strip() for line in f if line.strip()]
# Create CSV file
with open('arc_v1_task_ids.csv', 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
# Write header
writer.writerow(['id', 'set'])
# Write training tasks
for task_id in training_ids:
writer.writerow([task_id, 'training'])
# Write evaluation tasks
for task_id in evaluation_ids:
writer.writerow([task_id, 'evaluation'])
print("=" * 80)
print("ARC V1 Task IDs - CSV Export")
print("=" * 80)
print(f"\nTraining Tasks: {len(training_ids)}")
print(f"Evaluation Tasks: {len(evaluation_ids)}")
print(f"Total: {len(training_ids) + len(evaluation_ids)}")
print("\nFirst 10 rows (preview):")
print("-" * 40)
print("id,set")
for task_id in training_ids[:5]:
print(f"{task_id},training")
for task_id in evaluation_ids[:5]:
print(f"{task_id},evaluation")
print("\n✓ Saved to: arc_v1_task_ids.csv")
print("=" * 80)
if __name__ == '__main__':
main()

View File

@ -0,0 +1,71 @@
#!/usr/bin/env python3
"""
Generate a comprehensive JSON file with official ARC V1 task IDs
"""
import json
def main():
# Read the training IDs
with open('arc_v1_training_ids.txt', 'r') as f:
training_ids = [line.strip() for line in f if line.strip()]
# Read the evaluation IDs
with open('arc_v1_evaluation_ids.txt', 'r') as f:
evaluation_ids = [line.strip() for line in f if line.strip()]
# Create comprehensive JSON
data = {
"version": "ARC-AGI-1 (Official)",
"source": "https://github.com/fchollet/ARC-AGI v1.0.2",
"description": "Official ARC-AGI Version 1 task IDs - 400 training + 400 evaluation tasks",
"total_tasks": len(training_ids) + len(evaluation_ids),
"training": {
"count": len(training_ids),
"task_ids": training_ids
},
"evaluation": {
"count": len(evaluation_ids),
"task_ids": evaluation_ids
},
"all_task_ids": sorted(training_ids + evaluation_ids)
}
# Save to JSON
with open('arc_v1_official_task_ids.json', 'w') as f:
json.dump(data, f, indent=2)
print("=" * 80)
print("ARC V1 Official Task IDs")
print("=" * 80)
print(f"\nTraining Tasks: {len(training_ids)}")
print(f"Evaluation Tasks: {len(evaluation_ids)}")
print(f"Total: {len(training_ids) + len(evaluation_ids)}")
print("\nFirst 10 Training IDs:")
for task_id in training_ids[:10]:
print(f" {task_id}")
print("\nFirst 10 Evaluation IDs:")
for task_id in evaluation_ids[:10]:
print(f" {task_id}")
print("\n✓ Saved to: arc_v1_official_task_ids.json")
print("=" * 80)
# Also create a simple combined text file
with open('arc_v1_all_ids.txt', 'w') as f:
f.write("# ARC-AGI Version 1 Official Task IDs\n")
f.write("# Source: https://github.com/fchollet/ARC-AGI v1.0.2\n")
f.write(f"# Total: {len(training_ids) + len(evaluation_ids)} tasks\n\n")
f.write("## Training Tasks (400)\n")
for task_id in training_ids:
f.write(f"{task_id}\n")
f.write("\n## Evaluation Tasks (400)\n")
for task_id in evaluation_ids:
f.write(f"{task_id}\n")
print("✓ Saved to: arc_v1_all_ids.txt")
if __name__ == '__main__':
main()

View File

@ -0,0 +1,76 @@
#!/usr/bin/env python3
"""
Generate a CSV file with ARC V2 task IDs, set, and difficulty
"""
import csv
import json
def main():
# Read the training IDs
with open('arc_v2_training_ids.txt', 'r') as f:
training_ids = [line.strip() for line in f if line.strip()]
# Read the evaluation IDs
with open('arc_v2_evaluation_ids.txt', 'r') as f:
evaluation_ids = [line.strip() for line in f if line.strip()]
# Create CSV file
with open('arc_v2_task_ids.csv', 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
# Write header
writer.writerow(['id', 'set', 'difficulty'])
# Write training tasks (training = easy per official labeling)
for task_id in training_ids:
writer.writerow([task_id, 'training', 'easy'])
# Write evaluation tasks (evaluation = hard per official labeling)
for task_id in evaluation_ids:
writer.writerow([task_id, 'evaluation', 'hard'])
print("=" * 80)
print("ARC-AGI-2 Task IDs - CSV Export")
print("=" * 80)
print(f"\nTraining Tasks: {len(training_ids)} (labeled: easy)")
print(f"Evaluation Tasks: {len(evaluation_ids)} (labeled: hard)")
print(f"Total: {len(training_ids) + len(evaluation_ids)}")
print("\nFirst 10 rows (preview):")
print("-" * 50)
print("id,set,difficulty")
for task_id in training_ids[:5]:
print(f"{task_id},training,easy")
for task_id in evaluation_ids[:5]:
print(f"{task_id},evaluation,hard")
print("\n✓ Saved to: arc_v2_task_ids.csv")
# Also create a JSON version
data = {
"version": "ARC-AGI-2",
"source": "https://github.com/arcprize/ARC-AGI-2",
"description": "ARC-AGI Version 2 task IDs - 1000 training + 120 evaluation tasks",
"total_tasks": len(training_ids) + len(evaluation_ids),
"training": {
"count": len(training_ids),
"difficulty": "easy",
"task_ids": training_ids
},
"evaluation": {
"count": len(evaluation_ids),
"difficulty": "hard",
"task_ids": evaluation_ids
},
"all_task_ids": sorted(training_ids + evaluation_ids)
}
with open('arc_v2_official_task_ids.json', 'w') as f:
json.dump(data, f, indent=2)
print("✓ Saved to: arc_v2_official_task_ids.json")
print("=" * 80)
if __name__ == '__main__':
main()

View File

@ -0,0 +1,77 @@
#!/usr/bin/env python3
"""
Generate a list of all ARC V1 task IDs from the local dataset
"""
import os
import json
def get_task_ids_from_directory(directory):
"""Get all task IDs (filenames without .json) from a directory"""
task_ids = []
if os.path.exists(directory):
for filename in sorted(os.listdir(directory)):
if filename.endswith('.json'):
task_id = filename[:-5] # Remove .json extension
task_ids.append(task_id)
return task_ids
def main():
# Get task IDs from training and evaluation directories
training_dir = 'arc_data/training'
evaluation_dir = 'arc_data/evaluation'
training_ids = get_task_ids_from_directory(training_dir)
evaluation_ids = get_task_ids_from_directory(evaluation_dir)
print("ARC V1 Task IDs")
print("=" * 80)
print(f"\nTraining Tasks: {len(training_ids)} tasks")
print(f"Evaluation Tasks: {len(evaluation_ids)} tasks")
print(f"Total: {len(training_ids) + len(evaluation_ids)} tasks")
# Save to JSON file
output_data = {
"version": "ARC-AGI-1",
"total_tasks": len(training_ids) + len(evaluation_ids),
"training": {
"count": len(training_ids),
"task_ids": training_ids
},
"evaluation": {
"count": len(evaluation_ids),
"task_ids": evaluation_ids
},
"all_task_ids": sorted(training_ids + evaluation_ids)
}
with open('arc_v1_task_ids.json', 'w') as f:
json.dump(output_data, f, indent=2)
print(f"\n✓ Saved complete list to: arc_v1_task_ids.json")
# Also save a simple text list
with open('arc_v1_task_ids.txt', 'w') as f:
f.write("# ARC V1 Training Task IDs\n")
for task_id in training_ids:
f.write(f"{task_id}\n")
f.write("\n# ARC V1 Evaluation Task IDs\n")
for task_id in evaluation_ids:
f.write(f"{task_id}\n")
print(f"✓ Saved text list to: arc_v1_task_ids.txt")
# Display first 10 from each set as preview
print("\n" + "-" * 80)
print("Preview - First 10 Training Task IDs:")
for task_id in training_ids[:10]:
print(f" {task_id}")
print("\nPreview - First 10 Evaluation Task IDs:")
for task_id in evaluation_ids[:10]:
print(f" {task_id}")
print("\n" + "=" * 80)
if __name__ == '__main__':
main()

161
scripts/upload_to_db.py Executable file
View File

@ -0,0 +1,161 @@
#!/usr/bin/env python3
"""
Upload ARC puzzle data to MariaDB database
Reads credentials from .env file and inserts all JSON files
"""
import json
import os
import sys
from pathlib import Path
import pymysql
from dotenv import load_dotenv
# Force unbuffered output
sys.stdout.reconfigure(line_buffering=True)
sys.stderr.reconfigure(line_buffering=True)
def load_env_config():
"""Load database configuration from .env file"""
load_dotenv()
config = {
'host': os.getenv('DB_HOST'),
'user': os.getenv('DB_USER'),
'password': os.getenv('DB_PASSWORD'),
'database': os.getenv('DB_NAME'),
'port': int(os.getenv('DB_PORT', 3306)),
'charset': 'utf8mb4'
}
return config
def get_all_json_files():
"""Get all JSON files from training and evaluation directories"""
base_dir = Path('arc_data')
training_files = sorted(base_dir.glob('training/*.json'))
evaluation_files = sorted(base_dir.glob('evaluation/*.json'))
return training_files + evaluation_files
def insert_puzzle(cursor, file_path):
"""Insert a single puzzle into the database"""
# Extract ID from filename
puzzle_id = file_path.stem
# Read JSON content
with open(file_path, 'r') as f:
json_content = f.read().strip()
# Insert into database using parameterized query (prevents SQL injection)
sql = "INSERT INTO arc_jsons (id, json) VALUES (%s, %s)"
cursor.execute(sql, (puzzle_id, json_content))
return puzzle_id
def main():
print("ARC Data Upload to MariaDB")
print("=" * 50)
# Load configuration
try:
config = load_env_config()
print(f"✓ Loaded configuration from .env")
print(f" Host: {config['host']}")
print(f" Database: {config['database']}")
print(f" User: {config['user']}")
except Exception as e:
print(f"✗ Error loading configuration: {e}")
return 1
# Get all files
all_files = get_all_json_files()
print(f"✓ Found {len(all_files)} JSON files")
# Connect to database
try:
print(f"\nConnecting to database...")
connection = pymysql.connect(**config)
print(f"✓ Connected successfully")
except Exception as e:
print(f"✗ Database connection failed: {e}")
return 1
try:
cursor = connection.cursor()
# Check if table exists
cursor.execute("SHOW TABLES LIKE 'arc_jsons'")
if not cursor.fetchone():
print(f"✗ Table 'arc_jsons' does not exist")
return 1
# Get current count
cursor.execute("SELECT COUNT(*) FROM arc_jsons")
initial_count = cursor.fetchone()[0]
print(f"✓ Table 'arc_jsons' exists (current rows: {initial_count})")
# Ask for confirmation (unless --yes flag is provided)
if '--yes' not in sys.argv:
print(f"\n⚠ About to insert {len(all_files)} records")
response = input("Continue? (yes/no): ").strip().lower()
if response not in ['yes', 'y']:
print("Upload cancelled")
return 0
else:
print(f"\n⚠ About to insert {len(all_files)} records (auto-confirmed with --yes flag)")
print(f"\nInserting records...")
inserted = 0
errors = 0
for i, file_path in enumerate(all_files, 1):
try:
puzzle_id = insert_puzzle(cursor, file_path)
inserted += 1
# Show progress every 100 records
if i % 100 == 0 or i == len(all_files):
print(f" Progress: {i}/{len(all_files)} ({inserted} inserted, {errors} errors)")
except pymysql.IntegrityError as e:
# Likely duplicate key
if "Duplicate entry" in str(e):
errors += 1
if errors <= 5: # Only show first 5 errors
print(f" ⚠ Duplicate: {file_path.stem}")
else:
raise
except Exception as e:
errors += 1
print(f" ✗ Error with {file_path.stem}: {e}")
if errors > 10:
print(f" Too many errors, stopping...")
break
# Commit the transaction
connection.commit()
# Get final count
cursor.execute("SELECT COUNT(*) FROM arc_jsons")
final_count = cursor.fetchone()[0]
print(f"\n{'=' * 50}")
print(f"✓ Upload complete!")
print(f" Successfully inserted: {inserted}")
print(f" Errors/duplicates: {errors}")
print(f" Database rows: {initial_count}{final_count} (+{final_count - initial_count})")
except Exception as e:
connection.rollback()
print(f"\n✗ Error during upload: {e}")
return 1
finally:
connection.close()
print(f"\n✓ Database connection closed")
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -0,0 +1,83 @@
#!/usr/bin/env python3
"""
Verify that solutions were extracted successfully
Shows sample solutions from the database
"""
import json
import os
import sys
import pymysql
from dotenv import load_dotenv
def load_env_config():
"""Load database configuration from .env file"""
load_dotenv()
config = {
'host': os.getenv('DB_HOST'),
'user': os.getenv('DB_USER'),
'password': os.getenv('DB_PASSWORD'),
'database': os.getenv('DB_NAME'),
'port': int(os.getenv('DB_PORT', 3306)),
'charset': 'utf8mb4'
}
return config
def main():
print("Solution Verification Tool")
print("=" * 60)
# Load configuration and connect
config = load_env_config()
connection = pymysql.connect(**config)
cursor = connection.cursor()
# Get statistics
cursor.execute("SELECT COUNT(*) FROM arc_jsons")
total_records = cursor.fetchone()[0]
cursor.execute("SELECT COUNT(*) FROM arc_jsons WHERE solution IS NOT NULL")
with_solutions = cursor.fetchone()[0]
cursor.execute("SELECT COUNT(*) FROM arc_jsons WHERE solution IS NULL")
without_solutions = cursor.fetchone()[0]
print(f"\nDatabase Statistics:")
print(f" Total records: {total_records}")
print(f" With solutions: {with_solutions}")
print(f" Without solutions: {without_solutions}")
# Show 5 sample solutions
print(f"\nSample Solutions:")
print("-" * 60)
cursor.execute("SELECT id, solution FROM arc_jsons WHERE solution IS NOT NULL LIMIT 5")
samples = cursor.fetchall()
for i, (puzzle_id, solution_json) in enumerate(samples, 1):
solution = json.loads(solution_json)
rows = len(solution)
cols = len(solution[0]) if rows > 0 else 0
print(f"\n{i}. Puzzle ID: {puzzle_id}")
print(f" Grid size: {cols}×{rows}")
print(f" Grid data:")
# Show grid visually
for row in solution[:5]: # Show up to 5 rows
print(f" {row}")
if rows > 5:
print(f" ... ({rows - 5} more rows)")
connection.close()
print(f"\n{'=' * 60}")
print("Verification complete!")
if __name__ == '__main__':
main()