#!/usr/bin/env python3 """ Upload ARC puzzle data to MariaDB database Reads credentials from .env file and inserts all JSON files """ import json import os import sys from pathlib import Path import pymysql from dotenv import load_dotenv # Force unbuffered output sys.stdout.reconfigure(line_buffering=True) sys.stderr.reconfigure(line_buffering=True) def load_env_config(): """Load database configuration from .env file""" load_dotenv() config = { 'host': os.getenv('DB_HOST'), 'user': os.getenv('DB_USER'), 'password': os.getenv('DB_PASSWORD'), 'database': os.getenv('DB_NAME'), 'port': int(os.getenv('DB_PORT', 3306)), 'charset': 'utf8mb4' } return config def get_all_json_files(): """Get all JSON files from training and evaluation directories""" base_dir = Path('arc_data') training_files = sorted(base_dir.glob('training/*.json')) evaluation_files = sorted(base_dir.glob('evaluation/*.json')) return training_files + evaluation_files def insert_puzzle(cursor, file_path): """Insert a single puzzle into the database""" # Extract ID from filename puzzle_id = file_path.stem # Read JSON content with open(file_path, 'r') as f: json_content = f.read().strip() # Insert into database using parameterized query (prevents SQL injection) sql = "INSERT INTO arc_jsons (id, json) VALUES (%s, %s)" cursor.execute(sql, (puzzle_id, json_content)) return puzzle_id def main(): print("ARC Data Upload to MariaDB") print("=" * 50) # Load configuration try: config = load_env_config() print(f"✓ Loaded configuration from .env") print(f" Host: {config['host']}") print(f" Database: {config['database']}") print(f" User: {config['user']}") except Exception as e: print(f"✗ Error loading configuration: {e}") return 1 # Get all files all_files = get_all_json_files() print(f"✓ Found {len(all_files)} JSON files") # Connect to database try: print(f"\nConnecting to database...") connection = pymysql.connect(**config) print(f"✓ Connected successfully") except Exception as e: print(f"✗ Database connection failed: {e}") return 1 try: cursor = connection.cursor() # Check if table exists cursor.execute("SHOW TABLES LIKE 'arc_jsons'") if not cursor.fetchone(): print(f"✗ Table 'arc_jsons' does not exist") return 1 # Get current count cursor.execute("SELECT COUNT(*) FROM arc_jsons") initial_count = cursor.fetchone()[0] print(f"✓ Table 'arc_jsons' exists (current rows: {initial_count})") # Ask for confirmation (unless --yes flag is provided) if '--yes' not in sys.argv: print(f"\n⚠ About to insert {len(all_files)} records") response = input("Continue? (yes/no): ").strip().lower() if response not in ['yes', 'y']: print("Upload cancelled") return 0 else: print(f"\n⚠ About to insert {len(all_files)} records (auto-confirmed with --yes flag)") print(f"\nInserting records...") inserted = 0 errors = 0 for i, file_path in enumerate(all_files, 1): try: puzzle_id = insert_puzzle(cursor, file_path) inserted += 1 # Show progress every 100 records if i % 100 == 0 or i == len(all_files): print(f" Progress: {i}/{len(all_files)} ({inserted} inserted, {errors} errors)") except pymysql.IntegrityError as e: # Likely duplicate key if "Duplicate entry" in str(e): errors += 1 if errors <= 5: # Only show first 5 errors print(f" ⚠ Duplicate: {file_path.stem}") else: raise except Exception as e: errors += 1 print(f" ✗ Error with {file_path.stem}: {e}") if errors > 10: print(f" Too many errors, stopping...") break # Commit the transaction connection.commit() # Get final count cursor.execute("SELECT COUNT(*) FROM arc_jsons") final_count = cursor.fetchone()[0] print(f"\n{'=' * 50}") print(f"✓ Upload complete!") print(f" Successfully inserted: {inserted}") print(f" Errors/duplicates: {errors}") print(f" Database rows: {initial_count} → {final_count} (+{final_count - initial_count})") except Exception as e: connection.rollback() print(f"\n✗ Error during upload: {e}") return 1 finally: connection.close() print(f"\n✓ Database connection closed") return 0 if __name__ == '__main__': sys.exit(main())