162 lines
4.9 KiB
Python
Executable File
162 lines
4.9 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Upload ARC puzzle data to MariaDB database
|
|
Reads credentials from .env file and inserts all JSON files
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import sys
|
|
from pathlib import Path
|
|
import pymysql
|
|
from dotenv import load_dotenv
|
|
|
|
# Force unbuffered output
|
|
sys.stdout.reconfigure(line_buffering=True)
|
|
sys.stderr.reconfigure(line_buffering=True)
|
|
|
|
def load_env_config():
|
|
"""Load database configuration from .env file"""
|
|
load_dotenv()
|
|
|
|
config = {
|
|
'host': os.getenv('DB_HOST'),
|
|
'user': os.getenv('DB_USER'),
|
|
'password': os.getenv('DB_PASSWORD'),
|
|
'database': os.getenv('DB_NAME'),
|
|
'port': int(os.getenv('DB_PORT', 3306)),
|
|
'charset': 'utf8mb4'
|
|
}
|
|
|
|
return config
|
|
|
|
def get_all_json_files():
|
|
"""Get all JSON files from training and evaluation directories"""
|
|
base_dir = Path('arc_data')
|
|
|
|
training_files = sorted(base_dir.glob('training/*.json'))
|
|
evaluation_files = sorted(base_dir.glob('evaluation/*.json'))
|
|
|
|
return training_files + evaluation_files
|
|
|
|
def insert_puzzle(cursor, file_path):
|
|
"""Insert a single puzzle into the database"""
|
|
# Extract ID from filename
|
|
puzzle_id = file_path.stem
|
|
|
|
# Read JSON content
|
|
with open(file_path, 'r') as f:
|
|
json_content = f.read().strip()
|
|
|
|
# Insert into database using parameterized query (prevents SQL injection)
|
|
sql = "INSERT INTO arc_jsons (id, json) VALUES (%s, %s)"
|
|
cursor.execute(sql, (puzzle_id, json_content))
|
|
|
|
return puzzle_id
|
|
|
|
def main():
|
|
print("ARC Data Upload to MariaDB")
|
|
print("=" * 50)
|
|
|
|
# Load configuration
|
|
try:
|
|
config = load_env_config()
|
|
print(f"✓ Loaded configuration from .env")
|
|
print(f" Host: {config['host']}")
|
|
print(f" Database: {config['database']}")
|
|
print(f" User: {config['user']}")
|
|
except Exception as e:
|
|
print(f"✗ Error loading configuration: {e}")
|
|
return 1
|
|
|
|
# Get all files
|
|
all_files = get_all_json_files()
|
|
print(f"✓ Found {len(all_files)} JSON files")
|
|
|
|
# Connect to database
|
|
try:
|
|
print(f"\nConnecting to database...")
|
|
connection = pymysql.connect(**config)
|
|
print(f"✓ Connected successfully")
|
|
except Exception as e:
|
|
print(f"✗ Database connection failed: {e}")
|
|
return 1
|
|
|
|
try:
|
|
cursor = connection.cursor()
|
|
|
|
# Check if table exists
|
|
cursor.execute("SHOW TABLES LIKE 'arc_jsons'")
|
|
if not cursor.fetchone():
|
|
print(f"✗ Table 'arc_jsons' does not exist")
|
|
return 1
|
|
|
|
# Get current count
|
|
cursor.execute("SELECT COUNT(*) FROM arc_jsons")
|
|
initial_count = cursor.fetchone()[0]
|
|
print(f"✓ Table 'arc_jsons' exists (current rows: {initial_count})")
|
|
|
|
# Ask for confirmation (unless --yes flag is provided)
|
|
if '--yes' not in sys.argv:
|
|
print(f"\n⚠ About to insert {len(all_files)} records")
|
|
response = input("Continue? (yes/no): ").strip().lower()
|
|
if response not in ['yes', 'y']:
|
|
print("Upload cancelled")
|
|
return 0
|
|
else:
|
|
print(f"\n⚠ About to insert {len(all_files)} records (auto-confirmed with --yes flag)")
|
|
|
|
print(f"\nInserting records...")
|
|
inserted = 0
|
|
errors = 0
|
|
|
|
for i, file_path in enumerate(all_files, 1):
|
|
try:
|
|
puzzle_id = insert_puzzle(cursor, file_path)
|
|
inserted += 1
|
|
|
|
# Show progress every 100 records
|
|
if i % 100 == 0 or i == len(all_files):
|
|
print(f" Progress: {i}/{len(all_files)} ({inserted} inserted, {errors} errors)")
|
|
|
|
except pymysql.IntegrityError as e:
|
|
# Likely duplicate key
|
|
if "Duplicate entry" in str(e):
|
|
errors += 1
|
|
if errors <= 5: # Only show first 5 errors
|
|
print(f" ⚠ Duplicate: {file_path.stem}")
|
|
else:
|
|
raise
|
|
except Exception as e:
|
|
errors += 1
|
|
print(f" ✗ Error with {file_path.stem}: {e}")
|
|
if errors > 10:
|
|
print(f" Too many errors, stopping...")
|
|
break
|
|
|
|
# Commit the transaction
|
|
connection.commit()
|
|
|
|
# Get final count
|
|
cursor.execute("SELECT COUNT(*) FROM arc_jsons")
|
|
final_count = cursor.fetchone()[0]
|
|
|
|
print(f"\n{'=' * 50}")
|
|
print(f"✓ Upload complete!")
|
|
print(f" Successfully inserted: {inserted}")
|
|
print(f" Errors/duplicates: {errors}")
|
|
print(f" Database rows: {initial_count} → {final_count} (+{final_count - initial_count})")
|
|
|
|
except Exception as e:
|
|
connection.rollback()
|
|
print(f"\n✗ Error during upload: {e}")
|
|
return 1
|
|
finally:
|
|
connection.close()
|
|
print(f"\n✓ Database connection closed")
|
|
|
|
return 0
|
|
|
|
if __name__ == '__main__':
|
|
sys.exit(main())
|