ConceptARC db upload
This commit is contained in:
10
.env
Normal file
10
.env
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
# Copy this file to .env and fill in the values
|
||||||
|
# Database configuration
|
||||||
|
DB_HOST=mariadb.vialink.com.br
|
||||||
|
DB_USER=bp_solver
|
||||||
|
DB_PASSWORD=A0bn%3H5sHl7C8Jk
|
||||||
|
DB_NAME=arc
|
||||||
|
DB_PORT=3306
|
||||||
|
|
||||||
|
# Add other secrets here as needed
|
||||||
|
# e.g., API keys: OPENAI_API_KEY=...
|
||||||
83
scripts/check_arc_puzzles.py
Normal file
83
scripts/check_arc_puzzles.py
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Check arc_puzzles table for ConceptArc entries
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import pymysql
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
def load_env_config():
|
||||||
|
"""Load database configuration from .env file"""
|
||||||
|
load_dotenv()
|
||||||
|
return {
|
||||||
|
'host': os.getenv('DB_HOST'),
|
||||||
|
'user': os.getenv('DB_USER'),
|
||||||
|
'password': os.getenv('DB_PASSWORD'),
|
||||||
|
'database': os.getenv('DB_NAME'),
|
||||||
|
'port': int(os.getenv('DB_PORT', 3306)),
|
||||||
|
'charset': 'utf8mb4'
|
||||||
|
}
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print("Checking arc_puzzles table")
|
||||||
|
print("=" * 50)
|
||||||
|
|
||||||
|
config = load_env_config()
|
||||||
|
connection = pymysql.connect(**config)
|
||||||
|
cursor = connection.cursor()
|
||||||
|
|
||||||
|
# Check total counts by corpora
|
||||||
|
cursor.execute("""
|
||||||
|
SELECT corpora, COUNT(*) as count
|
||||||
|
FROM arc_puzzles
|
||||||
|
GROUP BY corpora
|
||||||
|
ORDER BY corpora
|
||||||
|
""")
|
||||||
|
|
||||||
|
print("\nTotal puzzles by corpora:")
|
||||||
|
for corpora, count in cursor.fetchall():
|
||||||
|
print(f" {corpora}: {count}")
|
||||||
|
|
||||||
|
# Check ConceptArc entries in detail
|
||||||
|
cursor.execute("""
|
||||||
|
SELECT id, corpora, `set`, difficulty, version
|
||||||
|
FROM arc_puzzles
|
||||||
|
WHERE corpora = 'ConceptArc'
|
||||||
|
ORDER BY `set`, id
|
||||||
|
LIMIT 20
|
||||||
|
""")
|
||||||
|
|
||||||
|
print("\n" + "=" * 50)
|
||||||
|
print("First 20 ConceptArc entries in arc_puzzles:")
|
||||||
|
print("=" * 50)
|
||||||
|
print(f"{'ID':<20} {'Corpora':<15} {'Set':<20} {'Difficulty':<12} {'Version':<10}")
|
||||||
|
print("-" * 80)
|
||||||
|
|
||||||
|
rows = cursor.fetchall()
|
||||||
|
if rows:
|
||||||
|
for row in rows:
|
||||||
|
puzzle_id, corpora, set_name, difficulty, version = row
|
||||||
|
print(f"{puzzle_id:<20} {corpora:<15} {set_name:<20} {str(difficulty):<12} {str(version):<10}")
|
||||||
|
else:
|
||||||
|
print("No ConceptArc entries found!")
|
||||||
|
|
||||||
|
# Count by set
|
||||||
|
cursor.execute("""
|
||||||
|
SELECT `set`, COUNT(*) as count
|
||||||
|
FROM arc_puzzles
|
||||||
|
WHERE corpora = 'ConceptArc'
|
||||||
|
GROUP BY `set`
|
||||||
|
ORDER BY `set`
|
||||||
|
""")
|
||||||
|
|
||||||
|
print("\n" + "=" * 50)
|
||||||
|
print("ConceptArc puzzles by category:")
|
||||||
|
print("=" * 50)
|
||||||
|
for set_name, count in cursor.fetchall():
|
||||||
|
print(f" {set_name}: {count}")
|
||||||
|
|
||||||
|
connection.close()
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
117
scripts/check_schema.py
Executable file
117
scripts/check_schema.py
Executable file
@ -0,0 +1,117 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Check the database schema for arc_puzzles table
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import pymysql
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
def load_env_config():
|
||||||
|
"""Load database configuration from .env file"""
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
config = {
|
||||||
|
'host': os.getenv('DB_HOST'),
|
||||||
|
'user': os.getenv('DB_USER'),
|
||||||
|
'password': os.getenv('DB_PASSWORD'),
|
||||||
|
'database': os.getenv('DB_NAME'),
|
||||||
|
'port': int(os.getenv('DB_PORT', 3306)),
|
||||||
|
'charset': 'utf8mb4'
|
||||||
|
}
|
||||||
|
|
||||||
|
return config
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print("Checking Database Schema")
|
||||||
|
print("=" * 50)
|
||||||
|
|
||||||
|
# Load configuration
|
||||||
|
try:
|
||||||
|
config = load_env_config()
|
||||||
|
print(f"✓ Loaded configuration from .env")
|
||||||
|
print(f" Host: {config['host']}")
|
||||||
|
print(f" Database: {config['database']}")
|
||||||
|
print(f" User: {config['user']}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"✗ Error loading configuration: {e}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Connect to database
|
||||||
|
try:
|
||||||
|
print(f"\nConnecting to database...")
|
||||||
|
connection = pymysql.connect(**config)
|
||||||
|
print(f"✓ Connected successfully")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"✗ Database connection failed: {e}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
cursor = connection.cursor()
|
||||||
|
|
||||||
|
# Show all tables
|
||||||
|
print("\n" + "=" * 50)
|
||||||
|
print("TABLES IN DATABASE:")
|
||||||
|
print("=" * 50)
|
||||||
|
cursor.execute("SHOW TABLES")
|
||||||
|
tables = cursor.fetchall()
|
||||||
|
for table in tables:
|
||||||
|
print(f" - {table[0]}")
|
||||||
|
|
||||||
|
# Check for arc_puzzles table
|
||||||
|
cursor.execute("SHOW TABLES LIKE 'arc_puzzles'")
|
||||||
|
if cursor.fetchone():
|
||||||
|
print("\n" + "=" * 50)
|
||||||
|
print("SCHEMA FOR 'arc_puzzles' TABLE:")
|
||||||
|
print("=" * 50)
|
||||||
|
cursor.execute("DESCRIBE arc_puzzles")
|
||||||
|
columns = cursor.fetchall()
|
||||||
|
for col in columns:
|
||||||
|
print(f" {col[0]:<20} {col[1]:<20} Null:{col[2]} Key:{col[3]} Default:{col[4]}")
|
||||||
|
|
||||||
|
# Get sample data
|
||||||
|
cursor.execute("SELECT * FROM arc_puzzles LIMIT 3")
|
||||||
|
print("\n" + "=" * 50)
|
||||||
|
print("SAMPLE DATA (first 3 rows):")
|
||||||
|
print("=" * 50)
|
||||||
|
rows = cursor.fetchall()
|
||||||
|
if rows:
|
||||||
|
# Get column names
|
||||||
|
cursor.execute("DESCRIBE arc_puzzles")
|
||||||
|
columns = [col[0] for col in cursor.fetchall()]
|
||||||
|
print(" Columns:", ", ".join(columns))
|
||||||
|
for i, row in enumerate(rows, 1):
|
||||||
|
print(f"\n Row {i}:")
|
||||||
|
for col_name, value in zip(columns, row):
|
||||||
|
if col_name == 'json':
|
||||||
|
print(f" {col_name}: [JSON data, length={len(str(value))}]")
|
||||||
|
else:
|
||||||
|
print(f" {col_name}: {value}")
|
||||||
|
else:
|
||||||
|
print(" (No data in table)")
|
||||||
|
else:
|
||||||
|
print("\n✗ Table 'arc_puzzles' does not exist")
|
||||||
|
|
||||||
|
# Check for arc_jsons table
|
||||||
|
cursor.execute("SHOW TABLES LIKE 'arc_jsons'")
|
||||||
|
if cursor.fetchone():
|
||||||
|
print("\n" + "=" * 50)
|
||||||
|
print("SCHEMA FOR 'arc_jsons' TABLE:")
|
||||||
|
print("=" * 50)
|
||||||
|
cursor.execute("DESCRIBE arc_jsons")
|
||||||
|
columns = cursor.fetchall()
|
||||||
|
for col in columns:
|
||||||
|
print(f" {col[0]:<20} {col[1]:<20} Null:{col[2]} Key:{col[3]} Default:{col[4]}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"\n✗ Error querying database: {e}")
|
||||||
|
return 1
|
||||||
|
finally:
|
||||||
|
connection.close()
|
||||||
|
print(f"\n✓ Database connection closed")
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
||||||
82
scripts/find_duplicates.py
Normal file
82
scripts/find_duplicates.py
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Find duplicate puzzle IDs in arc_puzzles table
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import pymysql
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
def load_env_config():
|
||||||
|
"""Load database configuration from .env file"""
|
||||||
|
load_dotenv()
|
||||||
|
return {
|
||||||
|
'host': os.getenv('DB_HOST'),
|
||||||
|
'user': os.getenv('DB_USER'),
|
||||||
|
'password': os.getenv('DB_PASSWORD'),
|
||||||
|
'database': os.getenv('DB_NAME'),
|
||||||
|
'port': int(os.getenv('DB_PORT', 3306)),
|
||||||
|
'charset': 'utf8mb4'
|
||||||
|
}
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print("Finding Duplicate IDs in arc_puzzles")
|
||||||
|
print("=" * 50)
|
||||||
|
|
||||||
|
config = load_env_config()
|
||||||
|
connection = pymysql.connect(**config)
|
||||||
|
cursor = connection.cursor()
|
||||||
|
|
||||||
|
# Find all duplicate IDs
|
||||||
|
cursor.execute("""
|
||||||
|
SELECT id, corpora, `set`, COUNT(*) as count
|
||||||
|
FROM arc_puzzles
|
||||||
|
GROUP BY id, corpora, `set`
|
||||||
|
HAVING COUNT(*) > 1
|
||||||
|
ORDER BY count DESC, id
|
||||||
|
""")
|
||||||
|
|
||||||
|
duplicates = cursor.fetchall()
|
||||||
|
|
||||||
|
if duplicates:
|
||||||
|
print(f"\n⚠ Found {len(duplicates)} duplicate entries:")
|
||||||
|
print(f"{'ID':<20} {'Corpora':<15} {'Set':<20} {'Count':<10}")
|
||||||
|
print("-" * 70)
|
||||||
|
for puzzle_id, corpora, set_name, count in duplicates:
|
||||||
|
print(f"{puzzle_id:<20} {corpora:<15} {set_name:<20} {count:<10}")
|
||||||
|
|
||||||
|
# Calculate totals
|
||||||
|
total_duplicates = sum(count - 1 for _, _, _, count in duplicates)
|
||||||
|
print(f"\nTotal duplicate rows to remove: {total_duplicates}")
|
||||||
|
|
||||||
|
# Check ConceptArc specifically
|
||||||
|
conceptarc_dups = [d for d in duplicates if d[1] == 'ConceptArc']
|
||||||
|
if conceptarc_dups:
|
||||||
|
print(f"\nConceptArc duplicates: {len(conceptarc_dups)} unique IDs")
|
||||||
|
conceptarc_dup_count = sum(count - 1 for _, _, _, count in conceptarc_dups)
|
||||||
|
print(f"ConceptArc duplicate rows to remove: {conceptarc_dup_count}")
|
||||||
|
else:
|
||||||
|
print("\n✓ No duplicates found!")
|
||||||
|
|
||||||
|
# Show total counts
|
||||||
|
print("\n" + "=" * 50)
|
||||||
|
print("Current table statistics:")
|
||||||
|
print("=" * 50)
|
||||||
|
cursor.execute("SELECT COUNT(*) FROM arc_puzzles")
|
||||||
|
total = cursor.fetchone()[0]
|
||||||
|
print(f"Total rows in arc_puzzles: {total}")
|
||||||
|
|
||||||
|
cursor.execute("""
|
||||||
|
SELECT corpora, COUNT(*) as count
|
||||||
|
FROM arc_puzzles
|
||||||
|
GROUP BY corpora
|
||||||
|
ORDER BY corpora
|
||||||
|
""")
|
||||||
|
print("\nBy corpora:")
|
||||||
|
for corpora, count in cursor.fetchall():
|
||||||
|
print(f" {corpora}: {count}")
|
||||||
|
|
||||||
|
connection.close()
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
155
scripts/fix_conceptarc_solutions.py
Executable file
155
scripts/fix_conceptarc_solutions.py
Executable file
@ -0,0 +1,155 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Fix ConceptArc solutions in the database to include ALL test outputs, not just the last one
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
import pymysql
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
def load_env_config():
|
||||||
|
"""Load database configuration from .env file"""
|
||||||
|
load_dotenv()
|
||||||
|
return {
|
||||||
|
'host': os.getenv('DB_HOST'),
|
||||||
|
'user': os.getenv('DB_USER'),
|
||||||
|
'password': os.getenv('DB_PASSWORD'),
|
||||||
|
'database': os.getenv('DB_NAME'),
|
||||||
|
'port': int(os.getenv('DB_PORT', 3306)),
|
||||||
|
'charset': 'utf8mb4'
|
||||||
|
}
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print("Fixing ConceptArc Solutions")
|
||||||
|
print("=" * 50)
|
||||||
|
|
||||||
|
config = load_env_config()
|
||||||
|
connection = pymysql.connect(**config)
|
||||||
|
cursor = connection.cursor()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Get all ConceptArc entries
|
||||||
|
cursor.execute("""
|
||||||
|
SELECT aj.id, aj.arc_puzzle_id, aj.json, aj.solution
|
||||||
|
FROM arc_jsons aj
|
||||||
|
JOIN arc_puzzles ap ON aj.arc_puzzle_id = ap.id
|
||||||
|
WHERE ap.corpora = 'ConceptArc'
|
||||||
|
ORDER BY aj.arc_puzzle_id
|
||||||
|
""")
|
||||||
|
|
||||||
|
entries = cursor.fetchall()
|
||||||
|
print(f"Found {len(entries)} ConceptArc entries to check")
|
||||||
|
|
||||||
|
if not entries:
|
||||||
|
print("No ConceptArc entries found!")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Ask for confirmation
|
||||||
|
if '--yes' not in sys.argv:
|
||||||
|
response = input(f"\nUpdate solutions for {len(entries)} entries? (yes/no): ").strip().lower()
|
||||||
|
if response not in ['yes', 'y']:
|
||||||
|
print("Operation cancelled")
|
||||||
|
return 0
|
||||||
|
else:
|
||||||
|
print(f"Updating solutions (auto-confirmed with --yes flag)")
|
||||||
|
|
||||||
|
updated = 0
|
||||||
|
errors = 0
|
||||||
|
skipped = 0
|
||||||
|
|
||||||
|
print("\nProcessing entries...")
|
||||||
|
|
||||||
|
for row_id, puzzle_id, json_str, current_solution in entries:
|
||||||
|
try:
|
||||||
|
# Parse the puzzle JSON
|
||||||
|
puzzle_data = json.loads(json_str)
|
||||||
|
|
||||||
|
# Extract all test outputs
|
||||||
|
all_outputs = []
|
||||||
|
if 'test' in puzzle_data:
|
||||||
|
for test_case in puzzle_data['test']:
|
||||||
|
if 'output' in test_case:
|
||||||
|
all_outputs.append(test_case['output'])
|
||||||
|
|
||||||
|
if not all_outputs:
|
||||||
|
print(f" ⚠ {puzzle_id}: No test outputs found")
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Create new solution as array of all outputs
|
||||||
|
new_solution = json.dumps(all_outputs)
|
||||||
|
|
||||||
|
# Check if it's different from current
|
||||||
|
if current_solution == new_solution:
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Update the solution
|
||||||
|
cursor.execute("""
|
||||||
|
UPDATE arc_jsons
|
||||||
|
SET solution = %s
|
||||||
|
WHERE id = %s
|
||||||
|
""", (new_solution, row_id))
|
||||||
|
|
||||||
|
updated += 1
|
||||||
|
|
||||||
|
if updated % 20 == 0:
|
||||||
|
print(f" Updated: {updated}/{len(entries)}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
errors += 1
|
||||||
|
print(f" ✗ Error with {puzzle_id}: {e}")
|
||||||
|
if errors > 10:
|
||||||
|
print("Too many errors, stopping...")
|
||||||
|
break
|
||||||
|
|
||||||
|
# Commit changes
|
||||||
|
connection.commit()
|
||||||
|
|
||||||
|
print(f"\n{'=' * 50}")
|
||||||
|
print(f"✓ Solution update complete!")
|
||||||
|
print(f" Updated: {updated}")
|
||||||
|
print(f" Skipped (unchanged): {skipped}")
|
||||||
|
print(f" Errors: {errors}")
|
||||||
|
|
||||||
|
# Show a sample of updated solutions
|
||||||
|
if updated > 0:
|
||||||
|
cursor.execute("""
|
||||||
|
SELECT aj.arc_puzzle_id, aj.solution
|
||||||
|
FROM arc_jsons aj
|
||||||
|
JOIN arc_puzzles ap ON aj.arc_puzzle_id = ap.id
|
||||||
|
WHERE ap.corpora = 'ConceptArc'
|
||||||
|
LIMIT 3
|
||||||
|
""")
|
||||||
|
|
||||||
|
print(f"\n{'=' * 50}")
|
||||||
|
print("Sample updated entries:")
|
||||||
|
print("=" * 50)
|
||||||
|
|
||||||
|
for puzzle_id, solution in cursor.fetchall():
|
||||||
|
if solution:
|
||||||
|
sol_data = json.loads(solution)
|
||||||
|
print(f"\n{puzzle_id}:")
|
||||||
|
print(f" Number of test outputs: {len(sol_data)}")
|
||||||
|
if isinstance(sol_data, list) and len(sol_data) > 0:
|
||||||
|
first_output = sol_data[0]
|
||||||
|
if isinstance(first_output, list):
|
||||||
|
print(f" First output dimensions: {len(first_output)}x{len(first_output[0]) if first_output else 0}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
connection.rollback()
|
||||||
|
print(f"\n✗ Error: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
return 1
|
||||||
|
finally:
|
||||||
|
connection.close()
|
||||||
|
print(f"\n✓ Database connection closed")
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
||||||
146
scripts/remove_duplicates.py
Executable file
146
scripts/remove_duplicates.py
Executable file
@ -0,0 +1,146 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Remove duplicate entries from arc_puzzles table
|
||||||
|
Keeps only the first occurrence of each unique (id, corpora, set) combination
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import pymysql
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
def load_env_config():
|
||||||
|
"""Load database configuration from .env file"""
|
||||||
|
load_dotenv()
|
||||||
|
return {
|
||||||
|
'host': os.getenv('DB_HOST'),
|
||||||
|
'user': os.getenv('DB_USER'),
|
||||||
|
'password': os.getenv('DB_PASSWORD'),
|
||||||
|
'database': os.getenv('DB_NAME'),
|
||||||
|
'port': int(os.getenv('DB_PORT', 3306)),
|
||||||
|
'charset': 'utf8mb4'
|
||||||
|
}
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print("Removing Duplicates from arc_puzzles Table")
|
||||||
|
print("=" * 50)
|
||||||
|
|
||||||
|
config = load_env_config()
|
||||||
|
connection = pymysql.connect(**config)
|
||||||
|
cursor = connection.cursor()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# First, check if arc_puzzles has a primary key or unique identifier
|
||||||
|
cursor.execute("DESCRIBE arc_puzzles")
|
||||||
|
columns = cursor.fetchall()
|
||||||
|
print("\nTable structure:")
|
||||||
|
for col in columns:
|
||||||
|
print(f" {col[0]:<20} {col[1]:<20} Key:{col[3]}")
|
||||||
|
|
||||||
|
# Find duplicates
|
||||||
|
cursor.execute("""
|
||||||
|
SELECT id, corpora, `set`, COUNT(*) as count
|
||||||
|
FROM arc_puzzles
|
||||||
|
GROUP BY id, corpora, `set`
|
||||||
|
HAVING COUNT(*) > 1
|
||||||
|
ORDER BY id
|
||||||
|
""")
|
||||||
|
|
||||||
|
duplicates = cursor.fetchall()
|
||||||
|
|
||||||
|
if not duplicates:
|
||||||
|
print("\n✓ No duplicates found!")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
print(f"\nFound {len(duplicates)} sets of duplicates")
|
||||||
|
total_to_remove = sum(count - 1 for _, _, _, count in duplicates)
|
||||||
|
print(f"Total rows to remove: {total_to_remove}")
|
||||||
|
|
||||||
|
# Ask for confirmation
|
||||||
|
if '--yes' not in sys.argv:
|
||||||
|
print(f"\n⚠ This will delete {total_to_remove} duplicate rows")
|
||||||
|
response = input("Continue? (yes/no): ").strip().lower()
|
||||||
|
if response not in ['yes', 'y']:
|
||||||
|
print("Operation cancelled")
|
||||||
|
return 0
|
||||||
|
else:
|
||||||
|
print(f"\n⚠ Deleting {total_to_remove} duplicate rows (auto-confirmed with --yes flag)")
|
||||||
|
|
||||||
|
# For each duplicate set, keep only one and delete the rest
|
||||||
|
# Since there's no auto-increment primary key, we'll use a different approach
|
||||||
|
# We'll create a temporary table with unique entries, then replace the original
|
||||||
|
|
||||||
|
print("\nRemoving duplicates...")
|
||||||
|
removed_count = 0
|
||||||
|
|
||||||
|
for puzzle_id, corpora, set_name, count in duplicates:
|
||||||
|
if count > 1:
|
||||||
|
# Keep one, delete the extras
|
||||||
|
# We delete (count - 1) duplicates
|
||||||
|
delete_count = count - 1
|
||||||
|
|
||||||
|
# Delete using LIMIT to remove only the extra copies
|
||||||
|
cursor.execute("""
|
||||||
|
DELETE FROM arc_puzzles
|
||||||
|
WHERE id = %s AND corpora = %s AND `set` = %s
|
||||||
|
LIMIT %s
|
||||||
|
""", (puzzle_id, corpora, set_name, delete_count))
|
||||||
|
|
||||||
|
removed_count += cursor.rowcount
|
||||||
|
|
||||||
|
if removed_count % 50 == 0:
|
||||||
|
print(f" Removed {removed_count}/{total_to_remove} duplicates...")
|
||||||
|
|
||||||
|
# Commit the changes
|
||||||
|
connection.commit()
|
||||||
|
|
||||||
|
print(f"\n{'=' * 50}")
|
||||||
|
print(f"✓ Duplicate removal complete!")
|
||||||
|
print(f" Total duplicates removed: {removed_count}")
|
||||||
|
|
||||||
|
# Verify no duplicates remain
|
||||||
|
cursor.execute("""
|
||||||
|
SELECT COUNT(*)
|
||||||
|
FROM (
|
||||||
|
SELECT id, corpora, `set`, COUNT(*) as count
|
||||||
|
FROM arc_puzzles
|
||||||
|
GROUP BY id, corpora, `set`
|
||||||
|
HAVING COUNT(*) > 1
|
||||||
|
) as dups
|
||||||
|
""")
|
||||||
|
|
||||||
|
remaining_dups = cursor.fetchone()[0]
|
||||||
|
if remaining_dups > 0:
|
||||||
|
print(f"\n⚠ Warning: {remaining_dups} duplicate sets still remain")
|
||||||
|
else:
|
||||||
|
print(f"\n✓ No duplicates remaining!")
|
||||||
|
|
||||||
|
# Show final counts
|
||||||
|
cursor.execute("SELECT COUNT(*) FROM arc_puzzles")
|
||||||
|
final_count = cursor.fetchone()[0]
|
||||||
|
print(f"\nFinal table size: {final_count} rows")
|
||||||
|
|
||||||
|
cursor.execute("""
|
||||||
|
SELECT corpora, COUNT(*) as count
|
||||||
|
FROM arc_puzzles
|
||||||
|
GROUP BY corpora
|
||||||
|
ORDER BY corpora
|
||||||
|
""")
|
||||||
|
print("\nBy corpora:")
|
||||||
|
for corpora, count in cursor.fetchall():
|
||||||
|
print(f" {corpora}: {count}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
connection.rollback()
|
||||||
|
print(f"\n✗ Error: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
return 1
|
||||||
|
finally:
|
||||||
|
connection.close()
|
||||||
|
print(f"\n✓ Database connection closed")
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
||||||
240
scripts/upload_conceptarc_to_db.py
Executable file
240
scripts/upload_conceptarc_to_db.py
Executable file
@ -0,0 +1,240 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Upload ConceptArc puzzle data to MariaDB database
|
||||||
|
Reads credentials from .env file and inserts all ConceptArc JSON files
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
import pymysql
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
# Force unbuffered output
|
||||||
|
sys.stdout.reconfigure(line_buffering=True)
|
||||||
|
sys.stderr.reconfigure(line_buffering=True)
|
||||||
|
|
||||||
|
def load_env_config():
|
||||||
|
"""Load database configuration from .env file"""
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
config = {
|
||||||
|
'host': os.getenv('DB_HOST'),
|
||||||
|
'user': os.getenv('DB_USER'),
|
||||||
|
'password': os.getenv('DB_PASSWORD'),
|
||||||
|
'database': os.getenv('DB_NAME'),
|
||||||
|
'port': int(os.getenv('DB_PORT', 3306)),
|
||||||
|
'charset': 'utf8mb4'
|
||||||
|
}
|
||||||
|
|
||||||
|
return config
|
||||||
|
|
||||||
|
def get_conceptarc_files():
|
||||||
|
"""Get all JSON files from ConceptArc subdirectories"""
|
||||||
|
base_dir = Path('data/ConceptArc')
|
||||||
|
|
||||||
|
if not base_dir.exists():
|
||||||
|
print(f"✗ ConceptArc directory not found: {base_dir}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Get all subdirectories (concept categories)
|
||||||
|
categories = [d for d in base_dir.iterdir() if d.is_dir()]
|
||||||
|
|
||||||
|
all_files = []
|
||||||
|
for category in sorted(categories):
|
||||||
|
json_files = sorted(category.glob('*.json'))
|
||||||
|
all_files.extend(json_files)
|
||||||
|
|
||||||
|
return all_files
|
||||||
|
|
||||||
|
def insert_puzzle(cursor, file_path):
|
||||||
|
"""Insert a single ConceptArc puzzle into the database"""
|
||||||
|
# Extract puzzle ID from filename (e.g., "Count1.json" -> "Count1")
|
||||||
|
puzzle_id = file_path.stem
|
||||||
|
|
||||||
|
# Extract category from parent directory (e.g., "Count", "Center", etc.)
|
||||||
|
category = file_path.parent.name
|
||||||
|
|
||||||
|
# Read JSON content
|
||||||
|
with open(file_path, 'r') as f:
|
||||||
|
json_content = f.read().strip()
|
||||||
|
|
||||||
|
# Verify JSON is valid and extract solution
|
||||||
|
try:
|
||||||
|
puzzle_data = json.loads(json_content)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
raise ValueError(f"Invalid JSON in {file_path}: {e}")
|
||||||
|
|
||||||
|
# Extract all solutions from test cases
|
||||||
|
# ConceptArc has multiple test cases, each with their own output
|
||||||
|
solution = None
|
||||||
|
if 'test' in puzzle_data and len(puzzle_data['test']) > 0:
|
||||||
|
# Collect all test outputs into an array
|
||||||
|
all_outputs = []
|
||||||
|
for test_case in puzzle_data['test']:
|
||||||
|
if 'output' in test_case:
|
||||||
|
all_outputs.append(test_case['output'])
|
||||||
|
|
||||||
|
# Store as JSON array if there are any outputs
|
||||||
|
if all_outputs:
|
||||||
|
solution = json.dumps(all_outputs)
|
||||||
|
|
||||||
|
# Insert into arc_puzzles table
|
||||||
|
# id, corpora, set, difficulty, version
|
||||||
|
# Note: `set` is a reserved keyword, so we escape it with backticks
|
||||||
|
sql_puzzles = """
|
||||||
|
INSERT INTO arc_puzzles (id, corpora, `set`, difficulty, version)
|
||||||
|
VALUES (%s, %s, %s, %s, %s)
|
||||||
|
"""
|
||||||
|
cursor.execute(sql_puzzles, (puzzle_id, 'ConceptArc', category, None, None))
|
||||||
|
|
||||||
|
# Insert into arc_jsons table
|
||||||
|
# arc_puzzle_id, json, solution
|
||||||
|
sql_jsons = """
|
||||||
|
INSERT INTO arc_jsons (arc_puzzle_id, json, solution)
|
||||||
|
VALUES (%s, %s, %s)
|
||||||
|
"""
|
||||||
|
cursor.execute(sql_jsons, (puzzle_id, json_content, solution))
|
||||||
|
|
||||||
|
return puzzle_id, category
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print("ConceptArc Data Upload to MariaDB")
|
||||||
|
print("=" * 50)
|
||||||
|
|
||||||
|
# Load configuration
|
||||||
|
try:
|
||||||
|
config = load_env_config()
|
||||||
|
print(f"✓ Loaded configuration from .env")
|
||||||
|
print(f" Host: {config['host']}")
|
||||||
|
print(f" Database: {config['database']}")
|
||||||
|
print(f" User: {config['user']}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"✗ Error loading configuration: {e}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Get all ConceptArc files
|
||||||
|
all_files = get_conceptarc_files()
|
||||||
|
if not all_files:
|
||||||
|
print(f"✗ No ConceptArc JSON files found")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
print(f"✓ Found {len(all_files)} ConceptArc JSON files")
|
||||||
|
|
||||||
|
# Count files per category
|
||||||
|
from collections import defaultdict
|
||||||
|
category_counts = defaultdict(int)
|
||||||
|
for file_path in all_files:
|
||||||
|
category_counts[file_path.parent.name] += 1
|
||||||
|
|
||||||
|
print(f"\nBreakdown by category:")
|
||||||
|
for category, count in sorted(category_counts.items()):
|
||||||
|
print(f" - {category}: {count} files")
|
||||||
|
|
||||||
|
# Connect to database
|
||||||
|
try:
|
||||||
|
print(f"\nConnecting to database...")
|
||||||
|
connection = pymysql.connect(**config)
|
||||||
|
print(f"✓ Connected successfully")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"✗ Database connection failed: {e}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
cursor = connection.cursor()
|
||||||
|
|
||||||
|
# Check if tables exist
|
||||||
|
cursor.execute("SHOW TABLES LIKE 'arc_puzzles'")
|
||||||
|
if not cursor.fetchone():
|
||||||
|
print(f"✗ Table 'arc_puzzles' does not exist")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
cursor.execute("SHOW TABLES LIKE 'arc_jsons'")
|
||||||
|
if not cursor.fetchone():
|
||||||
|
print(f"✗ Table 'arc_jsons' does not exist")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Get current counts
|
||||||
|
cursor.execute("SELECT COUNT(*) FROM arc_puzzles WHERE corpora = 'ConceptArc'")
|
||||||
|
initial_puzzles_count = cursor.fetchone()[0]
|
||||||
|
cursor.execute("SELECT COUNT(*) FROM arc_jsons WHERE arc_puzzle_id LIKE '%Count%' OR arc_puzzle_id LIKE '%Center%'")
|
||||||
|
initial_jsons_count = cursor.fetchone()[0]
|
||||||
|
|
||||||
|
print(f"✓ Tables exist")
|
||||||
|
print(f" Current ConceptArc puzzles in arc_puzzles: {initial_puzzles_count}")
|
||||||
|
print(f" Current ConceptArc-like entries in arc_jsons: {initial_jsons_count}")
|
||||||
|
|
||||||
|
# Ask for confirmation (unless --yes flag is provided)
|
||||||
|
if '--yes' not in sys.argv:
|
||||||
|
print(f"\n⚠ About to insert {len(all_files)} ConceptArc records")
|
||||||
|
response = input("Continue? (yes/no): ").strip().lower()
|
||||||
|
if response not in ['yes', 'y']:
|
||||||
|
print("Upload cancelled")
|
||||||
|
return 0
|
||||||
|
else:
|
||||||
|
print(f"\n⚠ About to insert {len(all_files)} records (auto-confirmed with --yes flag)")
|
||||||
|
|
||||||
|
print(f"\nInserting records...")
|
||||||
|
inserted = 0
|
||||||
|
errors = 0
|
||||||
|
category_inserted = defaultdict(int)
|
||||||
|
|
||||||
|
for i, file_path in enumerate(all_files, 1):
|
||||||
|
try:
|
||||||
|
puzzle_id, category = insert_puzzle(cursor, file_path)
|
||||||
|
inserted += 1
|
||||||
|
category_inserted[category] += 1
|
||||||
|
|
||||||
|
# Show progress every 20 records
|
||||||
|
if i % 20 == 0 or i == len(all_files):
|
||||||
|
print(f" Progress: {i}/{len(all_files)} ({inserted} inserted, {errors} errors)")
|
||||||
|
|
||||||
|
except pymysql.IntegrityError as e:
|
||||||
|
# Likely duplicate key
|
||||||
|
if "Duplicate entry" in str(e):
|
||||||
|
errors += 1
|
||||||
|
if errors <= 5: # Only show first 5 errors
|
||||||
|
print(f" ⚠ Duplicate: {file_path.stem} ({file_path.parent.name})")
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
errors += 1
|
||||||
|
print(f" ✗ Error with {file_path.stem}: {e}")
|
||||||
|
if errors > 10:
|
||||||
|
print(f" Too many errors, stopping...")
|
||||||
|
break
|
||||||
|
|
||||||
|
# Commit the transaction
|
||||||
|
connection.commit()
|
||||||
|
|
||||||
|
# Get final counts
|
||||||
|
cursor.execute("SELECT COUNT(*) FROM arc_puzzles WHERE corpora = 'ConceptArc'")
|
||||||
|
final_puzzles_count = cursor.fetchone()[0]
|
||||||
|
cursor.execute("SELECT COUNT(*) FROM arc_jsons")
|
||||||
|
final_jsons_count = cursor.fetchone()[0]
|
||||||
|
|
||||||
|
print(f"\n{'=' * 50}")
|
||||||
|
print(f"✓ Upload complete!")
|
||||||
|
print(f" Successfully inserted: {inserted}")
|
||||||
|
print(f" Errors/duplicates: {errors}")
|
||||||
|
print(f" ConceptArc puzzles: {initial_puzzles_count} → {final_puzzles_count} (+{final_puzzles_count - initial_puzzles_count})")
|
||||||
|
|
||||||
|
print(f"\nInserted by category:")
|
||||||
|
for category, count in sorted(category_inserted.items()):
|
||||||
|
print(f" - {category}: {count} puzzles")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
connection.rollback()
|
||||||
|
print(f"\n✗ Error during upload: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
return 1
|
||||||
|
finally:
|
||||||
|
connection.close()
|
||||||
|
print(f"\n✓ Database connection closed")
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
||||||
86
scripts/verify_conceptarc_upload.py
Executable file
86
scripts/verify_conceptarc_upload.py
Executable file
@ -0,0 +1,86 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Verify ConceptArc data was uploaded correctly to the database
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
import pymysql
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
def load_env_config():
|
||||||
|
"""Load database configuration from .env file"""
|
||||||
|
load_dotenv()
|
||||||
|
return {
|
||||||
|
'host': os.getenv('DB_HOST'),
|
||||||
|
'user': os.getenv('DB_USER'),
|
||||||
|
'password': os.getenv('DB_PASSWORD'),
|
||||||
|
'database': os.getenv('DB_NAME'),
|
||||||
|
'port': int(os.getenv('DB_PORT', 3306)),
|
||||||
|
'charset': 'utf8mb4'
|
||||||
|
}
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print("Verifying ConceptArc Upload")
|
||||||
|
print("=" * 50)
|
||||||
|
|
||||||
|
config = load_env_config()
|
||||||
|
connection = pymysql.connect(**config)
|
||||||
|
cursor = connection.cursor()
|
||||||
|
|
||||||
|
# Check arc_puzzles table
|
||||||
|
cursor.execute("""
|
||||||
|
SELECT `set`, COUNT(*)
|
||||||
|
FROM arc_puzzles
|
||||||
|
WHERE corpora = 'ConceptArc'
|
||||||
|
GROUP BY `set`
|
||||||
|
ORDER BY `set`
|
||||||
|
""")
|
||||||
|
|
||||||
|
print("\nConceptArc puzzles by category (from arc_puzzles):")
|
||||||
|
total = 0
|
||||||
|
for category, count in cursor.fetchall():
|
||||||
|
print(f" {category}: {count} puzzles")
|
||||||
|
total += count
|
||||||
|
print(f" TOTAL: {total} puzzles")
|
||||||
|
|
||||||
|
# Check arc_jsons table
|
||||||
|
cursor.execute("""
|
||||||
|
SELECT COUNT(*)
|
||||||
|
FROM arc_jsons aj
|
||||||
|
JOIN arc_puzzles ap ON aj.arc_puzzle_id = ap.id
|
||||||
|
WHERE ap.corpora = 'ConceptArc'
|
||||||
|
""")
|
||||||
|
json_count = cursor.fetchone()[0]
|
||||||
|
print(f"\nConceptArc entries in arc_jsons: {json_count}")
|
||||||
|
|
||||||
|
# Sample some puzzles
|
||||||
|
cursor.execute("""
|
||||||
|
SELECT ap.id, ap.corpora, ap.`set`, aj.json, aj.solution
|
||||||
|
FROM arc_puzzles ap
|
||||||
|
JOIN arc_jsons aj ON ap.id = aj.arc_puzzle_id
|
||||||
|
WHERE ap.corpora = 'ConceptArc'
|
||||||
|
LIMIT 3
|
||||||
|
""")
|
||||||
|
|
||||||
|
print("\n" + "=" * 50)
|
||||||
|
print("Sample ConceptArc entries:")
|
||||||
|
print("=" * 50)
|
||||||
|
|
||||||
|
for row in cursor.fetchall():
|
||||||
|
puzzle_id, corpora, category, json_data, solution = row
|
||||||
|
print(f"\nPuzzle ID: {puzzle_id}")
|
||||||
|
print(f" Corpora: {corpora}")
|
||||||
|
print(f" Category: {category}")
|
||||||
|
print(f" JSON length: {len(json_data)} chars")
|
||||||
|
print(f" Has solution: {'Yes' if solution else 'No'}")
|
||||||
|
|
||||||
|
if solution:
|
||||||
|
sol = json.loads(solution)
|
||||||
|
print(f" Solution dimensions: {len(sol)}x{len(sol[0]) if sol else 0}")
|
||||||
|
|
||||||
|
connection.close()
|
||||||
|
print("\n✓ Verification complete")
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
131
scripts/verify_solution_format.py
Normal file
131
scripts/verify_solution_format.py
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Verify that solutions are stored correctly for different corpora
|
||||||
|
- V1/V2/evaluation: Single grid (one test case)
|
||||||
|
- ConceptArc: Array of grids (multiple test cases)
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
import pymysql
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
def load_env_config():
|
||||||
|
"""Load database configuration from .env file"""
|
||||||
|
load_dotenv()
|
||||||
|
return {
|
||||||
|
'host': os.getenv('DB_HOST'),
|
||||||
|
'user': os.getenv('DB_USER'),
|
||||||
|
'password': os.getenv('DB_PASSWORD'),
|
||||||
|
'database': os.getenv('DB_NAME'),
|
||||||
|
'port': int(os.getenv('DB_PORT', 3306)),
|
||||||
|
'charset': 'utf8mb4'
|
||||||
|
}
|
||||||
|
|
||||||
|
def check_solution_format(cursor, corpora_name, expected_test_count):
|
||||||
|
"""Check solution format for a specific corpora"""
|
||||||
|
print(f"\n{'='*50}")
|
||||||
|
print(f"Checking {corpora_name} puzzles:")
|
||||||
|
print('='*50)
|
||||||
|
|
||||||
|
cursor.execute(f"""
|
||||||
|
SELECT aj.arc_puzzle_id, aj.json, aj.solution
|
||||||
|
FROM arc_jsons aj
|
||||||
|
JOIN arc_puzzles ap ON aj.arc_puzzle_id = ap.id
|
||||||
|
WHERE ap.corpora = %s
|
||||||
|
LIMIT 5
|
||||||
|
""", (corpora_name,))
|
||||||
|
|
||||||
|
results = cursor.fetchall()
|
||||||
|
|
||||||
|
if not results:
|
||||||
|
print(f"No {corpora_name} puzzles found")
|
||||||
|
return
|
||||||
|
|
||||||
|
mismatches = []
|
||||||
|
|
||||||
|
for puzzle_id, json_str, solution in results:
|
||||||
|
puzzle_data = json.loads(json_str)
|
||||||
|
test_count = len(puzzle_data.get('test', []))
|
||||||
|
|
||||||
|
if not solution:
|
||||||
|
print(f"⚠ {puzzle_id}: No solution stored!")
|
||||||
|
continue
|
||||||
|
|
||||||
|
sol = json.loads(solution)
|
||||||
|
|
||||||
|
# Determine solution structure
|
||||||
|
if isinstance(sol, list) and len(sol) > 0:
|
||||||
|
# Check if it's array of grids or single grid
|
||||||
|
if isinstance(sol[0], list) and len(sol[0]) > 0 and isinstance(sol[0][0], list):
|
||||||
|
# Array of grids (ConceptArc style)
|
||||||
|
sol_count = len(sol)
|
||||||
|
structure = f"Array of {sol_count} grids"
|
||||||
|
else:
|
||||||
|
# Single grid (regular ARC style)
|
||||||
|
sol_count = 1
|
||||||
|
structure = f"Single grid ({len(sol)}x{len(sol[0]) if sol else 0})"
|
||||||
|
else:
|
||||||
|
structure = "Unknown format"
|
||||||
|
sol_count = 0
|
||||||
|
|
||||||
|
match = "✓" if sol_count == test_count else "✗"
|
||||||
|
print(f"{match} {puzzle_id}: {test_count} tests, {structure}")
|
||||||
|
|
||||||
|
if sol_count != test_count:
|
||||||
|
mismatches.append((puzzle_id, test_count, sol_count))
|
||||||
|
|
||||||
|
if mismatches:
|
||||||
|
print(f"\n⚠ Found {len(mismatches)} mismatches:")
|
||||||
|
for pid, expected, actual in mismatches:
|
||||||
|
print(f" {pid}: Expected {expected} solutions, got {actual}")
|
||||||
|
else:
|
||||||
|
print(f"\n✓ All solutions match their test counts!")
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print("Verifying Solution Formats")
|
||||||
|
print("=" * 50)
|
||||||
|
|
||||||
|
config = load_env_config()
|
||||||
|
connection = pymysql.connect(**config)
|
||||||
|
cursor = connection.cursor()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Check different corpora
|
||||||
|
check_solution_format(cursor, "V1", 1)
|
||||||
|
check_solution_format(cursor, "V2", 1)
|
||||||
|
check_solution_format(cursor, "evaluation", 1)
|
||||||
|
check_solution_format(cursor, "ConceptArc", 3)
|
||||||
|
|
||||||
|
# Summary stats
|
||||||
|
print(f"\n{'='*50}")
|
||||||
|
print("Summary by corpora:")
|
||||||
|
print('='*50)
|
||||||
|
|
||||||
|
cursor.execute("""
|
||||||
|
SELECT ap.corpora,
|
||||||
|
COUNT(*) as total,
|
||||||
|
COUNT(aj.solution) as with_solution
|
||||||
|
FROM arc_puzzles ap
|
||||||
|
JOIN arc_jsons aj ON ap.id = aj.arc_puzzle_id
|
||||||
|
GROUP BY ap.corpora
|
||||||
|
ORDER BY ap.corpora
|
||||||
|
""")
|
||||||
|
|
||||||
|
for corpora, total, with_sol in cursor.fetchall():
|
||||||
|
print(f" {corpora}: {with_sol}/{total} have solutions")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"\n✗ Error: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
return 1
|
||||||
|
finally:
|
||||||
|
connection.close()
|
||||||
|
print(f"\n✓ Database connection closed")
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import sys
|
||||||
|
sys.exit(main())
|
||||||
12
todo.md
12
todo.md
@ -1,4 +1,14 @@
|
|||||||
Add ConceptARC corpus
|
Add ConceptARC Corpus
|
||||||
|
- to the Repository
|
||||||
|
- To the DB
|
||||||
|
- Categorize them in the DB
|
||||||
|
- Fix the Solution extraction method that is different from the other Corpora
|
||||||
|
|
||||||
|
Interface:
|
||||||
|
- Remove the Header frame, make it a single frame interface to increase the
|
||||||
|
|
||||||
|
DB:
|
||||||
|
- Make the View with Skills, Category etc.
|
||||||
|
|
||||||
user Inputs
|
user Inputs
|
||||||
Puzzle Assignment
|
Puzzle Assignment
|
||||||
|
|||||||
Reference in New Issue
Block a user