initial
This commit is contained in:
76
scripts/generate_v2_csv.py
Normal file
76
scripts/generate_v2_csv.py
Normal file
@ -0,0 +1,76 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Generate a CSV file with ARC V2 task IDs, set, and difficulty
|
||||
"""
|
||||
|
||||
import csv
|
||||
import json
|
||||
|
||||
def main():
|
||||
# Read the training IDs
|
||||
with open('arc_v2_training_ids.txt', 'r') as f:
|
||||
training_ids = [line.strip() for line in f if line.strip()]
|
||||
|
||||
# Read the evaluation IDs
|
||||
with open('arc_v2_evaluation_ids.txt', 'r') as f:
|
||||
evaluation_ids = [line.strip() for line in f if line.strip()]
|
||||
|
||||
# Create CSV file
|
||||
with open('arc_v2_task_ids.csv', 'w', newline='') as csvfile:
|
||||
writer = csv.writer(csvfile)
|
||||
|
||||
# Write header
|
||||
writer.writerow(['id', 'set', 'difficulty'])
|
||||
|
||||
# Write training tasks (training = easy per official labeling)
|
||||
for task_id in training_ids:
|
||||
writer.writerow([task_id, 'training', 'easy'])
|
||||
|
||||
# Write evaluation tasks (evaluation = hard per official labeling)
|
||||
for task_id in evaluation_ids:
|
||||
writer.writerow([task_id, 'evaluation', 'hard'])
|
||||
|
||||
print("=" * 80)
|
||||
print("ARC-AGI-2 Task IDs - CSV Export")
|
||||
print("=" * 80)
|
||||
print(f"\nTraining Tasks: {len(training_ids)} (labeled: easy)")
|
||||
print(f"Evaluation Tasks: {len(evaluation_ids)} (labeled: hard)")
|
||||
print(f"Total: {len(training_ids) + len(evaluation_ids)}")
|
||||
|
||||
print("\nFirst 10 rows (preview):")
|
||||
print("-" * 50)
|
||||
print("id,set,difficulty")
|
||||
for task_id in training_ids[:5]:
|
||||
print(f"{task_id},training,easy")
|
||||
for task_id in evaluation_ids[:5]:
|
||||
print(f"{task_id},evaluation,hard")
|
||||
|
||||
print("\n✓ Saved to: arc_v2_task_ids.csv")
|
||||
|
||||
# Also create a JSON version
|
||||
data = {
|
||||
"version": "ARC-AGI-2",
|
||||
"source": "https://github.com/arcprize/ARC-AGI-2",
|
||||
"description": "ARC-AGI Version 2 task IDs - 1000 training + 120 evaluation tasks",
|
||||
"total_tasks": len(training_ids) + len(evaluation_ids),
|
||||
"training": {
|
||||
"count": len(training_ids),
|
||||
"difficulty": "easy",
|
||||
"task_ids": training_ids
|
||||
},
|
||||
"evaluation": {
|
||||
"count": len(evaluation_ids),
|
||||
"difficulty": "hard",
|
||||
"task_ids": evaluation_ids
|
||||
},
|
||||
"all_task_ids": sorted(training_ids + evaluation_ids)
|
||||
}
|
||||
|
||||
with open('arc_v2_official_task_ids.json', 'w') as f:
|
||||
json.dump(data, f, indent=2)
|
||||
|
||||
print("✓ Saved to: arc_v2_official_task_ids.json")
|
||||
print("=" * 80)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user