78 lines
2.3 KiB
Python
78 lines
2.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Generate a list of all ARC V1 task IDs from the local dataset
|
|
"""
|
|
|
|
import os
|
|
import json
|
|
|
|
def get_task_ids_from_directory(directory):
|
|
"""Get all task IDs (filenames without .json) from a directory"""
|
|
task_ids = []
|
|
if os.path.exists(directory):
|
|
for filename in sorted(os.listdir(directory)):
|
|
if filename.endswith('.json'):
|
|
task_id = filename[:-5] # Remove .json extension
|
|
task_ids.append(task_id)
|
|
return task_ids
|
|
|
|
def main():
|
|
# Get task IDs from training and evaluation directories
|
|
training_dir = 'arc_data/training'
|
|
evaluation_dir = 'arc_data/evaluation'
|
|
|
|
training_ids = get_task_ids_from_directory(training_dir)
|
|
evaluation_ids = get_task_ids_from_directory(evaluation_dir)
|
|
|
|
print("ARC V1 Task IDs")
|
|
print("=" * 80)
|
|
print(f"\nTraining Tasks: {len(training_ids)} tasks")
|
|
print(f"Evaluation Tasks: {len(evaluation_ids)} tasks")
|
|
print(f"Total: {len(training_ids) + len(evaluation_ids)} tasks")
|
|
|
|
# Save to JSON file
|
|
output_data = {
|
|
"version": "ARC-AGI-1",
|
|
"total_tasks": len(training_ids) + len(evaluation_ids),
|
|
"training": {
|
|
"count": len(training_ids),
|
|
"task_ids": training_ids
|
|
},
|
|
"evaluation": {
|
|
"count": len(evaluation_ids),
|
|
"task_ids": evaluation_ids
|
|
},
|
|
"all_task_ids": sorted(training_ids + evaluation_ids)
|
|
}
|
|
|
|
with open('arc_v1_task_ids.json', 'w') as f:
|
|
json.dump(output_data, f, indent=2)
|
|
|
|
print(f"\n✓ Saved complete list to: arc_v1_task_ids.json")
|
|
|
|
# Also save a simple text list
|
|
with open('arc_v1_task_ids.txt', 'w') as f:
|
|
f.write("# ARC V1 Training Task IDs\n")
|
|
for task_id in training_ids:
|
|
f.write(f"{task_id}\n")
|
|
f.write("\n# ARC V1 Evaluation Task IDs\n")
|
|
for task_id in evaluation_ids:
|
|
f.write(f"{task_id}\n")
|
|
|
|
print(f"✓ Saved text list to: arc_v1_task_ids.txt")
|
|
|
|
# Display first 10 from each set as preview
|
|
print("\n" + "-" * 80)
|
|
print("Preview - First 10 Training Task IDs:")
|
|
for task_id in training_ids[:10]:
|
|
print(f" {task_id}")
|
|
|
|
print("\nPreview - First 10 Evaluation Task IDs:")
|
|
for task_id in evaluation_ids[:10]:
|
|
print(f" {task_id}")
|
|
|
|
print("\n" + "=" * 80)
|
|
|
|
if __name__ == '__main__':
|
|
main()
|