Files
arc-humans-interface-db/scripts/list_v1_task_ids.py
bmachado b8856c0660 initial
2025-11-05 00:24:05 +00:00

78 lines
2.3 KiB
Python

#!/usr/bin/env python3
"""
Generate a list of all ARC V1 task IDs from the local dataset
"""
import os
import json
def get_task_ids_from_directory(directory):
"""Get all task IDs (filenames without .json) from a directory"""
task_ids = []
if os.path.exists(directory):
for filename in sorted(os.listdir(directory)):
if filename.endswith('.json'):
task_id = filename[:-5] # Remove .json extension
task_ids.append(task_id)
return task_ids
def main():
# Get task IDs from training and evaluation directories
training_dir = 'arc_data/training'
evaluation_dir = 'arc_data/evaluation'
training_ids = get_task_ids_from_directory(training_dir)
evaluation_ids = get_task_ids_from_directory(evaluation_dir)
print("ARC V1 Task IDs")
print("=" * 80)
print(f"\nTraining Tasks: {len(training_ids)} tasks")
print(f"Evaluation Tasks: {len(evaluation_ids)} tasks")
print(f"Total: {len(training_ids) + len(evaluation_ids)} tasks")
# Save to JSON file
output_data = {
"version": "ARC-AGI-1",
"total_tasks": len(training_ids) + len(evaluation_ids),
"training": {
"count": len(training_ids),
"task_ids": training_ids
},
"evaluation": {
"count": len(evaluation_ids),
"task_ids": evaluation_ids
},
"all_task_ids": sorted(training_ids + evaluation_ids)
}
with open('arc_v1_task_ids.json', 'w') as f:
json.dump(output_data, f, indent=2)
print(f"\n✓ Saved complete list to: arc_v1_task_ids.json")
# Also save a simple text list
with open('arc_v1_task_ids.txt', 'w') as f:
f.write("# ARC V1 Training Task IDs\n")
for task_id in training_ids:
f.write(f"{task_id}\n")
f.write("\n# ARC V1 Evaluation Task IDs\n")
for task_id in evaluation_ids:
f.write(f"{task_id}\n")
print(f"✓ Saved text list to: arc_v1_task_ids.txt")
# Display first 10 from each set as preview
print("\n" + "-" * 80)
print("Preview - First 10 Training Task IDs:")
for task_id in training_ids[:10]:
print(f" {task_id}")
print("\nPreview - First 10 Evaluation Task IDs:")
for task_id in evaluation_ids[:10]:
print(f" {task_id}")
print("\n" + "=" * 80)
if __name__ == '__main__':
main()