initial
This commit is contained in:
77
scripts/list_v1_task_ids.py
Normal file
77
scripts/list_v1_task_ids.py
Normal file
@ -0,0 +1,77 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Generate a list of all ARC V1 task IDs from the local dataset
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
|
||||
def get_task_ids_from_directory(directory):
|
||||
"""Get all task IDs (filenames without .json) from a directory"""
|
||||
task_ids = []
|
||||
if os.path.exists(directory):
|
||||
for filename in sorted(os.listdir(directory)):
|
||||
if filename.endswith('.json'):
|
||||
task_id = filename[:-5] # Remove .json extension
|
||||
task_ids.append(task_id)
|
||||
return task_ids
|
||||
|
||||
def main():
|
||||
# Get task IDs from training and evaluation directories
|
||||
training_dir = 'arc_data/training'
|
||||
evaluation_dir = 'arc_data/evaluation'
|
||||
|
||||
training_ids = get_task_ids_from_directory(training_dir)
|
||||
evaluation_ids = get_task_ids_from_directory(evaluation_dir)
|
||||
|
||||
print("ARC V1 Task IDs")
|
||||
print("=" * 80)
|
||||
print(f"\nTraining Tasks: {len(training_ids)} tasks")
|
||||
print(f"Evaluation Tasks: {len(evaluation_ids)} tasks")
|
||||
print(f"Total: {len(training_ids) + len(evaluation_ids)} tasks")
|
||||
|
||||
# Save to JSON file
|
||||
output_data = {
|
||||
"version": "ARC-AGI-1",
|
||||
"total_tasks": len(training_ids) + len(evaluation_ids),
|
||||
"training": {
|
||||
"count": len(training_ids),
|
||||
"task_ids": training_ids
|
||||
},
|
||||
"evaluation": {
|
||||
"count": len(evaluation_ids),
|
||||
"task_ids": evaluation_ids
|
||||
},
|
||||
"all_task_ids": sorted(training_ids + evaluation_ids)
|
||||
}
|
||||
|
||||
with open('arc_v1_task_ids.json', 'w') as f:
|
||||
json.dump(output_data, f, indent=2)
|
||||
|
||||
print(f"\n✓ Saved complete list to: arc_v1_task_ids.json")
|
||||
|
||||
# Also save a simple text list
|
||||
with open('arc_v1_task_ids.txt', 'w') as f:
|
||||
f.write("# ARC V1 Training Task IDs\n")
|
||||
for task_id in training_ids:
|
||||
f.write(f"{task_id}\n")
|
||||
f.write("\n# ARC V1 Evaluation Task IDs\n")
|
||||
for task_id in evaluation_ids:
|
||||
f.write(f"{task_id}\n")
|
||||
|
||||
print(f"✓ Saved text list to: arc_v1_task_ids.txt")
|
||||
|
||||
# Display first 10 from each set as preview
|
||||
print("\n" + "-" * 80)
|
||||
print("Preview - First 10 Training Task IDs:")
|
||||
for task_id in training_ids[:10]:
|
||||
print(f" {task_id}")
|
||||
|
||||
print("\nPreview - First 10 Evaluation Task IDs:")
|
||||
for task_id in evaluation_ids[:10]:
|
||||
print(f" {task_id}")
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user