#!/usr/bin/env python3 """ Generate a list of all ARC V1 task IDs from the local dataset """ import os import json def get_task_ids_from_directory(directory): """Get all task IDs (filenames without .json) from a directory""" task_ids = [] if os.path.exists(directory): for filename in sorted(os.listdir(directory)): if filename.endswith('.json'): task_id = filename[:-5] # Remove .json extension task_ids.append(task_id) return task_ids def main(): # Get task IDs from training and evaluation directories training_dir = 'arc_data/training' evaluation_dir = 'arc_data/evaluation' training_ids = get_task_ids_from_directory(training_dir) evaluation_ids = get_task_ids_from_directory(evaluation_dir) print("ARC V1 Task IDs") print("=" * 80) print(f"\nTraining Tasks: {len(training_ids)} tasks") print(f"Evaluation Tasks: {len(evaluation_ids)} tasks") print(f"Total: {len(training_ids) + len(evaluation_ids)} tasks") # Save to JSON file output_data = { "version": "ARC-AGI-1", "total_tasks": len(training_ids) + len(evaluation_ids), "training": { "count": len(training_ids), "task_ids": training_ids }, "evaluation": { "count": len(evaluation_ids), "task_ids": evaluation_ids }, "all_task_ids": sorted(training_ids + evaluation_ids) } with open('arc_v1_task_ids.json', 'w') as f: json.dump(output_data, f, indent=2) print(f"\nāœ“ Saved complete list to: arc_v1_task_ids.json") # Also save a simple text list with open('arc_v1_task_ids.txt', 'w') as f: f.write("# ARC V1 Training Task IDs\n") for task_id in training_ids: f.write(f"{task_id}\n") f.write("\n# ARC V1 Evaluation Task IDs\n") for task_id in evaluation_ids: f.write(f"{task_id}\n") print(f"āœ“ Saved text list to: arc_v1_task_ids.txt") # Display first 10 from each set as preview print("\n" + "-" * 80) print("Preview - First 10 Training Task IDs:") for task_id in training_ids[:10]: print(f" {task_id}") print("\nPreview - First 10 Evaluation Task IDs:") for task_id in evaluation_ids[:10]: print(f" {task_id}") print("\n" + "=" * 80) if __name__ == '__main__': main()