import json
import os
def process_coco_json(json_file, image_folder):
with open(json_file, "r") as f:
data = json.load(f)
image_ids_with_annotations = set()
for annotation in data["annotations"]:
image_ids_with_annotations.add(annotation["image_id"])
new_images = [image for image in data["images"] if image["id"] in image_ids_with_annotations]
empty_image_filenames = [image["file_name"] for image in data["images"] if image["id"] not in image_ids_with_annotations]
data["images"] = new_images
with open(json_file, "w") as f:
json.dump(data, f)
for filename in empty_image_filenames:
file_path = os.path.join(image_folder, filename)
if os.path.exists(file_path):
os.remove(file_path)
def main():
train_json = "train.json"
train_folder = "train"
val_json = "val.json"
val_folder = "val"
process_coco_json(train_json, train_folder)
process_coco_json(val_json, val_folder)
if __name__ == "__main__":
main()