关于最好把with open写在循环外面（数据集更新出现错误包括只更新最后的一类数据原有数据消失）

import os
import json
import csv
import re

# 定义父文件夹路径
parent_folder_path = "captionAimage"

# 遍历父文件夹中的所有子文件夹
for subfolder_name in os.listdir(parent_folder_path):
    subfolder_path = os.path.join(parent_folder_path, subfolder_name)
    if os.path.isdir(subfolder_path):  # 确保是文件夹
        # 创建 CSV 文件并写入标题行
        csv_filename = 'check_apiOutput.csv'
        with open(csv_filename, "a+", encoding='utf-8', newline="") as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(["Image Index", "Title", "Image Path", "Image Caption","Page Index"])

            # 遍历子文件夹内的所有 JSON 文件
            for json_filename in os.listdir(subfolder_path):
                if json_filename.endswith(".json"):  # 确保是 JSON 文件
                    json_path = os.path.join(subfolder_path, json_filename)
                    with open(json_path, "r", encoding="utf-8") as json_file:
                        data = json.load(json_file)
                        # 遍历 non_text 列表
                        for item in data["non_text"]:
                            if item["type"] == "image":
                                # 获取图片路径和标题
                                img_path = item["img_path"]
                                full_img_path = os.path.join(parent_folder_path, subfolder_name, img_path)

                                img_caption = item["img_caption"]
                                # 获取 img_caption，如果为空列表则跳过
                                if not img_caption:
                                     img_caption = "error"  
                                else:
                                    img_caption = img_caption[0]

                                # 提取图片索引
                                match = re.match(r"(?:Figure|Fig)\.? (\d+):?",img_caption,re.IGNORECASE)
                                if match:
                                    img_index = match.group(1)
                                else:
                                    img_index = "error"

                                if "title" in data:
                                    title = data["title"]
                                else:
                                    # 处理不存在的情况，例如设置一个默认值或者跳过当前项
                                    title = "error"  
                                page_index = item["page_idx"]

                                # 写入 CSV 文件
                                writer.writerow([img_index, title, full_img_path, img_caption, page_index])

这是原来的数据处理，我在对正则表达式进行更新的时候发现原有数据集没有发生变化，后来发现可能是读写模式问题，将with open的模式改为‘w’ 但是此时发现只剩下最后的一次数据循环（子文件夹）更新失去原有数据集数据

import os
import json
import csv
import re

# 定义父文件夹路径
parent_folder_path = "captionAimage"

# 创建 CSV 文件并写入标题行
csv_filename = 'check_apiOutput.csv'
with open(csv_filename, "a+", encoding='utf-8', newline="") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["Image Index", "Title", "Image Path", "Image Caption", "Page Index"])

    # 遍历父文件夹中的所有子文件夹
    for subfolder_name in os.listdir(parent_folder_path):
        subfolder_path = os.path.join(parent_folder_path, subfolder_name)
        if os.path.isdir(subfolder_path):  # 确保是文件夹
            # 遍历子文件夹内的所有 JSON 文件
            for json_filename in os.listdir(subfolder_path):
                if json_filename.endswith(".json"):  # 确保是 JSON 文件
                    json_path = os.path.join(subfolder_path, json_filename)
                    with open(json_path, "r", encoding="utf-8") as json_file:
                        data = json.load(json_file)
                        # 遍历 non_text 列表
                        for item in data["non_text"]:
                            if item["type"] == "image":
                                # 获取图片路径和标题
                                img_path = item["img_path"]
                                full_img_path = os.path.join(parent_folder_path, subfolder_name, img_path)

                                img_caption = item["img_caption"]
                                # 获取 img_caption，如果为空列表则跳过
                                if not img_caption:
                                    img_caption = "error"  
                                else:
                                    img_caption = img_caption[0]

                                # 提取图片索引
                                match = re.match(r"(?:Figure|Fig)\.? (\d+):?", img_caption, re.IGNORECASE)
                                if match:
                                    img_index = match.group(1)
                                else:
                                    img_index = "error"

                                if "title" in data:
                                    title = data["title"]
                                else:
                                    # 处理不存在的情况，例如设置一个默认值或者跳过当前项
                                    title = "error"  
                                page_index = item["page_idx"]

                                # 写入 CSV 文件
                                writer.writerow([img_index, title, full_img_path, img_caption, page_index])

后续将with open写在循环外面就没事了