excel文件有两列,循环读取文件两列赋值到字典列表。字典的有两个key,分别为question和answer。将最终结果追加到json文件

import pandas as pd
import json
import os

def excel_to_json_append(excel_path, json_path):
    # 1. 读取Excel数据到字典列表
    df = pd.read_excel(excel_path, usecols=["question", "answer"])
    new_data = [
        {"question": str(row["question"]), "answer": str(row["answer"])}
        for _, row in df.iterrows()
    ]

    # 2. 读取原有JSON数据(如果文件存在)
    existing_data = []
    if os.path.exists(json_path):
        try:
            with open(json_path, "r", encoding="utf-8") as f:
                existing_data = json.load(f)
        except json.JSONDecodeError:
            print("警告:JSON文件内容格式异常,将覆盖写入")

    # 3. 合并新旧数据(可选去重逻辑)
    combined_data = existing_data + new_data
    # # 4.去重
    # seen = set()
    # unique_data = []
    # for item in combined_data:
    #     key = item["question"]
    #     if key not in seen:
    #         seen.add(key)
    #         unique_data.append(item)
    # combined_data = unique_data

    # 4. 写入更新后的JSON文件
    with open(json_path, "w", encoding="utf-8") as f:
        json.dump(combined_data, f, ensure_ascii=False, indent=2)

    print(f"成功追加{len(new_data)}条数据到{json_path}")

# 示例用法
excel_to_json_append("input.xlsx", "train_qa.json")
代码如下所示 import json import os import pickle import argparse import pandas as pd import io import numpy as np import PIL from PIL import Image from openai import OpenAI import json import base64 class Open3DVQA: def __init__(self): self.client = OpenAI( api_key="", # api key, base_url="https://free.v36.cm" ) def image_to_base64_data_uri(self, image_input): # Check if the input is a file path (string) if isinstance(image_input, str): with open(image_input, "rb") as img_file: base64_data = base64.b64encode(img_file.read()).decode('utf-8') # Check if the input is a PIL Image elif isinstance(image_input, Image.Image): buffer = io.BytesIO() image_input.save(buffer, format="PNG") # You can change the format if needed base64_data = base64.b64encode(buffer.getvalue()).decode('utf-8') else: raise ValueError("Unsupported input type. Input must be a file path or a PIL.Image.Image instance.") return f"data:image/png;base64,{base64_data}" def query(self, question, image): image_uri = self.image_to_base64_data_uri(image) response = self.client.chat.completions.create( model="gpt-3.5-turbo-1106", messages=[ { "role": "system", "content": "You are an assistant who perfectly answer question in urban environment. Only based on the image, you should directly answer the height, width, volume and distance question with exact number Answer the distance without output intermediate process. You should answer the direction question in the direction of the clock with taking your front as 12 o 'clock, your left as 9 o 'clock, and your right as 3 o 'clock." }, { "role": "user", "content": [ {"type": "image_url", "image_url": {"url": image_uri} }, {"type": "text", "text": question } ] } ] ) print(response.choices[0].message.content) resp = response.choices[0].message.content return resp vqa_agent = Open3DVQA() # 读取JSON文件 base_dir = os.path.dirname(os.path.abspath(r"D:\BaiduNetdiskDownload\Open3DVQA\data\open3dvqa\open3dvqa")) json_path = os.path.join(base_dir,'valid_qa.json') with open(json_path, 'r', encoding='utf-8') as file: data = json.load(file) gpt_responses = [] # 遍历JSON数据 for item in data: item_id = item.get('id') image_info = item.get('image_info', {}) qa_info = item.get('qa_info', {}) conversation = item.get('conversation', []) # 从文件名中提取文件夹编号(假设文件名格式为"RGBVis_0.png") image_filename = os.path.basename(image_info['image_path']) # 假设文件名格式为"RGBVis_0.png",提取数字部分 file_num = image_filename.split('_')[-1].split('.')[0] # 格式化为3位数,如"000", "001"等 folder_num = f"{int(file_num):03d}" # 构建新的正确路径 image_rel_path = os.path.join(folder_num, 'rgb', image_filename) depth_rel_path = os.path.join(folder_num, 'depth', os.path.basename(image_info['depth_path'])) image_path = os.path.join(base_dir, image_rel_path) depth_path = os.path.join(base_dir, depth_rel_path) # 检查图像路径是否存在 if not os.path.exists(image_path): raise FileNotFoundError(f"Image file not found: {image_path}") # 检查深度路径是否存在 if not os.path.exists(depth_path): raise FileNotFoundError(f"Depth file not found: {depth_path}") # 尝试打开图像文件 try: image = PIL.Image.open(image_path) except Exception as e: raise IOError(f"Failed to open image file: {image_path}. Error: {e}") # 尝试加载深度文件 try: depth_map = np.load(depth_path) depth_map = PIL.Image.fromarray(depth_map.squeeze()) except Exception as e: raise IOError(f"Failed to load depth file: {depth_path}. Error: {e}") # 获取QA信息 qa_type = qa_info.get('type') question_name = qa_info.get('question_name') # 获取对话内容 if conversation: answer = conversation[1].get('value') else: answer =None question = item.get('query_question') resp = vqa_agent.query(question, image) gpt_response = { "id": item_id, "qa_info": qa_info, "question": question, "answer": answer, "response": resp.choices[0].message.content } gpt_responses.append(gpt_response) result_dir = os.path.join(base_dir, 'response_result', 'gpt-3.5-turbo-1106', 'gpt_responses.json') # 将列表保存为JSON文件 with open(result_dir, 'w', encoding='utf-8') as f: json.dump(gpt_responses, f, ensure_ascii=False, indent=4)
05-29
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值