import openpyxl
import json
# 打开 Excel 文件
workbook = openpyxl.load_workbook(r"C:\Users\19604\Desktop\ambar.xlsx")
# 创建一个空列表来存储所有工作表的 JSON 数据
all_json_data = []
# 遍历所有工作表
for sheet_name in workbook.sheetnames:
sheet = workbook[sheet_name]
# 提取问题和答案列
questions = []
answers = []
for row in range(2, sheet.max_row + 1):
question = sheet.cell(row=row, column=2).value
answer = sheet.cell(row=row, column=3).value
# 检查是否为空行
if question is not None and answer is not None:
questions.append(question)
answers.append(answer)
# 将数据转换为所需的 JSON 格式
data = [
{
"instruction": "回答问题这一要求",
"input": question,
"output": answer
}
for question, answer in zip(questions, answers)
]
# 将当前工作表的 JSON 数据添加到总列表中
all_json_data.extend(data)
# 将所有 JSON 数据写入文件
with open('issue_data.json', 'w', encoding='utf-8') as f:
json.dump(all_json_data, f, indent=2,ensure_ascii=False)
print("JSON data saved to 'output.json'")
2.2.计算生成的json数据的sha1值 在后续将数据写入datainfo.json中时填入
import hashlib
def calculate_sha1(file_path):
sha1 = hashlib.sha1()
try:
with open(file_path, 'rb') as file:
while True:
data = file.read(8192) # Read in chunks to handle large files
if not data:
break
sha1.update(data)
return sha1.hexdigest()
except FileNotFoundError:
return "File not found."
# 使用示例
file_path = r'E:\研究生\工作\浩瀚深度\工作\code\LLM\issue_data.json' # 替换为您的文件路径
sha1_hash = calculate_sha1(file_path)
print("SHA-1 Hash:", sha1_hash)
3.单卡训练微调
3.1.启动web版本的训练
(llm) PS E:\llm-train\LLaMA-Factory> export no_proxy=192.168.1.0,localhost,127.0.0.1
(llm) PS E:\llm-train\LLaMA-Factory> set CUDA_VISIBLE_DEVICES=0
(llm) PS E:\llm-train\LLaMA-Factory> python src/train_web.py
Running on local URL: http://0.0.0.0:7860
To create a public link, set `share=True` in `launch()`