SQLite cominbed source file splitter

本文介绍了一个用于解析SQLite源代码的简易程序。该程序能够读取SQLite源文件,并将其拆分成多个独立文件,同时标记出包含文件的开始与结束位置。通过对源代码的逐行扫描,程序能准确识别并处理各种预定义的注释标签。



This program splits the combined version of SQLite source code. See http://www.sqlite.org/ or http://www.sqlite.org/download.html for more of SQLite.


#include <stdio.h>

typedef enum
{
Error = -1,
None = 0,
Common,
BeginInc,
EndInc,
BeginFile,
EndFile,
} LineType;

typedef int Bool;

typedef struct
{
FILE *file;
char *fname;
} Writer;

void writeToCur (Writer *cur, char *line)
{
fprintf(cur->file, "%s", line);
}

void writeIncToCur (Writer *cur, char *chbuf)
{
fprintf(cur->file, "#include /"%s/"", chbuf);
}

Bool nextEq (char **s, const char *t)
{
Bool res = strncmp((*s), (t), strlen(t))==0;
if (res)
{
*s += strlen(t);
}
return res;
}

void getFileName (char *fname, const char *p)
{
while (*p != '*' && *p != ' ' && *p != 0)
{
*fname++ = *p++;
}
*fname = 0;
}

#define LineBufSize 1024

LineType readLine (FILE *file, char *line, char *fname)
{
int lenLine;
LineType type;
if (!fgets(line, LineBufSize - 1, file))
{
return None;
}
if (strlen(line) == LineBufSize - 1)
{ // possibly error
return Error;
}
lenLine = strlen(line);
if (lenLine > 20 && line[0] == '/' && line[1] == '*')
{
char *pEnd = line + lenLine;
char *p = line + 2;
for ( ; (*p == '*' || *p == ' ') && *p != 0; p++);
if (p - line < 10)
{
return Common;
}
if (nextEq(&p, "Begin file "))
{
getFileName(fname, p);
return BeginFile;
}
else if (nextEq(&p, "End of "))
{
getFileName(fname, p);
return EndFile;
}
else if (nextEq(&p, "Include pager.h in the middle of "))
{
getFileName(fname, p);
return BeginInc;
}
else if (nextEq(&p, "Continuing where we left off in "))
{
getFileName(fname, p);
return EndInc;
}
}
return Common;
}

void parse (char *sfname)
{
#define StackSize 32
char chbuf[128];
char fnbuf[256];
char line[LineBufSize];
Writer stack[StackSize];
Writer *cur;
int depth = 0;
FILE *srcFile = fopen(sfname, "r");

cur = stack + depth;
sprintf(fnbuf, "out/%s", sfname);
cur->file = fopen(fnbuf, "w");

while (1)
{
LineType type = readLine(srcFile, line, chbuf);
if (type == Error)
{
printf("An error has occurred during parsing./n");
break;
}
else if (type == None)
{
break;
}
switch (type)
{
case Common:
writeToCur(cur, line);
break;
case BeginInc:
writeIncToCur(cur, chbuf);
break;
case EndInc:
break;
case BeginFile:
printf("Found file embedded %s/n", chbuf);
depth++;
cur = stack + depth;
sprintf(fnbuf, "out/%s", chbuf);
cur->file = fopen(fnbuf, "w");
writeToCur(cur, line);
break;
case EndFile:
writeToCur(cur, line);
fclose(cur->file);
depth--;
cur = stack + depth;
break;
}
if (depth < 0)
{
break;
}
}
for ( ; depth >= 0; depth--)
{
cur = stack + depth;
fclose(cur->file);
}
}

int main (void)
{
parse("sqlite3.c");
return 0;
}
import os import sys import requests from PyQt5.QtWidgets import ( QApplication, QMainWindow, QFileDialog, QLabel, ) from PyQt5.QtCore import Qt, QThread, pyqtSignal, QTimer from PyQt5.QtGui import QTextCursor from docx import Document from 智能问答系统_ui import Ui_aiWindow # LangChain 模块 from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_ollama import OllamaEmbeddings from langchain_community.document_loaders import TextLoader, Docx2txtLoader from langchain_community.vectorstores import FAISS class aiWindow(QMainWindow): def __init__(self): super().__init__() self.ui = Ui_aiWindow() self.ui.setupUi(self) self.resize(700, 500) # 初始化路径 self.knowledge_dir = os.path.abspath("./knowledge_files") self.persist_directory = os.path.abspath("./faiss_index") os.makedirs(self.knowledge_dir, exist_ok=True) os.makedirs(self.persist_directory, exist_ok=True) # 初始化变量 self.knowledge_db = None self.build_thread = None self.qa_thread = None # 页面切换按钮 self.ui.stackedWidget.setCurrentIndex(0) self.ui.pushButton.clicked.connect(lambda: self.ui.stackedWidget.setCurrentIndex(0)) self.ui.pushButton_2.clicked.connect(lambda: self.ui.stackedWidget.setCurrentIndex(1)) # 知识列表加载 self.load_existing_files() self.ui.listWidget.itemClicked.connect(self.preview_file) # 自动加载向量数据库 self.load_vector_db() # 构建数据库按钮 self.ui.pushButton_5.clicked.connect(self.start_build_vector_db) # 提交问题按钮 self.ui.pushButton_6.clicked.connect(self.ask_question) # 上传/删除文件 self.ui.pushButton_3.clicked.connect(self.add_file) self.ui.pushButton_4.clicked.connect(self.remove_file) self.show() def load_existing_files(self): self.ui.listWidget.clear() for file in os.listdir(self.knowledge_dir): if file.endswith((".txt", ".docx")): self.ui.listWidget.addItem(file) def add_file(self): file_paths, _ = QFileDialog.getOpenFileNames(self, "选择知识库文件", "", "文档文件 (*.txt *.docx)") for path in file_paths: filename = os.path.basename(path) dest = os.path.join(self.knowledge_dir, filename) if not os.path.exists(dest): with open(path, "rb") as src, open(dest, "wb") as dst: dst.write(src.read()) self.ui.listWidget.addItem(filename) def preview_file(self): item = self.ui.listWidget.currentItem() if not item: return filename = item.text() path = os.path.join(self.knowledge_dir, filename) try: if filename.endswith(".txt"): with open(path, "r", encoding="utf-8") as f: content = f.read() elif filename.endswith(".docx"): doc = Document(path) content = "\n".join([p.text for p in doc.paragraphs]) else: content = "不支持的格式" except Exception as e: content = f"读取失败: {e}" self.ui.textEdit_2.setPlainText(content) def remove_file(self): selected_item = self.ui.listWidget.currentItem() if not selected_item: return filename = selected_item.text() path = os.path.join(self.knowledge_dir, filename) try: if os.path.exists(path): os.remove(path) # 删除向量库中与该文件相关的条目 if os.path.exists(self.persist_directory): embeddings = OllamaEmbeddings(model="nomic-embed-text") db = FAISS.load_local(self.persist_directory, embeddings, allow_dangerous_deserialization=True) to_delete = [] for doc_id, doc in db.docstore._dict.items(): if doc.metadata.get("source") == filename: to_delete.append(doc_id) if to_delete: db.delete(to_delete) db.save_local(self.persist_directory) show_toast(f"✅ 已从向量库中删除 {len(to_delete)} 条与 {filename} 相关的记录。") except Exception as e: show_toast(f"❌ 删除向量记录或文件时出错:{e}") self.ui.listWidget.takeItem(self.ui.listWidget.row(selected_item)) self.ui.textEdit_2.clear() def load_vector_db(self): try: print("🔍 开始加载向量数据库...") embeddings = OllamaEmbeddings(model="nomic-embed-text") print("✅ 已初始化 OllamaEmbeddings") if not os.path.exists(self.persist_directory): print("🟡 向量库目录不存在,跳过加载") return print("📥 正在加载向量数据库...") self.knowledge_db = FAISS.load_local( self.persist_directory, embeddings, allow_dangerous_deserialization=True ) print("✅ 向量数据库加载成功") except Exception as e: print("❌ 加载向量数据库失败:", e) # 避免无限递归调用 if not getattr(self, "_is_building", False): self._is_building = True self.start_build_vector_db() def handle_build_success(self, msg): self.ui.textEdit.append(msg) self._is_building = False self.load_vector_db() def start_build_vector_db(self): print("🚀 开始构建向量数据库线程...") self.build_thread = BuildVectorDBThread(self.knowledge_dir) self.build_thread.status_signal.connect(self.update_status) self.build_thread.result_signal.connect(lambda _: self.load_vector_db()) # 构建完成后重新加载 self.build_thread.error_signal.connect(self.show_error) self.build_thread.finished.connect(self.build_thread.deleteLater) self.build_thread.start() def update_status(self, msg): cursor = self.ui.textEdit.textCursor() cursor.movePosition(QTextCursor.End) cursor.insertText(msg + "\n") self.ui.textEdit.setTextCursor(cursor) def show_result(self, result): self.ui.textEdit.append(result) self.load_vector_db() # 重新加载数据库 def show_error(self, error): self.ui.textEdit.append(error) def ask_question(self): question = self.ui.lineEdit.text() if not question or not self.knowledge_db: return self.ui.textEdit.append("⏳ 正在获取答案,请稍候...") self.ui.pushButton_6.setEnabled(False) self.qa_thread = QAWorkerThread(question, self.knowledge_db) self.qa_thread.status_signal.connect(self.update_status) self.qa_thread.result_signal.connect(self.show_ask_result) self.qa_thread.error_signal.connect(self.show_error) self.qa_thread.finished.connect(self.qa_thread.deleteLater) self.qa_thread.start() def show_ask_result(self, result): self.ui.textEdit.setPlainText(result) class BuildVectorDBThread(QThread): status_signal = pyqtSignal(str) result_signal = pyqtSignal(str) error_signal = pyqtSignal(str) def __init__(self, knowledge_dir, parent=None): super().__init__(parent) self.knowledge_dir = knowledge_dir self.persist_directory = os.path.abspath("./faiss_index") def run(self): print("📚 正在构建向量数据库...") # 检查知识库目录是否存在 print("🔍 检查知识库目录是否存在:", self.knowledge_dir) if not os.path.exists(self.knowledge_dir): self.error_signal.emit("❌ 知识库目录不存在") print("❌ 错误:知识库目录不存在") return # 处理已处理过的文件记录 processed_files_path = os.path.join(self.knowledge_dir, ".processed_files.txt") print("📄 加载已处理文件记录:", processed_files_path) if os.path.exists(processed_files_path): with open(processed_files_path, "r", encoding="utf-8") as f: processed_files = set(f.read().splitlines()) print(f"✅ 已读取 {len(processed_files)} 个已处理文件名") else: processed_files = set() print("🆕 未找到已处理文件记录,将创建新的记录") documents = [] new_processed_files = list(processed_files) # 遍历知识库目录,处理新文件 print("📂 开始遍历知识库目录中的文件...") for file in os.listdir(self.knowledge_dir): file_path = os.path.join(self.knowledge_dir, file) print(f"🔎 检查文件: {file}") if not (file.endswith(".txt") or file.endswith(".docx")): print(f"🚫 跳过非支持格式文件: {file}") continue if file not in processed_files: try: print(f"⚙️ 正在加载文件: {file}") if file.endswith(".txt"): loader = TextLoader(file_path, encoding="utf-8") else: loader = Docx2txtLoader(file_path) docs = loader.load() print(f"📄 从 {file} 中加载了 {len(docs)} 个文档") for doc in docs: doc.metadata["source"] = file documents.extend(docs) new_processed_files.append(file) self.status_signal.emit(f"✅ 文件 {file} 处理完成") except Exception as e: self.status_signal.emit(f"⚠️ 加载文件 {file} 出错:{e}") print(f"⚠️ 加载文件 {file} 出错:{e}") if not documents: self.status_signal.emit("✅ 没有新文件需要处理。") print("✅ 没有新文件需要处理,跳过构建") return # 分割文档 print("✂️ 开始分割文档...") text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) docs = text_splitter.split_documents(documents) print(f"🧩 分割完成,共生成 {len(docs)} 个文本块") self.status_signal.emit(f"🧩 分割完成,共生成 {len(docs)} 个文本块") # 初始化 embeddings print("🧠 初始化 OllamaEmbeddings(模型:nomic-embed-text)") try: embeddings = OllamaEmbeddings(model="nomic-embed-text") print("✅ embeddings 初始化成功") # 如果向量数据库目录存在,则加载;否则创建新库 if os.path.exists(self.persist_directory): print("📥 正在加载现有向量数据库...") db = FAISS.load_local(self.persist_directory, embeddings, allow_dangerous_deserialization=True) else: print("🆕 正在创建新的向量数据库...") db = FAISS.from_documents(docs, embeddings) print("🔄 正在添加新文档到向量数据库...") db.add_documents(docs) print("💾 正在保存向量数据库...") db.save_local(self.persist_directory) self.status_signal.emit("✅ 向量数据库保存成功") print("✅ 向量数据库保存成功") # 更新已处理文件列表 print("📝 正在更新已处理文件列表...") with open(processed_files_path, "w", encoding="utf-8") as f: f.write("\n".join(new_processed_files)) print("✅ 已更新已处理文件列表") self.result_signal.emit("🎉 向量数据库构建完成") print("🎉 向量数据库构建完成") except Exception as e: self.error_signal.emit(f"❌ 构建数据库时发生错误:{e}") print(f"❌ 构建数据库时发生错误:{e}") class QAWorkerThread(QThread): status_signal = pyqtSignal(str) result_signal = pyqtSignal(str) error_signal = pyqtSignal(str) def __init__(self, question, knowledge_db, parent=None): super().__init__(parent) self.question = question self.knowledge_db = knowledge_db def run(self): try: # 检查 Ollama 是否运行 response = requests.get("http://127.0.0.1:11434/api/version") if response.status_code != 200: raise Exception("❌ Ollama 服务未启动,请先启动服务。") # 检查模型是否存在 model_response = requests.get("http://127.0.0.1:11434/api/tags") models = model_response.json().get("models", []) if not models: raise Exception("❌ 未找到可用模型,请先拉取模型。") available_models = [m["name"] for m in models] if "gemma3:1b" not in available_models: raise Exception("❌ 模型 gemma3:1b 不存在,请先拉取该模型。") # 检索知识库 docs = self.knowledge_db.similarity_search(self.question, k=6) if not docs: raise Exception("❌ 未找到相关知识,请尝试其他问题。") context = "\n\n".join([ f"文档{i + 1}:\n{doc.page_content.replace('**', '').replace('*', '')}" for i, doc in enumerate(docs) ]) prompt = f"""请根据以下知识内容,用自己的话总结并回答问题。请确保回答清晰、准确,并基于提供的资料,不要编造内容。 {context} 问题:{self.question} 答案:""" # 发送请求 response = requests.post( "http://127.0.0.1:11434/api/chat", json={ "model": "gemma3:1b", "messages": [{"role": "user", "content": prompt}], "stream": False } ) answer = response.json().get("message", {}).get("content", "未获取到有效回答") self.result_signal.emit(f"问题:{self.question}\n答案:{answer}\n{'-' * 30}") except Exception as e: self.error_signal.emit(f"❌ 处理过程中发生错误:{str(e)}") class ToastLabel(QLabel): def __init__(self, text, duration=2000): super().__init__(text) self.setStyleSheet(''' background-color: #333; color: #fff; padding: 10px; border-radius: 5px; ''') self.setAlignment(Qt.AlignCenter) self.setWordWrap(True) self.setFixedSize(250, 80) self.setWindowFlags(Qt.FramelessWindowHint | Qt.WindowStaysOnTopHint | Qt.ToolTip) self.setAttribute(Qt.WA_ShowWithoutActivating) QTimer.singleShot(0, self.center_on_screen) QTimer.singleShot(duration, lambda: (self.hide(), self.deleteLater())) def show_toast(message, duration=2000): toast = ToastLabel(message, duration) toast.show() if __name__ == '__main__': from PyQt5 import QtCore from PyQt5 import QtGui QtCore.QCoreApplication.setAttribute(QtCore.Qt.AA_EnableHighDpiScaling, True) QtCore.QCoreApplication.setAttribute(QtCore.Qt.AA_UseHighDpiPixmaps, True) QtGui.QGuiApplication.setAttribute(QtCore.Qt.HighDpiScaleFactorRoundingPolicy.PassThrough) app = QApplication(sys.argv) # 设置 DPI 适配 screen = app.screens()[0] dpi = screen.logicalDotsPerInch() base_dpi = 96.0 scale_factor = dpi / base_dpi font_size = max(12, int(12 * scale_factor)) font = app.font() font.setPointSize(font_size) app.setFont(font) window = aiWindow() sys.exit(app.exec()) 把刚才那个修改加到我的代码里
08-11
🔍 开始加载向量数据库... ✅ 已初始化 OllamaEmbeddings 📥 正在加载向量数据库... ❌ 加载向量数据库失败: Error in __cdecl faiss::FileIOReader::FileIOReader(const char *) at D:\a\faiss-wheels\faiss-wheels\faiss\faiss\impl\io.cpp:68: Error: 'f' failed: could not open D:\python草稿\智能中医系统\门诊\faiss_index\index.faiss for reading: No such file or directory 🚀 开始构建向量数据库线程... 📚 正在构建向量数据库... 🔍 检查知识库目录是否存在: D:\python草稿\智能中医系统\门诊\knowledge_files 📄 加载已处理文件记录: D:\python草稿\智能中医系统\门诊\knowledge_files\.processed_files.txt 🆕 未找到已处理文件记录,将创建新的记录 📂 开始遍历知识库目录中的文件... 🔎 检查文件: 2016康康中医诊断学.docx ⚙️ 正在加载文件: 2016康康中医诊断学.docx 📄 从 2016康康中医诊断学.docx 中加载了 1 个文档 ✂️ 开始分割文档... 🧩 分割完成,共生成 50 个文本块 🧠 初始化 OllamaEmbeddings(模型:nomic-embed-text) ✅ embeddings 初始化成功 🆕 正在创建新的向量数据库... 🔄 正在添加新文档到向量数据库... 💾 正在保存向量数据库... ❌ 构建数据库时发生错误:Error in __cdecl faiss::FileIOWriter::FileIOWriter(const char *) at D:\a\faiss-wheels\faiss-wheels\faiss\faiss\impl\io.cpp:102: Error: 'f' failed: could not open D:\python草稿\智能中医系统\门诊\faiss_index\index.faiss for writing: No such file or directory
08-11
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值