import re
import sqlite3
from collections import Counter
from tkinter import *
from tkinter import ttk, messagebox
import tkinter.scrolledtext as scrolledtext
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import nltk
import threading
# 下载NLTK数据(第一次运行时需要)
nltk.download('stopwords')
nltk.download('punkt')
class AcademicPaperManager:
def __init__(self, root):
self.root = root
self.root.title("学术论文管理系统")
self.root.geometry("1000x700")
# 初始化数据库
self.init_db()
# 创建界面
self.create_widgets()
# 加载停用词
self.stop_words = set(stopwords.words('english'))
def init_db(self):
"""初始化数据库"""
self.conn = sqlite3.connect('papers.db')
self.cursor = self.conn.cursor()
# 创建表
self.cursor.execute('''
CREATE TABLE IF NOT EXISTS papers (
id INTEGER PRIMARY KEY,
title TEXT,
year INTEGER,
journal TEXT,
volume TEXT,
number TEXT,
pages TEXT,
abstract TEXT,
doi TEXT,
issn TEXT,
month TEXT
)
''')
self.cursor.execute('''
CREATE TABLE IF NOT EXISTS authors (
id INTEGER PRIMARY KEY,
name TEXT UNIQUE
)
''')
self.cursor.execute('''
CREATE TABLE IF NOT EXISTS paper_author (
paper_id INTEGER,
author_id INTEGER,
PRIMARY KEY (paper_id, author_id),
FOREIGN KEY (paper_id) REFERENCES papers(id),
FOREIGN KEY (author_id) REFERENCES authors(id)
)
''')
self.cursor.execute('''
CREATE TABLE IF NOT EXISTS keywords (
id INTEGER PRIMARY KEY,
keyword TEXT UNIQUE
)
''')
self.cursor.execute('''
CREATE TABLE IF NOT EXISTS paper_keyword (
paper_id INTEGER,
keyword_id INTEGER,
PRIMARY KEY (paper_id, keyword_id),
FOREIGN KEY (paper_id) REFERENCES papers(id),
FOREIGN KEY (keyword_id) REFERENCES keywords(id)
)
''')
self.conn.commit()
def create_widgets(self):
"""创建界面组件"""
# 顶部框架 - 导入功能
top_frame = Frame(self.root)
top_frame.pack(pady=10, fill=X)
Label(top_frame, text="BibTeX文件路径:").grid(row=0, column=0, padx=5)
self.file_path = Entry(top_frame, width=50)
self.file_path.grid(row=0, column=1, padx=5)
Button(top_frame, text="浏览", command=self.browse_file).grid(row=0, column=2, padx=5)
Button(top_frame, text="导入数据", command=self.start_import).grid(row=0, column=3, padx=5)
# 进度条和状态
self.progress = ttk.Progressbar(top_frame, orient=HORIZONTAL, length=200, mode='determinate')
self.progress.grid(row=1, column=0, columnspan=4, pady=5, sticky='ew')
self.status = Label(top_frame, text="就绪")
self.status.grid(row=2, column=0, columnspan=4)
# 中间框架 - 查询功能
middle_frame = Frame(self.root)
middle_frame.pack(pady=10, fill=X)
# 查询条件
Label(middle_frame, text="关键词:").grid(row=0, column=0, padx=5)
self.keyword_entry = Entry(middle_frame, width=20)
self.keyword_entry.grid(row=0, column=1, padx=5)
Label(middle_frame, text="作者:").grid(row=0, column=2, padx=5)
self.author_entry = Entry(middle_frame, width=20)
self.author_entry.grid(row=0, column=3, padx=5)
Label(middle_frame, text="年份:").grid(row=0, column=4, padx=5)
self.year_entry = Entry(middle_frame, width=10)
self.year_entry.grid(row=0, column=5, padx=5)
Button(middle_frame, text="查询", command=self.search_papers).grid(row=0, column=6, padx=5)
Button(middle_frame, text="查询合作关系", command=self.search_collaborators).grid(row=0, column=7, padx=5)
Button(middle_frame, text="统计高频词", command=self.show_top_words).grid(row=0, column=8, padx=5)
# 底部框架 - 结果显示
bottom_frame = Frame(self.root)
bottom_frame.pack(pady=10, fill=BOTH, expand=True)
# 结果列表
self.tree = ttk.Treeview(bottom_frame, columns=('ID', 'Title', 'Authors', 'Year', 'Journal'), show='headings')
self.tree.heading('ID', text='ID')
self.tree.heading('Title', text='标题')
self.tree.heading('Authors', text='作者')
self.tree.heading('Year', text='年份')
self.tree.heading('Journal', text='期刊')
self.tree.column('ID', width=50)
self.tree.column('Title', width=300)
self.tree.column('Authors', width=200)
self.tree.column('Year', width=50)
self.tree.column('Journal', width=150)
self.tree.pack(side=LEFT, fill=BOTH, expand=True)
# 添加滚动条
scrollbar = ttk.Scrollbar(bottom_frame, orient=VERTICAL, command=self.tree.yview)
scrollbar.pack(side=RIGHT, fill=Y)
self.tree.configure(yscrollcommand=scrollbar.set)
# 详情面板
self.details = scrolledtext.ScrolledText(self.root, width=100, height=10)
self.details.pack(pady=10, fill=BOTH, expand=False)
# 绑定事件
self.tree.bind('<<TreeviewSelect>>', self.show_paper_details)
def browse_file(self):
"""浏览文件"""
from tkinter.filedialog import askopenfilename
filepath = askopenfilename(filetypes=[("BibTeX files", "*.bib"), ("Text files", "*.txt"), ("All files", "*.*")])
if filepath:
self.file_path.delete(0, END)
self.file_path.insert(0, filepath)
def start_import(self):
"""开始导入数据"""
filepath = self.file_path.get()
if not filepath:
messagebox.showerror("错误", "请选择BibTeX文件")
return
# 清空进度条
self.progress['value'] = 0
self.status.config(text="正在导入...")
# 直接在主线程中执行导入
self.import_data(filepath)
def import_data(self, filepath):
"""导入数据到数据库"""
try:
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
# 解析BibTeX文件
entries = self.parse_bibtex(content)
total = len(entries)
for i, entry in enumerate(entries):
# 更新进度
progress = (i + 1) / total * 100
self.root.after(0, lambda p=progress: self.update_progress(p, f"正在导入 {i+1}/{total}"))
# 插入论文
paper_id = self.insert_paper(entry)
# 插入作者
if 'author' in entry:
authors = self.parse_authors(entry['author'])
for author in authors:
author_id = self.insert_author(author)
self.insert_paper_author(paper_id, author_id)
# 插入关键词
if 'keywords' in entry:
keywords = [kw.strip() for kw in entry['keywords'].split(';')]
for keyword in keywords:
keyword_id = self.insert_keyword(keyword)
self.insert_paper_keyword(paper_id, keyword_id)
self.root.after(0, lambda: self.update_progress(100, f"导入完成,共导入 {total} 篇论文"))
self.root.after(0, lambda: messagebox.showinfo("成功", f"成功导入 {total} 篇论文"))
except Exception as e:
self.root.after(0, lambda: messagebox.showerror("错误", f"导入失败: {str(e)}"))
self.root.after(0, lambda: self.update_progress(0, "导入失败"))
def update_progress(self, value, text):
"""更新进度条和状态"""
self.progress['value'] = value
self.status.config(text=text)
self.root.update()
def parse_bibtex(self, content):
"""解析BibTeX内容"""
entries = []
pattern = re.compile(r'@(\w+)\s*\{([^,]+),\s*([^@]*)\}', re.DOTALL)
for match in pattern.finditer(content):
entry_type = match.group(1)
entry_key = match.group(2)
entry_fields = match.group(3)
if entry_type.lower() != 'article':
continue
entry = {'key': entry_key}
field_pattern = re.compile(r'(\w+)\s*=\s*\{([^}]*)\}', re.DOTALL)
for field_match in field_pattern.finditer(entry_fields):
field_name = field_match.group(1).lower()
field_value = field_match.group(2).strip()
entry[field_name] = field_value
entries.append(entry)
return entries
def parse_authors(self, author_str):
"""解析作者字符串"""
# 处理类似 "Chen, Zheyi and Liang, Jie and Yu, Zhengxin" 的格式
authors = []
for author in author_str.split(' and '):
author = author.strip()
if ',' in author:
last, first = author.split(',', 1)
author = f"{first.strip()} {last.strip()}"
authors.append(author)
return authors
def insert_paper(self, entry):
"""插入论文到数据库"""
self.cursor.execute('''
INSERT INTO papers (title, year, journal, volume, number, pages, abstract, doi, issn, month)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (
entry.get('title', ''),
int(entry['year']) if 'year' in entry else None,
entry.get('journal', ''),
entry.get('volume', ''),
entry.get('number', ''),
entry.get('pages', ''),
entry.get('abstract', ''),
entry.get('doi', ''),
entry.get('issn', ''),
entry.get('month', '')
))
self.conn.commit()
return self.cursor.lastrowid
def insert_author(self, author_name):
"""插入作者到数据库"""
self.cursor.execute('''
INSERT OR IGNORE INTO authors (name) VALUES (?)
''', (author_name,))
self.conn.commit()
self.cursor.execute('SELECT id FROM authors WHERE name = ?', (author_name,))
return self.cursor.fetchone()[0]
def insert_paper_author(self, paper_id, author_id):
"""插入论文-作者关系到数据库"""
self.cursor.execute('''
INSERT OR IGNORE INTO paper_author (paper_id, author_id) VALUES (?, ?)
''', (paper_id, author_id))
self.conn.commit()
def insert_keyword(self, keyword):
"""插入关键词到数据库"""
self.cursor.execute('''
INSERT OR IGNORE INTO keywords (keyword) VALUES (?)
''', (keyword,))
self.conn.commit()
self.cursor.execute('SELECT id FROM keywords WHERE keyword = ?', (keyword,))
return self.cursor.fetchone()[0]
def insert_paper_keyword(self, paper_id, keyword_id):
"""插入论文-关键词关系到数据库"""
self.cursor.execute('''
INSERT OR IGNORE INTO paper_keyword (paper_id, keyword_id) VALUES (?, ?)
''', (paper_id, keyword_id))
self.conn.commit()
def search_papers(self):
"""查询论文"""
keyword = self.keyword_entry.get().strip()
author = self.author_entry.get().strip()
year = self.year_entry.get().strip()
query = '''
SELECT p.id, p.title, GROUP_CONCAT(a.name, ', '), p.year, p.journal
FROM papers p
JOIN paper_author pa ON p.id = pa.paper_id
JOIN authors a ON pa.author_id = a.id
'''
conditions = []
params = []
if keyword:
query += '''
JOIN paper_keyword pk ON p.id = pk.paper_id
JOIN keywords k ON pk.keyword_id = k.id
'''
conditions.append("k.keyword LIKE ?")
params.append(f"%{keyword}%")
if author:
conditions.append("a.name LIKE ?")
params.append(f"%{author}%")
if year:
conditions.append("p.year = ?")
params.append(year)
if conditions:
query += " WHERE " + " AND ".join(conditions)
query += " GROUP BY p.id"
self.cursor.execute(query, params)
papers = self.cursor.fetchall()
# 清空树视图
for item in self.tree.get_children():
self.tree.delete(item)
# 添加结果
for paper in papers:
self.tree.insert('', 'end', values=paper)
self.status.config(text=f"找到 {len(papers)} 篇论文")
def search_collaborators(self):
"""查询合作关系"""
author_name = self.author_entry.get().strip()
if not author_name:
messagebox.showwarning("警告", "请输入作者名")
return
# 查找作者ID
self.cursor.execute('SELECT id FROM authors WHERE name LIKE ?', (f"%{author_name}%",))
authors = self.cursor.fetchall()
if not authors:
messagebox.showinfo("提示", "未找到该作者")
return
# 查询合作关系
collaborators = set()
for author_id in authors:
self.cursor.execute('''
SELECT a.name
FROM authors a
JOIN paper_author pa ON a.id = pa.author_id
WHERE pa.paper_id IN (
SELECT paper_id FROM paper_author WHERE author_id = ?
) AND a.id != ?
''', (author_id[0], author_id[0]))
for collab in self.cursor.fetchall():
collaborators.add(collab[0])
# 显示结果
if collaborators:
result = f"作者 {author_name} 的合作者:\n\n" + "\n".join(collaborators)
self.details.delete(1.0, END)
self.details.insert(END, result)
self.status.config(text=f"找到 {len(collaborators)} 位合作者")
else:
messagebox.showinfo("提示", "未找到合作者")
def show_top_words(self):
"""显示摘要高频词"""
self.cursor.execute('SELECT abstract FROM papers')
abstracts = [row[0] for row in self.cursor.fetchall() if row[0]]
if not abstracts:
messagebox.showinfo("提示", "没有可用的摘要数据")
return
# 分词并统计
words = []
for abstract in abstracts:
tokens = word_tokenize(abstract.lower())
words.extend([word for word in tokens if word.isalpha() and word not in self.stop_words])
word_counts = Counter(words)
top_words = word_counts.most_common(20)
# 显示结果
result = "摘要高频词Top 20:\n\n"
for word, count in top_words:
result += f"{word}: {count}\n"
self.details.delete(1.0, END)
self.details.insert(END, result)
self.status.config(text="已统计摘要高频词")
def show_paper_details(self, event):
"""显示论文详情"""
selected = self.tree.focus()
if not selected:
return
paper_id = self.tree.item(selected, 'values')[0]
# 获取论文详情
self.cursor.execute('SELECT * FROM papers WHERE id = ?', (paper_id,))
paper = self.cursor.fetchone()
# 获取作者
self.cursor.execute('''
SELECT a.name FROM authors a
JOIN paper_author pa ON a.id = pa.author_id
WHERE pa.paper_id = ?
''', (paper_id,))
authors = ", ".join([row[0] for row in self.cursor.fetchall()])
# 获取关键词
self.cursor.execute('''
SELECT k.keyword FROM keywords k
JOIN paper_keyword pk ON k.id = pk.keyword_id
WHERE pk.paper_id = ?
''', (paper_id,))
keywords = ", ".join([row[0] for row in self.cursor.fetchall()])
# 显示详情
details = f"""
标题: {paper[1]}
作者: {authors}
年份: {paper[2]}
期刊: {paper[3]}
卷: {paper[4]} 期: {paper[5]} 页: {paper[6]}
DOI: {paper[8]}
ISSN: {paper[9]}
月份: {paper[10]}
关键词: {keywords}
摘要:
{paper[7]}
"""
self.details.delete(1.0, END)
self.details.insert(END, details)
if __name__ == "__main__":
root = Tk()
app = AcademicPaperManager(root)
root.mainloop()
详细解析此段代码
最新发布