I. Characters with Hash

本文介绍了一种通过比较种子字符与一系列输入字符的差异来计算最小编辑距离的算法实现。重点在于如何根据字符间的相似度调整计数逻辑,并特别考虑了所有输入字符与种子字符完全相同这一特殊情况。

//这道题要注意输入的每个字符都等于seed的情况,此时输出1而不是0

#include<iostream>
#include<cstdio>
#include<cstring>
#include<cmath>
#include<queue>
#include <set>
using namespace std;
typedef unsigned long long ull;
int main()
{
    int T;
    int N;
    char c,ch;
    scanf("%d",&T);
    while(T--)
    {
        int coun=0;
        scanf("%d",&N);
        getchar();
        scanf("%c",&c);
        getchar();
        for(int i=0; i<N; i++)
        {
            scanf("%c",&ch);
            if(c==ch&&coun==0)
                continue;
            if(c!=ch&&abs(c-ch)<10&&coun==0)
                coun+=1;
            else
                coun+=2;
        }
        if(coun!=0)
            printf("%d\n",coun);
        else
            printf("1\n");
        getchar();
    }
    return 0;
}
 

乱世佳人文本分析与可视化 作业描述 读取附件gone_with_the_wind.txt,基于内置模块tkinter开发一个文本分析与可视化程序,提供友好的图形用户界面。 功能需求: 1.文本输入:支持从文件读取或直接输入文本 2.基本统计:统计文本中的单词总数、不重复单词数 3.词频分析:统计各个单词出现的频率 4.生成词云图 5.显示词频统计结果 6.情感分析 7.人物识别 8.人物关系分析和可视化 import tkinter as tk from tkinter import ttk, scrolledtext, filedialog, messagebox import re from collections import Counter import threading import hashlib import os # === 第三方库 === try: from ttkthemes import ThemedTk except ImportError: raise ImportError("请安装 ttkthemes: pip install ttkthemes") import nltk from nltk.corpus import stopwords from nltk.tokenize import word_tokenize, sent_tokenize nltk.download('punkt', quiet=True) nltk.download('stopwords', quiet=True) from textblob import TextBlob from wordcloud import WordCloud import matplotlib.pyplot as plt import networkx as nx # === 配置 === CHARACTERS = ["scarlett", "rhett", "melanie", "ashley", "ellen", "gerald", "pitty", "archie", "frank", "bonnie", "suellen", "mammy"] ANALYZERS = [ {"name": "basic", "title": "📊 基本统计"}, {"name": "freq", "title": "📈 词频分析"}, {"name": "sentiment", "title": "😊 情感分析"}, {"name": "chars", "title": "👥 人物识别"}, {"name": "relations", "title": "🔗 人物关系"}, ] # === 工具函数 === def clean_text(text): text = re.sub(r'[^a-z\s]', ' ', text.lower()) tokens = [w for w in word_tokenize(text) if w not in stopwords.words('english') and len(w) > 2] return tokens def split_sentences(text): return sent_tokenize(text.lower()) def validate_input(text): if not (text := text.strip()): return False if len(text) < 10: return False en_chars = sum(1 for c in text if 'a' <= c.lower() <= 'z') return en_chars >= 10 and en_chars / max(len([c for c in text if c.isalpha()]), 1) >= 0.3 def read_file_smart(path): for enc in ['utf-8-sig', 'utf-8', 'gbk', 'latin1']: try: with open(path, encoding=enc) as f: return f.read() except: continue raise ValueError("无法读取文件编码,请使用纯文本(.txt)文件。") def clean_markup(text): text = re.sub(r'<[^>]+>', ' ', text) text = re.sub(r'\[.*?\]\(.*?\)|!\[.*?\]\(.*?\)', ' ', text) return re.sub(r'\s+', ' ', text).strip() # === 缓存 === class Cache: def __init__(self): self._cache = {} def key(self, text): return hashlib.md5(text.encode('utf-8', errors='ignore')).hexdigest()[:16] def get(self, k, name): return self._cache.get((k, name)) def set(self, k, name, data): self._cache[(k, name)] = data CACHE = Cache() # === 分析逻辑 === def basic_stats(text): words = clean_text(text) return {"total": len(words), "unique": len(set(words))} def word_freq(text): words = clean_text(text) counter = Counter(words) return {"top": counter.most_common(30), "counter": counter} def sentiment(text): blob = TextBlob(text) p, s = blob.sentiment.polarity, blob.sentiment.subjectivity if p > 0.2: mood, desc = "积极乐观 😊", "传递希望、喜悦或赞赏。" elif p < -0.2: mood, desc = "消极悲观 😞", "带有悲伤、愤怒或批评倾向。" else: mood, desc = "中性平和 😐", "情绪克制,偏向叙述。" return {"mood": mood, "desc": desc, "p": round(p,3), "s": round(s,3)} def characters(text): low = text.lower() return {c: low.count(c) for c in CHARACTERS if c in low} def relations(text): pairs = Counter() for s in split_sentences(text): mentioned = [c for c in CHARACTERS if c in s] for i in range(len(mentioned)): for j in range(i+1, len(mentioned)): pair = tuple(sorted([mentioned[i], mentioned[j]])) pairs[pair] += 1 return {"pairs": pairs} # === 摘要生成器 === def generate_summary(name, data): if name == "basic": total, unique = data["total"], data["unique"] div = unique / total if total else 0 level = '较高' if div > 0.3 else '中等' if div > 0.2 else '较低' return ( f"📌 这段文本共包含 **{total:,} 个单词**,其中 **{unique:,} 个不重复单词**。\n" f"词汇丰富度为 **{div:.1%}** —— **{level}**。\n" f"📚 小知识:经典文学作品通常在 20%~40% 之间。" ) elif name == "freq": top10 = [w for w, _ in data["top"][:10]] return ( f"🔤 **高频词前10名**:{' · '.join(top10)}\n" f"这些词反映核心主题(已过滤停用词)。\n" f"若出现人名(如 scarlett),说明聚焦人物互动。" ) elif name == "sentiment": return ( f"🎭 **情绪基调**:{data['mood']}\n" f"{data['desc']}\n" f"📊 极性值:{data['p']}(-1~1,越正越积极)\n" f"💭 主观性:{data['s']}(0~1,越高越主观)" ) elif name == "chars": if not data: return "🔍 **未检测到预设人物**。\n请确保包含如 Scarlett、Rhett 等名字(不区分大小写)。" main = max(data, key=data.get) total_mentions = sum(data.values()) return ( f"👑 **核心人物**:**{main.capitalize()}**(被提及 {data[main]} 次)\n" f"共识别 {len(data)} 位角色,总计 {total_mentions} 次提及。\n" f"当前段落可能围绕 {main.capitalize()} 展开情节。" ) elif name == "relations": pairs = [(a,b,w) for (a,b),w in data["pairs"].items() if w >= 2] if not pairs: return "🕸️ **未发现强人物关联**。\n建议分析更长段落(如一整章)以捕捉互动。" a, b, w = max(pairs, key=lambda x: x[2]) return ( f"💞 **最紧密关系**:**{a.capitalize()} ↔ {b.capitalize()}**(共现 {w} 次)\n" f"两人可能频繁互动(情侣/对手/亲属)。\n" f"点击下方按钮查看完整关系网络图。" ) return str(data) # === 渲染函数 === def render_result(tab, name, data, app): for w in tab.winfo_children(): w.destroy() # 摘要卡片 card = tk.Frame(tab, bg="#f8f9fa", relief="groove", bd=1) card.pack(fill=tk.X, padx=10, pady=(10,5)) summary = generate_summary(name, data) txt = tk.Text(card, wrap=tk.WORD, bg="#f8f9fa", relief="flat", height=4, font=("Segoe UI", 10)) txt.insert(tk.END, summary) # 自动加粗 **...** start = 0 while True: start = txt.search("**", f"1.{start}", tk.END) if not start: break end = txt.search("**", f"{start}+2c", tk.END) if not end: break txt.delete(f"{start}", f"{start}+2c") txt.delete(f"{end}-2c", f"{end}") txt.tag_add("bold", f"{start}", f"{end}-2c") start = txt.index(f"{end}-2c") bold_font = tk.font.Font(txt, weight="bold") txt.tag_configure("bold", font=bold_font) txt.config(state='disabled') txt.pack(padx=12, pady=8, fill=tk.X) # 内容区 if name == "freq": list_frame = ttk.Frame(tab); list_frame.pack(fill=tk.BOTH, expand=True, padx=10) txt = scrolledtext.ScrolledText(list_frame, width=30, font=("Consolas", 9)) for w, c in data["top"]: txt.insert(tk.END, f"{w:<15} : {c}\n") txt.config(state='disabled'); txt.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) btn = ttk.Button(list_frame, text="🖼️ 生成词云", command=lambda: app.show_wordcloud(data["counter"])) btn.pack(side=tk.RIGHT, padx=10, pady=10) elif name == "chars" and data: txt = scrolledtext.ScrolledText(tab, height=6, font=("Consolas", 9)) for c, n in sorted(data.items(), key=lambda x: -x[1]): txt.insert(tk.END, f"{c.capitalize():<12} : {n} 次\n") txt.config(state='disabled'); txt.pack(fill=tk.BOTH, expand=True, padx=10) elif name == "relations": pairs = [(a,b,w) for (a,b),w in data["pairs"].items() if w >= 2] if pairs: btn = ttk.Button(tab, text="🌐 查看人物关系图", command=lambda: app.show_relations(data["pairs"])) btn.pack(pady=10) # === 主应用 === class App: def __init__(self, root): self.root = root self.is_dark = False root.title("《乱世佳人》文本分析工具 — 最终增强版") root.geometry("900x780") self.setup_ui() def setup_ui(self): # 菜单栏 menubar = tk.Menu(self.root) view_menu = tk.Menu(menubar, tearoff=0) view_menu.add_command(label="切换深色/浅色主题", command=self.toggle_theme) menubar.add_cascade(label="视图", menu=view_menu) self.root.config(menu=menubar) style = ttk.Style() style.configure("TButton", font=("Segoe UI", 9)) style.configure("TNotebook.Tab", padding=[12, 6]) # 控制区 ctrl = ttk.Frame(self.root); ctrl.pack(pady=8, padx=10, fill=tk.X) ttk.Button(ctrl, text="📂 加载文件", command=self.load_file).pack(side=tk.LEFT) ttk.Button(ctrl, text="🧹 清空", command=self.clear).pack(side=tk.LEFT, padx=5) self.analyze_btn = ttk.Button(ctrl, text="▶️ 分析全部", state='disabled', command=self.start_analysis) self.analyze_btn.pack(side=tk.LEFT, padx=5) self.export_btn = ttk.Button(ctrl, text="📤 导出报告", state='disabled', command=self.export_report) self.export_btn.pack(side=tk.LEFT, padx=5) self.status = ttk.Label(ctrl, text="📝 请粘贴英文小说原文或加载 .txt 文件", foreground="gray") self.status.pack(side=tk.RIGHT) # 输入区 ttk.Label(self.root, text="📖 英文小说原文(建议 ≥100 字):").pack(anchor='w', padx=10) self.text_area = scrolledtext.ScrolledText(self.root, height=5, font=("Consolas", 10)) self.text_area.pack(padx=10, fill=tk.BOTH) self.text_area.bind('<KeyRelease>', self.on_text_change) # 结果区 self.notebook = ttk.Notebook(self.root) self.notebook.pack(padx=10, pady=5, fill=tk.BOTH, expand=True) self.tabs = {} for a in ANALYZERS: frame = ttk.Frame(self.notebook) self.notebook.add(frame, text=a["title"]) self.tabs[a["name"]] = frame def toggle_theme(self): self.is_dark = not self.is_dark theme = "azure-dark" if self.is_dark else "azure" self.root.set_theme(theme) # 更新摘要背景色 bg = "#2d2d2d" if self.is_dark else "#f8f9fa" fg = "white" if self.is_dark else "black" for tab in self.tabs.values(): for child in tab.winfo_children(): if isinstance(child, tk.Frame) and child.cget("relief") == "groove": child.config(bg=bg) for widget in child.winfo_children(): if isinstance(widget, tk.Text): widget.config(bg=bg, fg=fg) def on_text_change(self, event=None): raw = self.text_area.get(1.0, tk.END) text = clean_markup(raw) valid = validate_input(text) state = 'normal' if valid else 'disabled' self.analyze_btn.config(state=state) self.export_btn.config(state=state) self.status.config( text="✅ 内容有效 · 可点击「分析全部」" if valid else "📝 请粘贴英文小说原文或加载文件" if not raw.strip() else "⚠️ 当前内容暂不符合分析要求", foreground="green" if valid else "gray" if not raw.strip() else "orange" ) def load_file(self): path = filedialog.askopenfilename(filetypes=[("Text files", "*.txt")]) if not path: return threading.Thread(target=self._load_bg, args=(path,), daemon=True).start() def _load_bg(self, path): try: content = read_file_smart(path) self.root.after(0, lambda: self.text_area.delete(1.0, tk.END) or self.text_area.insert(tk.END, content)) except Exception as e: self.root.after(0, lambda: messagebox.showerror("错误", str(e))) def clear(self): self.text_area.delete(1.0, tk.END) self.on_text_change() def start_analysis(self): raw = self.text_area.get(1.0, tk.END) text = clean_markup(raw) if not validate_input(text): messagebox.showwarning("输入无效", "请确保输入足够长的英文文本。") return new_hash = CACHE.key(text) if getattr(self, 'current_hash', None) == new_hash: self.status.config(text="ℹ️ 内容未变,跳过重复分析", foreground="blue") return self.current_hash = new_hash self.status.config(text="⏳ 分析中...(界面仍可操作)", foreground="blue") self.analyze_btn.config(state='disabled') threading.Thread(target=self._analyze_bg, args=(text,), daemon=True).start() def _analyze_bg(self, text): results = {} for a in ANALYZERS: func_map = { "basic": basic_stats, "freq": word_freq, "sentiment": sentiment, "chars": characters, "relations": relations } cached = CACHE.get(self.current_hash, a["name"]) if cached is not None: results[a["name"]] = cached else: res = func_map[a["name"]](text) CACHE.set(self.current_hash, a["name"], res) results[a["name"]] = res self.root.after(0, lambda: self._update_ui(results)) def _update_ui(self, results): for name, data in results.items(): render_result(self.tabs[name], name, data, self) self.status.config(text="🎉 分析完成!切换标签页查看结果", foreground="green") self.analyze_btn.config(state='normal') self.export_btn.config(state='normal') def export_report(self): raw = self.text_area.get(1.0, tk.END) text = clean_markup(raw) if not validate_input(text): messagebox.showwarning("导出失败", "请先输入有效文本并完成分析。") return path = filedialog.asksaveasfilename( defaultextension=".md", filetypes=[("Markdown 文件", "*.md"), ("所有文件", "*.*")] ) if not path: return try: md = f"# 《乱世佳人》文本分析报告\n\n> 基于 {len(text)} 字符的英文文本\n\n---\n" for a in ANALYZERS: data = CACHE.get(self.current_hash, a["name"]) if data is not None: summary = generate_summary(a["name"], data) # 移除 ** 以兼容纯文本 summary_clean = summary.replace("**", "") md += f"\n## {a['title']}\n\n{summary_clean}\n" with open(path, 'w', encoding='utf-8') as f: f.write(md) messagebox.showinfo("导出成功", f"报告已保存至:\n{os.path.abspath(path)}") except Exception as e: messagebox.showerror("导出失败", f"保存时出错:\n{str(e)}") # === 可视化方法 === def show_wordcloud(self, counter): try: wc = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(counter) plt.figure(figsize=(10, 5)); plt.imshow(wc, interpolation='bilinear'); plt.axis("off"); plt.title("高频词云图"); plt.show() except Exception as e: messagebox.showerror("错误", f"词云生成失败:{e}") def show_relations(self, pairs): G = nx.Graph() for (a, b), w in pairs.items(): if w >= 2: G.add_edge(a.capitalize(), b.capitalize(), weight=w) if not G.nodes(): messagebox.showinfo("提示", "无足够共现数据(需 ≥2 次)") return plt.figure(figsize=(10, 8)) pos = nx.spring_layout(G, k=3, iterations=100) nx.draw_networkx_nodes(G, pos, node_size=1200, node_color='lightcoral') nx.draw_networkx_labels(G, pos, font_size=11, font_weight='bold') weights = [d['weight'] for u, v, d in G.edges(data=True)] nx.draw_networkx_edges(G, pos, width=[w*1.2 for w in weights], alpha=0.6) plt.title("人物共现关系网络(阈值:≥2 次)"); plt.axis('off'); plt.tight_layout(); plt.show() # === 启动 === if __name__ == "__main__": root = ThemedTk(theme="azure") App(root) root.mainloop() 评价一下这个代码
最新发布
12-10
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值