from pypinyin import pinyin, Style
import itertools
import random
import json
import requests
# 近韵映射表
near_rhyme_map = {
"a": ["a", "ia", "ua"],
"ve": ["ve", "ie", "ue", "an"],
"o": ["o", "uo"],
"e": ["e", "ie", "ve", "er"],
"i": ["i"],
"u": ["u", "ou", "uo", "iu"],
"ü": ["ü", "ue"],
"ai": ["ai", "uai"],
"ei": ["ei", "uei", "ui"],
"uei": ["uei", "ui", "ei"],
"ui": ["ui", "uei", "ei"],
"ao": ["ao", "iao", "ang", "iang", "uang"],
"ou": ["ou", "o", "u"],
"ang": ["ang", "iang", "uang", "ao", "iao"],
"eng": ["eng", "ing", "ong"],
"ong": ["ong", "eng", "iong"],
"an": ["an", "ian", "uan", "en"],
"en": ["en", "an", "in", "un"],
"in": ["in", "en", "un", "ing"],
"un": ["un", "uen", "en", "in"],
"ian": ["ian", "an", "uan", "en"],
"uan": ["uan", "an", "ian"],
"iang": ["iang", "ang", "uang", "ao", "iao"],
"uang": ["uang", "ang", "iang", "ao", "iao"],
"iao": ["iao", "ao", "iang", "ang", "uang"],
"iou": ["iou", "ou", "iu"],
"ing": ["ing", "in", "eng", "ang"],
"iong": ["iong", "ong", "eng", "ang"]
}
# 反向映射:音节 -> 近韵组
syllable_group = {}
for key, vals in near_rhyme_map.items():
for v in vals:
syllable_group[v] = key
def is_near_rhyme(s1, s2):
"""判断两个音节是否近韵"""
return syllable_group.get(s1) == syllable_group.get(s2)
import time
def call_deepseek_chat(prompt, retries=3, delay=2):
"""
调用 deepseek-chat API 生成押韵词汇,失败时自动重试
:param prompt: 提示词
:param retries: 最大重试次数
:param delay: 每次重试的等待秒数
"""
API_KEY = "请在这里替换成自己的apikey_调用费率超低"
API_URL = "https://api.deepseek.com/v1/chat/completions"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {API_KEY}"
}
payload = {
"model": "deepseek-chat",
"messages": [{"role": "user", "content": prompt}],
"temperature": 0.7
}
for attempt in range(1, retries + 1):
try:
resp = requests.post(API_URL, headers=headers, data=json.dumps(payload), timeout=15)
# 检查 HTTP 状态码
if resp.status_code != 200:
print(f"调用 deepseek-chat 失败,HTTP {resp.status_code},重试 {attempt}/{retries}")
time.sleep(delay)
continue
data = resp.json()
if "choices" in data and data["choices"]:
return data["choices"][0]["message"]["content"]
else:
print(f"调用 deepseek-chat 返回数据格式异常,重试 {attempt}/{retries}")
except Exception as e:
print(f"调用 deepseek-chat 异常: {e},重试 {attempt}/{retries}")
time.sleep(delay)
print("调用 deepseek-chat 多次失败,返回 None")
return None
def process_sentence(sentence, max_index_gap=2):
"""
处理句子,找到连续近韵多韵母子序列,并过滤掉索引差距过大和后半句高度重复的子序列,
调用 deepseek 生成词汇,并在最后基于两个韵母序列的词汇生成4句有逻辑的句子,
同时输出汇总押韵标注句子(<标签>形式,前后句用两个空格分隔)
"""
def seq_contains(long_seq, short_seq):
for i in range(len(long_seq) - len(short_seq) + 1):
if long_seq[i:i+len(short_seq)] == short_seq:
return True
return False
sentence_no_space = sentence.replace(" ", "")
print(f"原句(去空格): {sentence_no_space}")
pinyin_list = pinyin(sentence_no_space, style=Style.FINALS, strict=False)
syllables = [item[0] for item in pinyin_list]
print(f"拆解成韵母音节: {syllables}")
total_len = len(syllables)
mid_point = total_len // 2
front = syllables[:mid_point]
back = syllables[mid_point:]
print(f"前半部分: {front}")
print(f"后半部分: {back}")
matches = []
for f_start in range(len(front)):
for b_start in range(len(back)):
if abs(f_start - b_start) > max_index_gap:
continue
length = 0
temp_seq_front = []
temp_seq_back = []
while (f_start + length < len(front)) and (b_start + length < len(back)):
if is_near_rhyme(front[f_start + length], back[b_start + length]):
temp_seq_front.append(front[f_start + length])
temp_seq_back.append(back[b_start + length])
length += 1
else:
break
if length >= 2:
matches.append(((f_start, temp_seq_front), (b_start, temp_seq_back)))
matches_sorted = sorted(matches, key=lambda m: len(m[1][1]), reverse=True)
kept_matches = []
kept_back_sequences = []
for match in matches_sorted:
back_seq = match[1][1]
is_subseq = any(
len(back_seq) < len(seq) and seq_contains(seq, back_seq)
for seq in kept_back_sequences
)
if not is_subseq:
kept_matches.append(match)
kept_back_sequences.append(back_seq)
matches = kept_matches
if matches:
print("最终保留的连续近韵多韵母子序列:")
highlight_ranges = []
for (f_idx, f_seq), (b_idx, b_seq) in matches:
print(f"前半[{f_idx}:{f_idx+len(f_seq)}]={f_seq} ↔ 后半[{b_idx}:{b_idx+len(b_seq)}]={b_seq}")
front_range = list(range(f_idx, f_idx + len(f_seq)))
back_range = list(range(mid_point + b_idx, mid_point + b_idx + len(b_seq)))
highlight_ranges.extend(front_range)
highlight_ranges.extend(back_range)
highlight_ranges = sorted(set(highlight_ranges))
merged_ranges = []
start = None
prev = None
for idx in highlight_ranges:
if start is None:
start = idx
prev = idx
elif idx == prev + 1:
prev = idx
else:
merged_ranges.append((start, prev))
start = idx
prev = idx
if start is not None:
merged_ranges.append((start, prev))
marked_sentence = ""
i = 0
while i < len(sentence_no_space):
if i == mid_point:
marked_sentence += " "
in_range = False
for (s, e) in merged_ranges:
if i == s:
marked_sentence += "<" + "".join(sentence_no_space[s:e+1]) + ">"
i = e + 1
in_range = True
break
if not in_range:
marked_sentence += sentence_no_space[i]
i += 1
print(f"标注后句子:{marked_sentence}")
else:
print("未找到符合条件的连续近韵多韵母子序列")
rhyme_sequences = set(tuple(b_seq) for (_, _), (_, b_seq) in matches)
sorted_rhyme_sequences = sorted(rhyme_sequences)
rhyme_dict = {}
for seq in sorted_rhyme_sequences:
group_lists = []
for syl in seq:
group_key = syllable_group.get(syl)
if group_key and group_key in near_rhyme_map:
group_lists.append(near_rhyme_map[group_key])
else:
group_lists.append([syl])
all_combos = list(itertools.product(*group_lists))
combo_lengths = [sum(len(s) for s in combo) for combo in all_combos]
min_len = min(combo_lengths)
shortest_combos = [
list(combo) for combo, total_len in zip(all_combos, combo_lengths) if total_len == min_len
]
rhyme_dict[seq] = shortest_combos
print("\n近韵词汇库(总字母数最少的组合):")
for seq, words in rhyme_dict.items():
print(f"{list(seq)} -> {words}")
# 生成词汇并保存
seq_results = {}
for seq, combos in rhyme_dict.items():
combos_str_list = [" → ".join(combo) for combo in combos]
combos_text = "\n".join(
[f"{i + 1}. {c}" for i, c in enumerate(combos_str_list)]
)
prompt = (
f"请基于以下韵母组合序列,每个组合生成1个多音节连续押韵的中文词汇:\n"
f"{combos_text}\n"
f"要求:\n"
f"1. 严格按照每个组合的韵母个数和顺序生成,不得增减音节,不得更改韵母顺序。\n"
f"2. 每个词的拼音韵母部分必须依次为该组合中的韵母。\n"
f"3. 为同一韵母位置选择不同的声母组合,避免重复。\n"
f"4. 每个词汇后面用括号标注完整拼音,并在括号后加上“释义:”及简短解释。\n"
f"输出格式示例:\n"
f"盛夏(shèng xià),释义:夏天最炎热的时候。\n"
f"烹虾(pēng xiā),释义:烹饪虾的一种方法。\n"
f"请严格按照上述格式输出,不要添加额外说明。"
)
result = call_deepseek_chat(prompt)
seq_results[tuple(seq)] = result
print(f"\nDeepseek-chat 基于韵母序列 {list(seq)} 的所有最短组合生成的词汇:\n{result}")
# 二次调用生成4句有逻辑的句子
if len(seq_results) == 2:
seq_items = list(seq_results.items())
seq1, words1 = seq_items[0]
seq2, words2 = seq_items[1]
match_info = [str(seq1), str(seq2)]
prompt2 = (
f"我有两个不同韵母序列的词汇列表:\n"
f"第一类(韵母序列 {match_info[0]}):\n{words1}\n\n"
f"第二类(韵母序列 {match_info[1]}):\n{words2}\n\n"
"请你基于这两个词汇列表,按照匹配的前后顺序,生成4句有逻辑的中文句子。\n"
"要求:\n"
"1. 每句至少包含一个第一类词汇和一个第二类词汇。\n"
"2. 句子之间要有连贯的故事或逻辑关系。\n"
"3. 保持自然流畅,不要生硬拼接。\n"
"4. 输出格式为:句子1\\n句子2\\n句子3\\n句子4\\n"
)
final_sentences = call_deepseek_chat(prompt2)
print("\n基于两个韵母序列生成的4句有逻辑的句子:\n", final_sentences)
return rhyme_dict
# 示例
if __name__ == "__main__":
user_input = "我在街头成长从小时候就种下的象 生活种种苦难磨砺让我变得更加坚强"
process_sentence(user_input, max_index_gap=2)
智能体的定义:充分多种利用函数工具将数据处理后喂给大模型,让大模型做最简化的事情
1436

被折叠的 条评论
为什么被折叠?



