import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import Callback
import json
import re
from collections import Counter
import random
class PoetryGenerator:
def __init__(self, model_type='lstm'):
self.model_type = model_type
self.model = None
self.tokenizer = {}
self.reverse_tokenizer = {}
self.max_sequence_len = 0
self.vocab_size = 0
def load_data(self, file_path, poem_type='tang'):
"""加载诗歌数据"""
print("正在加载诗歌数据...")
poems = []
if poem_type == 'tang':
# 唐诗数据
sample_poems = [
"春眠不觉晓,处处闻啼鸟。夜来风雨声,花落知多少。",
"床前明月光,疑是地上霜。举头望明月,低头思故乡。",
"白日依山尽,黄河入海流。欲穷千里目,更上一层楼。",
"红豆生南国,春来发几枝。愿君多采撷,此物最相思。",
"空山不见人,但闻人语响。返景入深林,复照青苔上。",
"月落乌啼霜满天,江枫渔火对愁眠。姑苏城外寒山寺,夜半钟声到客船。",
"朝辞白帝彩云间,千里江陵一日还。两岸猿声啼不住,轻舟已过万重山。",
"好雨知时节,当春乃发生。随风潜入夜,润物细无声。",
"君问归期未有期,巴山夜雨涨秋池。何当共剪西窗烛,却话巴山夜雨时。"
]
poems = sample_poems
else:
# 现代诗数据
sample_poems = [
"你站在桥上看风景,看风景的人在楼上看你。明月装饰了你的窗子,你装饰了别人的梦。",
"从明天起,做一个幸福的人,喂马,劈柴,周游世界。从明天起,关心粮食和蔬菜。",
"黑夜给了我黑色的眼睛,我却用它寻找光明。",
"面朝大海,春暖花开。",
"轻轻的我走了,正如我轻轻的来;我轻轻的招手,作别西天的云彩。"
]
poems = sample_poems
return poems
def preprocess_data(self, poems):
"""预处理数据"""
print("正在预处理数据...")
# 合并所有诗歌文本
text = ''.join(poems)
# 创建字符到索引的映射
chars = sorted(list(set(text)))
self.vocab_size = len(chars)
self.tokenizer = {char: idx for idx, char in enumerate(chars)}
self.reverse_tokenizer = {idx: char for idx, char in enumerate(chars)}
# 创建训练序列
sequences = []
next_chars = []
for poem in poems:
for i in range(len(poem) - 1):
sequences.append([self.tokenizer[char] for char in poem[:i+1]])
next_chars.append(self.tokenizer[poem[i+1]])
# 找到最大序列长度
self.max_sequence_len = max(len(seq) for seq in sequences)
# 填充序列
X = pad_sequences(sequences, maxlen=self.max_sequence_len)
y = tf.keras.utils.to_categorical(next_chars, num_classes=self.vocab_size)
return X, y
def build_model(self):
"""构建LSTM模型"""
print("正在构建模型...")
model = Sequential([
Embedding(self.vocab_size, 128, input_length=self.max_sequence_len),
LSTM(256, return_sequences=True),
Dropout(0.2),
LSTM(256),
Dropout(0.2),
Dense(128, activation='relu'),
Dropout(0.2),
Dense(self.vocab_size, activation='softmax')
])
model.compile(
loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy']
)
self.model = model
print(model.summary())
def train(self, X, y, epochs=100, batch_size=64):
"""训练模型"""
print("开始训练模型...")
# 自定义回调函数用于在训练过程中生成示例诗歌
class PoetryCallback(Callback):
def __init__(self, generator, temperature=0.8):
self.generator = generator
self.temperature = temperature
def on_epoch_end(self, epoch, logs=None):
if epoch % 10 == 0:
print(f"\nEpoch {epoch} - 生成示例:")
theme = random.choice(['春', '月', '山', '水', '花'])
poem = self.generator.generate_poem(theme, temperature=self.temperature)
print(f"主题 '{theme}': {poem}")
history = self.model.fit(
X, y,
batch_size=batch_size,
epochs=epochs,
verbose=1,
callbacks=[PoetryCallback(self)]
)
return history
def generate_poem(self, theme, temperature=0.8, max_length=50):
"""生成诗歌"""
if self.model is None:
raise ValueError("模型尚未训练,请先训练模型")
# 根据主题生成起始文本
start_text = theme
generated = start_text
# 生成诗歌
for i in range(max_length):
# 准备输入序列
sequence = [self.tokenizer.get(char, 0) for char in generated]
sequence = pad_sequences([sequence], maxlen=self.max_sequence_len)
# 预测下一个字符
predictions = self.model.predict(sequence, verbose=0)[0]
# 应用温度采样
predictions = np.asarray(predictions).astype('float64')
predictions = np.log(predictions) / temperature
exp_preds = np.exp(predictions)
predictions = exp_preds / np.sum(exp_preds)
# 采样下一个字符
probas = np.random.multinomial(1, predictions, 1)
next_char_idx = np.argmax(probas)
next_char = self.reverse_tokenizer.get(next_char_idx, '')
generated += next_char
# 如果生成结束标志或达到最大长度,停止生成
if next_char in ['。', '!', '?'] and len(generated) > 10:
break
return generated
def check_rhythm(self, poem):
"""检查诗歌韵律(简单版本)"""
# 唐诗韵律检查
lines = re.split('[,。!?]', poem)
lines = [line for line in lines if line.strip()]
if len(lines) >= 4: # 绝句
# 检查押韵(最后一句的最后一个字)
rhyme_chars = []
for line in lines:
if line:
rhyme_chars.append(line[-1])
# 简单的押韵检查
rhyme_score = len(set(rhyme_chars[-2:])) == 1 # 最后两句押韵
return {
'line_count': len(lines),
'rhyme_score': rhyme_score,
'rhyme_chars': rhyme_chars
}
return {'line_count': len(lines), 'rhyme_score': False, 'rhyme_chars': []}
class PoetryEvaluation:
"""诗歌评估类"""
@staticmethod
def automatic_evaluation(poem):
"""自动评估生成诗歌的质量"""
scores = {}
# 1. 长度评分
length_score = min(len(poem) / 20, 1.0) # 假设20字为理想长度
# 2. 词汇多样性评分
chars = list(poem)
unique_chars = set(chars)
diversity_score = len(unique_chars) / len(chars) if chars else 0
# 3. 标点符号使用评分
punctuation_count = len(re.findall('[,。!?]', poem))
punctuation_score = min(punctuation_count / 3, 1.0) # 假设3个标点为理想
scores['length'] = length_score
scores['diversity'] = diversity_score
scores['punctuation'] = punctuation_score
scores['overall'] = (length_score + diversity_score + punctuation_score) / 3
return scores
@staticmethod
def manual_evaluation(poem, theme):
"""人工评估接口"""
print(f"\n请对以下诗歌进行评分:")
print(f"主题: {theme}")
print(f"诗歌: {poem}")
print("\n请从以下几个方面评分 (1-5分):")
print("1. 相关性 (与主题的相关程度)")
print("2. 韵律 (诗歌的韵律美感)")
print("3. 意境 (诗歌的意境和美感)")
print("4. 整体质量")
# 在实际应用中,这里可以接入人工评分界面
# 这里返回模拟评分
return {
'relevance': random.uniform(3, 5),
'rhythm': random.uniform(3, 5),
'imagery': random.uniform(3, 5),
'overall': random.uniform(3, 5)
}
def main():
"""主函数"""
# 初始化诗歌生成器
generator = PoetryGenerator()
# 加载数据
poems = generator.load_data('', poem_type='tang')
print(f"加载了 {len(poems)} 首诗歌")
# 预处理数据
X, y = generator.preprocess_data(poems)
print(f"训练数据形状: X {X.shape}, y {y.shape}")
print(f"词汇表大小: {generator.vocab_size}")
# 构建模型
generator.build_model()
# 训练模型(在实际应用中需要更多数据和时间)
print("\n开始训练...")
# 由于数据量小,只训练少量轮次
history = generator.train(X, y, epochs=50, batch_size=32)
# 生成诗歌示例
print("\n" + "="*50)
print("诗歌生成示例")
print("="*50)
themes = ['春', '月', '山', '水', '花']
evaluator = PoetryEvaluation()
for theme in themes:
print(f"\n生成主题: {theme}")
# 生成多首诗歌并评估
for i in range(2):
poem = generator.generate_poem(theme, temperature=0.7)
rhythm_info = generator.check_rhythm(poem)
auto_scores = evaluator.automatic_evaluation(poem)
print(f"诗歌 {i+1}: {poem}")
print(f"韵律检查: {rhythm_info}")
print(f"自动评分: {auto_scores}")
print("-" * 30)
# 交互式诗歌生成函数
def interactive_poetry_generation():
"""交互式诗歌生成"""
generator = PoetryGenerator()
# 为了演示,我们创建一个简单的预训练模型
poems = generator.load_data('', poem_type='tang')
X, y = generator.preprocess_data(poems)
generator.build_model()
print("欢迎使用诗歌生成系统!")
print("支持的主题示例:春、月、山、水、花、秋、雪、风等")
while True:
print("\n" + "="*40)
theme = input("请输入主题(输入'退出'结束): ").strip()
if theme == '退出':
print("感谢使用诗歌生成系统!")
break
if not theme:
print("主题不能为空,请重新输入。")
continue
# 生成诗歌
try:
poem = generator.generate_poem(theme, temperature=0.7)
rhythm_info = generator.check_rhythm(poem)
print(f"\n生成的诗歌:")
print(f"「{poem}」")
print(f"\n韵律分析:")
print(f"行数:{rhythm_info['line_count']}")
print(f"押韵效果:{'良好' if rhythm_info['rhyme_score'] else '一般'}")
print(f"韵脚:{rhythm_info['rhyme_chars']}")
# 评估
evaluator = PoetryEvaluation()
scores = evaluator.automatic_evaluation(poem)
print(f"\n自动评估得分:{scores['overall']:.2f}/1.0")
except Exception as e:
print(f"生成诗歌时出现错误:{e}")
if __name__ == "__main__":
# 运行主程序
main()
# 运行交互式界面
print("\n" + "="*60)
print("进入交互式诗歌生成模式")
print("="*60)
interactive_poetry_generation()
找出该代码的缺点并进行优化
生成的示例诗句要有符号分隔保证每句字数相同
诗歌生成系统不要一行输出诗句多行输出每行字数相同
诗句要分为五言绝句和七言律诗 现代诗