Python脚本驱动的大模型软文批量生成方案
以下是一个基于Python的高效软文批量生成解决方案,通过API集成主流大模型(如ChatGPT、Claude、文心一言等),实现全自动化的内容创作流程:
核心架构设计
graph LR
A[关键词输入] --> B(提示词工程模块)
C[行业模板库] --> B
B --> D{大模型API}
D --> E[内容生成]
E --> F(后处理引擎)
F --> G[敏感词过滤]
F --> H[SEO优化]
F --> I[改写降重]
G & H & I --> J[批量输出]
J --> K[格式转换]
完整实现方案
1. 依赖安装
pip install openai anthropic requests pandas numpy python-docx markdown
2. API集成模块(支持多模型热切换)
# api_integration.py
import openai
import anthropic
import requests
class LLM_Generator:
def __init__(self, provider="openai", model="gpt-4-turbo"):
self.provider = provider
self.model = model
self.api_keys = {
"openai": "sk-your-openai-key",
"anthropic": "your-claude-key",
"baidu": "your-wenxin-key"
}
def generate(self, prompt, max_tokens=1200):
if self.provider == "openai":
return self._openai_generation(prompt, max_tokens)
elif self.provider == "anthropic":
return self._claude_generation(prompt, max_tokens)
elif self.provider == "baidu":
return self._wenxin_generation(prompt, max_tokens)
def _openai_generation(self, prompt, max_tokens):
client = openai.OpenAI(api_key=self.api_keys["openai"])
response = client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": prompt}],
max_tokens=max_tokens,
temperature=0.7
)
return response.choices[0].message.content
def _claude_generation(self, prompt, max_tokens):
client = anthropic.Anthropic(api_key=self.api_keys["anthropic"])
response = client.messages.create(
model="claude-3-opus-20240229",
max_tokens=max_tokens,
messages=[{"role": "user", "content": prompt}]
)
return response.content[0].text
def _wenxin_generation(self, prompt, max_tokens):
url = "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions?access_token=" + self.api_keys["baidu"]
payload = {
"messages": [{"role":"user", "content": prompt}],
"max_output_tokens": max_tokens
}
response = requests.post(url, json=payload)
return response.json()["result"]
3. 智能提示词工程模块
# prompt_engineering.py
import pandas as pd
class PromptEngine:
def __init__(self, template_file="templates.csv"):
self.templates = pd.read_csv(template_file)
def craft_prompt(self, keywords, template_type="marketing", word_count=800):
# 智能选择模板
template = self.templates[
(self.templates["type"] == template_type) &
(self.templates["word_count"] == word_count)
].sample().iloc[0]["prompt"]
# 动态注入关键词
prompt = template.replace("{keywords}", ", ".join(keywords))
# 添加格式要求
prompt += f"""
请严格遵循以下要求生成内容:
- 内容结构:标题+5个段落+行动号召
- 插入3个相关关键词:{','.join(keywords[-3:])}
- 自然过渡段落
- 包含行业术语但保持通俗易懂
- 最后添加3个常见用户问答
"""
return prompt
4. 批量化生成引擎
# batch_generator.py
import time
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
from api_integration import LLM_Generator
from prompt_engineering import PromptEngine
class ContentFactory:
def __init__(self, keywords_list, output_format="markdown"):
self.keywords_list = keywords_list
self.output_format = output_format
self.generator = LLM_Generator(provider="anthropic") # 默认使用Claude
self.prompt_engine = PromptEngine()
def process_keywords(self, keywords):
try:
prompt = self.prompt_engine.craft_prompt(keywords)
content = self.generator.generate(prompt)
return self.post_process(content, keywords)
except Exception as e:
print(f"生成失败: {e}")
return None
def post_process(self, content, keywords):
# 内容优化处理(简化版)
content = "\n".join([line for line in content.split("\n") if line.strip()])
content = content.replace("人工智能", "AI") # 术语统一
content += f"\n\n相关关键词: {', '.join(keywords)}"
return content
def run_batch(self, max_workers=3):
results = []
with ThreadPoolExecutor(max_workers=max_workers) as executor:
futures = [executor.submit(self.process_keywords, kw) for kw in self.keywords_list]
for future in futures:
result = future.result()
if result:
results.append(result)
# 遵守API限速
time.sleep(3)
return self.export(results)
def export(self, contents):
timestamp = time.strftime("%Y%m%d_%H%M")
if self.output_format == "markdown":
file_name = f"soft_articles_{timestamp}.md"
with open(file_name, "w", encoding="utf-8") as f:
for i, content in enumerate(contents):
f.write(f"## 文章 {i+1}\n\n")
f.write(content)
f.write("\n\n---\n\n")
return file_name
# 支持Word、Excel等格式导出
5. 行业模板数据库 (templates.csv)
| type | word_count | prompt |
|---|---|---|
| marketing | 800 | 作为资深数字营销专家,请创作一篇关于{keywords}的推广软文。重点突出产品的创新性,解决目标用户的痛点,并包含3个真实应用场景... |
| tech | 1200 | 作为科技专栏作者,撰写一篇{keywords}的深度解析文章。从技术原理出发,分析行业现状,预测未来发展趋势,适合专业读者... |
| finance | 1000 | 撰写面向普通投资者的理财软文,主题围绕{keywords}。用通俗语言解释复杂概念,提供实用建议,避免夸大收益... |
6. 主程序入口
# main.py
from batch_generator import ContentFactory
if __name__ == "__main__":
# 批量关键词输入 (实际应用中可从Excel或数据库读取)
topics = [
["云服务器", "企业数字化转型", "成本优化"],
["AI客服", "客户体验", "智能化升级"],
["区块链", "供应链金融", "去中心化"]
]
# 启动生成工厂
factory = ContentFactory(
keywords_list=topics,
output_format="markdown"
)
# 批量生成50篇软文
output_file = factory.run_batch(max_workers=5)
print(f"生成完成! 文件已保存至: {output_file}")
优化进阶功能
- SEO增强模块
def seo_optimize(content, keywords):
"""自动SEO优化处理"""
from bs4 import BeautifulSoup # 针对HTML格式
# 关键词密度分析
total_words = len(content.split())
for kw in keywords:
kw_density = content.lower().count(kw.lower()) / total_words * 100
if kw_density < 1.5: # 增加关键词密度
paragraphs = content.split("\n\n")
for i, para in enumerate(paragraphs):
if i % 2 == 0 and kw_density < 1.5:
paragraphs[i] = f"{kw.capitalize()}应用领域广泛的背景下,{para}"
kw_density = paragraphs[i].count(kw) / len(paragraphs[i].split()) * 100
content = "\n\n".join(paragraphs)
# 自动生成Meta描述
meta_desc = f"本文深入探讨{','.join(keywords)}的技术创新与应用价值。"
return f"<meta name='description' content='{meta_desc}'>\n" + content
- 敏感词过滤系统
# 加载敏感词库 (约5万词)
with open("sensitive_words.txt", encoding="utf-8") as f:
sensitive_words = set(line.strip() for line in f)
def filter_content(content):
"""过滤违规内容并替换"""
import re
for word in sensitive_words:
if word in content:
# 保留首尾字符的模糊处理
content = re.sub(rf"\b{word}\b", word[0] + "*"*(len(word)-2) + word[-1], content)
return content
- 智能改写降重
def rewrite_content(content, similarity=0.3):
"""AI辅助降重改写,保留原意改变表述"""
from difflib import SequenceMatcher
from api_integration import LLM_Generator
prompt = f"""请以专业编辑身份改写以下内容,要求:
- 保持核心信息不变
- 原文相似度低于{int(similarity*100)}%
- 采用新的表达结构和词汇
- 增强可读性和专业性
原文:
{content[:1500]}..."""
llm = LLM_Generator(provider="openai")
rewritten = llm.generate(prompt)
# 相似度验证
ratio = SequenceMatcher(None, content, rewritten).ratio()
if ratio < similarity:
return rewritten
else:
return rewrite_content(content, similarity-0.05) # 递归降重
1090

被折叠的 条评论
为什么被折叠?



