#!/usr/bin/env python
# -*- coding: utf-8 -*-
import re
import time
import json
import base64
import hmac
import hashlib
import requests
from datetime import datetime, timezone
import pandas as pd
from docx import Document
from docx.shared import Pt
from docx.oxml.ns import qn
try:
import ntplib
except ImportError:
ntplib = None
# ─────────── 配置区 ───────────
INPUT_DOC = '一词多义测试文本.docx'
WORD_LIST = '拟加线并翻译的单词表.xlsx'
OUTPUT_DOC = 'C_annotated.docx'
HOST = 'ntrans.xfyun.cn'
REQUEST_URI = '/v2/ots'
API_URL = f'https://{HOST}{REQUEST_URI}'
APPID = '7f0f910b'
APIKey = '3c49387f2827fd870860c017d3326970'
APISecret = 'MTM2OWMzNTA4YzY2MDk5N2NmMGMwMmNi'
# ────────────────────────────────
def sync_time():
"""同步并打印本地与 NTP 时间差"""
if ntplib is None:
print("[WARNING] 未安装 ntplib,使用本地时间")
return
try:
c = ntplib.NTPClient()
r = c.request('pool.ntp.org')
net = datetime.fromtimestamp(r.tx_time, timezone.utc)
loc = datetime.now(timezone.utc)
diff = (net - loc).total_seconds()
print(f"[TIME SYNC] 本地 UTC:{loc.isoformat()} | NTP UTC:{net.isoformat()} | 差值:{diff:.1f}s")
if abs(diff) > 300:
print("[ERROR] 与 NTP 时间差超过 300 秒,可能导致签名失败!")
except Exception as e:
print(f"[WARNING] 时间同步失败:{e}")
def get_gmt_date() -> str:
"""返回 RFC1123 格式的 GMT 时间"""
return datetime.now(timezone.utc).strftime('%a, %d %b %Y %H:%M:%S GMT')
def make_niutrans_headers(body: str) -> dict:
"""构造 Niutrans 签名头"""
raw = body.encode('utf-8')
digest_val = hashlib.sha256(raw).digest()
digest_b64 = base64.b64encode(digest_val).decode()
digest_header = f"SHA-256={digest_b64}"
date = get_gmt_date()
sign_lines = [
f"host: {HOST}",
f"date: {date}",
f"POST {REQUEST_URI} HTTP/1.1",
f"digest: {digest_header}"
]
sign_str = "\n".join(sign_lines).replace("\r\n", "\n").replace("\r", "\n")
sig = hmac.new(
APISecret.encode('utf-8'),
sign_str.encode('utf-8'),
digestmod=hashlib.sha256
).digest()
signature_b64 = base64.b64encode(sig).decode()
return {
"Content-Type": "application/json",
"Accept": "application/json,version=1.0",
"Host": HOST,
"Date": date,
"Digest": digest_header,
"Authorization": f'api_key="{APIKey}", algorithm="hmac-sha256", headers="host date request-line digest", signature="{signature_b64}"',
"Content-Length": str(len(raw)),
}
def translate_line(line: str) -> list:
"""翻译单行文本"""
payload = {
"common": {"app_id": APPID},
"business": {"from": "en", "to": "zh"},
"data": {"text": base64.b64encode(line.encode('utf-8')).decode()}
}
body = json.dumps(payload, ensure_ascii=False)
headers = make_niutrans_headers(body)
try:
print(f"[DEBUG] 翻译行前20字符: {line[:20]!r}")
resp = requests.post(API_URL, headers=headers, data=body, timeout=10)
resp.raise_for_status()
js = resp.json()
return js.get("data", {}).get("result", [])
except Exception as e:
print(f"[ERROR] 翻译失败: {e}")
if hasattr(e, "response"):
print(f"响应内容: {e.response.text}")
return []
def process_line(line, para, uncommon):
"""处理单行文本"""
results = translate_line(line) if line.strip() else []
for tok in re.split(r'(\W+)', line):
if not tok:
continue
low = tok.lower()
if tok.isalpha() and low in uncommon:
# 带下划线的单词
run = para.add_run(tok)
run.font.underline = True
run.font.name = "Georgia"
run._element.rPr.rFonts.set(qn("w:eastAsia"), "Georgia")
run.font.size = Pt(12)
# 翻译注释
note_run = para.add_run()
note_run.add_break()
dst = ""
for item in results:
if isinstance(item, dict) and item.get("src_text", "").lower() == low:
dst = item.get("dst_text", "")
break
if isinstance(item, str):
dst = item
break
if not dst and results:
first = results[0]
dst = first.get("dst_text", first) if isinstance(first, dict) else first
note_run.text = dst
note_run.font.name = "SimSun"
note_run._element.rPr.rFonts.set(qn("w:eastAsia"), "SimSun")
note_run.font.size = Pt(8)
else:
# 普通文本
run = para.add_run(tok)
run.font.name = "Georgia"
run._element.rPr.rFonts.set(qn("w:eastAsia"), "Georgia")
run.font.size = Pt(12)
def annotate_doc():
"""主处理流程(已优化行间距控制)"""
# 读取单词表
df = pd.read_excel(WORD_LIST, header=None)
uncommon = {w.lower() for w in df.iloc[:, 0].astype(str)}
# 创建新文档
input_doc = Document(INPUT_DOC)
output_doc = Document()
# 设置段落样式(核心修改点 ▼)
style = output_doc.styles["Normal"]
style.paragraph_format.space_after = Pt(6) # 行间距6磅
style.paragraph_format.space_before = Pt(0) # 清除段前距
# 处理段落(核心修改点 ▼)
for para in input_doc.paragraphs:
# 使用splitlines处理不同换行符
lines = [line.strip() for line in para.text.splitlines() if line.strip()]
for i, line in enumerate(lines):
new_para = output_doc.add_paragraph()
process_line(line, new_para, uncommon)
# 添加段后间距(最后一行不添加)
if i < len(lines) - 1:
output_doc.add_paragraph() # 空段落作为间距
# 保存结果
output_doc.save(OUTPUT_DOC)
print(f"✅ 处理完成:行间距已精确控制 → {OUTPUT_DOC}")
if __name__ == "__main__":
sync_time()
print(f"[TIME] {datetime.now(timezone.utc).isoformat()}")
annotate_doc()
我需要在每一行的代码下添加一行空行可以加进去中文的空行把中文词义对应放在单词下的下划线下,记住最重要的一点其他的内容不要改动和删减修改这个地方的内容就行
最新发布