记单词017

paw
手掌,爪子

hillside
山腰,山坡

pillar
柱子

photographic
摄影的

swift
快速的

interference
冲突,干涉

disposal
处理,消除

squirrel
松鼠

cap
帽子

stain
污染

hobby
业余爱好

avenue
林荫道

mainland
大陆

leisure
闲暇

blossom
花,开花

sincere
真挚的

princess
公主

preferable
更好的

jewel
宝石

imitate
模仿

flock
一群

flourish
繁荣

loaf
条,块

mathematical
数学的

awfully
可怕地

intermediate
中间的

lump
团,块

inquire
询问,查究

sausage
香肠

liberation
解放

scarce
缺乏的

voyage
航行

hammer

#!/usr/bin/env python # -*- coding: utf-8 -*- import re import time import json import base64 import hmac import hashlib import requests from datetime import datetime, timezone import pandas as pd from docx import Document from docx.shared import Pt from docx.oxml.ns import qn try: import ntplib except ImportError: ntplib = None # ─────────── 配置区 ─────────── INPUT_DOC = '一词多义测试文本.docx' WORD_LIST = '拟加线并翻译的单词表.xlsx' OUTPUT_DOC = 'C_annotated.docx' HOST = 'ntrans.xfyun.cn' REQUEST_URI = '/v2/ots' API_URL = f'https://{HOST}{REQUEST_URI}' APPID = '7f0f910b' APIKey = '3c49387f2827fd870860c017d3326970' APISecret = 'MTM2OWMzNTA4YzY2MDk5N2NmMGMwMmNi' # ──────────────────────────────── def sync_time(): """同步并打印本地与 NTP 时间差""" if ntplib is None: print("[WARNING] 未安装 ntplib,使用本地时间") return try: c = ntplib.NTPClient() r = c.request('pool.ntp.org') net = datetime.fromtimestamp(r.tx_time, timezone.utc) loc = datetime.now(timezone.utc) diff = (net - loc).total_seconds() print(f"[TIME SYNC] 本地 UTC:{loc.isoformat()} | NTP UTC:{net.isoformat()} | 差值:{diff:.1f}s") if abs(diff) > 300: print("[ERROR] 与 NTP 时间差超过 300 秒,可能导致签名失败!") except Exception as e: print(f"[WARNING] 时间同步失败:{e}") def get_gmt_date() -> str: """返回 RFC1123 格式的 GMT 时间""" return datetime.now(timezone.utc).strftime('%a, %d %b %Y %H:%M:%S GMT') def make_niutrans_headers(body: str) -> dict: """构造 Niutrans 签名头""" raw = body.encode('utf-8') digest_val = hashlib.sha256(raw).digest() digest_b64 = base64.b64encode(digest_val).decode() digest_header = f"SHA-256={digest_b64}" date = get_gmt_date() sign_lines = [ f"host: {HOST}", f"date: {date}", f"POST {REQUEST_URI} HTTP/1.1", f"digest: {digest_header}" ] sign_str = "\n".join(sign_lines).replace("\r\n", "\n").replace("\r", "\n") sig = hmac.new( APISecret.encode('utf-8'), sign_str.encode('utf-8'), digestmod=hashlib.sha256 ).digest() signature_b64 = base64.b64encode(sig).decode() return { "Content-Type": "application/json", "Accept": "application/json,version=1.0", "Host": HOST, "Date": date, "Digest": digest_header, "Authorization": f'api_key="{APIKey}", algorithm="hmac-sha256", headers="host date request-line digest", signature="{signature_b64}"', "Content-Length": str(len(raw)), } def translate_line(line: str) -> list: """翻译单行文本""" payload = { "common": {"app_id": APPID}, "business": {"from": "en", "to": "zh"}, "data": {"text": base64.b64encode(line.encode('utf-8')).decode()} } body = json.dumps(payload, ensure_ascii=False) headers = make_niutrans_headers(body) try: print(f"[DEBUG] 翻译行前20字符: {line[:20]!r}") resp = requests.post(API_URL, headers=headers, data=body, timeout=10) resp.raise_for_status() js = resp.json() return js.get("data", {}).get("result", []) except Exception as e: print(f"[ERROR] 翻译失败: {e}") if hasattr(e, "response"): print(f"响应内容: {e.response.text}") return [] def process_line(line, para, uncommon): """处理单行文本""" results = translate_line(line) if line.strip() else [] for tok in re.split(r'(\W+)', line): if not tok: continue low = tok.lower() if tok.isalpha() and low in uncommon: # 带下划线的单词 run = para.add_run(tok) run.font.underline = True run.font.name = "Georgia" run._element.rPr.rFonts.set(qn("w:eastAsia"), "Georgia") run.font.size = Pt(12) # 翻译注释 note_run = para.add_run() note_run.add_break() dst = "" for item in results: if isinstance(item, dict) and item.get("src_text", "").lower() == low: dst = item.get("dst_text", "") break if isinstance(item, str): dst = item break if not dst and results: first = results[0] dst = first.get("dst_text", first) if isinstance(first, dict) else first note_run.text = dst note_run.font.name = "SimSun" note_run._element.rPr.rFonts.set(qn("w:eastAsia"), "SimSun") note_run.font.size = Pt(8) else: # 普通文本 run = para.add_run(tok) run.font.name = "Georgia" run._element.rPr.rFonts.set(qn("w:eastAsia"), "Georgia") run.font.size = Pt(12) def annotate_doc(): """主处理流程(已优化行间距控制)""" # 读取单词表 df = pd.read_excel(WORD_LIST, header=None) uncommon = {w.lower() for w in df.iloc[:, 0].astype(str)} # 创建新文档 input_doc = Document(INPUT_DOC) output_doc = Document() # 设置段落样式(核心修改点 ▼) style = output_doc.styles["Normal"] style.paragraph_format.space_after = Pt(6) # 行间距6磅 style.paragraph_format.space_before = Pt(0) # 清除段前距 # 处理段落(核心修改点 ▼) for para in input_doc.paragraphs: # 使用splitlines处理不同换行符 lines = [line.strip() for line in para.text.splitlines() if line.strip()] for i, line in enumerate(lines): new_para = output_doc.add_paragraph() process_line(line, new_para, uncommon) # 添加段后间距(最后一行不添加) if i < len(lines) - 1: output_doc.add_paragraph() # 空段落作为间距 # 保存结果 output_doc.save(OUTPUT_DOC) print(f"✅ 处理完成:行间距已精确控制 → {OUTPUT_DOC}") if __name__ == "__main__": sync_time() print(f"[TIME] {datetime.now(timezone.utc).isoformat()}") annotate_doc() 我需要在每一行的代码下添加一行空行可以加进去中文的空行把中文词义对应放在单词下的下划线下,住最重要的一点其他的内容不要改动和删减修改这个地方的内容就行
最新发布
05-12
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值