retrieveHTML,translateonly,xmlParse,clearRows,addRow,findlocid,setPara,

本文介绍了一个基于JavaScript的天气应用程序的功能实现,包括城市搜索、参数设置及国际化语言支持等功能。文章详细解释了如何通过HTTP请求获取城市信息,并利用XML解析来展示搜索结果,同时还涉及了如何设置用户偏好的温度单位、风速单位等。

// JavaScript Document
var cities;
var tempunit;
var visbunit;
var presunit;
var windunit;
var locimg;
var ssn = "xml.weather.com";
var xmlDoc = null;

function setCookie(name,value) {
   var today = new Date();
   var expires = new Date();
   expires.setTime(today.getTime() + 1000*60*60*24*365);
   document.cookie = name + "=" + escape(value) + "; expires=" + expires.toGMTString();
}

function getCookie(Name) {
   var search = Name + "=";
   if(document.cookie.length > 0) {
      offset = document.cookie.indexOf(search);
      if(offset != -1) {
         offset += search.length;
         end = document.cookie.indexOf(";", offset);
         if(end == -1) end = document.cookie.length;
         return unescape(document.cookie.substring(offset, end));
      }
      else return('');
   }
   else return('');
}

var curlocid = getCookie("WT_loc_id")
var curcityname = getCookie("WT_loc_text")
var tempunit = getCookie("WT_T_unit");
var windunit = getCookie("WT_Wind_unit");
var presunit = getCookie("WT_P_unit");
var visbunit = getCookie("WT_Visb_unit");
var lan = getCookie("WT_Lan");

xmlDoc = new ActiveXObject("Microsoft.XMLDOM");
xmlDoc.async = "false";

switch(lan) {
 case "1":
  xmlDoc.load("language/lan_cn_S.xml");
 break;
 case "0":
  xmlDoc.load("language/lan_en.xml");
 break;
 case "2":
  xmlDoc.load("language/lan_cn_T.xml");
 break;
}

function retrieveHTML(sURL) {
 var htmlContent;
 var objXMLHTTP = new ActiveXObject("Microsoft.XMLHTTP");
 objXMLHTTP.open("GET", sURL, false);
 objXMLHTTP.send();
 htmlContent = objXMLHTTP.responseBody;
 var objRS = new ActiveXObject("ADODB.Recordset");
 objRS.Fields.Append("txt", 200, 40000, 128);
 objRS.Open();
 objRS.AddNew();
 objRS.Fields.item("txt").AppendChunk(htmlContent);
 var htmlContent = objRS.Fields.item("txt").value;
 objRS.Close();
 objRS = null;
 objXMLHTTP = null;
 return htmlContent;
}

function translateonly(WTwords) {
 objwords = xmlDoc.getElementsByTagName(WTwords);
 if(objwords.length > 0) {
  rewords = objwords.item(0).text;
 } else {
  rewords = WTwords;
 }
 return rewords;
}

function xmlParse(xmlText) {
 var nodes = new Array()
 var xmlDoc = new ActiveXObject("Microsoft.XMLDOM")
 xmlDoc.async = "false"
 xmlDoc.loadXML(xmlText)
 for(i=0; i<xmlDoc.documentElement.childNodes.length; i++){
  nodes[i] = new Array()
  nodes[i][0] = xmlDoc.documentElement.childNodes.item(i).text
  nodes[i][1] = xmlDoc.documentElement.childNodes.item(i).getAttribute("id")
  nodes[i][2] = xmlDoc.documentElement.childNodes.item(i).getAttribute("type")
 }
 return nodes
}

function clearRows(oTable) {
 var rowsnum = oTable.rows.length
 for(i=0; i<rowsnum; i++) {
  oTable.deleteRow()
 }
}

function addRow(oTable,rowHTML) {
 var oRow=oTable.insertRow()
 var aRows=oTable.rows
 var aCells=oRow.cells
 var oCell=aRows(oRow.rowIndex).insertCell(aCells.length)
 oCell.innerHTML=rowHTML
}

function findlocid() {
 var name = cityname.value
 var urlstr = "http://" + ssn + "/search/search?where=" + name
 var xmlInfo = retrieveHTML(urlstr)
 cities = xmlParse(xmlInfo)
 if(cities.length == 0) {
  clearRows(resultcontent)
  addRow(resultcontent,translateonly("WT_Noresult"));  
 } else {
  clearRows(resultcontent)
  addRow(resultcontent,"<B>"+translateonly("WT_Find1")+cities.length+translateonly("WT_Find2")+"</B>")
  for(i=0; i<cities.length; i++) {
   rowHTML="<label><input class=radio type=radio name=listcities value="+cities[i][1]+">&nbsp;"+cities[i][0]+"</label>"
   addRow(resultcontent,rowHTML)   
  }
  addRow(resultcontent,"<input class=button type=button value="+translateonly("WT_Confirm")+" onClick=confirmcity()>")
 }

}

function setPara() {
 tempunit = 0;
 visbunit = 0;
 presunit = 0;
 windunit = 0;
 locimg = 0;
 if(configtemp[0].checked) tempunit = 1;
 if(configvisb[0].checked) visbunit = 1;
 if(configpres[0].checked) presunit = 1;
 if(configwind[0].checked) windunit = 1;
 if(configlocimg[0].checked) locimg = 1;
 setCookie("WT_T_unit", tempunit);
 setCookie("WT_Wind_unit", windunit);
 setCookie("WT_P_unit", presunit);
 setCookie("WT_Visb_unit", visbunit);
 setCookie("WT_Lan", configlan.selectedIndex);
 setCookie("WT_loc_img", locimg);
 location.replace("weather.htm");
}

function confirmcity() {
 if(cities.length == 1) {
  setCookie("WT_loc_id", cities[0][1])
  setCookie("WT_loc_type", cities[0][2])
  setCookie("WT_loc_text", cities[0][0])
 } else {
  for(i=0; i<cities.length; i++) {
   if(listcities[i].checked) {
    setCookie("WT_loc_id", cities[i][1])
    setCookie("WT_loc_type", cities[i][2])
    setCookie("WT_loc_text", cities[i][0])
   }
  }
 }
 location.replace("config.htm")
}

function comment() {
 if(curlocid == '') {
  strHTML = translateonly("WT_Search_Error");
  document.write(strHTML);
 } else {
  strHTML = translateonly("WT_Search_Normal");
  document.write(strHTML);
 }

请将修改整合到以下代码中: import os import re import traceback from docx import Document from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_LINE_SPACING from docx.shared import Pt, Cm, RGBColor from docx.oxml.ns import qn from docx.oxml import parse_xml from docx.oxml.shared import OxmlElement def process_word_document(input_path, skip_pages): try: # 检查文件格式 if not input_path.lower().endswith(('.docx', '.doc')): raise ValueError("只支持 .docx 或 .doc 格式的文件") # 处理 .doc 文件(需要转换为 .docx) if input_path.lower().endswith('.doc'): # 实际应用中需要安装 win32com 并转换文件 # 这里简化为直接报错 raise NotImplementedError("请先将 .doc 文件转换为 .docx 格式") doc = Document(input_path) # 跳过前x页 if skip_pages > 0: page_break_count = 0 delete_index = 0 # 遍历段落查找分页符 for i, para in enumerate(doc.paragraphs): if page_break_count >= skip_pages: break # 检查分页符(w:lastRenderedPageBreak 或 w:br type="page") if para._element.xpath('.//w:lastRenderedPageBreak') or \ para._element.xpath('.//w:br[@w:type="page"]'): page_break_count += 1 delete_index = i + 1 # 删除封面和目录页的段落 for i in range(delete_index): if len(doc.paragraphs) > 0: p = doc.paragraphs[0] p_element = p._element p_element.getparent().remove(p_element) # 设置全局样式 for para in doc.paragraphs: # 跳过空段落 if not para.text.strip(): continue # 设置基本段落格式 set_paragraph_format(para) # 检查并设置标题样式 text = para.text.strip() if re.match(r'^[一二三四五六七八九十]+、', text): set_heading_style(para, level=1) elif re.match(r'^\d+\.\d+', text): set_heading_style(para, level=2) elif re.match(r'^$\d+$', text): set_heading_style(para, level=3) # 处理图片和表格 process_images(doc) process_tables(doc) # 保存处理后的文件 output_dir = os.path.dirname(input_path) filename = os.path.basename(input_path) name, ext = os.path.splitext(filename) output_path = os.path.join(output_dir, f"{name}_已处理.docx") doc.save(output_path) return f"文件处理成功,保存为: {output_path}" except Exception as e: # 获取出错时的上下文信息 error_context = "未知位置" if 'para' in locals(): error_context = f"段落内容: {para.text[:50] + '...' if para.text else '空段落'}" elif 'table' in locals(): error_context = f"表格位置: 第{len(doc.tables)}个表格" return f"处理出错: {str(e)}\n出错位置: {error_context}\n详细错误:\n{traceback.format_exc()}" def set_paragraph_format(para): """设置段落基本格式:仿宋三号,行间距28.95磅,首行缩进2字符,两端对齐""" # 设置字体 for run in para.runs: run.font.name = '仿宋' run.font.size = Pt(16) # 三号≈16磅 run._element.rPr.rFonts.set(qn('w:eastAsia'), '仿宋') # 设置段落格式 p_format = para.paragraph_format p_format.line_spacing_rule = WD_LINE_SPACING.EXACTLY p_format.line_spacing = Pt(28.95) p_format.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY # 设置首行缩进(2字符≈0.74cm) p_format.first_line_indent = Cm(0.74) # 清除原有制表位 p_prop = para._element.get_or_add_pPr() tabs = p_prop.find(qn('w:tabs')) if tabs is not None: p_prop.remove(tabs) # 添加新的制表位(用于首行缩进) tab_stop = OxmlElement('w:tab') tab_stop.set(qn('w:val'), 'left') tab_stop.set(qn('w:pos'), str(int(Cm(0.74).emu))) tabs = OxmlElement('w:tabs') tabs.append(tab_stop) p_prop.append(tabs) def set_heading_style(para, level): """设置标题样式""" # 清除首行缩进 para.paragraph_format.first_line_indent = Cm(0) # 根据级别设置样式 if level == 1: # 一级标题:黑体三号 font_name = '黑体' for run in para.runs: run.font.name = font_name run.font.size = Pt(16) run._element.rPr.rFonts.set(qn('w:eastAsia'), font_name) para.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER elif level == 2: # 二级标题:楷体三号 font_name = '楷体' for run in para.runs: run.font.name = font_name run.font.size = Pt(16) run._element.rPr.rFonts.set(qn('w:eastAsia'), font_name) para.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.LEFT elif level == 3: # 三级标题:仿宋三号 # 已在基础设置中 para.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.LEFT def process_images(doc): """处理图片及其图注""" for i, para in enumerate(doc.paragraphs): # 检查是否包含图片 if any(run._element.xpath('.//pic:pic') for run in para.runs): # 检查是否为独立图片(整段只有图片) if not para.text.strip() and len(para.runs) == 1: # 独立图片:单倍行距、居中对齐 para.paragraph_format.line_spacing = Pt(12) # 单倍行距 para.alignment = WD_ALIGN_PARAGRAPH.CENTER # 检查下一段是否为图注 if i + 1 < len(doc.paragraphs): next_para = doc.paragraphs[i + 1] if next_para.text.strip().startswith('图'): set_figure_caption_style(next_para) else: # 内嵌图片:单倍行距、两端对齐 para.paragraph_format.line_spacing = Pt(12) # 单倍行距 para.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY def set_figure_caption_style(para): """设置图注样式:楷体四号,行间距28.95磅,居中对齐""" # 设置字体 for run in para.runs: run.font.name = '楷体' run.font.size = Pt(14) # 四号≈14磅 run._element.rPr.rFonts.set(qn('w:eastAsia'), '楷体') # 设置段落格式 para.paragraph_format.line_spacing = Pt(28.95) para.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER para.paragraph_format.first_line_indent = Cm(0) def process_tables(doc): """处理表格及其表注(修复版)""" for table in doc.tables: # 表格整体居中 table.alignment = WD_ALIGN_PARAGRAPH.CENTER # 设置表头样式(第一行) if len(table.rows) > 0: header_cells = table.rows[0].cells for cell in header_cells: for para in cell.paragraphs: for run in para.runs: run.font.name = '黑体' run.font.size = Pt(14) run._element.rPr.rFonts.set(qn('w:eastAsia'), '黑体') para.paragraph_format.line_spacing = Pt(28.95) para.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER # 设置表格内容样式(其他行) for row in table.rows[1:]: for cell in row.cells: for para in cell.paragraphs: for run in para.runs: run.font.name = '仿宋' run.font.size = Pt(14) run._element.rPr.rFonts.set(qn('w:eastAsia'), '仿宋') para.paragraph_format.line_spacing = Pt(28.95) para.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER # 修复:正确处理表注 table_element = table._element prev_element = table_element.getprevious() # 检查前一个元素是否是段落 if prev_element is not None and prev_element.tag.endswith('p'): # 在文档段落中查找匹配的元素 for para in doc.paragraphs: if para._element is prev_element: if para.text.strip().startswith('表'): set_table_caption_style(para) break def set_table_caption_style(para): """设置表注样式:楷体四号,行间距28.95磅,居中对齐""" # 设置字体 for run in para.runs: run.font.name = '楷体' run.font.size = Pt(14) # 四号≈14磅 run._element.rPr.rFonts.set(qn('w:eastAsia'), '楷体') # 设置段落格式 para.paragraph_format.line_spacing = Pt(28.95) para.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER para.paragraph_format.first_line_indent = Cm(0) # 主程序入口 if __name__ == "__main__": print("Word文档格式处理工具") print("=" * 50) input_path = input("请输入Word文档路径: ").strip() skip_pages = int(input("请输入要跳过的页数: ")) result = process_word_document(input_path, skip_pages) print("\n处理结果:") print("-" * 50) print(result) print("=" * 50)
07-03
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

rjzou2006

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值