1082. Read Number in Chinese (25)

1.需要分类考虑的情况比较多

2.123456789可以分为 1  2345  6789   ,其中2345和6789的处理方法相同,不同点就是2345处理完后需要加上Wan

3.需要处理0等特殊情况

4下面有些特别的测试例子,可以参考:

/*
-123456789
 123400000
 123000000
 120000000
 100000000
 123000010
-23456789
 23400000
 23000000
 20000000
 00000000
 23000010
*/

//#include<string>
//#include <iomanip>
//#include<stack>
//#include<unordered_set>
//#include <sstream>
//#include "func.h"
//#include <list>
#include<unordered_map>
#include<set>
#include<queue>
#include<map>
#include<vector>
#include <algorithm>
#include<stdio.h>
#include<iostream>
#include<string>
#include<memory.h>
#include<limits.h>
#include<stack>
using namespace std;
/*
-123456789
 123400000
 123000000
 120000000
 100000000
 123000010
-23456789
 23400000
 23000000
 20000000
 00000000
 23000010
*/
string num2Chinese[] = { "ling", "yi", "er", "san", "si", "wu", "liu", "qi", "ba", "jiu" };
string deleteZero(string str)
{
	string tmp = "";
	bool firstZero = true;
	for (int i = 0; i < str.size(); i++)
	{
		if (str[i] == '0'&&firstZero)
			;
		else
		{
			firstZero = false;
			tmp += str[i];
		}
	}
	return tmp;
}
string digit1(string str)
{//只有一位数字的时候
	if (str == "") return str;
	else return num2Chinese[str[0] - '0'];
}
string digit2(string str)
{//两位数组,10为yi Shi,11为yi Shi yi,处理这两种特殊情况:1)个位为0:2)各位不为0
	string strTmp = num2Chinese[str[0] - '0'] + " Shi";
	string next = deleteZero(str.substr(1));
	if (next.size() == 1)
		return strTmp + " " + digit1(next);
	else return strTmp;
}
string digit3(string str)
{//100,101,110,111处理三种特殊情况,1)十位不为0;2)十位为0,个位不为0;3)十位、个位均为0
	string strTmp = num2Chinese[str[0] - '0'] + " Bai";
	string next = deleteZero(str.substr(1));
	if (next.size() == 2)
		return strTmp + " " + digit2(next);
	else if (next.size() == 1)
		return strTmp + " ling " + digit1(next);
	else return strTmp;
}
string digit4(string str)
{//1000,1001,1011,1111处理四种特殊情况,1)百位不为0;2)百位为0,十位不为0,个位不为0;3)百位、十位均为0,个位不为0;4)百位、十位、个位均为0
	string strTmp = num2Chinese[str[0] - '0'] + " Qian";
	string next = deleteZero(str.substr(1));
	if (next.size() == 3)
		return strTmp + " " + digit3(next);
	else if (next.size() == 2)
		return strTmp + " ling" + digit2(next);
	else if (next.size() == 1)
		return strTmp + " ling" + digit1(next);
	else return strTmp;
}
string digitProc(string str)
{
	switch (str.size())
	{
	case 0:return digit1(str);
	case 1:return digit1(str);
	case 2:return digit2(str);
	case 3:return digit3(str);
	case 4:return digit4(str);
	default:
		return "";
	}
}

int main(void)
{
	string str;
	cin >> str;
	bool sign = true;
	if (str[0] == '-')
	{
		sign = false;
		str = str.substr(1);
	}
	str = deleteZero(str);
	string ans = "";
	if (str.size() == 9)
	{//包括亿位
		string a = str.substr(0, 1);
		string b = str.substr(1, 4);
		string c = str.substr(5, 4);
		ans = num2Chinese[a[0] - '0'] + " Yi";
		string tmpB = deleteZero(b);
		string tmpC = deleteZero(c);
		if (tmpB.size() == 4 && tmpC.size() == 4)
			ans += " " + digitProc(tmpB) + " Wan " + digitProc(tmpC);
		else if (tmpB.size() == 4 && tmpC.size() == 0)
			ans += " " + digitProc(tmpB) + " Wan ";
		else if (tmpB.size() == 4 && tmpC.size() < 4)
			ans += " " + digitProc(tmpB) + " Wan ling " + digitProc(tmpC);
		else if (tmpB.size() == 0 && tmpC.size() == 0)
			;
		else if (tmpB.size() == 0 && tmpC.size() <= 4)
			ans += " ling " + digitProc(tmpC);
		else if (tmpB.size() < 4 && tmpC.size() == 0)
			ans += " ling" + digitProc(tmpB) + " Wan ";
	}
	else if (str.size() < 9 && str.size() > 4)
	{//不包括亿位,包括万位
		string b = str.substr(0, str.size() - 4);
		string c = str.substr(str.size() - 4);
		string tmpB = deleteZero(b);
		string tmpC = deleteZero(c);
		if (tmpB.size() == b.size() && tmpC.size() == c.size())
			ans += digitProc(tmpB) + " Wan " + digitProc(tmpC);
		else if (tmpB.size() == 0 && tmpC.size() == 0)
			;
		else if (tmpB.size() == 0 && tmpC.size() <= 4)
			ans += digitProc(tmpC);
		else if (tmpB.size() == 4 && tmpC.size() == 0)
			ans += digitProc(tmpB) + " Wan ";
		else if (tmpB.size() < 4 && tmpC.size() == 0)
			ans += digitProc(tmpB) + " Wan ";
		else if (tmpB.size() < 4 && tmpC.size() < 4)
			ans += digitProc(tmpB) + " Wan ling " + digitProc(tmpC);
		else if (tmpB.size() == 4 && tmpC.size() < 4)
			ans += digitProc(tmpB) + " Wan ling " + digitProc(tmpC);
	}
	else if (str.size() <= 4 && str.size() != 0)
	{
		ans = digitProc(str);
	}
	else if (str.size() == 0)
		ans = "ling";
	if (!sign)
		ans = "Fu " + ans;
	cout << ans << endl;

	return 0;
}



import sys import os import tempfile import subprocess import time from PyQt5.QtWidgets import ( QApplication, QMainWindow, QPushButton, QTextEdit, QFileDialog, QVBoxLayout, QWidget, QStatusBar, QProgressDialog ) from PyQt5.QtCore import Qt from docx import Document from docx.oxml import parse_xml from docx.oxml.ns import nsdecls, qn from docx.text.paragraph import Paragraph class DocumentReaderApp(QMainWindow): def __init__(self): super().__init__() self.setWindowTitle("文档阅读器") self.setGeometry(100, 100, 800, 600) # 主布局 main_widget = QWidget() self.setCentralWidget(main_widget) layout = QVBoxLayout(main_widget) # 选择文件按钮 self.btn_open = QPushButton("选择文件") self.btn_open.clicked.connect(self.open_file) layout.addWidget(self.btn_open) # 文本显示区域 self.text_edit = QTextEdit() self.text_edit.setReadOnly(True) layout.addWidget(self.text_edit) # 状态栏 self.status_bar = QStatusBar() self.setStatusBar(self.status_bar) self.status_bar.showMessage("就绪") # 检查LibreOffice是否安装 self.libreoffice_path = self.find_libreoffice() if not self.libreoffice_path: self.status_bar.showMessage("警告: 未找到LibreOffice,DOC/WPS文件转换功能不可用") def find_libreoffice(self): """查找系统中安装的LibreOffice""" # Windows路径 windows_paths = [ r"C:\Program Files\LibreOffice\program\soffice.exe", r"C:\Program Files (x86)\LibreOffice\program\soffice.exe" ] # Linux路径 linux_paths = [ "/usr/bin/libreoffice", "/usr/bin/soffice", "/snap/bin/libreoffice" ] # 检查路径是否存在 paths = windows_paths if sys.platform == "win32" else linux_paths for path in paths: if os.path.exists(path): return path # 尝试通过PATH查找 try: if sys.platform == "win32": result = subprocess.run(["where", "soffice"], capture_output=True, text=True) else: result = subprocess.run(["which", "soffice"], capture_output=True, text=True) if result.returncode == 0 and os.path.exists(result.stdout.strip()): return result.stdout.strip() except: pass return None def open_file(self): """打开文件对话框并读取内容""" file_path, _ = QFileDialog.getOpenFileName( self, "选择文档", "", "文档文件 (*.docx *.doc *.wps);;所有文件 (*.*)" ) if not file_path: return self.status_bar.showMessage(f"正在处理: {os.path.basename(file_path)}...") QApplication.processEvents() # 更新UI try: text = self.read_document(file_path) self.text_edit.setText(text) self.status_bar.showMessage(f"成功读取: {os.path.basename(file_path)}") except Exception as e: self.text_edit.setText(f"错误: {str(e)}") self.status_bar.showMessage(f"读取失败: {os.path.basename(file_path)}") def read_document(self, file_path): """根据文件类型选择读取方法""" ext = os.path.splitext(file_path)[1].lower() if ext == '.docx': return self.read_docx_with_numbering(file_path) elif ext in ('.doc', '.wps'): if not self.libreoffice_path: raise RuntimeError("未安装LibreOffice,无法转换DOC/WPS文件") # 显示进度对话框 progress = QProgressDialog("正在转换文件...", "取消", 0, 0, self) progress.setWindowTitle("文档转换") progress.setWindowModality(Qt.WindowModal) progress.setCancelButton(None) # 禁用取消按钮 progress.show() QApplication.processEvents() # 转换文件 converted_file = self.convert_to_docx(file_path) # 关闭进度对话框 progress.close() # 读取转换后的文件 return self.read_docx_with_numbering(converted_file) else: raise ValueError("不支持的格式") def convert_to_docx(self, file_path): """使用LibreOffice将文件转换为DOCX格式""" # 创建临时目录 temp_dir = tempfile.mkdtemp() try: # 构建转换命令 cmd = [ self.libreoffice_path, "--headless", # 无界面模式 "--convert-to", "docx", # 转换为docx "--outdir", temp_dir, # 输出目录 file_path # 输入文件 ] # 执行转换 result = subprocess.run( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=60 # 超时60秒 ) if result.returncode != 0: error_msg = result.stderr.decode('utf-8', errors='ignore') raise RuntimeError(f"文件转换失败: {error_msg}") # 查找转换后的文件 base_name = os.path.splitext(os.path.basename(file_path))[0] converted_path = os.path.join(temp_dir, f"{base_name}.docx") if not os.path.exists(converted_path): raise RuntimeError("转换后的文件未找到") return converted_path except subprocess.TimeoutExpired: raise RuntimeError("文件转换超时") except Exception as e: raise RuntimeError(f"转换过程中出错: {str(e)}") def read_docx_with_numbering(self, file_path): """读取.docx文件并正确处理编号""" try: doc = Document(file_path) text_list = [] numbering_dict = {} list_counter = {} # 用于跟踪每个列表的计数器 # 获取文档中的所有编号定义 if doc.part.numbering_part: numbering_part = doc.part.numbering_part.numbering_definitions._numbering for num in numbering_part.findall('.//w:num', namespaces=numbering_part.nsmap): num_id = num.get(qn('w:numId')) abstract_num_id = num.find('.//w:abstractNumId', namespaces=num.nsmap).get(qn('w:val')) # 查找对应的抽象编号定义 abstract_num = numbering_part.find(f'.//w:abstractNum[@w:abstractNumId="{abstract_num_id}"]', namespaces=numbering_part.nsmap) if abstract_num: levels = {} for lvl in abstract_num.findall('.//w:lvl', namespaces=abstract_num.nsmap): ilvl = lvl.get(qn('w:ilvl')) num_fmt = lvl.find('.//w:numFmt', namespaces=lvl.nsmap).get(qn('w:val')) levels[ilvl] = num_fmt numbering_dict[num_id] = levels # 遍历文档中的所有段落 for para in doc.paragraphs: p = para._p # 获取底层XML元素 # 检查段落是否有编号 num_pr = p.find('.//w:pPr/w:numPr', namespaces=p.nsmap) if num_pr: num_id = num_pr.find('.//w:numId', namespaces=num_pr.nsmap).get(qn('w:val')) ilvl = num_pr.find('.//w:ilvl', namespaces=num_pr.nsmap).get(qn('w:val')) # 获取编号格式 num_fmt = numbering_dict.get(num_id, {}).get(ilvl, 'decimal') # 为这个编号列表创建计数器 counter_key = f"{num_id}_{ilvl}" if counter_key not in list_counter: list_counter[counter_key] = 0 list_counter[counter_key] += 1 # 根据格式生成编号前缀 prefix = self.get_number_prefix(num_fmt, list_counter[counter_key]) text_list.append(f"{prefix} {para.text}") else: # 没有编号的段落 text_list.append(para.text) # 如果是临时文件,读取后删除 if "tmp" in file_path.lower() or "temp" in file_path.lower(): try: os.remove(file_path) temp_dir = os.path.dirname(file_path) if os.path.exists(temp_dir) and not os.listdir(temp_dir): os.rmdir(temp_dir) except: pass return "\n".join(text_list) except Exception as e: raise RuntimeError(f"读取DOCX文件失败: {str(e)}") def get_number_prefix(self, num_fmt, counter): """根据编号格式生成前缀""" if num_fmt == 'decimal': return f"{counter}." elif num_fmt == 'lowerLetter': return f"{self.number_to_letters(counter, False)}." elif num_fmt == 'upperLetter': return f"{self.number_to_letters(counter, True)}." elif num_fmt == 'lowerRoman': return f"{self.number_to_roman(counter).lower()}." elif num_fmt == 'upperRoman': return f"{self.number_to_roman(counter)}." elif num_fmt == 'bullet': return "•" else: return f"{counter}." def number_to_letters(self, n, uppercase=True): """将数字转换为字母(A, B, C, ... AA, AB, ...)""" result = "" while n > 0: n, remainder = divmod(n - 1, 26) result = chr(65 + remainder) + result return result if uppercase else result.lower() def number_to_roman(self, n): """将数字转换为罗马数字""" val = [ 1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1 ] syb = [ "M", "CM", "D", "CD", "C", "XC", "L", "XL", "X", "IX", "V", "IV", "I" ] roman_num = '' i = 0 while n > 0: for _ in range(n // val[i]): roman_num += syb[i] n -= val[i] i += 1 return roman_num if __name__ == "__main__": app = QApplication(sys.argv) window = DocumentReaderApp() window.show() sys.exit(app.exec_()) 这个程序可以读取wps、doc、docx文件,并可以识别自动编号,但识别的编号不对,比如,它会把“一、”“(一)”这样的编号识别成“1.”,这样就不符合原文的内容了。请帮我把这段代码修复一下
06-15
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值