## 项目结构
```
invoice_recognition/
├── main.py # 主程序入口
├── config.py # 配置文件
├── requirements.txt # 依赖库
├── data/ # 数据目录
│ ├── invoices/ # 原始发票图像
│ ├── processed/ # 处理后的图像
│ └── models/ # 预训练模型
│ └── invoice_detector.pth
├── src/ # 源代码目录
│ ├── preprocessing.py # 图像预处理
│ ├── detection.py # 发票检测与定位
│ ├── ocr.py # 文字识别
│ ├── table_recognition.py # 表格识别
│ ├── validation.py # 数据验证
│ ├── visualization.py # 可视化工具
│ ├── utils.py # 辅助函数
│ └── exceptions.py # 自定义异常
└── results/ # 结果输出目录
└── reports/ # 分析报告
```
## 完整代码实现
### 1. config.py - 配置文件
```python
# config.py
import os
# 路径配置
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
DATA_DIR = os.path.join(BASE_DIR, 'data')
INVOICE_DIR = os.path.join(DATA_DIR, 'invoices')
PROCESSED_DIR = os.path.join(DATA_DIR, 'processed')
MODELS_DIR = os.path.join(DATA_DIR, 'models')
RESULTS_DIR = os.path.join(BASE_DIR, 'results')
REPORTS_DIR = os.path.join(RESULTS_DIR, 'reports')
# 图像处理参数
PREPROCESS_PARAMS = {
'denoise_h': 10,
'adaptive_block_size': 11,
'adaptive_c': 2,
'canny_threshold1': 50,
'canny_threshold2': 150,
'perspective_padding': 20
}
# OCR配置
OCR_CONFIG = {
'lang': 'chi_sim+eng',
'oem': 3,
'psm': 6,
'table_psm': 11
}
# 深度学习模型配置
MODEL_CONFIG = {
'invoice_detector': os.path.join(MODELS_DIR, 'invoice_detector.pth'),
'confidence_threshold': 0.8,
'nms_threshold': 0.4
}
# 验证规则
VALIDATION_RULES = {
'max_invoice_age_months': 3,
'min_amount': 1.0,
'max_amount': 100000.0,
'required_fields': ['发票代码', '发票号码', '开票日期', '金额']
}
# 创建必要目录
os.makedirs(INVOICE_DIR, exist_ok=True)
os.makedirs(PROCESSED_DIR, exist_ok=True)
os.makedirs(REPORTS_DIR, exist_ok=True)
```
### 2. requirements.txt - 依赖库
```
opencv-python==4.5.5.64
numpy==1.22.3
pytesseract==0.3.9
Pillow==9.1.0
matplotlib==3.5.1
scikit-image==0.19.2
torch==1.11.0
torchvision==0.12.0
pandas==1.4.2
scipy==1.8.0
seaborn==0.11.2
```
### 3. src/exceptions.py - 自定义异常
```python
# src/exceptions.py
class InvoiceProcessingError(Exception):
"""发票处理异常基类"""
pass
class InvoiceNotFoundError(InvoiceProcessingError):
"""未检测到发票"""
pass
class OCRFailureError(InvoiceProcessingError):
"""OCR识别失败"""
pass
class ValidationError(InvoiceProcessingError):
"""数据验证失败"""
pass
class PerspectiveTransformError(InvoiceProcessingError):
"""透视变换失败"""
pass
```
### 4. src/utils.py - 辅助函数
```python
# src/utils.py
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from config import PROCESSED_DIR, REPORTS_DIR
def save_processed_image(image, filename, suffix=""):
"""保存处理后的图像"""
if suffix:
name, ext = os.path.splitext(filename)
filename = f"{name}_{suffix}{ext}"
output_path = os.path.join(PROCESSED_DIR, filename)
cv2.imwrite(output_path, image)
return output_path
def plot_histogram(data, title, xlabel, ylabel, filename):
"""绘制并保存直方图"""
plt.figure(figsize=(10, 6))
plt.hist(data, bins=20, alpha=0.7, color='skyblue')
plt.title(title)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
plt.grid(True, linestyle='--', alpha=0.7)
output_path = os.path.join(REPORTS_DIR, filename)
plt.savefig(output_path)
plt.close()
return output_path
def order_points(pts):
"""重新排列四个点:左上,右上,右下,左下"""
rect = np.zeros((4, 2), dtype="float32")
s = pts.sum(axis=1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
diff = np.diff(pts, axis=1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
return rect
def four_point_transform(image, pts, padding=0):
"""应用四点透视变换"""
rect = order_points(pts)
(tl, tr, br, bl) = rect
# 计算新图像的宽度和高度
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
# 构建目标点
dst = np.array([
[padding, padding],
[maxWidth - 1 + padding, padding],
[maxWidth - 1 + padding, maxHeight - 1 + padding],
[padding, maxHeight - 1 + padding]
], dtype="float32")
# 计算透视变换矩阵
M = cv2.getPerspectiveTransform(rect, dst)
# 应用变换
warped = cv2.warpPerspective(image, M, (maxWidth + 2*padding, maxHeight + 2*padding))
return warped
```
### 5. src/preprocessing.py - 图像预处理
```python
# src/preprocessing.py
import cv2
import numpy as np
from .exceptions import InvoiceProcessingError
from config import PREPROCESS_PARAMS
from .utils import save_processed_image
def preprocess_image(image_path):
"""
图像预处理流程
步骤:1.读取 2.灰度化 3.去噪 4.二值化 5.边缘检测
"""
# 1. 读取图像
orig = cv2.imread(image_path)
if orig is None:
raise InvoiceProcessingError(f"无法读取图像: {image_path}")
filename = os.path.basename(image_path)
# 2. 灰度化
gray = cv2.cvtColor(orig, cv2.COLOR_BGR2GRAY)
save_processed_image(gray, filename, "gray")
# 3. 去噪(非局部均值去噪)
denoised = cv2.fastNlMeansDenoising(
gray,
h=PREPROCESS_PARAMS['denoise_h']
)
save_processed_image(denoised, filename, "denoised")
# 4. 自适应二值化
binary = cv2.adaptiveThreshold(
denoised, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV,
PREPROCESS_PARAMS['adaptive_block_size'],
PREPROCESS_PARAMS['adaptive_c']
)
save_processed_image(binary, filename, "binary")
# 5. 边缘检测
edges = cv2.Canny(
binary,
PREPROCESS_PARAMS['canny_threshold1'],
PREPROCESS_PARAMS['canny_threshold2']
)
save_processed_image(edges, filename, "edges")
return orig, gray, denoised, binary, edges
```
### 6. src/detection.py - 发票检测与定位
```python
# src/detection.py
import cv2
import numpy as np
import torch
import torchvision
from .exceptions import InvoiceNotFoundError, PerspectiveTransformError
from config import MODEL_CONFIG, PREPROCESS_PARAMS
from .utils import four_point_transform, save_processed_image
def detect_invoice_contour(edges):
"""使用传统方法检测发票轮廓"""
# 查找轮廓
contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if not contours:
raise InvoiceNotFoundError("未检测到任何轮廓")
# 按面积排序,取前5个
contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5]
# 寻找近似矩形轮廓
invoice_contour = None
for cnt in contours:
peri = cv2.arcLength(cnt, True)
approx = cv2.approxPolyDP(cnt, 0.02 * peri, True)
# 如果是四边形
if len(approx) == 4:
invoice_contour = approx
break
if invoice_contour is None:
raise InvoiceNotFoundError("未找到有效的发票轮廓")
return invoice_contour.reshape(4, 2)
def detect_invoice_dl(image):
"""使用深度学习检测发票位置"""
# 加载预训练模型
model = torch.load(MODEL_CONFIG['invoice_detector'])
model.eval()
# 预处理图像
transform = torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
input_tensor = transform(image).unsqueeze(0)
# 推理
with torch.no_grad():
predictions = model(input_tensor)
# 应用非极大值抑制
indices = torchvision.ops.nms(
predictions[0]['boxes'],
predictions[0]['scores'],
MODEL_CONFIG['nms_threshold']
)
# 获取最佳检测结果
best_score = 0
best_box = None
for idx in indices:
score = predictions[0]['scores'][idx].item()
if score > MODEL_CONFIG['confidence_threshold'] and score > best_score:
best_score = score
best_box = predictions[0]['boxes'][idx].cpu().numpy().astype(int)
if best_box is None:
raise InvoiceNotFoundError("深度学习模型未检测到发票")
# 将边界框转换为四点坐标
x1, y1, x2, y2 = best_box
invoice_points = np.array([
[x1, y1],
[x2, y1],
[x2, y2],
[x1, y2]
])
return invoice_points
def extract_invoice_region(orig, edges, method='hybrid'):
"""提取发票区域并进行透视变换"""
filename = os.path.basename(orig)
try:
if method == 'traditional':
points = detect_invoice_contour(edges)
elif method == 'deep_learning':
points = detect_invoice_dl(orig)
else: # hybrid
try:
points = detect_invoice_contour(edges)
except InvoiceNotFoundError:
points = detect_invoice_dl(orig)
# 绘制检测点
marked = orig.copy()
for point in points:
cv2.circle(marked, tuple(point), 10, (0, 0, 255), -1)
save_processed_image(marked, filename, "detected_points")
# 应用透视变换
warped = four_point_transform(
orig,
points,
padding=PREPROCESS_PARAMS['perspective_padding']
)
save_processed_image(warped, filename, "warped")
return warped
except Exception as e:
raise PerspectiveTransformError(f"透视变换失败: {str(e)}")
```
### 7. src/ocr.py - 文字识别
```python
# src/ocr.py
import pytesseract
from PIL import Image
import cv2
import numpy as np
import re
from .exceptions import OCRFailureError
from config import OCR_CONFIG
def enhance_text_region(image):
"""增强文本区域的可读性"""
# 使用CLAHE增强对比度
lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
l, a, b = cv2.split(lab)
# 应用CLAHE到L通道
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
l = clahe.apply(l)
# 合并通道并转换回BGR
enhanced_lab = cv2.merge([l, a, b])
enhanced = cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2BGR)
# 轻微锐化
kernel = np.array([[-1, -1, -1],
[-1, 9, -1],
[-1, -1, -1]])
sharpened = cv2.filter2D(enhanced, -1, kernel)
return sharpened
def extract_text(image, config=None):
"""从图像中提取文本"""
if config is None:
config = f"--oem {OCR_CONFIG['oem']} --psm {OCR_CONFIG['psm']} -l {OCR_CONFIG['lang']}"
# 转换为PIL图像
pil_img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
try:
text = pytesseract.image_to_string(pil_img, config=config)
return text.strip()
except Exception as e:
raise OCRFailureError(f"OCR识别失败: {str(e)}")
def extract_invoice_info(text):
"""从文本中提取结构化发票信息"""
if not text:
raise OCRFailureError("OCR未返回任何文本")
# 定义提取模式
patterns = {
'发票代码': r'发票代码\s*[::]?\s*(\d+)',
'发票号码': r'发票号码\s*[::]?\s*(\d+)',
'开票日期': r'开票日期\s*[::]?\s*(\d{4}年\d{1,2}月\d{1,2}日)',
'购买方': r'购买方[::]\s*名\s*称\s*[::]\s*([^\n]+)',
'销售方': r'销售方[::]\s*名\s*称\s*[::]\s*([^\n]+)',
'金额': r'小写\s*[::]?\s*[¥¥]?\s*(\d+\.\d{2})',
'价税合计': r'价税合计\s*\(.*\)\s*[::]?\s*[¥¥]?\s*(\d+\.\d{2})',
'校验码': r'校验码\s*[::]?\s*([0-9a-zA-Z]{20})'
}
results = {}
for key, pattern in patterns.items():
match = re.search(pattern, text)
results[key] = match.group(1) if match else None
# 如果金额未找到,尝试其他模式
if results['金额'] is None:
amount_match = re.search(r'¥\s*(\d+\.\d{2})', text)
results['金额'] = amount_match.group(1) if amount_match else None
return results
```
### 8. src/table_recognition.py - 表格识别
```python
# src/table_recognition.py
import cv2
import numpy as np
import pytesseract
from .exceptions import OCRFailureError
from config import OCR_CONFIG
def detect_table_lines(image):
"""检测表格线"""
# 转换为灰度图
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# 二值化
thresh = cv2.adaptiveThreshold(
gray, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 25, 16
)
# 检测水平线
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (40, 1))
horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
# 检测垂直线
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 40))
vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
# 合并表格线
table_lines = cv2.add(horizontal, vertical)
return table_lines, horizontal, vertical
def extract_table_cells(image):
"""提取表格单元格"""
# 检测表格线
table_lines, horizontal, vertical = detect_table_lines(image)
# 查找轮廓
contours, _ = cv2.findContours(table_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# 筛选出单元格轮廓
cells = []
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
# 过滤太小的区域
if w > 20 and h > 20:
cells.append((x, y, w, h))
# 按位置排序单元格
cells.sort(key=lambda c: (c[1], c[0]))
return cells
def recognize_table(image):
"""识别表格内容"""
# 提取单元格
cells = extract_table_cells(image)
# OCR配置
config = f"--oem {OCR_CONFIG['oem']} --psm {OCR_CONFIG['table_psm']} -l {OCR_CONFIG['lang']}"
table_data = []
for i, (x, y, w, h) in enumerate(cells):
# 提取单元格区域
cell_img = image[y:y+h, x:x+w]
# 应用OCR
cell_text = pytesseract.image_to_string(cell_img, config=config).strip()
# 添加到表格数据
table_data.append({
'cell_id': i,
'x': x,
'y': y,
'width': w,
'height': h,
'text': cell_text
})
return table_data
```
### 9. src/validation.py - 数据验证
```python
# src/validation.py
import datetime
import re
from .exceptions import ValidationError
from config import VALIDATION_RULES
def validate_invoice_info(info):
"""验证发票信息"""
errors = []
# 检查必填字段
for field in VALIDATION_RULES['required_fields']:
if not info.get(field):
errors.append(f"缺少必填字段: {field}")
# 验证发票代码(10-12位数字)
if info.get('发票代码') and not re.match(r'^\d{10,12}$', info['发票代码']):
errors.append("发票代码格式错误")
# 验证发票号码(8位数字)
if info.get('发票号码') and not re.match(r'^\d{8}$', info['发票号码']):
errors.append("发票号码格式错误")
# 验证日期
if info.get('开票日期'):
try:
# 转换日期字符串
date_str = info['开票日期'].replace('年', '-').replace('月', '-').replace('日', '')
date_obj = datetime.datetime.strptime(date_str, '%Y-%m-%d')
# 检查日期范围
today = datetime.datetime.now()
max_age = datetime.timedelta(days=VALIDATION_RULES['max_invoice_age_months']*30)
if date_obj > today:
errors.append("开票日期不能是未来日期")
elif today - date_obj > max_age:
errors.append("开票日期超过有效期限")
except ValueError:
errors.append("开票日期格式解析错误")
# 验证金额
if info.get('金额'):
try:
amount = float(info['金额'])
if amount < VALIDATION_RULES['min_amount']:
errors.append(f"金额过小: {amount}")
elif amount > VALIDATION_RULES['max_amount']:
errors.append(f"金额过大: {amount}")
except ValueError:
errors.append("金额格式错误")
# 验证校验码(20位数字+字母)
if info.get('校验码') and not re.match(r'^[0-9a-zA-Z]{20}$', info['校验码']):
errors.append("校验码格式错误")
if errors:
raise ValidationError("; ".join(errors))
return True
```
### 10. src/visualization.py - 可视化工具
```python
# src/visualization.py
import cv2
import matplotlib.pyplot as plt
import numpy as np
from .utils import save_processed_image
def visualize_processing_steps(image_path, steps):
"""可视化处理步骤"""
plt.figure(figsize=(15, 8))
# 原始图像
orig = cv2.imread(image_path)
orig_rgb = cv2.cvtColor(orig, cv2.COLOR_BGR2RGB)
plt.subplot(2, 3, 1)
plt.imshow(orig_rgb)
plt.title("原始图像")
plt.axis('off')
# 显示每个处理步骤
for i, (title, image) in enumerate(steps.items(), 2):
plt.subplot(2, 3, i)
if len(image.shape) == 2: # 灰度图
plt.imshow(image, cmap='gray')
else:
# 转换BGR为RGB
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
plt.imshow(image_rgb)
plt.title(title)
plt.axis('off')
plt.tight_layout()
# 保存可视化结果
filename = os.path.basename(image_path)
output_path = os.path.join(PROCESSED_DIR, f"processing_steps_{filename}")
plt.savefig(output_path)
plt.close()
return output_path
def visualize_table(image, table_data):
"""可视化表格识别结果"""
# 创建副本用于绘制
table_vis = image.copy()
# 绘制单元格边界和文本
for cell in table_data:
x, y, w, h = cell['x'], cell['y'], cell['width'], cell['height']
# 绘制矩形
cv2.rectangle(table_vis, (x, y), (x+w, y+h), (0, 255, 0), 2)
# 绘制文本
cv2.putText(table_vis, cell['text'], (x+5, y+20),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
# 保存结果
filename = os.path.basename(image_path)
output_path = os.path.join(PROCESSED_DIR, f"table_detection_{filename}")
cv2.imwrite(output_path, table_vis)
return output_path, table_vis
```
### 11. main.py - 主程序入口
```python
# main.py
import os
import argparse
import json
import pandas as pd
from datetime import datetime
from config import INVOICE_DIR, RESULTS_DIR, REPORTS_DIR
from src.preprocessing import preprocess_image
from src.detection import extract_invoice_region
from src.ocr import enhance_text_region, extract_text, extract_invoice_info
from src.table_recognition import recognize_table
from src.validation import validate_invoice_info
from src.visualization import visualize_processing_steps, visualize_table
from src.exceptions import InvoiceProcessingError, InvoiceNotFoundError, OCRFailureError, ValidationError
from src.utils import plot_histogram
def process_invoice(image_path, output_report=False):
"""处理单个发票图像"""
try:
print(f"\n处理发票: {os.path.basename(image_path)}")
print("-" * 50)
# 1. 图像预处理
orig, gray, denoised, binary, edges = preprocess_image(image_path)
# 2. 发票检测与定位
warped = extract_invoice_region(orig, edges, method='hybrid')
# 3. 文本区域增强
enhanced = enhance_text_region(warped)
# 4. OCR识别
text = extract_text(enhanced)
print("OCR识别结果摘要:")
print(text[:500] + "..." if len(text) > 500 else text)
print("-" * 50)
# 5. 提取结构化信息
info = extract_invoice_info(text)
print("提取的发票信息:")
for key, value in info.items():
print(f"{key}: {value}")
# 6. 表格识别(可选)
try:
table_data = recognize_table(warped)
print(f"检测到 {len(table_data)} 个表格单元格")
except Exception as e:
table_data = []
print(f"表格识别失败: {str(e)}")
# 7. 数据验证
validate_invoice_info(info)
print("数据验证通过")
# 8. 可视化处理步骤
steps = {
"灰度化": gray,
"去噪": denoised,
"二值化": binary,
"边缘检测": edges,
"校正后": warped
}
steps_path = visualize_processing_steps(image_path, steps)
# 9. 可视化表格识别(如果检测到表格)
table_vis_path = None
if table_data:
table_vis_path, _ = visualize_table(warped, table_data)
# 保存结果
result = {
'filename': os.path.basename(image_path),
'processing_steps': steps_path,
'table_visualization': table_vis_path,
'ocr_text': text,
'extracted_info': info,
'table_data': table_data,
'timestamp': datetime.now().isoformat(),
'status': 'success'
}
print(f"\n处理完成: {image_path}")
return result
except InvoiceProcessingError as e:
print(f"\n处理失败: {str(e)}")
return {
'filename': os.path.basename(image_path),
'error': str(e),
'timestamp': datetime.now().isoformat(),
'status': 'failed'
}
def batch_process_invoices(input_dir, output_dir):
"""批量处理发票目录"""
results = []
amounts = []
# 获取所有图像文件
image_files = [f for f in os.listdir(input_dir)
if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
if not image_files:
print(f"在目录 {input_dir} 中未找到图像文件")
return
print(f"开始批量处理 {len(image_files)} 张发票...")
# 处理每张发票
for i, filename in enumerate(image_files):
image_path = os.path.join(input_dir, filename)
print(f"\n[{i+1}/{len(image_files)}] 处理 {filename}")
result = process_invoice(image_path)
results.append(result)
# 收集金额用于分析
if result['status'] == 'success' and '金额' in result['extracted_info']:
try:
amount = float(result['extracted_info']['金额'])
amounts.append(amount)
except:
pass
# 保存结果到JSON
json_path = os.path.join(output_dir, 'invoice_results.json')
with open(json_path, 'w', encoding='utf-8') as f:
json.dump(results, f, ensure_ascii=False, indent=2)
print(f"\n处理完成,结果已保存至: {json_path}")
# 生成分析报告
if amounts:
hist_path = plot_histogram(
amounts,
"发票金额分布",
"金额 (元)",
"发票数量",
"amount_distribution.png"
)
print(f"金额分布图已保存至: {hist_path}")
# 生成CSV摘要
summary_data = []
for result in results:
if result['status'] == 'success':
info = result['extracted_info']
summary_data.append({
'文件名': result['filename'],
'发票代码': info.get('发票代码', ''),
'发票号码': info.get('发票号码', ''),
'开票日期': info.get('开票日期', ''),
'金额': info.get('金额', ''),
'购买方': info.get('购买方', ''),
'销售方': info.get('销售方', '')
})
else:
summary_data.append({
'文件名': result['filename'],
'错误': result['error']
})
df = pd.DataFrame(summary_data)
csv_path = os.path.join(output_dir, 'invoice_summary.csv')
df.to_csv(csv_path, index=False, encoding='utf-8-sig')
print(f"摘要报告已保存至: {csv_path}")
return results
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='电子发票识别系统')
parser.add_argument('--input', type=str, default=INVOICE_DIR,
help='输入发票目录路径')
parser.add_argument('--output', type=str, default=RESULTS_DIR,
help='结果输出目录路径')
parser.add_argument('--single', type=str,
help='处理单个发票文件路径')
args = parser.parse_args()
if args.single:
# 处理单个发票
result = process_invoice(args.single, output_report=True)
print("\n处理结果:")
print(json.dumps(result, indent=2, ensure_ascii=False))
else:
# 批量处理
batch_process_invoices(args.input, args.output)我这个代码可以正确运行并实现功能吗