import cv2
import os
import numpy as np
import matplotlib.pyplot as plt
import skimage.io as io
from collections import Counter
from PIL import Image
import pandas as pd
from scipy.ndimage import gaussian_filter1d
from sklearn.cluster import AgglomerativeClustering
import time
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
# 1. 读取图像并进行预处理
data_dir = './附件4'
path = data_dir + '/*.bmp'
coll = io.ImageCollection(path) # 读入灰度图像
img_num = len(coll)
# 转矩阵并二值化
img = np.asarray(coll)
for i in range(len(coll)):
img[i] = cv2.adaptiveThreshold(
src=img[i],
maxValue=1,
adaptiveMethod=cv2.ADAPTIVE_THRESH_MEAN_C,
thresholdType=cv2.THRESH_BINARY,
blockSize=13,
C=2
)
print("图像数据形状:", img.shape)
# 2. 计算每张图片的左右边距
left = []
right = []
for i in range(img.shape[0]):
# 计算左边距
count = 0
for y in range(img.shape[2]): # 列
if np.any(img[i, :, y] == 0): # 该列有文字
break
count += 1
left.append(count)
# 计算右边距
count = 0
for y in range(img.shape[2] - 1, -1, -1): # 从右向左
if np.any(img[i, :, y] == 0): # 该列有文字
break
count += 1
right.append(count)
plt.figure(figsize=(10, 6))
plt.scatter(range(len(left)), left, label='左边距')
plt.scatter(range(len(right)), right, label='右边距')
plt.title('碎片左右边距分布')
plt.xlabel('碎片索引')
plt.ylabel('边距大小(像素)')
plt.legend()
plt.grid(True)
plt.savefig('margin_distribution.png')
plt.close()
print("左边距统计:", Counter(left))
print("右边距统计:", Counter(right))
# 3. 确定行首和行尾碎片
fenge = 15 # 边距阈值,超过此值认为是行首或行尾
col = 19 # 列数
row = 11 # 行数
# 找出行尾碎片
end_index = [i for i, r in enumerate(right) if r >= fenge]
print(f"行尾碎片数量: {len(end_index)}")
# 找出行首碎片
first_index = [i for i, l in enumerate(left) if l >= fenge]
print(f"行首碎片数量: {len(first_index)}")
# 4. 修改的特征提取函数:考虑行高度类型
def extract_english_features(image):
"""
提取英文文本的四线三行特征,考虑行高度类型(整行/半行)
image: 预处理后的二值图像 (0=文字, 1=空白)
"""
h, w = image.shape
features = []
# 1. 计算水平投影
horizontal_proj = np.sum(1 - image, axis=1) # 反转:文字区域值高
# 2. 平滑投影曲线
smoothed_proj = gaussian_filter1d(horizontal_proj, sigma=1.5)
# 3. 检测文本行区域
line_regions = []
in_text = False
start = 0
threshold = 0.1 * np.max(smoothed_proj)
for i, val in enumerate(smoothed_proj):
if val > threshold and not in_text:
in_text = True
start = i
elif val <= threshold and in_text:
in_text = False
line_regions.append((start, i))
# 处理最后一行
if in_text:
line_regions.append((start, h - 1))
# 4. 分析每行的四线三行特征,添加行高度类型检测
for start, end in line_regions:
line_height = end - start
if line_height < 5: # 忽略太小的区域
continue
line_img = image[start:end, :]
# 计算垂直投影
vertical_proj = np.sum(1 - line_img, axis=0)
# 检测基线(字母底部) - 使用下边缘检测
bottom_edge = np.zeros(w, dtype=int)
for col in range(w):
col_data = line_img[:, col]
if np.any(col_data == 0): # 该列有文字
# 找到最底部的文字像素(即最大的行索引)
row_indices = np.where(col_data == 0)[0]
bottom_edge[col] = row_indices[-1] # 最后出现的文字像素
# 只考虑有文字的区域
text_cols = np.where(vertical_proj > 0)[0]
if len(text_cols) == 0:
continue
# 基线位置(最常见的底部位置)
baseline_pos = np.median(bottom_edge[text_cols])
# 检测顶线(大写字母高度) - 使用上边缘检测
top_edge = np.zeros(w, dtype=int)
for col in range(w):
col_data = line_img[:, col]
if np.any(col_data == 0): # 该列有文字
# 找到最顶部的文字像素(即最小的行索引)
row_indices = np.where(col_data == 0)[0]
top_edge[col] = row_indices[0] # 第一个出现的文字像素
# 顶线位置(最常见的顶部位置)
topline_pos = np.median(top_edge[text_cols])
# 检测下伸区域(g, j, p, q, y等)
descender_pos = baseline_pos
for col in text_cols:
if bottom_edge[col] > baseline_pos + 3: # 下伸部分至少比基线低3像素
if bottom_edge[col] > descender_pos:
descender_pos = bottom_edge[col]
# 检测上伸区域(b, d, f, h, k, l等)
ascender_pos = topline_pos
for col in text_cols:
if top_edge[col] < topline_pos - 3: # 上伸部分至少比顶线高3像素
if top_edge[col] < ascender_pos:
ascender_pos = top_edge[col]
# 计算中线位置(小写字母高度)
midline_pos = (topline_pos + baseline_pos) / 2
# 如果检测到上伸部分,调整顶线位置
if ascender_pos < topline_pos - 3:
topline_pos = ascender_pos
# 如果检测到下伸部分,调整底线位置
if descender_pos > baseline_pos + 3:
bottomline_pos = descender_pos
else:
bottomline_pos = baseline_pos # 没有下伸字母,底线与基线重合
# 添加行高度类型检测 - 基于已知的字母高度
# 小写字母a高度:28像素,大写字母/特殊字母高度:40像素
text_height = bottomline_pos - topline_pos
# 判断行类型
if 35 <= text_height <= 45: # 整行(包含大写字母或特殊字母)
line_type = 1.0
elif 25 <= text_height <= 34: # 整行(仅小写字母)
line_type = 1.0
elif 15 <= text_height <= 24: # 半行
line_type = 0.5
else:
line_type = 1.0 # 默认视为整行
# 添加行高度类型特征
features.append({
'start': start,
'end': end,
'baseline': baseline_pos,
'midline': midline_pos,
'topline': topline_pos,
'bottomline': bottomline_pos,
'height': line_height,
'text_height': text_height, # 实际文本高度
'line_type': line_type, # 行高度类型 (1.0=整行, 0.5=半行)
'has_ascender': ascender_pos < topline_pos - 3, # 是否包含上伸字母
'has_descender': descender_pos > baseline_pos + 3, # 是否包含下伸字母
'ascender_height': topline_pos - ascender_pos if ascender_pos < topline_pos - 3 else 0,
'descender_height': descender_pos - baseline_pos if descender_pos > baseline_pos + 3 else 0
})
return features
# 提取所有碎片的特征
print("开始提取特征...")
start_time = time.time()
fragment_features = []
line_type_counts = [] # 记录每个碎片的行高度类型
for i in range(img.shape[0]):
features = extract_english_features(img[i])
fragment_features.append(features)
# 记录行高度类型
if features:
line_types = [feat['line_type'] for feat in features]
line_type_counts.append(sum(line_types))
else:
line_type_counts.append(0)
if (i + 1) % 10 == 0:
print(f"已处理 {i + 1}/{img.shape[0]} 个碎片")
end_time = time.time()
print(f"特征提取完成,耗时 {end_time - start_time:.2f} 秒")
# 可视化行高度类型分布
plt.figure(figsize=(10, 6))
plt.hist(line_type_counts, bins=np.arange(0, 3.5, 0.5), edgecolor='black')
plt.title('碎片行高度类型分布')
plt.xlabel('行高度类型(1.0=整行, 0.5=半行)')
plt.ylabel('碎片数量')
plt.xticks([0.5, 1.0, 1.5, 2.0, 2.5, 3.0])
plt.grid(True, axis='y')
plt.savefig('line_type_distribution.png')
plt.close()
print("行高度类型统计:")
print(Counter(line_type_counts))
# 5. 修改的距离计算函数:考虑行高度类型
def feature_distance(feat1, feat2):
"""计算两个特征之间的距离(匹配度),考虑行高度类型"""
if not feat1 or not feat2:
return float('inf') # 没有特征的碎片距离无穷大
# 使用第一个检测到的行特征(通常一个碎片只包含一行)
f1 = feat1[0]
f2 = feat2[0]
# 计算关键特征线的差异
baseline_diff = abs(f1['baseline'] - f2['baseline'])
midline_diff = abs(f1['midline'] - f2['midline'])
topline_diff = abs(f1['topline'] - f2['topline'])
bottomline_diff = abs(f1['bottomline'] - f2['bottomline'])
# 考虑特殊字母的影响
ascender_diff = abs(f1.get('ascender_height', 0) - f2.get('ascender_height', 0))
descender_diff = abs(f1.get('descender_height', 0) - f2.get('descender_height', 0))
# 行高度类型差异 - 新增
line_type_diff = abs(f1['line_type'] - f2['line_type'])
# 加权计算总距离
distance = (baseline_diff * 0.3 + midline_diff * 0.2 + topline_diff * 0.2 +
bottomline_diff * 0.1 + ascender_diff * 0.05 + descender_diff * 0.05 +
line_type_diff * 0.1) # 行高度类型占10%权重
return distance
# 创建距离矩阵
n = img.shape[0]
print("开始计算距离矩阵...")
dist_matrix = np.zeros((n, n))
for i in range(n):
for j in range(i + 1, n): # 利用对称性,只计算上三角
dist = feature_distance(fragment_features[i], fragment_features[j])
dist_matrix[i, j] = dist
dist_matrix[j, i] = dist
if (i + 1) % 10 == 0:
print(f"已计算 {i + 1}/{n} 个碎片的距离")
print("距离矩阵计算完成")
# 使用层次聚类(基于特征线匹配)
print("开始层次聚类...")
clustering = AgglomerativeClustering(
n_clusters=row,
metric='precomputed',
linkage='average',
distance_threshold=None
)
cluster_labels = clustering.fit_predict(dist_matrix)
print("聚类完成")
# 将碎片分配到行
row_assignments = [[] for _ in range(row)]
for frag_id, label in enumerate(cluster_labels):
row_assignments[label].append(frag_id)
# 添加聚类可视化功能
def visualize_clusters(cluster_labels, row_assignments):
"""可视化聚类结果,显示每个聚类的碎片并标注序号"""
# 创建输出目录
os.makedirs('cluster_visualization', exist_ok=True)
for cluster_idx, fragments in enumerate(row_assignments):
if not fragments:
print(f"聚类 {cluster_idx} 没有碎片")
continue
print(f"聚类 {cluster_idx} 有 {len(fragments)} 个碎片")
# 确保所有碎片高度相同(题目要求都是180×72)
heights = set(img[frag_id].shape[0] for frag_id in fragments)
widths = set(img[frag_id].shape[1] for frag_id in fragments)
if len(heights) != 1 or len(widths) != 1:
print(f"警告: 聚类 {cluster_idx} 中碎片尺寸不一致")
# 使用最大尺寸作为标准
max_height = max(img[frag_id].shape[0] for frag_id in fragments)
max_width = max(img[frag_id].shape[1] for frag_id in fragments)
else:
max_height = next(iter(heights))
max_width = next(iter(widths))
# 计算拼接图像的大小 - 包括碎片之间的间距
spacing = 10 # 碎片之间的间距
label_space = 40 # 序号标注的空间
# 计算总宽度:每个碎片宽度 + 碎片间距
total_width = sum(img[frag_id].shape[1] for frag_id in fragments) + (len(fragments) - 1) * spacing
# 创建空白图像(额外空间用于序号标注)
cluster_img = np.ones((max_height + label_space, total_width), dtype=np.uint8) * 255
x_offset = 0
# 添加每个碎片和序号标注
for frag_id in fragments:
frag_img = img[frag_id]
h, w = frag_img.shape
# 粘贴碎片图像
y_start = 10
# 确保目标区域宽度与碎片宽度一致
cluster_img[y_start:y_start + h, x_offset:x_offset + w] = frag_img * 255
# 添加序号标注(在碎片下方)
cv2.putText(
img=cluster_img,
text=f"ID:{frag_id}",
org=(x_offset + w // 2 - 20, y_start + h + 30),
fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=0.7,
color=0,
thickness=2
)
x_offset += w + spacing # 移动到下一个位置,加上间距
# 保存聚类图像
cv2.imwrite(f'cluster_visualization/cluster_{cluster_idx}.png', cluster_img)
# 显示聚类图像(可选)
plt.figure(figsize=(15, 5))
plt.imshow(cluster_img, cmap='gray')
plt.title(f'聚类 {cluster_idx} (碎片数量: {len(fragments)})')
plt.axis('off')
plt.savefig(f'cluster_visualization/cluster_{cluster_idx}_plot.png')
plt.close()
# 可视化聚类结果
print("可视化聚类结果...")
visualize_clusters(cluster_labels, row_assignments)
# 6. 处理聚类结果,考虑行高度类型
reserve_list = [] # 备选列表
line_type_rows = [0] * row # 记录每行的主要行高度类型
# 首先确定每行的主要行高度类型
for row_idx, fragments in enumerate(row_assignments):
if not fragments:
continue
# 统计该行碎片的主要行高度类型
row_line_types = []
for frag_id in fragments:
if fragment_features[frag_id]:
# 取主要行的高度类型
row_line_types.append(fragment_features[frag_id][0]['line_type'])
if row_line_types:
# 取出现频率最高的行高度类型
line_type_rows[row_idx] = max(set(row_line_types), key=row_line_types.count)
print("各行的主要行高度类型:")
for i, t in enumerate(line_type_rows):
print(f"行 {i}: {'整行(1.0)' if t == 1.0 else '半行(0.5)' if t == 0.5 else '未知'}")
# 调整行分配,确保行高度类型一致
for row_idx, fragments in enumerate(row_assignments):
if len(fragments) <= col: # col=19
continue # 不需要处理
# 计算每个碎片到行中心的距离(同时考虑行高度类型匹配)
row_distances = []
for frag_id in fragments:
# 计算碎片到行内所有其他碎片的平均距离
other_ids = [f for f in fragments if f != frag_id]
if not other_ids:
avg_distance = 0
else:
distances = []
for other in other_ids:
# 考虑行高度类型是否匹配
type_match = 1.0
if fragment_features[frag_id] and fragment_features[other]:
t1 = fragment_features[frag_id][0]['line_type']
t2 = fragment_features[other][0]['line_type']
if abs(t1 - t2) > 0.5: # 行高度类型不匹配
type_match = 2.0 # 增加惩罚因子
dist = dist_matrix[frag_id, other] * type_match
distances.append(dist)
avg_distance = np.mean(distances)
row_distances.append((frag_id, avg_distance))
# 按距离排序(从大到小)
row_distances.sort(key=lambda x: x[1], reverse=True)
# 移除匹配度最低的碎片(放入备选列表)
while len(fragments) > col:
frag_id, _ = row_distances.pop(0)
fragments.remove(frag_id)
reserve_list.append(frag_id)
# 7. 处理备选列表和未分配的碎片
# 首先,收集所有未分配的碎片(包括特征提取失败的)
unassigned = reserve_list[:] # 复制备选列表
# 添加特征提取失败的碎片
for frag_id in range(n):
if not fragment_features[frag_id]:
if frag_id not in unassigned:
unassigned.append(frag_id)
# 尝试根据边距特征和行高度类型分配未分配的碎片
for frag_id in unassigned[:]: # 遍历副本,以便从列表中移除
if not fragment_features[frag_id]:
continue # 跳过没有特征的碎片
frag_type = fragment_features[frag_id][0]['line_type']
# 左边距大,可能是行首
if left[frag_id] >= fenge:
# 尝试分配到第一行
if len(row_assignments[0]) < col and abs(frag_type - line_type_rows[0]) < 0.6:
row_assignments[0].append(frag_id)
if frag_id in unassigned:
unassigned.remove(frag_id)
continue
# 右边距大,可能是行尾
if right[frag_id] >= fenge:
# 尝试分配到第十一行
if len(row_assignments[-1]) < col and abs(frag_type - line_type_rows[-1]) < 0.6:
row_assignments[-1].append(frag_id)
if frag_id in unassigned:
unassigned.remove(frag_id)
# 尝试分配到行高度类型匹配的行
for row_idx in range(row):
if len(row_assignments[row_idx]) < col and abs(frag_type - line_type_rows[row_idx]) < 0.1:
row_assignments[row_idx].append(frag_id)
if frag_id in unassigned:
unassigned.remove(frag_id)
break
# 输出行分配情况
print("行分配结果:")
for i, fragments in enumerate(row_assignments):
print(f"行 {i}: {len(fragments)} 个碎片 - {fragments}")
if fragments:
# 统计该行的行高度类型
types = []
for frag_id in fragments:
if fragment_features[frag_id]:
types.append(fragment_features[frag_id][0]['line_type'])
if types:
print(f" 行高度类型: 平均={np.mean(types):.2f}, 主要={Counter(types).most_common(1)[0][0]}")
print(f"备选列表中的碎片数量: {len(reserve_list)}")
print(f"未分配的碎片: {unassigned}")
def visualize_row_assignments(row_assignments, img, output_dir='row_assignment_results'):
"""
可视化行分配结果,为每行生成拼接图像并标注碎片序号
参数:
row_assignments (list): 每行的碎片ID列表
img (np.ndarray): 碎片图像数组
output_dir (str): 输出目录
"""
# 创建输出目录
os.makedirs(output_dir, exist_ok=True)
# 创建所有行的综合预览图
fig, axes = plt.subplots(len(row_assignments), 1, figsize=(15, 5 * len(row_assignments)))
if len(row_assignments) == 1:
axes = [axes] # 确保单行时axes是可迭代的
fig.suptitle('行分配结果', fontsize=20)
# 处理每行
for row_idx, row_fragments in enumerate(row_assignments):
if not row_fragments:
print(f"行 {row_idx} 没有碎片")
continue
print(f"行 {row_idx} 有 {len(row_fragments)} 个碎片")
# 确保所有碎片高度相同
heights = set(img[frag_id].shape[0] for frag_id in row_fragments)
widths = set(img[frag_id].shape[1] for frag_id in row_fragments)
if len(heights) != 1 or len(widths) != 1:
print(f"警告: 行 {row_idx} 中碎片尺寸不一致")
# 使用最大尺寸作为标准
max_height = max(img[frag_id].shape[0] for frag_id in row_fragments)
max_width = max(img[frag_id].shape[1] for frag_id in row_fragments)
else:
max_height = next(iter(heights))
max_width = next(iter(widths))
# 计算拼接图像的大小 - 包括碎片之间的间距
spacing = 10 # 碎片之间的间距
label_space = 150 # 序号标注的空间
# 计算总宽度:每个碎片宽度 + 碎片间距
total_width = sum(img[frag_id].shape[1] for frag_id in row_fragments) + (len(row_fragments) - 1) * spacing
# 创建空白图像(额外空间用于序号标注)
row_img = np.ones((max_height + label_space, total_width), dtype=np.uint8) * 255
x_offset = 0
# 添加每个碎片和序号标注
for frag_id in row_fragments:
frag_img = img[frag_id]
h, w = frag_img.shape
# 粘贴碎片图像
y_start = 10
row_img[y_start:y_start + h, x_offset:x_offset + w] = frag_img * 255
# 添加序号标注(在碎片上方)
cv2.putText(
img=row_img,
text=f"ID:{frag_id}",
org=(x_offset + w // 2 - 20, y_start - 5), # 碎片上方
fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=0.7,
color=0,
thickness=2
)
# 在碎片下方添加行索引(可选)
cv2.putText(
img=row_img,
text=f"Row:{row_idx}",
org=(x_offset + w // 2 - 30, y_start + h + 30),
fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=0.5,
color=0,
thickness=1
)
x_offset += w + spacing # 移动到下一个位置,加上间距
# 保存单行图像
cv2.imwrite(f'{output_dir}/row_{row_idx}.png', row_img)
# 在综合预览图中添加该行
ax = axes[row_idx]
ax.imshow(row_img, cmap='gray')
ax.set_title(f'行 {row_idx} (碎片数量: {len(row_fragments)})')
ax.axis('off')
# 保存综合预览图
plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.savefig(f'{output_dir}/all_rows.png')
plt.close()
print(f"行分配结果已保存到 {output_dir} 目录")
print("\n生成行分配结果可视化...")
visualize_row_assignments(row_assignments, img, output_dir='row_assignment_results')
# 8. 可视化行分组结果
def visualize_row_grouping(row_assignments):
for row_id, fragments in enumerate(row_assignments):
if not fragments:
print(f"行 {row_id} 没有碎片")
continue
print(f"行 {row_id} 有 {len(fragments)} 个碎片")
# 创建行预览
row_img = img[fragments[0]]
for frag_id in fragments[1:]:
row_img = np.hstack((row_img, img[frag_id]))
plt.figure(figsize=(15, 3))
plt.imshow(row_img, cmap='gray')
plt.title(f'行 {row_id} 分组预览 (行高度类型: {line_type_rows[row_id]})')
plt.axis('off')
plt.savefig(f'row_{row_id}_preview.png')
plt.close()
print("行分组结果预览...")
visualize_row_grouping(row_assignments)
# 9. 人工干预:调整行分组
print("当前行分配:")
for row_id, fragments in enumerate(row_assignments):
print(f"行 {row_id}: {fragments}")
# 模拟人工干预
adjustments = input("输入需要调整的碎片ID和目标行(格式: 碎片ID:目标行, 多个用分号分隔): ")
if adjustments:
for adj in adjustments.split(';'):
if ':' in adj:
frag_id, target_row = map(int, adj.split(':'))
# 从原行中移除
for row in row_assignments:
if frag_id in row:
row.remove(frag_id)
break
# 添加到目标行
if 0 <= target_row < len(row_assignments):
row_assignments[target_row].append(frag_id)
# 重新计算行高度类型
for row_idx, fragments in enumerate(row_assignments):
if not fragments:
line_type_rows[row_idx] = 0
continue
row_line_types = []
for frag_id in fragments:
if fragment_features[frag_id]:
row_line_types.append(fragment_features[frag_id][0]['line_type'])
if row_line_types:
line_type_rows[row_idx] = max(set(row_line_types), key=row_line_types.count)
# 10. 行内排序(考虑行高度类型)
def sort_fragments_in_row(fragments):
if len(fragments) < 2:
return fragments
# 找到最左侧的碎片(左侧空白最大)
left_margins = [left[i] for i in fragments]
start_idx = fragments[np.argmax(left_margins)]
sorted_frags = [start_idx]
remaining = set(fragments)
remaining.remove(start_idx)
while remaining:
current = sorted_frags[-1]
best_match = None
best_score = -1
for candidate in remaining:
# 计算匹配分数
score = 0
# 比较当前碎片的右边缘和候选碎片的左边缘
current_right = img[current][:, -1] # 当前碎片的最后一列
candidate_left = img[candidate][:, 0] # 候选碎片的第一列
# 计算像素匹配度
match_count = np.sum(current_right == candidate_left)
# 增强文字区域的匹配权重
text_match = np.sum((current_right == 0) & (candidate_left == 0))
score = match_count + 2 * text_match
# 考虑行高度类型匹配(同一行应该相同)
if fragment_features[current] and fragment_features[candidate]:
type_diff = abs(fragment_features[current][0]['line_type'] -
fragment_features[candidate][0]['line_type'])
score *= (1.0 - type_diff) # 类型差异越大,分数越低
if score > best_score:
best_score = score
best_match = candidate
if best_match is not None:
sorted_frags.append(best_match)
remaining.remove(best_match)
else:
# 如果没有找到匹配,随机选择一个
sorted_frags.append(remaining.pop())
return sorted_frags
# 对每行进行排序
sorted_rows = []
for row_id, fragments in enumerate(row_assignments):
if not fragments:
sorted_rows.append([])
continue
sorted_frags = sort_fragments_in_row(fragments)
sorted_rows.append(sorted_frags)
# 可视化行排序结果
row_img = img[sorted_frags[0]]
for frag_id in sorted_frags[1:]:
row_img = np.hstack((row_img, img[frag_id]))
plt.figure(figsize=(15, 3))
plt.imshow(row_img, cmap='gray')
plt.title(f'行 {row_id} 排序结果 (行高度类型: {line_type_rows[row_id]})')
plt.axis('off')
plt.savefig(f'row_{row_id}_sorted.png')
plt.close()
# 人工干预:调整行内顺序
manual_adjust = input(f"行 {row_id} 排序是否正确?(y/n): ")
if manual_adjust.lower() == 'n':
print("当前顺序:", sorted_frags)
new_order = list(map(int, input("输入正确顺序(用空格分隔): ").split()))
sorted_rows[row_id] = new_order
# 11. 行间排序(考虑行高度类型)
def sort_rows(rows):
if len(rows) < 2:
return rows
# 找到最顶部的行(顶部空白最大)
top_margins = []
for row in rows:
if not row:
top_margins.append(0)
continue
row_img = img[row[0]]
for y in range(row_img.shape[0]):
if np.any(row_img[y] == 0): # 找到第一个文字像素
top_margins.append(y)
break
else:
top_margins.append(row_img.shape[0])
start_idx = np.argmax(top_margins)
sorted_rows = [rows[start_idx]]
remaining = set(range(len(rows)))
remaining.remove(start_idx)
while remaining:
current_row_idx = sorted_rows[-1]
if not current_row_idx: # 空行
break
current_bottom = img[current_row_idx[0]][-1] # 当前行第一个碎片的最后一行
best_match = None
best_score = -1
for candidate_idx in remaining:
candidate_row = rows[candidate_idx]
if not candidate_row:
continue
candidate_top = img[candidate_row[0]][0] # 候选行第一个碎片的第一行
# 计算匹配分数
score = 0
# 比较当前行的底部和候选行的顶部
match_count = np.sum(current_bottom == candidate_top)
# 增强文字区域的匹配权重
text_match = np.sum((current_bottom == 0) & (candidate_top == 0))
score = match_count + 2 * text_match
# 考虑行高度类型匹配(相邻行类型可能不同)
if line_type_rows[current_row_idx] and line_type_rows[candidate_idx]:
# 实际中相邻行高度类型应该相同,但半行后接整行时可能不同
# 不做惩罚,因为类型差异在行间排序中不是主要因素
pass
if score > best_score:
best_score = score
best_match = candidate_idx
if best_match is not None:
sorted_rows.append(rows[best_match])
remaining.remove(best_match)
else:
# 如果没有找到匹配,随机选择一个
sorted_rows.append(rows[remaining.pop()])
return sorted_rows
# 行间排序
final_row_order = sort_rows(sorted_rows)
# 12. 可视化行间排序结果
print("行间排序结果预览...")
for i, row in enumerate(final_row_order):
if not row:
continue
row_img = img[row[0]]
for frag_id in row[1:]:
row_img = np.hstack((row_img, img[frag_id]))
plt.figure(figsize=(15, 3))
plt.imshow(row_img, cmap='gray')
plt.title(f'最终排序 - 行 {i} (行高度类型: {line_type_rows[i]})')
plt.axis('off')
plt.savefig(f'final_row_{i}.png')
plt.close()
# 人工干预:调整行顺序
manual_adjust = input("行间排序是否正确?(y/n): ")
if manual_adjust.lower() == 'n':
print("当前行顺序:", [i for i in range(len(final_row_order))])
new_order = list(map(int, input("输入正确行顺序(用空格分隔): ").split()))
final_row_order = [final_row_order[i] for i in new_order]
# 13. 最终拼接与结果输出
# 拼接最终图像
full_image = None
for row in final_row_order:
if not row:
continue
row_img = img[row[0]]
for frag_id in row[1:]:
row_img = np.hstack((row_img, img[frag_id]))
if full_image is None:
full_image = row_img
else:
# 在行之间添加空白分隔(可选)
separator = np.ones((10, row_img.shape[1]), dtype=row_img.dtype) # 10像素高的空白行
full_image = np.vstack((full_image, separator))
full_image = np.vstack((full_image, row_img))
# 保存结果
if full_image is not None:
# 转换为0-255范围
full_image = (1 - full_image) * 255 # 反转:0变为255(白色),1变为0(黑色)
full_image = full_image.astype(np.uint8)
final_img = Image.fromarray(full_image)
final_img.save('result4.png')
print("最终拼接结果已保存为 'result4.png'")
else:
print("错误: 无法拼接图像")
# 14. 输出碎片顺序表格(包含行高度类型)
def create_result_table(final_row_order):
table = []
for row in final_row_order:
if row:
table.append(row)
else:
table.append([-1] * col) # 空行占位符
# 确保表格有11行
while len(table) < row:
table.append([-1] * col)
return np.array(table)
result_table = create_result_table(final_row_order)
print("碎片顺序表格:")
print(result_table)
# 保存表格到CSV
pd.DataFrame(result_table).to_csv('result4.csv', index=False, header=False)
print("碎片顺序表格已保存为 'result4.csv'")
# 15. 保存行高度类型信息
with open('line_types.txt', 'w') as f:
f.write("行索引\t行高度类型\n")
for i, row in enumerate(final_row_order):
if row:
types = []
for frag_id in row:
if fragment_features[frag_id]:
types.append(fragment_features[frag_id][0]['line_type'])
avg_type = np.mean(types) if types else 0
f.write(f"{i}\t{avg_type:.1f}\n")
else:
f.write(f"{i}\t0.0\n")
print("行高度类型信息已保存为 'line_types.txt'")
# 16. 保存未分配碎片
if unassigned:
print(f"未分配的碎片: {unassigned}")
with open('unassigned_fragments.txt', 'w') as f:
f.write("未分配的碎片:\n")
f.write(", ".join(map(str, unassigned)))
# 添加行高度类型信息
f.write("\n\n行高度类型信息:\n")
for frag_id in unassigned:
if fragment_features[frag_id]:
t = fragment_features[frag_id][0]['line_type']
f.write(f"碎片 {frag_id}: {'整行' if t == 1.0 else '半行' if t == 0.5 else '未知'}\n")
else:
f.write(f"碎片 {frag_id}: 无特征\n")
else:
print("所有碎片都已成功分配")修改人工调试部分,使能够完成多次输入,且在行内排序后添加可视化