import cv2
import os
import numpy as np
import matplotlib.pyplot as plt
import skimage.io as io
from collections import Counter
from PIL import Image
import pandas as pd
from scipy.signal import find_peaks
from scipy.ndimage import gaussian_filter1d
from sklearn.cluster import KMeans # 导入聚类模型
plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus']=False #用来正常显示负号
data_dir = './附件4'
path = data_dir + '/*.bmp'
coll = io.ImageCollection(path) # 读入灰度图像
img_num = len(coll)
# *********转矩阵*******
img = np.asarray(coll)
for i in range(0, len(coll)):
img[i] = cv2.adaptiveThreshold(src=img[i], # 要进行处理的图片
maxValue=1, # 大于阈值后设定的值
adaptiveMethod=cv2.ADAPTIVE_THRESH_MEAN_C,
# 自适应方法,ADAPTIVE_THRESH_MEAN_C:表区域内均值;ADAPTIVE_THRESH_GAUSSIAN_C:表区域内像素点加权求和
thresholdType=cv2.THRESH_BINARY, # 同全局阈值法中的参数一样
blockSize=7, # 方阵(区域)大小,
C=2) # 常数项,每个区域计算出的阈值的基础上在减去这个常数作为这个区域的最终阈值,可以为负数
print(img.shape)
# *******计算每张图片的左右边距*****
left = []
right = []
for i in range(0, img.shape[0]):
# 计算每张图片同左边的距离
count = 0
for y in range(0, img.shape[2]): # 列
panduan = 1
for x in range(0, img.shape[1]): # 行
if (img[i][x][y] == 0):
panduan = 0
break
if (panduan == 1):
count = count + 1
else:
break
left.append(count)
# 计算每张图片同右边的距离
count = 0
for y in range(img.shape[2] - 1, -1, -1): # 列
panduan = 1
for x in range(0, img.shape[1]): # 行
if (img[i][x][y] == 0):
panduan = 0
break
if (panduan == 1):
count = count + 1
else:
break
right.append(count)
plt.scatter(range(0, len(left)), left)
plt.scatter(range(0, len(right)), right)
print(Counter(left))
print(Counter(right))
# *****确定行数********
# 可以从图中找到11个最右边和最左边的图片
# 剩余的点 中可以计算 行间距
# 从散点图可以看出 行数为11
# 列数为 209/11 = 19 209为img.shape[0]
fenge = 14 # 看图确定 或 通过计算得出count的平均值
col = 19 # 列数
row = 11 # 行数 left 或 right 中count值大于 fenge的个数
# **********最后一列图片***********
end_index = []
for i in range(0, len(right)):
if (right[i] >= fenge):
end_index.append(i)
len(end_index)
# **********找出第一列的图片index*******
first_index = []
for i in range(0, len(left)):
if (left[i] >= fenge):
first_index.append(i)
len(first_index)
kong_width = []
zi_width = []
# ********计算每张图片连续的1和0的长度********
for i in range(0, img.shape[0]):
width = 0
zj_kong = []
zj_zi = []
if (sum(img[i][0]) == img.shape[2]): # 空白行
qian = 0
else:
qian = 1
for x in range(0, img.shape[1]):
if (sum(img[i][x]) != img.shape[2]): # 字
xian = 0
else:
xian = 1
if (qian != xian):
if (qian == 0):
if (width):
zj_zi.append(width)
else:
if (width):
zj_kong.append(width)
width = 0
else:
width = width + 1
qian = xian
if (qian == 0): # 最后一行处理
zj_zi.append(width)
else:
zj_kong.append(width)
kong_width.append(zj_kong)
zi_width.append(zj_zi)
print(kong_width[0])
print(zi_width[0])
# 统计分析
# 得出字宽为40、39、38 空白行宽度为27、26、28
ans = []
for i in kong_width:
for j in i:
ans.append(j)
plt.scatter(range(0, len(ans)), ans)
print("空白行宽度统计:" + str(Counter(ans)))
ans = []
for i in zi_width:
for j in i:
ans.append(j)
plt.scatter(range(0, len(ans)), ans)
print("字宽统计:" + str(Counter(ans)))
img1 = img
# 掩码补全 对于段首空行和段尾空行处理 为聚类做预处理
chuli_index_1 = [] # 需处理的图片 index 分为两种情况 如果需处理的行在第一行 需找到下界字的边缘行数
chuli_index_2 = [] # 如果不在第一行 需找到上界字的边缘行数
count = 0
for i in kong_width:
index = 0
for j in i:
if (j > 32):
if (index == 0):
chuli_index_1.append(count)
else:
chuli_index_2.append(count)
break
index = index + 1
count = count + 1
print("进行掩码处理的图片数量:" + str(len(chuli_index_1) + len(chuli_index_2)))
print("第一类需掩码处理的图片数量" + str(len(chuli_index_1)))
print("第二类需掩码处理的图片数量" + str(len(chuli_index_2)))
# 处理
# 第一种情况 需找到下界字的边缘行数
for index in chuli_index_1:
# 找到第一行
first_index_ = 0
for x in range(0, img.shape[1]): # 行
if (sum(img[index][x]) != img.shape[2]):
break
first_index_ = x
if (x - 30 - 35 < 0):
first = 0
else:
first = int(x - 30 - 35)
for x in range(first, x - 30):
for y in range(0, img.shape[2]):
img1[index][x][y] = 0
# 第二种情况 需找到上界字的边缘行数
for index in chuli_index_2:
# 找到上界行数
width = 0
zj_kong = []
hang = []
zj_zi = []
if (sum(img[index][0]) == img.shape[2]): # 空白行
qian = 0
else:
qian = 1
for x in range(0, img.shape[1]):
if (sum(img[index][x]) != img.shape[2]): # 字
xian = 0
else:
xian = 1
if (qian != xian):
if (qian == 0):
if (width):
zj_zi.append(width)
else:
if (width):
zj_kong.append(width)
hang.append(x)
width = 0
else:
width = width + 1
qian = xian
if (qian == 0): # 最后一行处理
zj_zi.append(width)
else:
zj_kong.append(width)
hang.append(x)
Max = 0
for i in range(0, len(zj_kong)):
if (zj_kong[i] > Max):
Max = zj_kong[i]
first_index_ = hang[i] - zj_kong[i]
if (first_index_ + 30 + 35 >= img.shape[1]):
end = img.shape[1]
else:
end = first_index_ + 30 + 35
for x in range(first_index_ + 30, end):
for y in range(0, img.shape[2]):
img1[index][x][y] = 0
# ***********聚类***********
# 修改特征提取函数 - 仅关注四线位置
def extract_four_lines_features(image):
"""
提取英文文本的四线位置特征
image: 预处理后的二值图像 (0=文字, 1=空白)
返回: [文本行数量, 顶线位置, 中线位置, 基线位置, 底线位置]
"""
h, w = image.shape
features = [0] * 5 # 初始化特征向量
# 1. 计算水平投影
horizontal_proj = np.sum(1 - image, axis=1)
# 2. 检测文本行位置
threshold = 0.1 * w # 初始阈值
line_positions = []
in_text = False
line_start = 0
for i, val in enumerate(horizontal_proj):
if val > threshold:
if not in_text:
in_text = True
line_start = i
else:
if in_text:
in_text = False
line_end = i
if (line_end - line_start) > 8: # 最小行高5像素
line_positions.append((line_start, line_end))
# 处理最后一行
if in_text:
line_end = len(horizontal_proj) - 1
if (line_end - line_start) > 5:
line_positions.append((line_start, line_end))
# 如果没有检测到文本行,返回空白特征
if not line_positions:
return features
# 只考虑第一行文本(假设每个碎片只有一行文本)
start, end = line_positions[0]
line_img = image[start:end, :]
line_height = end - start
# 3. 计算垂直投影
vertical_proj = np.sum(1 - line_img, axis=0)
# 4. 检测四线位置
# 顶线 - 文本行顶部
top_line = start
# 基线 - 字母底部位置(垂直投影最大处)
base_line = start + np.argmax(vertical_proj)
# 中线 - 小写字母高度的一半处
mid_line = start + int(0.5 * (base_line - start))
# 底线 - 文本行底部
bottom_line = end
# 设置特征值
features[0] = len(line_positions) # 文本行数量
features[1] = top_line # 顶线位置
features[2] = mid_line # 中线位置
features[3] = base_line # 基线位置
features[4] = bottom_line # 底线位置
return features
# 为所有碎片提取特征
tezhe = []
for i in range(img1.shape[0]):
features = extract_four_lines_features(img1[i])
tezhe.append(features)
# 特征标准化
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
tezhe_scaled = scaler.fit_transform(tezhe)
# 转换为特征向量
x_train = pd.DataFrame(tezhe_scaled)
kmeansmodel = KMeans(n_clusters=11, init='k-means++', random_state=150)
y_kmeans = kmeansmodel.fit_predict(x_train)
print("聚类结果统计:" + str(Counter(y_kmeans)))
# 人工干预点1:聚类结果验证(带四线可视化)
os.makedirs('./cluster_visualization', exist_ok=True)
def visualize_with_four_lines(image, lines, idx):
"""
可视化碎片并绘制四线
image: 碎片图像
lines: 四线位置 (top, mid, base, bottom)
idx: 碎片索引
"""
# 创建彩色图像用于可视化
if len(image.shape) == 2:
vis_img = np.stack([image * 255] * 3, axis=-1).astype(np.uint8)
else:
vis_img = (image * 255).astype(np.uint8)
# 绘制四线
colors = ['red', 'green', 'blue', 'purple']
labels = ['顶线', '中线', '基线', '底线']
for i, line in enumerate(lines):
if line > 0: # 确保线位置有效
cv2.line(vis_img, (0, line), (image.shape[1] - 1, line),
tuple((np.array(colors[i]) * 255).astype(int),out=None), 1)
cv2.putText(vis_img, labels[i], (10, line - 5),
cv2.FONT_HERSHEY_SIMPLEX, 0.4, tuple((np.array(colors[i]) * 255).astype(int)), 1)
# 添加碎片ID
cv2.putText(vis_img, f'ID:{idx}', (5, 15),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)
return vis_img
print("正在生成聚类可视化结果(带四线)...")
for cluster_id in range(11):
cluster_samples = [i for i, label in enumerate(y_kmeans) if label == cluster_id]
if not cluster_samples:
continue
# 创建可视化大图
sample_img = img1[cluster_samples[0]]
h, w = sample_img.shape[:2]
num_fragments = len(cluster_samples)
cols = min(5, num_fragments) # 每行最多5个碎片
rows = (num_fragments + cols - 1) // cols
# 创建空白大图(高度增加20像素用于显示标题)
big_img = np.zeros((rows * (h + 20), cols * w, 3), dtype=np.uint8) + 255
for i, idx in enumerate(cluster_samples):
row_idx = i // cols
col_idx = i % cols
# 获取四线位置
lines = tezhe[idx][1:5] # 提取四线位置 (top, mid, base, bottom)
# 创建带四线可视化的碎片图像
frag_img = visualize_with_four_lines(img1[idx], lines, idx)
# 放置碎片图像
y_start = row_idx * (h + 20)
x_start = col_idx * w
big_img[y_start:y_start + h, x_start:x_start + w] = frag_img
# 添加聚类标签
cv2.putText(big_img, f'Cluster:{cluster_id}',
(x_start + 5, y_start + h + 15),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)
# 保存大图
save_path = f'./cluster_visualization/cluster_{cluster_id}_four_lines.png'
cv2.imwrite(save_path, big_img)
# 显示大图
plt.figure(figsize=(15, 8))
plt.imshow(cv2.cvtColor(big_img, cv2.COLOR_BGR2RGB))
plt.title(f'Cluster {cluster_id} - {num_fragments} fragments')
plt.axis('off')
plt.show()
manual_correction = input("是否需要人工调整聚类结果?(y/n): ")
if manual_correction.lower() == 'y':
print("当前聚类分配:")
for i in range(len(coll)):
print(f"碎片{i} -> 类{y_kmeans[i]}")
corrections = input("输入需调整的碎片ID和目标类(格式: 碎片ID:目标类, 多个用分号分隔): ")
for corr in corrections.split(';'):
fid, new_cls = map(int, corr.split(':'))
y_kmeans[fid] = new_cls
print("人工调整后的聚类统计:", Counter(y_kmeans))
# 分类结果
ans = {}
count = 0
for i in y_kmeans:
if (i in ans.keys()):
ans[i].append(count)
else:
zj = []
zj.append(count)
ans[i] = zj
count += 1
ans_lei = ans
# *******行内排序******
img1 = img
img = np.asarray(coll)
for i in range(0, len(coll)):
img[i] = cv2.adaptiveThreshold(src=img[i], # 要进行处理的图片
maxValue=1, # 大于阈值后设定的值
adaptiveMethod=cv2.ADAPTIVE_THRESH_MEAN_C,
# 自适应方法,ADAPTIVE_THRESH_MEAN_C:表区域内均值;ADAPTIVE_THRESH_GAUSSIAN_C:表区域内像素点加权求和
thresholdType=cv2.THRESH_BINARY, # 同全局阈值法中的参数一样
blockSize=11, # 方阵(区域)大小,
C=1) # 常数项,每个区域计算出的阈值的基础上在减去这个常数作为这个区域的最终阈值,可以为负数
hang_index = []
for i in range(0, len(ans_lei)):
ans_index = [] # 用于记录的排序
ans_index.append(first_index[i]) # 插入第一张图片的索引
count1 = 0
while (count1 < len(ans_lei[y_kmeans[first_index[i]]]) - 2):
count1 = count1 + 1
Max = -1
index = 0
zj = ans_index[len(ans_index) - 1]
for j in ans_lei[y_kmeans[first_index[i]]]:
if (ans_index.count(j) == 1 or end_index.count(j) == 1):
if (end_index.count(j) == 1):
yc = j
continue
count = 0
for x in range(0, img.shape[1]): # 遍历行遍历 左右元素
if (img[j][x][0] == img[zj][x][img.shape[2] - 1]):
if (img[j][x][0] == 0):
count += 0.6
count = count + 1
count2 = abs(sum(img[j][0]) - sum(img[zj][img.shape[1] - 1]))
loss = count * 0.5 - count1 * 0.8
if (loss > Max):
Max = loss
index = j
ans_index.append(index)
ans_index.append(yc)
print(ans_index)
hang_index.append(ans_index)
# 人工干预点2:行内排序验证
row_img = coll[ans_index[0]]
for j in range(1, len(ans_index)):
row_img = np.hstack((row_img, coll[ans_index[j]]))
plt.figure(figsize=(15, 3))
plt.imshow(row_img, cmap='gray')
plt.title(f'第{i}行自动排序结果')
plt.axis('off')
plt.show()
manual_adjust = input(f"第{i}行排序是否正确?(y/n): ")
if manual_adjust.lower() == 'n':
print("当前顺序:", ans_index)
new_order = list(map(int, input("输入正确顺序(用空格分隔): ").split()))
ans_index = new_order
# ******按行拼接图片查看效果********排序效果很好
ans_hang_img = []
for i in range(0, len(hang_index)):
ans_img = coll[hang_index[i][0]]
for j in range(0, len(hang_index[i])):
if (j == 0):
continue
ans_img = np.hstack((ans_img, coll[hang_index[i][j]])) # 水平合并
ans_hang_img.append(ans_img)
im = Image.fromarray(ans_hang_img[5]) # to Image
img_ = np.array(ans_hang_img)
img_.shape # 11行图片
# 二值化 加快运算速度
for i in range(0, len(img_)):
img_[i] = cv2.adaptiveThreshold(src=img_[i], # 要进行处理的图片
maxValue=1, # 大于阈值后设定的值
adaptiveMethod=cv2.ADAPTIVE_THRESH_MEAN_C,
# 自适应方法,ADAPTIVE_THRESH_MEAN_C:表区域内均值;ADAPTIVE_THRESH_GAUSSIAN_C:表区域内像素点加权求和
thresholdType=cv2.THRESH_BINARY, # 同全局阈值法中的参数一样
blockSize=11, # 方阵(区域)大小,
C=1) # 常数项,每个区域计算出的阈值的基础上在减去这个常数作为这个区域的最终阈值,可以为负数
# *******将以上拼接好的行图片进行竖方向的拼接
# ***找到第一行index
Max = 0
first_hang_index = 0
for i in range(0, img_.shape[0]):
# 计算每张行图片同顶部的距离
for x in range(0, img_.shape[1]): # 行
if (sum(img_[i][x]) != img_.shape[2]):
if (x > Max):
Max = x
first_hang_index = i
break
# ***找到最后一行index
Max = 0
end_hang_index = 0
for i in range(0, img_.shape[0]):
# 计算每张行图片同顶部的距离
for x in range(img_.shape[1] - 1, -1, -1): # 行
if (sum(img_[i][x]) != img_.shape[2]):
if (179 - x > Max):
Max = 179 - x
end_hang_index = i
break
# 列排序
lie_index = []
lie_index.append(first_hang_index) # 行排序 列的第一个 行图片index
while (1):
Max = -1
index = 0
zj = lie_index[len(lie_index) - 1]
for j in range(0, img_.shape[0]):
if (lie_index.count(j) == 1 or j == end_hang_index):
continue
count = 0
for y in range(0, img_.shape[2]): # 遍历行遍历
if (img_[j][0][y] == img_[zj][img_.shape[1] - 1][y]):
if (img_[j][0][y] == 0):
count += 0.3
count = count + 1
count1 = abs(sum(img_[j][0]) - sum(img_[zj][img_.shape[1] - 1]))
loss = count * 0.5 - count1 * 0.3
if (loss > Max):
Max = loss
index = j
lie_index.append(index)
if (len(lie_index) >= img_.shape[0] - 1):
break
lie_index.append(end_hang_index)
print("列排序:" + str(lie_index))
# ******图片列拼接 输出最终拼接图片 基于拼接好的ans_hang_img图片矩阵
ans_img = []
ans_img = ans_hang_img[lie_index[0]]
for i in range(0, len(lie_index)):
if (i == 0):
continue
ans_img = np.vstack((ans_img, ans_hang_img[lie_index[i]])) #
im = Image.fromarray(ans_img) # to Image
# 人工干预点3:垂直拼接验证
plt.figure(figsize=(10, 15))
plt.imshow(ans_img, cmap='gray')
plt.title('垂直拼接结果预览')
plt.axis('off')
plt.show()
vertical_correction = input("垂直拼接是否正确?(y/n): ")
if vertical_correction.lower() == 'n':
print("当前行顺序:", lie_index)
new_vertical_order = list(map(int, input("输入正确行顺序(用空格分隔): ").split()))
# 重新垂直拼接
ans_img = ans_hang_img[new_vertical_order[0]]
for i in range(1, len(new_vertical_order)):
ans_img = np.vstack((ans_img, ans_hang_img[new_vertical_order[i]]))
im.save('result4.png')生成聚类可视化结果时,代码直接结束了,没有生成图片,请找出原因
最新发布