缘 in English

有缘千里来相会--用英语说“缘”

缘 reason; cause; sake, relationship, edge; fringe
  
缘分 predestined relationship

血缘 blood relationship

姻缘 predestined marriage

化缘 beg for alms

天缘巧合 a luck coincidence

有人缘 enjoy great popularity

投缘的街坊们 congenial neighbors

天赐良缘 a god sent marriage; a good marriage arranged in Heaven

有缘结识某人 be lucky to get acquainted with sb.

无缘结识某人 have no opportunity to get acquainted with sb.

婚姻是缘分。 A couples conjugal fate is prearranged.

有缘终相逢。 Fate brings together people who are far apart.

无缘不相逢。 There is no meeting without predestination.

我与烟酒无缘。 Smoking and drinking dont appeal to me.

好事似乎与他无缘。 Good luck seemed to be wholly denied to him.

他们俩有情无缘。 They are attracted to each other but are not fated to be conjugally tied.

机缘凑巧,我找到一份工作。 As luck would have it, I found a job.

千里姻缘一线牵。 Two beings destined to marry each other, though thousands of miles apart, are tied together with an invisible red thread by an old man under the moonlight.

有缘千里来相会,无缘对面不相逢。 As decreed by providence you have met him; otherwise you might have failed although you traveled a long way.

import cv2 import os import numpy as np import matplotlib.pyplot as plt import skimage.io as io from collections import Counter from PIL import Image import pandas as pd from scipy.ndimage import gaussian_filter1d from sklearn.cluster import KMeans from scipy.signal import find_peaks from sklearn.preprocessing import StandardScaler plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签 plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号 # 1. 读取图像并进行预处理 data_dir = './附件4' path = data_dir + '/*.bmp' coll = io.ImageCollection(path) # 读入灰度图像 img_num = len(coll) # 转矩阵并二值化 img = np.asarray(coll) for i in range(len(coll)): img[i] = cv2.adaptiveThreshold( src=img[i], maxValue=1, adaptiveMethod=cv2.ADAPTIVE_THRESH_MEAN_C, thresholdType=cv2.THRESH_BINARY, blockSize=13, C=2 ) print("图像数据形状:", img.shape) # 2. 计算每张图片的左右边距 left = [] right = [] for i in range(img.shape[0]): # 计算左边距 count = 0 for y in range(img.shape[2]): # 列 if np.any(img[i, :, y] == 0): # 该列有文字 break count += 1 left.append(count) # 计算右边距 count = 0 for y in range(img.shape[2] - 1, -1, -1): # 从右向左 if np.any(img[i, :, y] == 0): # 该列有文字 break count += 1 right.append(count) plt.figure(figsize=(10, 6)) plt.scatter(range(len(left)), left, label='左边距') plt.scatter(range(len(right)), right, label='右边距') plt.title('碎片左右边距分布') plt.xlabel('碎片索引') plt.ylabel('边距大小(像素)') plt.legend() plt.grid(True) plt.show() print("左边距统计:", Counter(left)) print("右边距统计:", Counter(right)) # 3. 确定行首和行尾碎片 fenge = 10 # 边距阈值,超过此值认为是行首或行尾 col = 19 # 列数 row = 16 # 行数 # 找出行尾碎片 end_index = [i for i, r in enumerate(right) if r >= fenge] print(f"行尾碎片数量: {len(end_index)}") # 找出行首碎片 first_index = [i for i, l in enumerate(left) if l >= fenge] print(f"行首碎片数量: {len(first_index)}") # 4. 提取四线三行特征 def extract_english_features(image): """ 提取英文文本的四线三行特征 image: 预处理后的二值图像 (0=文字, 1=空白) """ h, w = image.shape features = [] # 1. 计算水平投影 horizontal_proj = np.sum(1 - image, axis=1) # 反转:文字区域值高 # 2. 平滑投影曲线 smoothed_proj = gaussian_filter1d(horizontal_proj, sigma=1.5) # 3. 检测文本行区域 line_regions = [] in_text = False start = 0 threshold = 0.1 * np.max(smoothed_proj) for i, val in enumerate(smoothed_proj): if val > threshold and not in_text: in_text = True start = i elif val <= threshold and in_text: in_text = False line_regions.append((start, i)) # 处理最后一行 if in_text: line_regions.append((start, h - 1)) # 4. 分析每行的四线三行特征 line_features = [] for start, end in line_regions: line_height = end - start if line_height < 5: # 忽略太小的区域 continue line_img = image[start:end, :] # 计算垂直投影 vertical_proj = np.sum(1 - line_img, axis=0) # 检测基线(字母底部) baseline_pos = np.argmax(vertical_proj) # 检测中线(小写字母高度) mid_threshold = 0.5 * np.max(vertical_proj) mid_region = np.where(vertical_proj > mid_threshold)[0] if len(mid_region) > 0: midline_pos = np.mean(mid_region) else: midline_pos = baseline_pos - 0.3 * line_height # 检测顶线(大写字母高度) top_threshold = 0.7 * np.max(vertical_proj) top_region = np.where(vertical_proj > top_threshold)[0] if len(top_region) > 0: topline_pos = np.mean(top_region) else: topline_pos = midline_pos - 0.2 * line_height # 检测底线(下伸字母底部) bottom_threshold = 0.6 * np.max(vertical_proj) bottom_region = np.where(vertical_proj > bottom_threshold)[0] if len(bottom_region) > 0: bottomline_pos = np.mean(bottom_region) else: bottomline_pos = baseline_pos + 0.2 * line_height line_features.append({ 'start': start, 'end': end, 'baseline': baseline_pos, 'midline': midline_pos, 'topline': topline_pos, 'bottomline': bottomline_pos, 'height': line_height }) return line_features # 提取所有碎片的四线三行特征 fragment_features = [] for i in range(img.shape[0]): features = extract_english_features(img[i]) fragment_features.append(features) # 5. 确定整个文档的行结构 all_baselines = [] all_heights = [] for features in fragment_features: for line in features: all_baselines.append(line['baseline']) all_heights.append(line['height']) # 聚类确定文档的行基线位置 if len(all_baselines) > 0: kmeans = KMeans(n_clusters=row, random_state=42) baseline_clusters = kmeans.fit_predict(np.array(all_baselines).reshape(-1, 1)) # 获取每个聚类的中心点(即文档的行基线位置) document_baselines = sorted([center[0] for center in kmeans.cluster_centers_]) print("文档行基线位置:", document_baselines) else: # 如果没有检测到基线,使用默认值 document_baselines = np.linspace(20, 150, row).tolist() print("警告: 未检测到基线,使用默认基线位置:", document_baselines) # 6. 将碎片分配到行 row_assignments = [[] for _ in range(row)] # 11行 for frag_id, features in enumerate(fragment_features): if not features: # 没有检测到文本行 # 尝试根据空白区域分配 if left[frag_id] >= fenge: row_assignments[0].append(frag_id) # 可能是第一行 elif right[frag_id] >= fenge: row_assignments[-1].append(frag_id) # 可能是最后一行 else: # 无法确定,暂不分配 pass continue # 计算碎片与每行的匹配分数 match_scores = [] for doc_baseline in document_baselines: # 找到碎片中最接近该基线的行 min_dist = float('inf') for line in features: dist = abs(line['baseline'] - doc_baseline) if dist < min_dist: min_dist = dist match_scores.append(min_dist) # 选择最匹配的行(距离最小的) best_row = np.argmin(match_scores) row_assignments[best_row].append(frag_id) # 处理未分配的碎片 unassigned = [] for frag_id in range(len(img)): assigned = False for row in row_assignments: if frag_id in row: assigned = True break if not assigned: unassigned.append(frag_id) print(f"未分配的碎片数量: {len(unassigned)}") # 7. 可视化行分组结果 def visualize_row_grouping(row_assignments): for row_id, fragments in enumerate(row_assignments): if not fragments: print(f"行 {row_id} 没有碎片") continue print(f"行 {row_id} 有 {len(fragments)} 个碎片") # 创建行预览 row_img = img[fragments[0]] for frag_id in fragments[1:]: row_img = np.hstack((row_img, img[frag_id])) plt.figure(figsize=(15, 3)) plt.imshow(row_img, cmap='gray') plt.title(f'行 {row_id} 分组预览') plt.axis('off') plt.show() print("行分组结果预览...") visualize_row_grouping(row_assignments) # 8. 人工干预:调整行分组 print("当前行分配:") for row_id, fragments in enumerate(row_assignments): print(f"行 {row_id}: {fragments}") # 模拟人工干预 adjustments = input("输入需要调整的碎片ID和目标行(格式: 碎片ID:目标行, 多个用分号分隔): ") if adjustments: for adj in adjustments.split(';'): if ':' in adj: frag_id, target_row = map(int, adj.split(':')) # 从原行中移除 for row in row_assignments: if frag_id in row: row.remove(frag_id) break # 添加到目标行 if 0 <= target_row < len(row_assignments): row_assignments[target_row].append(frag_id) # 9. 行内排序 def sort_fragments_in_row(fragments): if len(fragments) < 2: return fragments # 找到最左侧的碎片(左侧空白最大) left_margins = [left[i] for i in fragments] start_idx = fragments[np.argmax(left_margins)] sorted_frags = [start_idx] remaining = set(fragments) remaining.remove(start_idx) while remaining: current = sorted_frags[-1] best_match = None best_score = -1 for candidate in remaining: # 计算匹配分数 score = 0 # 比较当前碎片的右边和候选碎片的左边 current_right = img[current][:, -1] # 当前碎片的最后一列 candidate_left = img[candidate][:, 0] # 候选碎片的第一列 # 计算像素匹配度 match_count = np.sum(current_right == candidate_left) # 增强文字区域的匹配权重 text_match = np.sum((current_right == 0) & (candidate_left == 0)) score = match_count + 2 * text_match if score > best_score: best_score = score best_match = candidate if best_match is not None: sorted_frags.append(best_match) remaining.remove(best_match) else: # 如果没有找到匹配,随机选择一个 sorted_frags.append(remaining.pop()) return sorted_frags # 对每行进行排序 sorted_rows = [] for row_id, fragments in enumerate(row_assignments): if not fragments: sorted_rows.append([]) continue sorted_frags = sort_fragments_in_row(fragments) sorted_rows.append(sorted_frags) # 可视化行排序结果 row_img = img[sorted_frags[0]] for frag_id in sorted_frags[1:]: row_img = np.hstack((row_img, img[frag_id])) plt.figure(figsize=(15, 3)) plt.imshow(row_img, cmap='gray') plt.title(f'行 {row_id} 排序结果') plt.axis('off') plt.show() # 人工干预:调整行内顺序 manual_adjust = input(f"行 {row_id} 排序是否正确?(y/n): ") if manual_adjust.lower() == 'n': print("当前顺序:", sorted_frags) new_order = list(map(int, input("输入正确顺序(用空格分隔): ").split())) sorted_rows[row_id] = new_order # 10. 行间排序 def sort_rows(rows): if len(rows) < 2: return rows # 找到最顶部的行(顶部空白最大) top_margins = [] for row in rows: if not row: top_margins.append(0) continue row_img = img[row[0]] for y in range(row_img.shape[0]): if np.any(row_img[y] == 0): # 找到第一个文字像素 top_margins.append(y) break else: top_margins.append(row_img.shape[0]) start_idx = np.argmax(top_margins) sorted_rows = [rows[start_idx]] remaining = set(range(len(rows))) remaining.remove(start_idx) while remaining: current_row_idx = sorted_rows[-1] if not current_row_idx: # 空行 break current_bottom = img[current_row_idx[0]][-1] # 当前行第一个碎片的最后一行 best_match = None best_score = -1 for candidate_idx in remaining: candidate_row = rows[candidate_idx] if not candidate_row: continue candidate_top = img[candidate_row[0]][0] # 候选行第一个碎片的第一行 # 计算匹配分数 score = 0 # 比较当前行的底部和候选行的顶部 match_count = np.sum(current_bottom == candidate_top) # 增强文字区域的匹配权重 text_match = np.sum((current_bottom == 0) & (candidate_top == 0)) score = match_count + 2 * text_match if score > best_score: best_score = score best_match = candidate_idx if best_match is not None: sorted_rows.append(rows[best_match]) remaining.remove(best_match) else: # 如果没有找到匹配,随机选择一个 sorted_rows.append(rows[remaining.pop()]) return sorted_rows # 行间排序 final_row_order = sort_rows(sorted_rows) # 11. 可视化行间排序结果 print("行间排序结果预览...") for i, row in enumerate(final_row_order): if not row: continue row_img = img[row[0]] for frag_id in row[1:]: row_img = np.hstack((row_img, img[frag_id])) plt.figure(figsize=(15, 3)) plt.imshow(row_img, cmap='gray') plt.title(f'最终排序 - 行 {i}') plt.axis('off') plt.show() # 人工干预:调整行顺序 manual_adjust = input("行间排序是否正确?(y/n): ") if manual_adjust.lower() == 'n': print("当前行顺序:", [i for i in range(len(final_row_order))]) new_order = list(map(int, input("输入正确行顺序(用空格分隔): ").split())) final_row_order = [final_row_order[i] for i in new_order] # 12. 最终拼接与结果输出 # 拼接最终图像 full_image = None for row in final_row_order: if not row: continue row_img = img[row[0]] for frag_id in row[1:]: row_img = np.hstack((row_img, img[frag_id])) if full_image is None: full_image = row_img else: # 在行之间添加空白分隔(可选) separator = np.ones((10, row_img.shape[1]), dtype=row_img.dtype) # 10像素高的空白行 full_image = np.vstack((full_image, separator)) full_image = np.vstack((full_image, row_img)) # 保存结果 if full_image is not None: # 转换为0-255范围 full_image = (1 - full_image) * 255 # 反转:0变为255(白色),1变为0(黑色) full_image = full_image.astype(np.uint8) final_img = Image.fromarray(full_image) final_img.save('result4.png') print("最终拼接结果已保存为 'result4.png'") else: print("错误: 无法拼接图像") # 13. 输出碎片顺序表格 def create_result_table(final_row_order): table = [] for row in final_row_order: if row: table.append(row) else: table.append([-1] * col) # 空行占位符 # 确保表格有11行 while len(table) < row: table.append([-1] * col) return np.array(table) result_table = create_result_table(final_row_order) print("碎片顺序表格:") print(result_table) # 保存表格到CSV pd.DataFrame(result_table).to_csv('result4.csv', index=False, header=False) print("碎片顺序表格已保存为 'result4.csv'") # 14. 保存未分配碎片 if unassigned: print(f"未分配的碎片: {unassigned}") with open('unassigned_fragments.txt', 'w') as f: f.write("未分配的碎片:\n") f.write(", ".join(map(str, unassigned))) else: print("所有碎片都已成功分配")优化特征提取函数,要求仅以小写字母a的底部,顶部为基准线,来进行特征提取,不同图片之间以顶线和底线的匹配度来判断相似性,从而进行聚类
08-12
import cv2 import os import numpy as np import matplotlib.pyplot as plt import skimage.io as io from collections import Counter from PIL import Image import pandas as pd from scipy.ndimage import gaussian_filter1d from sklearn.cluster import AgglomerativeClustering import time plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签 plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号 # 1. 读取图像并进行预处理 data_dir = './附件4' path = data_dir + '/*.bmp' coll = io.ImageCollection(path) # 读入灰度图像 img_num = len(coll) # 转矩阵并二值化 img = np.asarray(coll) for i in range(len(coll)): img[i] = cv2.adaptiveThreshold( src=img[i], maxValue=1, adaptiveMethod=cv2.ADAPTIVE_THRESH_MEAN_C, thresholdType=cv2.THRESH_BINARY, blockSize=13, C=2 ) print("图像数据形状:", img.shape) # 2. 计算每张图片的左右边距 left = [] right = [] for i in range(img.shape[0]): # 计算左边距 count = 0 for y in range(img.shape[2]): # 列 if np.any(img[i, :, y] == 0): # 该列有文字 break count += 1 left.append(count) # 计算右边距 count = 0 for y in range(img.shape[2] - 1, -1, -1): # 从右向左 if np.any(img[i, :, y] == 0): # 该列有文字 break count += 1 right.append(count) plt.figure(figsize=(10, 6)) plt.scatter(range(len(left)), left, label='左边距') plt.scatter(range(len(right)), right, label='右边距') plt.title('碎片左右边距分布') plt.xlabel('碎片索引') plt.ylabel('边距大小(像素)') plt.legend() plt.grid(True) plt.savefig('margin_distribution.png') plt.close() print("左边距统计:", Counter(left)) print("右边距统计:", Counter(right)) # 3. 确定行首和行尾碎片 fenge = 15 # 边距阈值,超过此值认为是行首或行尾 col = 19 # 列数 row = 11 # 行数 # 找出行尾碎片 end_index = [i for i, r in enumerate(right) if r >= fenge] print(f"行尾碎片数量: {len(end_index)}") # 找出行首碎片 first_index = [i for i, l in enumerate(left) if l >= fenge] print(f"行首碎片数量: {len(first_index)}") # 4. 修改的特征提取函数:考虑行高度类型 def extract_english_features(image): """ 提取英文文本的四线三行特征,考虑行高度类型(整行/半行) image: 预处理后的二值图像 (0=文字, 1=空白) """ h, w = image.shape features = [] # 1. 计算水平投影 horizontal_proj = np.sum(1 - image, axis=1) # 反转:文字区域值高 # 2. 平滑投影曲线 smoothed_proj = gaussian_filter1d(horizontal_proj, sigma=1.5) # 3. 检测文本行区域 line_regions = [] in_text = False start = 0 threshold = 0.1 * np.max(smoothed_proj) for i, val in enumerate(smoothed_proj): if val > threshold and not in_text: in_text = True start = i elif val <= threshold and in_text: in_text = False line_regions.append((start, i)) # 处理最后一行 if in_text: line_regions.append((start, h - 1)) # 4. 分析每行的四线三行特征,添加行高度类型检测 for start, end in line_regions: line_height = end - start if line_height < 5: # 忽略太小的区域 continue line_img = image[start:end, :] # 计算垂直投影 vertical_proj = np.sum(1 - line_img, axis=0) # 检测基线(字母底部) - 使用下边检测 bottom_edge = np.zeros(w, dtype=int) for col in range(w): col_data = line_img[:, col] if np.any(col_data == 0): # 该列有文字 # 找到最底部的文字像素(即最大的行索引) row_indices = np.where(col_data == 0)[0] bottom_edge[col] = row_indices[-1] # 最后出现的文字像素 # 只考虑有文字的区域 text_cols = np.where(vertical_proj > 0)[0] if len(text_cols) == 0: continue # 基线位置(最常见的底部位置) baseline_pos = np.median(bottom_edge[text_cols]) # 检测顶线(大写字母高度) - 使用上边检测 top_edge = np.zeros(w, dtype=int) for col in range(w): col_data = line_img[:, col] if np.any(col_data == 0): # 该列有文字 # 找到最顶部的文字像素(即最小的行索引) row_indices = np.where(col_data == 0)[0] top_edge[col] = row_indices[0] # 第一个出现的文字像素 # 顶线位置(最常见的顶部位置) topline_pos = np.median(top_edge[text_cols]) # 检测下伸区域(g, j, p, q, y等) descender_pos = baseline_pos for col in text_cols: if bottom_edge[col] > baseline_pos + 3: # 下伸部分至少比基线低3像素 if bottom_edge[col] > descender_pos: descender_pos = bottom_edge[col] # 检测上伸区域(b, d, f, h, k, l等) ascender_pos = topline_pos for col in text_cols: if top_edge[col] < topline_pos - 3: # 上伸部分至少比顶线高3像素 if top_edge[col] < ascender_pos: ascender_pos = top_edge[col] # 计算中线位置(小写字母高度) midline_pos = (topline_pos + baseline_pos) / 2 # 如果检测到上伸部分,调整顶线位置 if ascender_pos < topline_pos - 3: topline_pos = ascender_pos # 如果检测到下伸部分,调整底线位置 if descender_pos > baseline_pos + 3: bottomline_pos = descender_pos else: bottomline_pos = baseline_pos # 没有下伸字母,底线与基线重合 # 添加行高度类型检测 - 基于已知的字母高度 # 小写字母a高度:28像素,大写字母/特殊字母高度:40像素 text_height = bottomline_pos - topline_pos # 判断行类型 if 35 <= text_height <= 45: # 整行(包含大写字母或特殊字母) line_type = 1.0 elif 25 <= text_height <= 34: # 整行(仅小写字母) line_type = 1.0 elif 15 <= text_height <= 24: # 半行 line_type = 0.5 else: line_type = 1.0 # 默认视为整行 # 添加行高度类型特征 features.append({ 'start': start, 'end': end, 'baseline': baseline_pos, 'midline': midline_pos, 'topline': topline_pos, 'bottomline': bottomline_pos, 'height': line_height, 'text_height': text_height, # 实际文本高度 'line_type': line_type, # 行高度类型 (1.0=整行, 0.5=半行) 'has_ascender': ascender_pos < topline_pos - 3, # 是否包含上伸字母 'has_descender': descender_pos > baseline_pos + 3, # 是否包含下伸字母 'ascender_height': topline_pos - ascender_pos if ascender_pos < topline_pos - 3 else 0, 'descender_height': descender_pos - baseline_pos if descender_pos > baseline_pos + 3 else 0 }) return features # 提取所有碎片的特征 print("开始提取特征...") start_time = time.time() fragment_features = [] line_type_counts = [] # 记录每个碎片的行高度类型 for i in range(img.shape[0]): features = extract_english_features(img[i]) fragment_features.append(features) # 记录行高度类型 if features: line_types = [feat['line_type'] for feat in features] line_type_counts.append(sum(line_types)) else: line_type_counts.append(0) if (i + 1) % 10 == 0: print(f"已处理 {i + 1}/{img.shape[0]} 个碎片") end_time = time.time() print(f"特征提取完成,耗时 {end_time - start_time:.2f} 秒") # 可视化行高度类型分布 plt.figure(figsize=(10, 6)) plt.hist(line_type_counts, bins=np.arange(0, 3.5, 0.5), edgecolor='black') plt.title('碎片行高度类型分布') plt.xlabel('行高度类型(1.0=整行, 0.5=半行)') plt.ylabel('碎片数量') plt.xticks([0.5, 1.0, 1.5, 2.0, 2.5, 3.0]) plt.grid(True, axis='y') plt.savefig('line_type_distribution.png') plt.close() print("行高度类型统计:") print(Counter(line_type_counts)) # 5. 修改的距离计算函数:考虑行高度类型 def feature_distance(feat1, feat2): """计算两个特征之间的距离(匹配度),考虑行高度类型""" if not feat1 or not feat2: return float('inf') # 没有特征的碎片距离无穷大 # 使用第一个检测到的行特征(通常一个碎片只包含一行) f1 = feat1[0] f2 = feat2[0] # 计算关键特征线的差异 baseline_diff = abs(f1['baseline'] - f2['baseline']) midline_diff = abs(f1['midline'] - f2['midline']) topline_diff = abs(f1['topline'] - f2['topline']) bottomline_diff = abs(f1['bottomline'] - f2['bottomline']) # 考虑特殊字母的影响 ascender_diff = abs(f1.get('ascender_height', 0) - f2.get('ascender_height', 0)) descender_diff = abs(f1.get('descender_height', 0) - f2.get('descender_height', 0)) # 行高度类型差异 - 新增 line_type_diff = abs(f1['line_type'] - f2['line_type']) # 加权计算总距离 distance = (baseline_diff * 0.3 + midline_diff * 0.2 + topline_diff * 0.2 + bottomline_diff * 0.1 + ascender_diff * 0.05 + descender_diff * 0.05 + line_type_diff * 0.1) # 行高度类型占10%权重 return distance # 创建距离矩阵 n = img.shape[0] print("开始计算距离矩阵...") dist_matrix = np.zeros((n, n)) for i in range(n): for j in range(i + 1, n): # 利用对称性,只计算上三角 dist = feature_distance(fragment_features[i], fragment_features[j]) dist_matrix[i, j] = dist dist_matrix[j, i] = dist if (i + 1) % 10 == 0: print(f"已计算 {i + 1}/{n} 个碎片的距离") print("距离矩阵计算完成") # 使用层次聚类(基于特征线匹配) print("开始层次聚类...") clustering = AgglomerativeClustering( n_clusters=row, metric='precomputed', linkage='average', distance_threshold=None ) cluster_labels = clustering.fit_predict(dist_matrix) print("聚类完成") # 将碎片分配到行 row_assignments = [[] for _ in range(row)] for frag_id, label in enumerate(cluster_labels): row_assignments[label].append(frag_id) # 添加聚类可视化功能 def visualize_clusters(cluster_labels, row_assignments): """可视化聚类结果,显示每个聚类的碎片并标注序号""" # 创建输出目录 os.makedirs('cluster_visualization', exist_ok=True) for cluster_idx, fragments in enumerate(row_assignments): if not fragments: print(f"聚类 {cluster_idx} 没有碎片") continue print(f"聚类 {cluster_idx} 有 {len(fragments)} 个碎片") # 确保所有碎片高度相同(题目要求都是180×72) heights = set(img[frag_id].shape[0] for frag_id in fragments) widths = set(img[frag_id].shape[1] for frag_id in fragments) if len(heights) != 1 or len(widths) != 1: print(f"警告: 聚类 {cluster_idx} 中碎片尺寸不一致") # 使用最大尺寸作为标准 max_height = max(img[frag_id].shape[0] for frag_id in fragments) max_width = max(img[frag_id].shape[1] for frag_id in fragments) else: max_height = next(iter(heights)) max_width = next(iter(widths)) # 计算拼接图像的大小 - 包括碎片之间的间距 spacing = 10 # 碎片之间的间距 label_space = 40 # 序号标注的空间 # 计算总宽度:每个碎片宽度 + 碎片间距 total_width = sum(img[frag_id].shape[1] for frag_id in fragments) + (len(fragments) - 1) * spacing # 创建空白图像(额外空间用于序号标注) cluster_img = np.ones((max_height + label_space, total_width), dtype=np.uint8) * 255 x_offset = 0 # 添加每个碎片和序号标注 for frag_id in fragments: frag_img = img[frag_id] h, w = frag_img.shape # 粘贴碎片图像 y_start = 10 # 确保目标区域宽度与碎片宽度一致 cluster_img[y_start:y_start + h, x_offset:x_offset + w] = frag_img * 255 # 添加序号标注(在碎片下方) cv2.putText( img=cluster_img, text=f"ID:{frag_id}", org=(x_offset + w // 2 - 20, y_start + h + 30), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.7, color=0, thickness=2 ) x_offset += w + spacing # 移动到下一个位置,加上间距 # 保存聚类图像 cv2.imwrite(f'cluster_visualization/cluster_{cluster_idx}.png', cluster_img) # 显示聚类图像(可选) plt.figure(figsize=(15, 5)) plt.imshow(cluster_img, cmap='gray') plt.title(f'聚类 {cluster_idx} (碎片数量: {len(fragments)})') plt.axis('off') plt.savefig(f'cluster_visualization/cluster_{cluster_idx}_plot.png') plt.close() # 可视化聚类结果 print("可视化聚类结果...") visualize_clusters(cluster_labels, row_assignments) # 6. 处理聚类结果,考虑行高度类型 reserve_list = [] # 备选列表 line_type_rows = [0] * row # 记录每行的主要行高度类型 # 首先确定每行的主要行高度类型 for row_idx, fragments in enumerate(row_assignments): if not fragments: continue # 统计该行碎片的主要行高度类型 row_line_types = [] for frag_id in fragments: if fragment_features[frag_id]: # 取主要行的高度类型 row_line_types.append(fragment_features[frag_id][0]['line_type']) if row_line_types: # 取出现频率最高的行高度类型 line_type_rows[row_idx] = max(set(row_line_types), key=row_line_types.count) print("各行的主要行高度类型:") for i, t in enumerate(line_type_rows): print(f"行 {i}: {'整行(1.0)' if t == 1.0 else '半行(0.5)' if t == 0.5 else '未知'}") # 调整行分配,确保行高度类型一致 for row_idx, fragments in enumerate(row_assignments): if len(fragments) <= col: # col=19 continue # 不需要处理 # 计算每个碎片到行中心的距离(同时考虑行高度类型匹配) row_distances = [] for frag_id in fragments: # 计算碎片到行内所有其他碎片的平均距离 other_ids = [f for f in fragments if f != frag_id] if not other_ids: avg_distance = 0 else: distances = [] for other in other_ids: # 考虑行高度类型是否匹配 type_match = 1.0 if fragment_features[frag_id] and fragment_features[other]: t1 = fragment_features[frag_id][0]['line_type'] t2 = fragment_features[other][0]['line_type'] if abs(t1 - t2) > 0.5: # 行高度类型不匹配 type_match = 2.0 # 增加惩罚因子 dist = dist_matrix[frag_id, other] * type_match distances.append(dist) avg_distance = np.mean(distances) row_distances.append((frag_id, avg_distance)) # 按距离排序(从大到小) row_distances.sort(key=lambda x: x[1], reverse=True) # 移除匹配度最低的碎片(放入备选列表) while len(fragments) > col: frag_id, _ = row_distances.pop(0) fragments.remove(frag_id) reserve_list.append(frag_id) # 7. 处理备选列表和未分配的碎片 # 首先,收集所有未分配的碎片(包括特征提取失败的) unassigned = reserve_list[:] # 复制备选列表 # 添加特征提取失败的碎片 for frag_id in range(n): if not fragment_features[frag_id]: if frag_id not in unassigned: unassigned.append(frag_id) # 尝试根据边距特征和行高度类型分配未分配的碎片 for frag_id in unassigned[:]: # 遍历副本,以便从列表中移除 if not fragment_features[frag_id]: continue # 跳过没有特征的碎片 frag_type = fragment_features[frag_id][0]['line_type'] # 左边距大,可能是行首 if left[frag_id] >= fenge: # 尝试分配到第一行 if len(row_assignments[0]) < col and abs(frag_type - line_type_rows[0]) < 0.6: row_assignments[0].append(frag_id) if frag_id in unassigned: unassigned.remove(frag_id) continue # 右边距大,可能是行尾 if right[frag_id] >= fenge: # 尝试分配到第十一行 if len(row_assignments[-1]) < col and abs(frag_type - line_type_rows[-1]) < 0.6: row_assignments[-1].append(frag_id) if frag_id in unassigned: unassigned.remove(frag_id) # 尝试分配到行高度类型匹配的行 for row_idx in range(row): if len(row_assignments[row_idx]) < col and abs(frag_type - line_type_rows[row_idx]) < 0.1: row_assignments[row_idx].append(frag_id) if frag_id in unassigned: unassigned.remove(frag_id) break # 输出行分配情况 print("行分配结果:") for i, fragments in enumerate(row_assignments): print(f"行 {i}: {len(fragments)} 个碎片 - {fragments}") if fragments: # 统计该行的行高度类型 types = [] for frag_id in fragments: if fragment_features[frag_id]: types.append(fragment_features[frag_id][0]['line_type']) if types: print(f" 行高度类型: 平均={np.mean(types):.2f}, 主要={Counter(types).most_common(1)[0][0]}") print(f"备选列表中的碎片数量: {len(reserve_list)}") print(f"未分配的碎片: {unassigned}") def visualize_row_assignments(row_assignments, img, output_dir='row_assignment_results'): """ 可视化行分配结果,为每行生成拼接图像并标注碎片序号 参数: row_assignments (list): 每行的碎片ID列表 img (np.ndarray): 碎片图像数组 output_dir (str): 输出目录 """ # 创建输出目录 os.makedirs(output_dir, exist_ok=True) # 创建所有行的综合预览图 fig, axes = plt.subplots(len(row_assignments), 1, figsize=(15, 5 * len(row_assignments))) if len(row_assignments) == 1: axes = [axes] # 确保单行时axes是可迭代的 fig.suptitle('行分配结果', fontsize=20) # 处理每行 for row_idx, row_fragments in enumerate(row_assignments): if not row_fragments: print(f"行 {row_idx} 没有碎片") continue print(f"行 {row_idx} 有 {len(row_fragments)} 个碎片") # 确保所有碎片高度相同 heights = set(img[frag_id].shape[0] for frag_id in row_fragments) widths = set(img[frag_id].shape[1] for frag_id in row_fragments) if len(heights) != 1 or len(widths) != 1: print(f"警告: 行 {row_idx} 中碎片尺寸不一致") # 使用最大尺寸作为标准 max_height = max(img[frag_id].shape[0] for frag_id in row_fragments) max_width = max(img[frag_id].shape[1] for frag_id in row_fragments) else: max_height = next(iter(heights)) max_width = next(iter(widths)) # 计算拼接图像的大小 - 包括碎片之间的间距 spacing = 10 # 碎片之间的间距 label_space = 150 # 序号标注的空间 # 计算总宽度:每个碎片宽度 + 碎片间距 total_width = sum(img[frag_id].shape[1] for frag_id in row_fragments) + (len(row_fragments) - 1) * spacing # 创建空白图像(额外空间用于序号标注) row_img = np.ones((max_height + label_space, total_width), dtype=np.uint8) * 255 x_offset = 0 # 添加每个碎片和序号标注 for frag_id in row_fragments: frag_img = img[frag_id] h, w = frag_img.shape # 粘贴碎片图像 y_start = 10 row_img[y_start:y_start + h, x_offset:x_offset + w] = frag_img * 255 # 添加序号标注(在碎片上方) cv2.putText( img=row_img, text=f"ID:{frag_id}", org=(x_offset + w // 2 - 20, y_start - 5), # 碎片上方 fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.7, color=0, thickness=2 ) # 在碎片下方添加行索引(可选) cv2.putText( img=row_img, text=f"Row:{row_idx}", org=(x_offset + w // 2 - 30, y_start + h + 30), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.5, color=0, thickness=1 ) x_offset += w + spacing # 移动到下一个位置,加上间距 # 保存单行图像 cv2.imwrite(f'{output_dir}/row_{row_idx}.png', row_img) # 在综合预览图中添加该行 ax = axes[row_idx] ax.imshow(row_img, cmap='gray') ax.set_title(f'行 {row_idx} (碎片数量: {len(row_fragments)})') ax.axis('off') # 保存综合预览图 plt.tight_layout(rect=[0, 0, 1, 0.96]) plt.savefig(f'{output_dir}/all_rows.png') plt.close() print(f"行分配结果已保存到 {output_dir} 目录") print("\n生成行分配结果可视化...") visualize_row_assignments(row_assignments, img, output_dir='row_assignment_results') # 8. 可视化行分组结果 def visualize_row_grouping(row_assignments): for row_id, fragments in enumerate(row_assignments): if not fragments: print(f"行 {row_id} 没有碎片") continue print(f"行 {row_id} 有 {len(fragments)} 个碎片") # 创建行预览 row_img = img[fragments[0]] for frag_id in fragments[1:]: row_img = np.hstack((row_img, img[frag_id])) plt.figure(figsize=(15, 3)) plt.imshow(row_img, cmap='gray') plt.title(f'行 {row_id} 分组预览 (行高度类型: {line_type_rows[row_id]})') plt.axis('off') plt.savefig(f'row_{row_id}_preview.png') plt.close() print("行分组结果预览...") visualize_row_grouping(row_assignments) # 9. 人工干预:调整行分组 print("当前行分配:") for row_id, fragments in enumerate(row_assignments): print(f"行 {row_id}: {fragments}") # 模拟人工干预 adjustments = input("输入需要调整的碎片ID和目标行(格式: 碎片ID:目标行, 多个用分号分隔): ") if adjustments: for adj in adjustments.split(';'): if ':' in adj: frag_id, target_row = map(int, adj.split(':')) # 从原行中移除 for row in row_assignments: if frag_id in row: row.remove(frag_id) break # 添加到目标行 if 0 <= target_row < len(row_assignments): row_assignments[target_row].append(frag_id) # 重新计算行高度类型 for row_idx, fragments in enumerate(row_assignments): if not fragments: line_type_rows[row_idx] = 0 continue row_line_types = [] for frag_id in fragments: if fragment_features[frag_id]: row_line_types.append(fragment_features[frag_id][0]['line_type']) if row_line_types: line_type_rows[row_idx] = max(set(row_line_types), key=row_line_types.count) # 10. 行内排序(考虑行高度类型) def sort_fragments_in_row(fragments): if len(fragments) < 2: return fragments # 找到最左侧的碎片(左侧空白最大) left_margins = [left[i] for i in fragments] start_idx = fragments[np.argmax(left_margins)] sorted_frags = [start_idx] remaining = set(fragments) remaining.remove(start_idx) while remaining: current = sorted_frags[-1] best_match = None best_score = -1 for candidate in remaining: # 计算匹配分数 score = 0 # 比较当前碎片的右边和候选碎片的左边 current_right = img[current][:, -1] # 当前碎片的最后一列 candidate_left = img[candidate][:, 0] # 候选碎片的第一列 # 计算像素匹配度 match_count = np.sum(current_right == candidate_left) # 增强文字区域的匹配权重 text_match = np.sum((current_right == 0) & (candidate_left == 0)) score = match_count + 2 * text_match # 考虑行高度类型匹配(同一行应该相同) if fragment_features[current] and fragment_features[candidate]: type_diff = abs(fragment_features[current][0]['line_type'] - fragment_features[candidate][0]['line_type']) score *= (1.0 - type_diff) # 类型差异越大,分数越低 if score > best_score: best_score = score best_match = candidate if best_match is not None: sorted_frags.append(best_match) remaining.remove(best_match) else: # 如果没有找到匹配,随机选择一个 sorted_frags.append(remaining.pop()) return sorted_frags # 对每行进行排序 sorted_rows = [] for row_id, fragments in enumerate(row_assignments): if not fragments: sorted_rows.append([]) continue sorted_frags = sort_fragments_in_row(fragments) sorted_rows.append(sorted_frags) # 可视化行排序结果 row_img = img[sorted_frags[0]] for frag_id in sorted_frags[1:]: row_img = np.hstack((row_img, img[frag_id])) plt.figure(figsize=(15, 3)) plt.imshow(row_img, cmap='gray') plt.title(f'行 {row_id} 排序结果 (行高度类型: {line_type_rows[row_id]})') plt.axis('off') plt.savefig(f'row_{row_id}_sorted.png') plt.close() # 人工干预:调整行内顺序 manual_adjust = input(f"行 {row_id} 排序是否正确?(y/n): ") if manual_adjust.lower() == 'n': print("当前顺序:", sorted_frags) new_order = list(map(int, input("输入正确顺序(用空格分隔): ").split())) sorted_rows[row_id] = new_order # 11. 行间排序(考虑行高度类型) def sort_rows(rows): if len(rows) < 2: return rows # 找到最顶部的行(顶部空白最大) top_margins = [] for row in rows: if not row: top_margins.append(0) continue row_img = img[row[0]] for y in range(row_img.shape[0]): if np.any(row_img[y] == 0): # 找到第一个文字像素 top_margins.append(y) break else: top_margins.append(row_img.shape[0]) start_idx = np.argmax(top_margins) sorted_rows = [rows[start_idx]] remaining = set(range(len(rows))) remaining.remove(start_idx) while remaining: current_row_idx = sorted_rows[-1] if not current_row_idx: # 空行 break current_bottom = img[current_row_idx[0]][-1] # 当前行第一个碎片的最后一行 best_match = None best_score = -1 for candidate_idx in remaining: candidate_row = rows[candidate_idx] if not candidate_row: continue candidate_top = img[candidate_row[0]][0] # 候选行第一个碎片的第一行 # 计算匹配分数 score = 0 # 比较当前行的底部和候选行的顶部 match_count = np.sum(current_bottom == candidate_top) # 增强文字区域的匹配权重 text_match = np.sum((current_bottom == 0) & (candidate_top == 0)) score = match_count + 2 * text_match # 考虑行高度类型匹配(相邻行类型可能不同) if line_type_rows[current_row_idx] and line_type_rows[candidate_idx]: # 实际中相邻行高度类型应该相同,但半行后接整行时可能不同 # 不做惩罚,因为类型差异在行间排序中不是主要因素 pass if score > best_score: best_score = score best_match = candidate_idx if best_match is not None: sorted_rows.append(rows[best_match]) remaining.remove(best_match) else: # 如果没有找到匹配,随机选择一个 sorted_rows.append(rows[remaining.pop()]) return sorted_rows # 行间排序 final_row_order = sort_rows(sorted_rows) # 12. 可视化行间排序结果 print("行间排序结果预览...") for i, row in enumerate(final_row_order): if not row: continue row_img = img[row[0]] for frag_id in row[1:]: row_img = np.hstack((row_img, img[frag_id])) plt.figure(figsize=(15, 3)) plt.imshow(row_img, cmap='gray') plt.title(f'最终排序 - 行 {i} (行高度类型: {line_type_rows[i]})') plt.axis('off') plt.savefig(f'final_row_{i}.png') plt.close() # 人工干预:调整行顺序 manual_adjust = input("行间排序是否正确?(y/n): ") if manual_adjust.lower() == 'n': print("当前行顺序:", [i for i in range(len(final_row_order))]) new_order = list(map(int, input("输入正确行顺序(用空格分隔): ").split())) final_row_order = [final_row_order[i] for i in new_order] # 13. 最终拼接与结果输出 # 拼接最终图像 full_image = None for row in final_row_order: if not row: continue row_img = img[row[0]] for frag_id in row[1:]: row_img = np.hstack((row_img, img[frag_id])) if full_image is None: full_image = row_img else: # 在行之间添加空白分隔(可选) separator = np.ones((10, row_img.shape[1]), dtype=row_img.dtype) # 10像素高的空白行 full_image = np.vstack((full_image, separator)) full_image = np.vstack((full_image, row_img)) # 保存结果 if full_image is not None: # 转换为0-255范围 full_image = (1 - full_image) * 255 # 反转:0变为255(白色),1变为0(黑色) full_image = full_image.astype(np.uint8) final_img = Image.fromarray(full_image) final_img.save('result4.png') print("最终拼接结果已保存为 'result4.png'") else: print("错误: 无法拼接图像") # 14. 输出碎片顺序表格(包含行高度类型) def create_result_table(final_row_order): table = [] for row in final_row_order: if row: table.append(row) else: table.append([-1] * col) # 空行占位符 # 确保表格有11行 while len(table) < row: table.append([-1] * col) return np.array(table) result_table = create_result_table(final_row_order) print("碎片顺序表格:") print(result_table) # 保存表格到CSV pd.DataFrame(result_table).to_csv('result4.csv', index=False, header=False) print("碎片顺序表格已保存为 'result4.csv'") # 15. 保存行高度类型信息 with open('line_types.txt', 'w') as f: f.write("行索引\t行高度类型\n") for i, row in enumerate(final_row_order): if row: types = [] for frag_id in row: if fragment_features[frag_id]: types.append(fragment_features[frag_id][0]['line_type']) avg_type = np.mean(types) if types else 0 f.write(f"{i}\t{avg_type:.1f}\n") else: f.write(f"{i}\t0.0\n") print("行高度类型信息已保存为 'line_types.txt'") # 16. 保存未分配碎片 if unassigned: print(f"未分配的碎片: {unassigned}") with open('unassigned_fragments.txt', 'w') as f: f.write("未分配的碎片:\n") f.write(", ".join(map(str, unassigned))) # 添加行高度类型信息 f.write("\n\n行高度类型信息:\n") for frag_id in unassigned: if fragment_features[frag_id]: t = fragment_features[frag_id][0]['line_type'] f.write(f"碎片 {frag_id}: {'整行' if t == 1.0 else '半行' if t == 0.5 else '未知'}\n") else: f.write(f"碎片 {frag_id}: 无特征\n") else: print("所有碎片都已成功分配")修改人工调试部分,使能够完成多次输入,且在行内排序后添加可视化
08-13
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值