缘 in English

有缘千里来相会--用英语说“缘”

缘 reason; cause; sake, relationship, edge; fringe
  
缘分 predestined relationship

血缘 blood relationship

姻缘 predestined marriage

化缘 beg for alms

天缘巧合 a luck coincidence

有人缘 enjoy great popularity

投缘的街坊们 congenial neighbors

天赐良缘 a god sent marriage; a good marriage arranged in Heaven

有缘结识某人 be lucky to get acquainted with sb.

无缘结识某人 have no opportunity to get acquainted with sb.

婚姻是缘分。 A couples conjugal fate is prearranged.

有缘终相逢。 Fate brings together people who are far apart.

无缘不相逢。 There is no meeting without predestination.

我与烟酒无缘。 Smoking and drinking dont appeal to me.

好事似乎与他无缘。 Good luck seemed to be wholly denied to him.

他们俩有情无缘。 They are attracted to each other but are not fated to be conjugally tied.

机缘凑巧,我找到一份工作。 As luck would have it, I found a job.

千里姻缘一线牵。 Two beings destined to marry each other, though thousands of miles apart, are tied together with an invisible red thread by an old man under the moonlight.

有缘千里来相会,无缘对面不相逢。 As decreed by providence you have met him; otherwise you might have failed although you traveled a long way.

转载于:https://www.cnblogs.com/skylaugh/archive/2006/08/15/477767.html

import cv2 import os import numpy as np import matplotlib.pyplot as plt import skimage.io as io from collections import Counter from PIL import Image import pandas as pd from scipy.ndimage import gaussian_filter1d from sklearn.cluster import KMeans from scipy.signal import find_peaks from sklearn.preprocessing import StandardScaler plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签 plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号 # 1. 读取图像并进行预处理 data_dir = './附件4' path = data_dir + '/*.bmp' coll = io.ImageCollection(path) # 读入灰度图像 img_num = len(coll) # 转矩阵并二值化 img = np.asarray(coll) for i in range(len(coll)): img[i] = cv2.adaptiveThreshold( src=img[i], maxValue=1, adaptiveMethod=cv2.ADAPTIVE_THRESH_MEAN_C, thresholdType=cv2.THRESH_BINARY, blockSize=13, C=2 ) print("图像数据形状:", img.shape) # 2. 计算每张图片的左右边距 left = [] right = [] for i in range(img.shape[0]): # 计算左边距 count = 0 for y in range(img.shape[2]): # 列 if np.any(img[i, :, y] == 0): # 该列有文字 break count += 1 left.append(count) # 计算右边距 count = 0 for y in range(img.shape[2] - 1, -1, -1): # 从右向左 if np.any(img[i, :, y] == 0): # 该列有文字 break count += 1 right.append(count) plt.figure(figsize=(10, 6)) plt.scatter(range(len(left)), left, label='左边距') plt.scatter(range(len(right)), right, label='右边距') plt.title('碎片左右边距分布') plt.xlabel('碎片索引') plt.ylabel('边距大小(像素)') plt.legend() plt.grid(True) plt.show() print("左边距统计:", Counter(left)) print("右边距统计:", Counter(right)) # 3. 确定行首和行尾碎片 fenge = 10 # 边距阈值,超过此值认为是行首或行尾 col = 19 # 列数 row = 16 # 行数 # 找出行尾碎片 end_index = [i for i, r in enumerate(right) if r >= fenge] print(f"行尾碎片数量: {len(end_index)}") # 找出行首碎片 first_index = [i for i, l in enumerate(left) if l >= fenge] print(f"行首碎片数量: {len(first_index)}") # 4. 提取四线三行特征 def extract_english_features(image): """ 提取英文文本的四线三行特征 image: 预处理后的二值图像 (0=文字, 1=空白) """ h, w = image.shape features = [] # 1. 计算水平投影 horizontal_proj = np.sum(1 - image, axis=1) # 反转:文字区域值高 # 2. 平滑投影曲线 smoothed_proj = gaussian_filter1d(horizontal_proj, sigma=1.5) # 3. 检测文本行区域 line_regions = [] in_text = False start = 0 threshold = 0.1 * np.max(smoothed_proj) for i, val in enumerate(smoothed_proj): if val > threshold and not in_text: in_text = True start = i elif val <= threshold and in_text: in_text = False line_regions.append((start, i)) # 处理最后一行 if in_text: line_regions.append((start, h - 1)) # 4. 分析每行的四线三行特征 line_features = [] for start, end in line_regions: line_height = end - start if line_height < 5: # 忽略太小的区域 continue line_img = image[start:end, :] # 计算垂直投影 vertical_proj = np.sum(1 - line_img, axis=0) # 检测基线(字母底部) baseline_pos = np.argmax(vertical_proj) # 检测中线(小写字母高度) mid_threshold = 0.5 * np.max(vertical_proj) mid_region = np.where(vertical_proj > mid_threshold)[0] if len(mid_region) > 0: midline_pos = np.mean(mid_region) else: midline_pos = baseline_pos - 0.3 * line_height # 检测顶线(大写字母高度) top_threshold = 0.7 * np.max(vertical_proj) top_region = np.where(vertical_proj > top_threshold)[0] if len(top_region) > 0: topline_pos = np.mean(top_region) else: topline_pos = midline_pos - 0.2 * line_height # 检测底线(下伸字母底部) bottom_threshold = 0.6 * np.max(vertical_proj) bottom_region = np.where(vertical_proj > bottom_threshold)[0] if len(bottom_region) > 0: bottomline_pos = np.mean(bottom_region) else: bottomline_pos = baseline_pos + 0.2 * line_height line_features.append({ 'start': start, 'end': end, 'baseline': baseline_pos, 'midline': midline_pos, 'topline': topline_pos, 'bottomline': bottomline_pos, 'height': line_height }) return line_features # 提取所有碎片的四线三行特征 fragment_features = [] for i in range(img.shape[0]): features = extract_english_features(img[i]) fragment_features.append(features) # 5. 确定整个文档的行结构 all_baselines = [] all_heights = [] for features in fragment_features: for line in features: all_baselines.append(line['baseline']) all_heights.append(line['height']) # 聚类确定文档的行基线位置 if len(all_baselines) > 0: kmeans = KMeans(n_clusters=row, random_state=42) baseline_clusters = kmeans.fit_predict(np.array(all_baselines).reshape(-1, 1)) # 获取每个聚类的中心点(即文档的行基线位置) document_baselines = sorted([center[0] for center in kmeans.cluster_centers_]) print("文档行基线位置:", document_baselines) else: # 如果没有检测到基线,使用默认值 document_baselines = np.linspace(20, 150, row).tolist() print("警告: 未检测到基线,使用默认基线位置:", document_baselines) # 6. 将碎片分配到行 row_assignments = [[] for _ in range(row)] # 11行 for frag_id, features in enumerate(fragment_features): if not features: # 没有检测到文本行 # 尝试根据空白区域分配 if left[frag_id] >= fenge: row_assignments[0].append(frag_id) # 可能是第一行 elif right[frag_id] >= fenge: row_assignments[-1].append(frag_id) # 可能是最后一行 else: # 无法确定,暂不分配 pass continue # 计算碎片与每行的匹配分数 match_scores = [] for doc_baseline in document_baselines: # 找到碎片中最接近该基线的行 min_dist = float('inf') for line in features: dist = abs(line['baseline'] - doc_baseline) if dist < min_dist: min_dist = dist match_scores.append(min_dist) # 选择最匹配的行(距离最小的) best_row = np.argmin(match_scores) row_assignments[best_row].append(frag_id) # 处理未分配的碎片 unassigned = [] for frag_id in range(len(img)): assigned = False for row in row_assignments: if frag_id in row: assigned = True break if not assigned: unassigned.append(frag_id) print(f"未分配的碎片数量: {len(unassigned)}") # 7. 可视化行分组结果 def visualize_row_grouping(row_assignments): for row_id, fragments in enumerate(row_assignments): if not fragments: print(f"行 {row_id} 没有碎片") continue print(f"行 {row_id} 有 {len(fragments)} 个碎片") # 创建行预览 row_img = img[fragments[0]] for frag_id in fragments[1:]: row_img = np.hstack((row_img, img[frag_id])) plt.figure(figsize=(15, 3)) plt.imshow(row_img, cmap='gray') plt.title(f'行 {row_id} 分组预览') plt.axis('off') plt.show() print("行分组结果预览...") visualize_row_grouping(row_assignments) # 8. 人工干预:调整行分组 print("当前行分配:") for row_id, fragments in enumerate(row_assignments): print(f"行 {row_id}: {fragments}") # 模拟人工干预 adjustments = input("输入需要调整的碎片ID和目标行(格式: 碎片ID:目标行, 多个用分号分隔): ") if adjustments: for adj in adjustments.split(';'): if ':' in adj: frag_id, target_row = map(int, adj.split(':')) # 从原行中移除 for row in row_assignments: if frag_id in row: row.remove(frag_id) break # 添加到目标行 if 0 <= target_row < len(row_assignments): row_assignments[target_row].append(frag_id) # 9. 行内排序 def sort_fragments_in_row(fragments): if len(fragments) < 2: return fragments # 找到最左侧的碎片(左侧空白最大) left_margins = [left[i] for i in fragments] start_idx = fragments[np.argmax(left_margins)] sorted_frags = [start_idx] remaining = set(fragments) remaining.remove(start_idx) while remaining: current = sorted_frags[-1] best_match = None best_score = -1 for candidate in remaining: # 计算匹配分数 score = 0 # 比较当前碎片的右边和候选碎片的左边 current_right = img[current][:, -1] # 当前碎片的最后一列 candidate_left = img[candidate][:, 0] # 候选碎片的第一列 # 计算像素匹配度 match_count = np.sum(current_right == candidate_left) # 增强文字区域的匹配权重 text_match = np.sum((current_right == 0) & (candidate_left == 0)) score = match_count + 2 * text_match if score > best_score: best_score = score best_match = candidate if best_match is not None: sorted_frags.append(best_match) remaining.remove(best_match) else: # 如果没有找到匹配,随机选择一个 sorted_frags.append(remaining.pop()) return sorted_frags # 对每行进行排序 sorted_rows = [] for row_id, fragments in enumerate(row_assignments): if not fragments: sorted_rows.append([]) continue sorted_frags = sort_fragments_in_row(fragments) sorted_rows.append(sorted_frags) # 可视化行排序结果 row_img = img[sorted_frags[0]] for frag_id in sorted_frags[1:]: row_img = np.hstack((row_img, img[frag_id])) plt.figure(figsize=(15, 3)) plt.imshow(row_img, cmap='gray') plt.title(f'行 {row_id} 排序结果') plt.axis('off') plt.show() # 人工干预:调整行内顺序 manual_adjust = input(f"行 {row_id} 排序是否正确?(y/n): ") if manual_adjust.lower() == 'n': print("当前顺序:", sorted_frags) new_order = list(map(int, input("输入正确顺序(用空格分隔): ").split())) sorted_rows[row_id] = new_order # 10. 行间排序 def sort_rows(rows): if len(rows) < 2: return rows # 找到最顶部的行(顶部空白最大) top_margins = [] for row in rows: if not row: top_margins.append(0) continue row_img = img[row[0]] for y in range(row_img.shape[0]): if np.any(row_img[y] == 0): # 找到第一个文字像素 top_margins.append(y) break else: top_margins.append(row_img.shape[0]) start_idx = np.argmax(top_margins) sorted_rows = [rows[start_idx]] remaining = set(range(len(rows))) remaining.remove(start_idx) while remaining: current_row_idx = sorted_rows[-1] if not current_row_idx: # 空行 break current_bottom = img[current_row_idx[0]][-1] # 当前行第一个碎片的最后一行 best_match = None best_score = -1 for candidate_idx in remaining: candidate_row = rows[candidate_idx] if not candidate_row: continue candidate_top = img[candidate_row[0]][0] # 候选行第一个碎片的第一行 # 计算匹配分数 score = 0 # 比较当前行的底部和候选行的顶部 match_count = np.sum(current_bottom == candidate_top) # 增强文字区域的匹配权重 text_match = np.sum((current_bottom == 0) & (candidate_top == 0)) score = match_count + 2 * text_match if score > best_score: best_score = score best_match = candidate_idx if best_match is not None: sorted_rows.append(rows[best_match]) remaining.remove(best_match) else: # 如果没有找到匹配,随机选择一个 sorted_rows.append(rows[remaining.pop()]) return sorted_rows # 行间排序 final_row_order = sort_rows(sorted_rows) # 11. 可视化行间排序结果 print("行间排序结果预览...") for i, row in enumerate(final_row_order): if not row: continue row_img = img[row[0]] for frag_id in row[1:]: row_img = np.hstack((row_img, img[frag_id])) plt.figure(figsize=(15, 3)) plt.imshow(row_img, cmap='gray') plt.title(f'最终排序 - 行 {i}') plt.axis('off') plt.show() # 人工干预:调整行顺序 manual_adjust = input("行间排序是否正确?(y/n): ") if manual_adjust.lower() == 'n': print("当前行顺序:", [i for i in range(len(final_row_order))]) new_order = list(map(int, input("输入正确行顺序(用空格分隔): ").split())) final_row_order = [final_row_order[i] for i in new_order] # 12. 最终拼接与结果输出 # 拼接最终图像 full_image = None for row in final_row_order: if not row: continue row_img = img[row[0]] for frag_id in row[1:]: row_img = np.hstack((row_img, img[frag_id])) if full_image is None: full_image = row_img else: # 在行之间添加空白分隔(可选) separator = np.ones((10, row_img.shape[1]), dtype=row_img.dtype) # 10像素高的空白行 full_image = np.vstack((full_image, separator)) full_image = np.vstack((full_image, row_img)) # 保存结果 if full_image is not None: # 转换为0-255范围 full_image = (1 - full_image) * 255 # 反转:0变为255(白色),1变为0(黑色) full_image = full_image.astype(np.uint8) final_img = Image.fromarray(full_image) final_img.save('result4.png') print("最终拼接结果已保存为 'result4.png'") else: print("错误: 无法拼接图像") # 13. 输出碎片顺序表格 def create_result_table(final_row_order): table = [] for row in final_row_order: if row: table.append(row) else: table.append([-1] * col) # 空行占位符 # 确保表格有11行 while len(table) < row: table.append([-1] * col) return np.array(table) result_table = create_result_table(final_row_order) print("碎片顺序表格:") print(result_table) # 保存表格到CSV pd.DataFrame(result_table).to_csv('result4.csv', index=False, header=False) print("碎片顺序表格已保存为 'result4.csv'") # 14. 保存未分配碎片 if unassigned: print(f"未分配的碎片: {unassigned}") with open('unassigned_fragments.txt', 'w') as f: f.write("未分配的碎片:\n") f.write(", ".join(map(str, unassigned))) else: print("所有碎片都已成功分配")优化特征提取函数,要求仅以小写字母a的底部,顶部为基准线,来进行特征提取,不同图片之间以顶线和底线的匹配度来判断相似性,从而进行聚类
08-12
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值