% 碎片处理主程序
function [front_table, back_table] = classify_text_fragments()
% 设置碎片文件夹路径
folder_path = 'D:\BaiduNetdiskDownload\MATLAB R2024a\bin\project\附件5';
% 创建可视化窗口
main_fig = figure('Name', '碎片分类可视化', 'Position', [100, 100, 1200, 800]);
% 获取所有.bmp文件列表
file_list = dir(fullfile(folder_path, '*.bmp'));
num_files = length(file_list);
filenames = cell(num_files, 1);
for i = 1:num_files
filenames{i} = file_list(i).name;
end
% 步骤0: 显示碎片预览
subplot(3, 3, 1);
show_fragments_preview(folder_path, filenames, 16);
title('碎片预览 (随机16个)');
% 读取并预处理所有图像:转换为二值图像
images_bin = cell(num_files, 1);
for i = 1:num_files
img_path = fullfile(folder_path, filenames{i});
img = imread(img_path);
% 转换为灰度图像(如果是RGB)
if size(img, 3) == 3
img_gray = rgb2gray(img);
else
img_gray = img;
end
% 二值化:使用Otsu方法自适应阈值
img_bin = imbinarize(img_gray);
images_bin{i} = img_bin;
end
% 步骤1: 区分正反面 - 基于水平投影特征聚类
% 特征: 水平投影的均值和方差(反映文本行密度和分布)
features = zeros(num_files, 2);
for i = 1:num_files
img_bin = images_bin{i};
% 计算水平投影(行和)
row_sum = sum(img_bin, 2); % 沿列求和,得到每行的像素和
% 计算特征:均值和方差
features(i, 1) = mean(row_sum);
features(i, 2) = var(row_sum);
end
% 可视化特征分布
subplot(3, 3, 2);
scatter(features(:,1), features(:,2), 30, 'filled', 'MarkerFaceColor', [0.5 0.5 0.9]);
xlabel('水平投影均值');
ylabel('水平投影方差');
title('正反面特征分布');
grid on;
% 使用K-means聚类分两组(正反面)
rng(1); % 设置随机种子确保可重复性
[idx_side, centroids] = kmeans(features, 2, 'Replicates', 5, 'Options', statset('UseParallel', 1));
% 分配标签:假设簇1为正面,簇2为反面(基于特征均值,较高可能为文本密集面)
if mean(features(idx_side == 1, 1)) > mean(features(idx_side == 2, 1))
side_labels = idx_side; % idx_side=1:正面, idx_side=2:反面
else
side_labels = 3 - idx_side; % 反转标签:原1->2(反面),原2->1(正面)
end
% 可视化聚类结果
subplot(3, 3, 3);
scatter(features(side_labels == 1, 1), features(side_labels == 1, 2), 50, 'r', 'filled');
hold on;
scatter(features(side_labels == 2, 1), features(side_labels == 2, 2), 50, 'b', 'filled');
scatter(centroids(:,1), centroids(:,2), 200, 'kx', 'LineWidth', 2);
legend('正面', '反面', '聚类中心');
xlabel('水平投影均值');
ylabel('水平投影方差');
title('正反面聚类结果');
grid on;
hold off;
% 提取正面和反面的索引
front_indices = find(side_labels == 1);
back_indices = find(side_labels == 2);
% 显示正反面碎片数量
fprintf('正面碎片数量: %d\n', numel(front_indices));
fprintf('反面碎片数量: %d\n', numel(back_indices));
% 可视化正反面示例
subplot(3, 3, 4);
show_side_examples(folder_path, filenames, front_indices, '正面示例');
subplot(3, 3, 5);
show_side_examples(folder_path, filenames, back_indices, '反面示例');
% 步骤2: 对每个面进行按行初分类和列排序
% 定义行数(11行)和列数(19列)
num_rows = 11;
num_cols = 19;
% 处理正面碎片
[front_sorted, front_row_positions] = process_side(images_bin, front_indices, filenames, num_rows, num_cols, 6, '正面行分类');
front_table = reshape(front_sorted, num_rows, num_cols); % 转换为11x19表格
% 处理反面碎片
[back_sorted, back_row_positions] = process_side(images_bin, back_indices, filenames, num_rows, num_cols, 7, '反面行分类');
back_table = reshape(back_sorted, num_rows, num_cols); % 转换为11x19表格
% 显示结果表格预览
subplot(3, 3, 8);
show_table_preview(front_table, '正面表格预览');
subplot(3, 3, 9);
show_table_preview(back_table, '反面表格预览');
% 保存结果到MAT文件
save('fragment_tables.mat', 'front_table', 'back_table');
% 创建完整表格可视化
figure('Name', '正面碎片表格', 'Position', [100, 100, 1200, 600]);
uitable('Data', front_table, 'Position', [20, 20, 1160, 560], ...
'ColumnName', compose('列%d', 1:num_cols), ...
'RowName', compose('行%d', 1:num_rows));
figure('Name', '反面碎片表格', 'Position', [200, 100, 1200, 600]);
uitable('Data', back_table, 'Position', [20, 20, 1160, 560], ...
'ColumnName', compose('列%d', 1:num_cols), ...
'RowName', compose('行%d', 1:num_rows));
% 保存结果到Excel
writecell(front_table, fullfile(folder_path, '正面碎片表.xlsx'));
writecell(back_table, fullfile(folder_path, '反面碎片表.xlsx'));
end
% 辅助函数:处理单个面(正面或反面)
function [sorted_filenames, row_positions] = process_side(images_bin, indices, filenames, num_rows, num_cols, plot_position, plot_title)
num_fragments = length(indices);
side_images = images_bin(indices);
side_filenames = filenames(indices);
% 提取每个碎片的行位置特征(平均文本行y坐标)
row_positions = zeros(num_fragments, 1);
row_peaks = cell(num_fragments, 1);
for i = 1:num_fragments
img_bin = side_images{i};
% 计算水平投影
row_sum = sum(img_bin, 2);
% 平滑投影以减少噪声
row_sum_smooth = smooth(row_sum, 5); % 移动平均平滑
% 找峰值(文本行位置)
[pks, locs] = findpeaks(double(row_sum_smooth), 'MinPeakProminence', max(row_sum_smooth)/5);
row_peaks{i} = locs;
if isempty(locs)
% 无峰值时使用图像中心
row_positions(i) = size(img_bin, 1) / 2;
else
% 取峰值的平均作为行位置
row_positions(i) = mean(locs);
end
end
% 可视化行位置分布
figure(gcf);
subplot(3, 3, plot_position);
histogram(row_positions, 30);
xlabel('行位置');
ylabel('碎片数量');
title(plot_title);
grid on;
% 聚类行位置到11行
[idx_row, centroids] = kmeans(row_positions, num_rows, 'Replicates', 10, 'Options', statset('UseParallel', 1));
% 可视化行聚类结果
figure;
set(gcf, 'Position', [200, 200, 1000, 600], 'Name', [plot_title, ' 聚类结果']);
% 绘制聚类结果
subplot(1, 2, 1);
gscatter(1:num_fragments, row_positions, idx_row);
hold on;
for i = 1:num_rows
yline(centroids(i), '--', sprintf('行%d中心', i), 'LineWidth', 1.5);
end
xlabel('碎片索引');
ylabel('行位置');
title('行位置聚类');
grid on;
hold off;
% 绘制聚类中心
subplot(1, 2, 2);
plot(centroids, 1:num_rows, 'o-', 'MarkerSize', 8, 'LineWidth', 2);
set(gca, 'YDir', 'reverse');
xlabel('行位置中心');
ylabel('行号');
title('行中心位置');
grid on;
ylim([0.5, num_rows+0.5]);
% 按行分组,并对每行内的碎片按列排序
sorted_filenames = cell(num_rows * num_cols, 1); % 预分配
col_positions = cell(num_rows, 1);
for row = 1:num_rows
% 获取当前行的碎片索引
row_indices = indices(idx_row == row);
row_filenames = filenames(row_indices);
row_images = images_bin(row_indices);
% 提取每个碎片的列位置特征(文本左边缘x坐标)
left_edges = zeros(length(row_indices), 1);
for j = 1:length(row_indices)
img_bin = row_images{j};
% 计算垂直投影
col_sum = sum(img_bin, 1);
col_sum_smooth = smooth(col_sum, 5); % 平滑
% 找左边缘:第一个超过平均值的点
threshold = mean(col_sum_smooth);
edge_idx = find(col_sum_smooth > threshold, 1, 'first');
if isempty(edge_idx)
left_edges(j) = 1; % 默认左边缘
else
left_edges(j) = edge_idx;
end
end
% 存储列位置
col_positions{row} = left_edges;
% 按左边缘排序(从左到右)
[~, sort_order] = sort(left_edges);
sorted_row_filenames = row_filenames(sort_order);
% 存储到输出(移除.bmp扩展名)
start_idx = (row - 1) * num_cols + 1;
end_idx = min(row * num_cols, start_idx + length(sorted_row_filenames) - 1);
for j = 1:min(length(sorted_row_filenames), num_cols)
[~, name_only, ~] = fileparts(sorted_row_filenames{j});
sorted_filenames{start_idx + j - 1} = name_only;
end
end
% 可视化列位置分布
figure('Name', [plot_title, ' 列位置分布'], 'Position', [300, 300, 1000, 600]);
for row = 1:num_rows
subplot(3, 4, row);
if ~isempty(col_positions{row})
histogram(col_positions{row}, 20);
title(sprintf('行 %d (碎片: %d)', row, numel(col_positions{row})));
xlabel('列位置');
ylabel('数量');
grid on;
end
end
end
% 显示碎片预览
function show_fragments_preview(folder_path, filenames, num_samples)
% 随机选择样本
rng(42); % 固定随机种子以便重现
sample_idx = randperm(numel(filenames), min(num_samples, numel(filenames)));
% 创建预览图像
preview_size = 300;
preview_img = zeros(preview_size, preview_size, 3, 'uint8');
grid_size = ceil(sqrt(num_samples));
cell_size = floor(preview_size / grid_size);
for i = 1:num_samples
idx = sample_idx(i);
img_path = fullfile(folder_path, filenames{idx});
img = imread(img_path);
% 调整大小
if size(img, 3) == 1
img = repmat(img, [1, 1, 3]);
end
img_resized = imresize(img, [cell_size, cell_size]);
% 计算位置
row = floor((i-1)/grid_size);
col = mod(i-1, grid_size);
y_range = row*cell_size + 1 : (row+1)*cell_size;
x_range = col*cell_size + 1 : (col+1)*cell_size;
% 确保范围不超出
y_range = y_range(1:min(end, size(preview_img, 1)));
x_range = x_range(1:min(end, size(preview_img, 2)));
% 放置图像
preview_img(y_range, x_range, :) = img_resized(1:numel(y_range), 1:numel(x_range), :);
end
imshow(preview_img);
end
% 显示面示例
function show_side_examples(folder_path, filenames, indices, title_str)
% 随机选择4个示例
num_samples = min(4, numel(indices));
sample_idx = indices(randperm(numel(indices), num_samples));
% 创建预览图像
preview_size = 200;
preview_img = zeros(preview_size, preview_size, 3, 'uint8');
grid_size = ceil(sqrt(num_samples));
cell_size = floor(preview_size / grid_size);
for i = 1:num_samples
idx = sample_idx(i);
img_path = fullfile(folder_path, filenames{idx});
img = imread(img_path);
% 调整大小
if size(img, 3) == 1
img = repmat(img, [1, 1, 3]);
end
img_resized = imresize(img, [cell_size, cell_size]);
% 计算位置
row = floor((i-1)/grid_size);
col = mod(i-1, grid_size);
y_range = row*cell_size + 1 : (row+1)*cell_size;
x_range = col*cell_size + 1 : (col+1)*cell_size;
% 确保范围不超出
y_range = y_range(1:min(end, size(preview_img, 1)));
x_range = x_range(1:min(end, size(preview_img, 2)));
% 放置图像
preview_img(y_range, x_range, :) = img_resized(1:numel(y_range), 1:numel(x_range), :);
end
imshow(preview_img);
title(title_str);
end
% 显示表格预览
function show_table_preview(table_data, title_str)
% 创建小表格预览
preview_size = min(5, size(table_data, 1));
preview_data = table_data(1:preview_size, 1:min(5, size(table_data, 2)));
% 显示表格文本
text(0.5, 0.5, sprintf('表格预览:\n%s', evalc('disp(preview_data)')), ...
'HorizontalAlignment', 'center', 'VerticalAlignment', 'middle', ...
'FontSize', 9);
axis off;
title(title_str);
end明确告诉你,没有不能拼的碎片,应该都能拼成,初分类可以不按11x19的格式,但我要求较为准确的分类,你这个运行出来分的不太对