function classifyFragments()
% 设置碎片路径
fragmentPath = 'D:\BaiduNetdiskDownload\MATLAB R2024a\bin\project\附件5';
% 获取所有碎片文件
fileInfo = dir(fullfile(fragmentPath, '*.bmp'));
fragmentFiles = {fileInfo.name};
numFragments = numel(fragmentFiles);
% 初始化特征矩阵和元数据
fprintf('步骤1: 初始化数据结构...\n');
features = cell(numFragments, 1);
fragmentNames = cell(numFragments, 1);
heights = zeros(numFragments, 1);
widths = zeros(numFragments, 1);
% 参数设置
standardHeight = 100; % 标准化的图像高度
% 第一步:特征提取
fprintf('步骤2: 提取碎片特征 (%d个碎片)...\n', numFragments);
for i = 1:numFragments
fprintf(' 处理碎片 %d/%d: %s\n', i, numFragments, fragmentFiles{i});
% 读取图像
fileName = fragmentFiles{i};
img = imread(fullfile(fragmentPath, fileName));
% 转换为灰度图
if size(img, 3) == 3
grayImg = rgb2gray(img);
else
grayImg = img;
end
% 存储文件名(不含扩展名)
[~, name, ~] = fileparts(fileName);
fragmentNames{i} = name;
% 存储原始尺寸
[h, w] = size(grayImg);
heights(i) = h;
widths(i) = w;
% 1. 行平均灰度特征(核心特征)
rowMean = mean(grayImg, 2);
% 2. 标准化尺寸的行特征(确保所有特征向量长度一致)
resizedImg = imresize(grayImg, [standardHeight, NaN]);
resizedRowMean = mean(resizedImg, 2);
% 3. 上下边缘特征
topEdge = mean(grayImg(1:min(5,h), :), 'all');
bottomEdge = mean(grayImg(max(1,h-4):h, :), 'all');
% 4. 文本密度特征
binaryImg = imbinarize(grayImg, 'adaptive');
textDensity = 1 - mean(binaryImg(:)); % 文本像素比例
% 5. 水平投影特征(使用标准化尺寸)
resizedBinaryImg = imresize(binaryImg, [standardHeight, NaN]);
horizontalProfile = sum(~resizedBinaryImg, 2);
% 组合所有特征
features{i} = struct(...
'rowMean', rowMean, ...
'resizedRowMean', resizedRowMean, ... % 固定长度: standardHeight
'topEdge', topEdge, ...
'bottomEdge', bottomEdge, ...
'textDensity', textDensity, ...
'horizontalProfile', horizontalProfile); % 固定长度: standardHeight
end
% 第二步:碎片分组(按文本密度和边缘特征)
fprintf('步骤3: 正反面分组...\n');
% 准备分组特征矩阵
densityValues = zeros(numFragments, 1);
topEdgeValues = zeros(numFragments, 1);
for i = 1:numFragments
densityValues(i) = features{i}.textDensity;
topEdgeValues(i) = features{i}.topEdge;
end
% 使用K-means将碎片分为两组(正反面)
[groupIdx, groupCenters] = kmeans([densityValues, topEdgeValues], 2, ...
'Replicates', 5, 'MaxIter', 1000);
% 确定哪个组是正面(假设文本密度较高的为正面)
if groupCenters(1, 1) > groupCenters(2, 1)
frontGroup = 1;
backGroup = 2;
else
frontGroup = 2;
backGroup = 1;
end
% 分离正反面碎片
frontIndices = find(groupIdx == frontGroup);
backIndices = find(groupIdx == backGroup);
fprintf(' 识别结果: 正面碎片=%d, 反面碎片=%d\n', ...
numel(frontIndices), numel(backIndices));
% 第三步:行内分类(分别处理正面和反面)
fprintf('步骤4: 行内分类处理...\n');
fprintf(' 处理正面碎片 (%d个)...\n', numel(frontIndices));
frontRows = classifyRows(features(frontIndices), fragmentNames(frontIndices), standardHeight);
fprintf(' 处理反面碎片 (%d个)...\n', numel(backIndices));
backRows = classifyRows(features(backIndices), fragmentNames(backIndices), standardHeight);
% 第四步:创建11x19表格
fprintf('步骤5: 创建结果表格...\n');
[frontTable, frontMissing] = createResultTable(frontRows, fragmentNames(frontIndices));
[backTable, backMissing] = createResultTable(backRows, fragmentNames(backIndices));
% 第五步:保存结果
fprintf('步骤6: 保存结果...\n');
saveResults(fragmentPath, frontTable, backTable, frontMissing, backMissing);
fprintf('处理完成!结果已保存至目录: %s\n', fragmentPath);
end
function rows = classifyRows(features, names, standardHeight)
% 确保所有特征向量长度一致
numFragments = numel(features);
% 检查并统一特征向量长度
for i = 1:numFragments
if length(features{i}.resizedRowMean) ~= standardHeight
features{i}.resizedRowMean = imresize(features{i}.resizedRowMean, [standardHeight, 1]);
end
end
% 创建特征矩阵(所有向量长度相同)
featureMatrix = zeros(numFragments, standardHeight);
for i = 1:numFragments
featureMatrix(i, :) = features{i}.resizedRowMean';
end
% 确定行数(固定为11行)
numRows = 11;
% 使用层次聚类
fprintf(' 计算距离矩阵...\n');
distanceMatrix = pdist(featureMatrix, 'cosine');
fprintf(' 进行层次聚类...\n');
clusterTree = linkage(distanceMatrix, 'average');
fprintf(' 形成聚类分组...\n');
rowGroups = cluster(clusterTree, 'maxclust', numRows);
% 组织行结果
rows = cell(numRows, 1);
for i = 1:numRows
rows{i} = names(rowGroups == i);
end
% 按行位置排序(基于顶部边缘特征)
fprintf(' 按行位置排序...\n');
rowPositions = zeros(numRows, 1);
for i = 1:numRows
groupIndices = find(rowGroups == i);
if ~isempty(groupIndices)
topEdges = zeros(numel(groupIndices), 1);
for j = 1:numel(groupIndices)
topEdges(j) = features{groupIndices(j)}.topEdge;
end
rowPositions(i) = mean(topEdges);
else
rowPositions(i) = i; % 默认位置
end
end
[~, sortIdx] = sort(rowPositions);
rows = rows(sortIdx);
end
function [resultTable, missingFragments] = createResultTable(rows, allNames)
numRows = numel(rows);
resultTable = cell(numRows, 19);
missingFragments = {};
% 用于跟踪已使用的碎片
usedFragments = containers.Map('KeyType', 'char', 'ValueType', 'logical');
fprintf(' 填充表格行...\n');
for rowIdx = 1:numRows
rowFragments = rows{rowIdx};
numInRow = numel(rowFragments);
% 提取编号并排序
numbers = zeros(numInRow, 1);
for i = 1:numInRow
% 提取数字部分(去除字母)
numStr = regexp(rowFragments{i}, '\d+', 'match');
if ~isempty(numStr)
numbers(i) = str2double(numStr{1});
else
numbers(i) = 0; % 无效编号
end
end
% 按编号排序
[~, sortIdx] = sort(numbers);
sortedFragments = rowFragments(sortIdx);
% 放入表格(最多19列)
numToPlace = min(numel(sortedFragments), 19);
for colIdx = 1:numToPlace
resultTable{rowIdx, colIdx} = sortedFragments{colIdx};
usedFragments(sortedFragments{colIdx}) = true;
end
% 记录多余碎片
if numel(sortedFragments) > 19
for i = 20:numel(sortedFragments)
missingFragments{end+1} = sortedFragments{i};
end
end
end
% 添加未分类的碎片
fprintf(' 检查未分类碎片...\n');
for i = 1:numel(allNames)
if ~isKey(usedFragments, allNames{i})
missingFragments{end+1} = allNames{i};
end
end
end
function saveResults(outputPath, frontTable, backTable, frontMissing, backMissing)
% 保存正面表格
frontOutput = fullfile(outputPath, '正面复原表格.xlsx');
fprintf(' 保存正面表格到: %s\n', frontOutput);
writecell(frontTable, frontOutput, 'Sheet', '正面结果');
% 保存反面表格
backOutput = fullfile(outputPath, '反面复原表格.xlsx');
fprintf(' 保存反面表格到: %s\n', backOutput);
writecell(backTable, backOutput, 'Sheet', '反面结果');
% 保存缺失碎片
allMissing = [frontMissing(:); backMissing(:)];
if ~isempty(allMissing)
missingOutput = fullfile(outputPath, '未分类碎片.xlsx');
fprintf(' 保存未分类碎片到: %s\n', missingOutput);
writecell(allMissing, missingOutput, 'Sheet', '未分类碎片');
end
% 显示统计信息
fprintf('\n统计信息:\n');
fprintf(' 正面表格: %d行 x %d列\n', size(frontTable, 1), size(frontTable, 2));
fprintf(' 反面表格: %d行 x %d列\n', size(backTable, 1), size(backTable, 2));
fprintf(' 未分类碎片: %d个\n', numel(allMissing));
% 可视化结果
fprintf(' 创建可视化结果...\n');
try
figure('Name', '正面分类结果', 'Position', [100, 100, 1200, 600]);
uitable('Data', frontTable, 'Position', [20, 20, 1160, 560], ...
'ColumnName', compose('列%d', 1:19), ...
'RowName', compose('行%d', 1:size(frontTable, 1)));
figure('Name', '反面分类结果', 'Position', [200, 100, 1200, 600]);
uitable('Data', backTable, 'Position', [20, 20, 1160, 560], ...
'ColumnName', compose('列%d', 1:19), ...
'RowName', compose('行%d', 1:size(backTable, 1)));
catch ME
fprintf(' 可视化创建失败: %s\n', ME.message);
end
end修改这个代码,使它能精确的分类