Line reconstruction by height

本文介绍了一种根据人员的高度和前面更高人员的数量来重建队伍的算法。提供了两种实现方式:一种是通过排序并插入的方式,另一种是预先按高度降序排列再进行插入。文中还附带了C++代码实现。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

Problem

Suppose you have a line of n people in which the k-th person is described by a pair (h,t) , where h is the height of the k-th person and t is the number of people in front of k who have a height greater or equal than h . Write an algorithm to reconstruct the line.

For example, if the line is composed by the following people:

[(7, 0, A),(4, 4, B),(7, 1, C), (5, 0, D), (6, 1, E), (5, 2, F)]

The original line should be:

[(5, 0, D), (7, 0, A), (5, 2, F), (6, 1, E), (4, 4, B),(7, 1, C)]

Solution

每个输入信息表示为: (height, taller_num, name), 两种思路:
1. 排序:先以taller_num 升序排序,如果相等按照height的降序排列。然后将每个人插入到他们正确的位置,遍历每一个人 p,从结果list的开头计数比p.height大的数目,直到与p.taller_num为止,即找到p在res中的位置,将p插入到当前位置。
2. 先以height的降序排列,如果相等按照taller_num的升序排列。然后依然是找到每个人的正确位置插入, 不过这次要简单一些,遍历每个人(p), 将其插入list.begin() + p.taller_num位置。

Code

#include <iostream>
#include <vector>
#include <list>
#include <string>

using namespace std;

struct person{
  string name;
  int height;
  int taller_num;
  person(string name, int h, int num) : name(name), height(h), taller_num(num){}
};

bool cmp(person& p1, person& p2) {
  if(p1.taller_num != p2.taller_num)
    return p1.taller_num < p2.taller_num;
  return p1.height > p2.height;
}

void reorder(vector<person>& person_list, list<person>& res) {
  sort(person_list.begin(), person_list.end(), cmp);
  for(int i = 0; i < person_list.size(); ++i) {
    list<person>::iterator iter = res.begin();
    int n = person_list[i].taller_num;
    while(n > 0) {
      if(iter->height >= person_list[i].height)
        --n;
      ++iter;
    }
    res.insert(iter, person_list[i]);
  }
}

bool cmp2(person& p1, person&p2) {
  if(p1.height != p2.height) return p1.height > p2.height;
  return p1.taller_num < p2.taller_num;
}

void reorder2(vector<person>& person_list, list<person>& res) {
  sort(person_list.begin(), person_list.end(), cmp2);
  for(auto& p : person_list) {
    list<person>::iterator iter = res.begin();
    int n = p.taller_num;
    while(n-- > 0) {
      ++iter;
    }
    res.insert(iter, p);
  }
}

int main() {
  vector<person> person_list;
  person_list.push_back(person("A", 7, 0));
  person_list.push_back(person("B", 4, 4));
  person_list.push_back(person("C", 7, 1));
  person_list.push_back(person("D", 5, 0));
  person_list.push_back(person("E", 6, 1));
  person_list.push_back(person("F", 5, 2));
  list<person> res;
  reorder(person_list, res);
  for(person& p : res) {
    cout << p.name << " ";
  }
  cout << endl;
  res.clear();
  cout << "reorder2 :" << endl;
  reorder2(person_list, res);
  for(person& p : res) {
    cout << p.name << " ";
  }
  cout << endl;
}
% 主函数调用 paper_reconstruction_2d('附件4'); % 英文碎片 % paper_reconstruction_2d('附件3'); % 中文碎片 function paper_reconstruction_2d(folder_path) % 判断碎片类型 if contains(folder_path, '附件3') language = 'chinese'; disp('处理中文碎片...'); elseif contains(folder_path, '附件4') language = 'english'; disp('处理英文碎片...'); else error('未知文件夹,请输入"附件3"或"附件4"'); end % 读取所有碎片图像 file_list = dir(fullfile(folder_path, '*.bmp')); num_files = length(file_list); % 检查碎片数量 if num_files ~= 209 error('错误:需要209个碎片(11×19),当前数量:%d', num_files); end % 文件名排序 file_names = {file_list.name}; num_indices = zeros(1, num_files); for i = 1:num_files [~, name, ~] = fileparts(file_names{i}); num_indices(i) = str2double(name); end [~, sorted_idx] = sort(num_indices); file_list = file_list(sorted_idx); % 预处理所有碎片 fragments = cell(1, num_files); fragment_ids = cell(1, num_files); text_features = zeros(num_files, 3); % [顶线, 底线, 基线] baselines = zeros(num_files, 1); % 基线位置 for i = 1:num_files % 读取图像 img = imread(fullfile(folder_path, file_list(i).name)); if size(img, 3) == 3 fragments{i} = rgb2gray(img); else fragments{i} = img; end fragment_ids{i} = file_list(i).name(1:end-4); % 检测文字特征 if strcmp(language, 'chinese') [top, bottom] = detect_chinese_lines(fragments{i}); midline = (top + bottom) / 2; text_features(i,:) = [top, bottom, midline]; else % 英文使用OCR检测基线 [baseline, top, bottom] = detect_english_baseline_ocr(fragments{i}); baselines(i) = baseline; text_features(i,:) = [top, bottom, baseline]; end end % 标记所有碎片的基线(新增功能) visualize_baselines(fragments, baselines, text_features, language); % 按基线位置分组 row_groups = group_by_baseline(baselines, language); % 验证所有组都有19个碎片 for i = 1:11 if numel(row_groups{i}) ~= 19 fprintf('行%d碎片数量为%d,正在调整...\n', i, numel(row_groups{i})); row_groups = adjust_group_sizes(row_groups, baselines, language, i); end end % 对每行碎片进行水平拼接 row_sequences = cell(11, 1); for row = 1:11 row_frags = row_groups{row}; if length(row_frags) ~= 19 error('行%d碎片数量为%d,不是19', row, length(row_frags)); end if strcmp(language, 'chinese') row_sequences{row} = sort_chinese_row(fragments, row_frags); else row_sequences{row} = sort_english_row(fragments, row_frags, baselines); end end % 按基线位置排序行(从上到下) row_order = sort_rows_by_baseline(row_groups, baselines); % 生成最终序列和结果表格 [final_sequence, result_table] = generate_results(row_sequences, row_order, fragment_ids); disp('复原结果表格:'); disp(result_table); % 可视化拼接结果(包含基线标记) visualize_reconstruction(fragments, baselines, row_sequences, row_order, language); end %% 新增功能:可视化所有碎片的基线 function visualize_baselines(fragments, baselines, features, language) % 创建新图窗 fig = figure('Name', '碎片基线标记', 'Position', [100, 100, 1200, 800], 'Color', [0.9 0.9 0.9]); % 设置子图布局 (11行 x 19列) num_frags = numel(fragments); rows = 11; cols = 19; % 计算每个碎片的尺寸 frag_height = size(fragments{1}, 1); frag_width = size(fragments{1}, 2); % 创建大画布 canvas = 255 * ones(rows * frag_height, cols * frag_width, 'uint8'); % 填充画布 for i = 1:num_frags row_idx = ceil(i / cols); col_idx = mod(i-1, cols) + 1; % 计算位置 y_start = (row_idx-1) * frag_height + 1; y_end = row_idx * frag_height; x_start = (col_idx-1) * frag_width + 1; x_end = col_idx * frag_width; % 放置碎片 canvas(y_start:y_end, x_start:x_end) = fragments{i}; % 标记基线(红色线) baseline = baselines(i); if baseline > 0 && baseline <= frag_height line_y = y_start + round(baseline) - 1; canvas(line_y, x_start:x_end) = 255; % 白色 canvas(line_y-1, x_start:x_end) = 0; % 黑色边框 canvas(line_y+1, x_start:x_end) = 0; % 黑色边框 end % 标记碎片编号 canvas = insertText(canvas, [x_start+5, y_start+5], num2str(i-1), ... 'FontSize', 10, 'TextColor', 'red', 'BoxColor', 'white'); end % 显示结果 imshow(canvas); title(sprintf('所有碎片基线标记 (%s) - 红色为基线位置', language)); % 添加网格线 hold on; for i = 1:rows y = i * frag_height; line([1, cols*frag_width], [y, y], 'Color', 'blue', 'LineWidth', 1, 'LineStyle', '--'); end for j = 1:cols x = j * frag_width; line([x, x], [1, rows*frag_height], 'Color', 'blue', 'LineWidth', 1, 'LineStyle', '--'); end hold off; % 保存图像 saveas(fig, 'fragments_baselines.png'); end function [baseline, top, bottom] = detect_english_baseline_ocr(img) % 二值化图像 bw_img = imbinarize(img, 'adaptive'); % 尝试使用OCR检测文本 ocr_results = ocr(bw_img); % 修复1:添加OCR结果检查 if isempty(ocr_results.Words) || isempty(ocr_results.WordBoundingBoxes) % 使用投影法作为备用方案 horz_proj = sum(bw_img, 2); smoothed_proj = movmean(horz_proj, 5); % 动态阈值检测文字行 threshold = 0.5 * max(smoothed_proj); text_mask = smoothed_proj > threshold; % 找到文字区域 if any(text_mask) text_rows = find(text_mask); top = min(text_rows); bottom = max(text_rows); baseline = bottom; % 英文基线通常在文字底部 else % 完全无文字时的默认值 top = 1; bottom = size(img, 1); baseline = round(size(img, 1) * 0.8); % 默认在图像80%高度 end return; % 提前返回 end % 获取所有单词的基线位置 word_baselines = zeros(numel(ocr_results.Words), 1); for i = 1:numel(ocr_results.Words) word_box = ocr_results.WordBoundingBoxes(i,:); word_baselines(i) = word_box(2) + word_box(4); % y + height end % 修复2:添加空值检查 if isempty(word_baselines) baseline = round(size(img, 1) * 0.8); top = 1; bottom = size(img, 1); else baseline = mean(word_baselines); top = min(ocr_results.WordBoundingBoxes(:,2)); bottom = max(ocr_results.WordBoundingBoxes(:,2) + ocr_results.WordBoundingBoxes(:,4)); end end %% 分组算法(基于基线位置) function row_groups = group_by_baseline(baselines, language) % 使用K-means聚类将碎片分组到11行 k = 11; % 11行 % 设置聚类选项 opts = statset('UseParallel', true, 'MaxIter', 1000); % 执行聚类 [idx, centroids] = kmeans(baselines, k, 'Replicates', 5, 'Options', opts); % 创建分组 row_groups = cell(k, 1); for i = 1:k row_groups{i} = find(idx == i); end % 可视化分组结果 fig = figure('Name', '基线位置分组', 'Position', [200, 200, 800, 600]); scatter(1:length(baselines), baselines, 50, idx, 'filled'); hold on; plot(centroids, 'kx', 'MarkerSize', 15, 'LineWidth', 3); hold off; xlabel('碎片序号'); ylabel('基线位置'); title(sprintf('%s碎片基线位置分组结果', language)); colormap(jet(k)); colorbar('Ticks', 1:k, 'TickLabels', 1:k); saveas(fig, 'baseline_grouping.png'); end %% 调整组大小确保每组19个碎片 function row_groups = adjust_group_sizes(row_groups, baselines, language, current_row) % 找出所有组的碎片数量 group_sizes = cellfun(@numel, row_groups); % 找出碎片过多的组 overflow_groups = find(group_sizes > 19); % 如果当前行不足19个碎片 if group_sizes(current_row) < 19 needed = 19 - group_sizes(current_row); % 从碎片过多的组中转移碎片 for i = 1:length(overflow_groups) group_idx = overflow_groups(i); excess = group_sizes(group_idx) - 19; if excess > 0 % 找出该组中距离当前组基线最远的碎片 current_baseline = mean(baselines(row_groups{current_row})); group_baselines = baselines(row_groups{group_idx}); % 计算距离并排序 dists = abs(group_baselines - current_baseline); [~, sorted_idx] = sort(dists, 'descend'); % 转移碎片 transfer_count = min(needed, excess); transfer_idx = sorted_idx(1:transfer_count); % 添加到当前组 row_groups{current_row} = [row_groups{current_row}; row_groups{group_idx}(transfer_idx)]; % 从原组移除 row_groups{group_idx}(transfer_idx) = []; % 更新需求数量 needed = needed - transfer_count; if needed <= 0 break; end end end end end %% 中文文字行检测 function [top_line, bottom_line] = detect_chinese_lines(img) % 增强文字区域检测 bw_img = imbinarize(img, 'adaptive'); horz_proj = sum(bw_img, 2); % 平滑投影曲线 smoothed_proj = movmean(horz_proj, 5); % 动态阈值检测文字行 threshold = 0.1 * max(smoothed_proj); text_mask = smoothed_proj > threshold; % 找到连续的文字区域 [labels, num] = bwlabel(text_mask); if num == 0 top_line = 1; bottom_line = size(img, 1); return; end % 取最大的连续文字区域 region_stats = regionprops(labels, 'Area', 'BoundingBox'); [~, idx] = max([region_stats.Area]); bbox = region_stats(idx).BoundingBox; top_line = floor(bbox(2)); bottom_line = floor(bbox(2) + bbox(4)); end %% 中文行排序(宽度对应模型) function sequence = sort_chinese_row(fragments, frag_indices) num_frags = length(frag_indices); char_widths = zeros(1, num_frags); % 计算每个碎片的字符宽度特征 for i = 1:num_frags img = fragments{frag_indices(i)}; vert_proj = sum(imbinarize(img), 1); % 找到字符边界 char_edges = find(diff([0, vert_proj > 0.1*max(vert_proj), 0])); if isempty(char_edges) char_widths(i) = size(img, 2); else % 计算字符宽度统计特征 widths = diff(char_edges(1:2:end)); char_widths(i) = median(widths(widths > 5)); % 过滤噪声 end end % 估算平均字符宽度 avg_char_width = median(char_widths, 'omitnan'); % 构建匹配矩阵 match_matrix = inf(num_frags); for i = 1:num_frags img1 = fragments{frag_indices(i)}; right_edge = img1(:, end-10:end); % 右边缘区域 for j = 1:num_frags if i == j, continue; end img2 = fragments{frag_indices(j)}; left_edge = img2(:, 1:10); % 左边缘区域 % 计算重叠匹配度 overlap_scores = zeros(1, 10); for shift = 1:10 end_idx = size(right_edge, 2) - shift + 1; start_idx = shift; seg1 = right_edge(:, end_idx:end); seg2 = left_edge(:, 1:size(seg1, 2)); diff_val = sum(abs(double(seg1(:)) - double(seg2(:)))); overlap_scores(shift) = diff_val; end match_matrix(i,j) = min(overlap_scores); end end % 找到最左碎片(左侧空白最大) left_blanks = zeros(1, num_frags); for i = 1:num_frags img = fragments{frag_indices(i)}; vert_proj = sum(imbinarize(img), 1); left_blanks(i) = find(vert_proj > 0, 1) - 1; end [~, start] = max(left_blanks); % 构建序列 sequence = zeros(1, num_frags); sequence(1) = start; used = false(1, num_frags); used(start) = true; for pos = 2:num_frags [min_val, best] = min(match_matrix(sequence(pos-1), :)); if ~used(best) && isfinite(min_val) sequence(pos) = best; used(best) = true; else % 使用备选方案 unused = find(~used); [~, best_idx] = min(match_matrix(sequence(pos-1), unused)); sequence(pos) = unused(best_idx); used(unused(best_idx)) = true; end end sequence = frag_indices(sequence); end %% 英文行排序(灰度差值+基线对齐) function sequence = sort_english_row(fragments, frag_indices, baselines) num_frags = length(frag_indices); left_edges = cell(1, num_frags); right_edges = cell(1, num_frags); for i = 1:num_frags img = fragments{frag_indices(i)}; left_edges{i} = img(:,1); right_edges{i} = img(:,end); end % 创建匹配矩阵,考虑灰度差值和基线对齐 match_matrix = inf(num_frags); for i = 1:num_frags for j = 1:num_frags if i == j match_matrix(i,j) = inf; else % 计算灰度差异 diff_val = sum(abs(double(right_edges{i}) - double(left_edges{j}))); % 计算基线位置差异(重要!) baseline_diff = abs(baselines(frag_indices(i)) - baselines(frag_indices(j))); % 综合两个因素(基线对齐更重要) match_matrix(i,j) = diff_val + 20 * baseline_diff; end end end % 找到最左碎片(左侧空白最大) left_blanks = zeros(1, num_frags); for i = 1:num_frags img = fragments{frag_indices(i)}; vert_proj = sum(imbinarize(img), 1); left_blanks(i) = find(vert_proj > 0, 1) - 1; end [~, start] = max(left_blanks); % 构建序列 sequence = zeros(1, num_frags); sequence(1) = start; used = false(1, num_frags); used(start) = true; for pos = 2:num_frags [min_val, best] = min(match_matrix(sequence(pos-1), :)); if ~used(best) && isfinite(min_val) sequence(pos) = best; used(best) = true; else unused = find(~used); [min_val_unused, best_in_unused] = min(match_matrix(sequence(pos-1), unused)); if isempty(unused) || isinf(min_val_unused) error('排序失败'); end sequence(pos) = unused(best_in_unused); used(unused(best_in_unused)) = true; end end sequence = frag_indices(sequence); end %% 行间排序(按基线位置) function row_order = sort_rows_by_baseline(row_groups, baselines) row_baselines = zeros(11, 1); for row = 1:11 row_baselines(row) = mean(baselines(row_groups{row})); end % 按垂直位置排序(从上到下) [~, row_order] = sort(row_baselines); end %% 生成结果 function [sequence, table] = generate_results(row_sequences, row_order, ids) sequence = []; table = cell(11, 19); for r = 1:11 row_idx = row_order(r); for col = 1:19 frag_idx = row_sequences{row_idx}(col); sequence = [sequence, frag_idx]; table{r,col} = ids{frag_idx}; end end % 将结果写入Excel文件 filename = 'reconstruction_result.xlsx'; writecell(table, filename); fprintf('结果已保存至: %s\n', filename); end %% 可视化结果(包含基线标记) function visualize_reconstruction(fragments, baselines, row_sequences, row_order, language) row_images = cell(11, 1); % 拼接各行(包含基线标记) for r = 1:11 row_idx = row_order(r); row_img = []; for col = 1:19 frag_idx = row_sequences{row_idx}(col); frag_img = fragments{frag_idx}; % 标记基线(红色) baseline = baselines(frag_idx); if baseline > 0 && baseline <= size(frag_img, 1) frag_img = insertShape(frag_img, 'line', ... [1, baseline; size(frag_img, 2), baseline], ... 'Color', 'red', 'LineWidth', 2); end % 标记碎片编号 frag_img = insertText(frag_img, [5, 5], num2str(frag_idx-1), ... 'FontSize', 12, 'TextColor', 'red', 'BoxColor', 'white'); row_img = [row_img, frag_img]; end row_images{r} = row_img; end % 垂直拼接所有行 full_img = []; for r = 1:11 full_img = [full_img; row_images{r}]; end % 显示完整图像 fig = figure('Position', [100, 100, 1200, 800]); subplot(2,1,1); imshow(full_img); title(sprintf('完整拼接结果(%s)- 红色为基线位置', language)); % 显示行间拼接效果 subplot(2,1,2); montage(row_images, 'Size', [11 1]); title('行间排列效果'); % 保存结果 imwrite(full_img, 'reconstructed_full_with_baselines.png'); saveas(fig, 'reconstruction_result_with_baselines.png'); % 显示分组和排序信息 fprintf('行排序结果(从上到下):'); disp(row_order'); end完善下代码
最新发布
08-10
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值