请根据训练系统的数据集文件夹,重新修改数据集生成代码,原来的代码如下
outputDir = 'math_problems';
if ~exist(outputDir, 'dir')
mkdir(outputDir);
end
numImages = 100;
dataTable = table('Size', [numImages, 5], ...
'VariableTypes', {'string', 'string', 'string', 'string', 'string'}, ...
'VariableNames', {'Filename', 'Problem', 'CorrectResult', 'Operator', 'CorrectFlag'});
fontSize = 80;
horizontalPadding = 50;
verticalPadding = 80;
bgColor = [1, 1, 1];
textColor = [0, 0, 0];
availableFonts = {'Arial', 'Times New Roman', 'Courier New', 'Comic Sans MS'};
fig = figure('Visible', 'off', 'Color', bgColor, ...
'Position', [100, 100, 800, 600], ...
'InvertHardcopy', 'off');
ax = axes('Parent', fig, 'Position', [0, 0, 1, 1], ...
'XLim', [0, 1], 'YLim', [0, 1], 'Visible', 'off');
hold(ax, 'on');
imgIdx = 1;
maxAttempts = numImages * 3; % 最大尝试次数
existingLabels = containers.Map(); % 标签检测器
while imgIdx <= numImages && maxAttempts > 0
maxAttempts = maxAttempts - 1;
% === 生成两位数算式 ===
opType = randi(4);
switch opType
case 1 % 加法 (10-99)
a = randi([10, 99]);
b = randi([10, 99]);
result = a + b;
eq = sprintf('%d+%d=', a, b);
operator = 'addition';
case 2 % 减法 (20-99)
a = randi([20, 99]);
b = randi([10, min(a-1, 99)]); % 确保结果为正
result = a - b;
eq = sprintf('%d-%d=', a, b);
operator = 'subtraction';
case 3 % 乘法 (2-20)
a = randi([2, 20]);
b = randi([2, 20]);
result = a * b;
eq = sprintf('%d×%d=', a, b);
operator = 'multiplication';
case 4 % 除法 (整数结果)
divisor = randi([2, 20]);
quotient = randi([2, 20]);
dividend = divisor * quotient;
result = quotient;
eq = sprintf('%d÷%d=', dividend, divisor);
operator = 'division';
end
% === 生成合理错误答案 ===
if rand() > 0.5
displayedResult = result;
isCorrect = "1";
else
% 在正确结果附近生成合理错误
errorRange = max(1, round(result*0.2)); % 误差范围
offset = randi([-errorRange, errorRange]);
while offset == 0
offset = randi([-errorRange, errorRange]);
end
displayedResult = result + offset;
isCorrect = "0";
end
% === 创建安全标签 ===
labelFolder = sprintf('%s%d', eq, displayedResult);
safeLabel = regexprep(labelFolder, '[<>:"/\\|?*]', '');
% 检查标签是否已存在
if isKey(existingLabels, safeLabel)
continue; % 跳过重复标签
end
existingLabels(safeLabel) = true;
% === 生成图像 ===
fullEq = sprintf('%s%d', eq, displayedResult);
selectedFont = availableFonts{randi(length(availableFonts))};
textWidth = length(fullEq) * fontSize * 0.6;
imgWidth = textWidth + 2*horizontalPadding;
imgHeight = verticalPadding + fontSize + verticalPadding/2;
set(fig, 'Position', [100, 100, imgWidth, imgHeight]);
cla(ax);
text(ax, 0.5, 0.5, fullEq, ...
'FontSize', fontSize, ...
'FontName', selectedFont, ...
'FontWeight', 'normal', ...
'Color', textColor, ...
'HorizontalAlignment', 'center', ...
'VerticalAlignment', 'middle');
% === 保存图像 ===
imgData = getframe(fig).cdata;
labelDir = fullfile(outputDir, safeLabel);
if ~exist(labelDir, 'dir')
mkdir(labelDir);
end
filename = sprintf('%04d.png', imgIdx);
imwrite(imgData, fullfile(labelDir, filename));
% 更新数据表
dataTable.Filename(imgIdx) = string(filename);
dataTable.Problem(imgIdx) = string(fullEq);
dataTable.CorrectResult(imgIdx) = string(result);
dataTable.Operator(imgIdx) = operator;
dataTable.CorrectFlag(imgIdx) = isCorrect;
imgIdx = imgIdx + 1;
end
% === 检查完成度 ===
if imgIdx <= numImages
fprintf('生成不足: %d/%d, 正在补充...\n', imgIdx-1, numImages);
dataTable = dataTable(1:imgIdx-1, :); % 调整表格大小
% 补充生成缺失的图像
for i = imgIdx:numImages
% 生成唯一标签的简化方法
while true
a = randi([10,99]);
b = randi([10,99]);
op = {'+', '-', '×', '÷'};
opChar = op{randi(4)};
result = randi([10,200]);
label = sprintf('%d%s%d=%d', a, opChar, b, result);
safeLabel = regexprep(label, '[<>:"/\\|?*]', '');
if ~isKey(existingLabels, safeLabel)
existingLabels(safeLabel) = true;
break;
end
end
% 生成图像(简化版)
fullEq = label;
set(fig, 'Position', [100, 100, 600, 300]); % 固定尺寸
cla(ax);
text(ax, 0.5, 0.5, fullEq, 'FontSize', fontSize, 'HorizontalAlignment', 'center');
imgData = getframe(fig).cdata;
% 保存
labelDir = fullfile(outputDir, safeLabel);
mkdir(labelDir);
filename = sprintf('%04d.png', i);
imwrite(imgData, fullfile(labelDir, filename));
% 更新表格(简化)
newRow = {filename, fullEq, '0', 'addition', '0'}; % 占位数据
dataTable = [dataTable; newRow];
end
end
% === 最终保存 ===
writetable(dataTable, fullfile(outputDir, 'math_problems.xlsx'));
close(fig);
%fprintf('生成完成! 创建了%d个子文件夹\n', height(dataTable));
% === 验证重复性 ===
folders = dir(outputDir);
folderCount = sum([folders.isdir]) - 2; % 排除.和..
fprintf('最终生成文件夹数量: %d\n', folderCount);
if folderCount < numImages
fprintf('警告: 有重复标签未被检测到\n');
end
最新发布