outputDir = 'math_problems';
if ~exist(outputDir, 'dir')
mkdir(outputDir);
end
% === 新增:内容哈希记录器 ===
contentHashMap = containers.Map('KeyType', 'char', 'ValueType', 'logical');
labelHashMap = containers.Map('KeyType', 'char', 'ValueType', 'logical');
numImages = 100;
dataTable = table('Size', [numImages, 5], ...
'VariableTypes', {'string', 'string', 'string', 'string', 'string'}, ...
'VariableNames', {'Filename', 'Problem', 'CorrectResult', 'Operator', 'CorrectFlag'});
fontSize = 80;
horizontalPadding = 50;
verticalPadding = 80;
bgColor = [1, 1, 1];
textColor = [0, 0, 0];
% === 新增:引入字体变异 ===
availableFonts = {'Arial', 'Times New Roman', 'Courier New', 'Comic Sans MS'};
fig = figure('Visible', 'off', 'Color', bgColor, ...
'Position', [100, 100, 800, 600], ...
'InvertHardcopy', 'off');
ax = axes('Parent', fig, 'Position', [0, 0, 1, 1], ...
'XLim', [0, 1], 'YLim', [0, 1], 'Visible', 'off');
hold(ax, 'on');
imgIdx = 1;
attemptCount = 0;
maxAttempts = numImages * 5; % 防止无限循环
while imgIdx <= numImages && attemptCount < maxAttempts
attemptCount = attemptCount + 1;
% 生成唯一文件名
filename = sprintf('%04d.png', imgIdx);
% === 生成随机算式 ===
opType = randi(4);
switch opType
case 1 % 加法
a = randi([10, 999]);
b = randi([1, 999]);
result = a + b;
eq = sprintf('%d+%d=', a, b);
operator = 'addition';
case 2 % 减法
a = randi([20, 999]);
b = randi([1, min(a-1, 999)]);
result = a - b;
eq = sprintf('%d-%d=', a, b);
operator = 'subtraction';
case 3 % 乘法
a = randi([1, 99]);
b = randi([1, 99]);
result = a * b;
eq = sprintf('%d×%d=', a, b);
operator = 'multiplication';
case 4 % 除法
divisor = randi([2, 99]);
quotient = randi([1, 99]);
dividend = divisor * quotient;
result = quotient;
eq = sprintf('%d÷%d=', dividend, divisor);
operator = 'division';
end
% === 随机生成正确/错误答案 ===
if rand() > 0.5
displayedResult = result;
isCorrect = "1";
else
minError = max(1, round(result * 0.8));
maxError = round(result * 1.2);
possibleErrors = setdiff(minError:maxError, result);
if isempty(possibleErrors)
possibleErrors = [result-1, result+1];
end
errorIdx = randi(length(possibleErrors));
displayedResult = possibleErrors(errorIdx);
isCorrect = "0";
end
% === 创建标签文件夹 ===
labelFolder = sprintf('%s%d', eq, displayedResult);
% 移除非法字符
safeLabel = regexprep(labelFolder, '[<>:"/\\|?*]', '');
% === 新增:标签唯一性检查 ===
if isKey(labelHashMap, safeLabel)
% fprintf('标签重复: %s\n', safeLabel);
continue;
end
% === 生成图像 ===
fullEq = sprintf('%s%d', eq, displayedResult);
% === 新增:字体随机选择 ===
selectedFont = availableFonts{randi(length(availableFonts))};
textWidth = length(fullEq) * fontSize * 0.6;
imgWidth = textWidth + 2*horizontalPadding;
imgHeight = verticalPadding + fontSize + verticalPadding/2;
set(fig, 'Position', [100, 100, imgWidth, imgHeight]);
cla(ax);
text(ax, 0.5, 0.5, fullEq, ...
'FontSize', fontSize, ...
'FontName', selectedFont,... % 使用随机字体
'FontWeight', 'normal', ...
'Color', textColor, ...
'HorizontalAlignment', 'center', ...
'VerticalAlignment', 'middle');
% 获取图像数据
imgData = getframe(fig).cdata;
% === 新增:内容哈希检测 ===
imgHash = getImageHash(imgData);
if isKey(contentHashMap, imgHash)
% fprintf('内容重复: %s (哈希: %s)\n', fullEq, imgHash);
continue;
end
% === 创建标签目录 ===
labelDir = fullfile(outputDir, safeLabel);
if ~exist(labelDir, 'dir')
mkdir(labelDir);
end
% 保存图像
imwrite(imgData, fullfile(labelDir, filename));
% 更新哈希记录
contentHashMap(imgHash) = true;
labelHashMap(safeLabel) = true;
% 更新数据表
dataTable.Filename(imgIdx) = string(filename);
dataTable.Problem(imgIdx) = string(fullEq);
dataTable.CorrectResult(imgIdx) = string(result);
dataTable.Operator(imgIdx) = operator;
dataTable.CorrectFlag(imgIdx) = isCorrect;
imgIdx = imgIdx + 1;
end
% === 新增:实际生成数量处理 ===
if imgIdx <= numImages
dataTable = dataTable(1:imgIdx-1, :);
fprintf('实际生成图像: %d/%d (跳过重复项)\n', imgIdx-1, numImages);
end
writetable(dataTable, fullfile(outputDir, 'math_problems.xlsx'));
close(fig);
fprintf('生成完成! 创建了%d个子文件夹\n', height(dataTable));
% === 新增:图像哈希函数 ===
function hash = getImageHash(imgData)
% 使用简化哈希提高性能
grayImg = rgb2gray(imgData);
smallImg = imresize(grayImg, [16, 16]); % 缩小尺寸
hashVec = smallImg(:) > 128; % 二值化
hash = char(bin2dec(reshape(num2str(hashVec'), 8, [])' + '0')');
end
修改刚才出错的代码
最新发布