image.getSubimage(x, y, width, height)函数解析

x - 指定矩形区域左上角的 X 坐标
y - 指定矩形区域左上角的 Y 坐标
w - 指定矩形区域的宽度
h - 指定矩形区域的高度

public static ByteBuffer getSubImage(ByteBuffer imageContent, int x, int y, int width, int height) throws    Exception {
        ByteArrayInputStream in = new ByteArrayInputStream(imageContent.array());
        BufferedImage image = ImageIO.read(in);

        BufferedImage subImage = image.getSubimage(x, y, width, height);

        ByteArrayOutputStream out = new ByteArrayOutputStream();
        ImageIO.write(subImage, "jpeg", out);

        return ByteBuffer.wrap(out.toByteArray());
    }
/** * ocr配置 */ @PostConstruct public void initOcrEngine() { tesseract = new Tesseract(); //语言包路径和支持语言 tesseract.setDatapath(tessDataPath); tesseract.setLanguage("eng+chi_sim"); tesseract.setPageSegMode(6); //自动页面分割 tesseract.setOcrEngineMode(3); //LSTM引擎 } /** * OCR识别图片内容 */ private String extractImageText(MultipartFile file) { try (InputStream is = file.getInputStream()) { BufferedImage image = ImageIO.read(is); if (image == null) { return MessageUtils.message("Image.parsing.failed"); } image = increaseVerticalSpacingBetweenText(image, 300); image = increaseHorizontalSpacing(image, 20); saveDebugImage(image, "3_final.png"); String result = tesseract.doOCR(image); //OCR识别 result = postProcess(result); result = result.replaceAll("\\s+", " ").trim(); System.out.println("图片内容:\n" + result); return result; } catch (Exception e) { e.printStackTrace(); return MessageUtils.message("file.read.picture.error"); } } private void saveDebugImage(BufferedImage img, String filename) throws IOException { // 保存到项目目录下的debug文件夹 String basePath = "D:/pdf/debug/"; File outputDir = new File(basePath); if (!outputDir.exists()) outputDir.mkdirs(); String fullPath = basePath + filename; ImageIO.write(img, "png", new File(fullPath)); } public static BufferedImage increaseVerticalSpacingBetweenText(BufferedImage image, int spacing) { int width = image.getWidth(); int height = image.getHeight(); // 二值化处理 BufferedImage binaryImage = binarizeImage(image, 158); // 检测每一行文本区域(包括表格行) List<int[]> textRegions = detectTextRows(binaryImage); // 计算新高度 int newHeight = 0; for (int[] region : textRegions) { newHeight += (region[1] - region[0]) + spacing; } newHeight -= spacing; // 最后一行不需要空隙 BufferedImage newImage = new BufferedImage(width, newHeight, BufferedImage.TYPE_BYTE_BINARY); Graphics2D g2d = newImage.createGraphics(); g2d.setColor(Color.WHITE); g2d.fillRect(0, 0, width, newHeight); int currentY = 0; for (int[] region : textRegions) { int startY = region[0]; int endY = region[1]; int lineHeight = endY - startY; // 复制当前行内容 for (int y = 0; y < lineHeight; y++) { for (int x = 0; x < width; x++) { newImage.setRGB(x, currentY + y, image.getRGB(x, startY + y)); } } // 添加空隙 currentY += lineHeight + spacing; } g2d.dispose(); return newImage; } private static BufferedImage binarizeImage(BufferedImage image, int threshold) { int width = image.getWidth(); int height = image.getHeight(); BufferedImage binaryImage = new BufferedImage(width, height, BufferedImage.TYPE_BYTE_BINARY); for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) { int argb = image.getRGB(x, y); int gray = (argb >> 16) & 0xff; if (gray < threshold) { binaryImage.setRGB(x, y, Color.BLACK.getRGB()); } else { binaryImage.setRGB(x, y, Color.WHITE.getRGB()); } } } return binaryImage; } // 检测文字区域 private static List<int[]> detectTextRows(BufferedImage binaryImage) { int width = binaryImage.getWidth(); int height = binaryImage.getHeight(); List<int[]> textRows = new ArrayList<>(); boolean inText = false; int startRow = 0; for (int y = 0; y < height; y++) { boolean hasBlackPixel = false; for (int x = 0; x < width; x++) { if (binaryImage.getRGB(x, y) == Color.BLACK.getRGB()) { hasBlackPixel = true; break; } } if (hasBlackPixel && !inText) { startRow = y; inText = true; } else if (!hasBlackPixel && inText) { textRows.add(new int[]{startRow, y}); inText = false; } } // 处理最后一行 if (inText) { textRows.add(new int[]{startRow, height}); } return textRows; } public static BufferedImage increaseHorizontalSpacing(BufferedImage image, int spacing) { int width = image.getWidth(); int height = image.getHeight(); BufferedImage newImage = new BufferedImage( width + spacing * 2, height, BufferedImage.TYPE_BYTE_BINARY ); Graphics2D g2d = newImage.createGraphics(); g2d.setColor(Color.WHITE); g2d.fillRect(0, 0, newImage.getWidth(), newImage.getHeight()); // 将原图像绘制到中间位置 g2d.drawImage(image, spacing, 0, image.getWidth(), image.getHeight(), null); g2d.dispose(); return newImage; }tesseract 我的ocr识别 上下字符字母紧连着识别错误 我需要上下字符需要空隙 这个识别就不会出现问题 我这个效果 表里面的内容没有效果 表外变是有效果的可以更具我的填的数 然后弄出空隙 但是表里没有任何效果 我需要解决表里上下字符字母连接着这的问题
最新发布
07-26
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值