JsonConfig

JsonConfig config = new JsonConfig();   
    	     config.setJsonPropertyFilter(new PropertyFilter() {   
    	         public boolean apply(Object source, String name, Object value) {   
    	             if (name.equals("clubs") || name.equals("city")    //说明一下:name为容易出死循环的属性!
    	                      || name.equals("msg")) {   
    	                 return true;   
    	              } else {   
    	                 return false;   
    	              }   
    	          }   
         });  

 haibernate查询结果转json 出现异常解决

 

是我@PostConstruct public void initOcrEngine() { tesseract = new Tesseract(); try { //语言包路径和支持语言 tesseract.setDatapath(“D:\maven_use\lingxi-lhc\lingxi-ai-extend\lingxi-ai-comparison\src\main\resources\tessdata”); tesseract.setLanguage(“eng+chi_sim”); tesseract.setPageSegMode(6); // 自动页面分割 tesseract.setOcrEngineMode(1); // LSTM引擎 } catch (Exception e) { throw new RuntimeException(“OCR引擎初始化失败: " + e.getMessage(), e); } } /** * 支持PDF和图片 / public String extractContent(MultipartFile file) { String contentType = file.getContentType(); String fileName = file.getOriginalFilename().toLowerCase(); if (contentType == null) { return “不支持的文件类型: " + contentType; } if (fileName.endsWith(”.pdf")) { return readPdfText(file); } return extractImageText(file); } /* * 读取PDF文本内容 * * @param file * @return / public String readPdfText(MultipartFile file) { try (PDDocument doc = PDDocument.load(file.getInputStream())) { PDFTextStripper stripper = new PDFTextStripper(); // 设置行分隔符 stripper.setLineSeparator(“\n”); // 设置字符间距 stripper.setSortByPosition(true); String rawText = stripper.getText(doc); System.out.println(“内容” + rawText); return rawText.trim(); } catch (Exception e) { return MessageUtils.message(“file.red.pdf.error”); } } /* * OCR识别图片内容 / private String extractImageText(MultipartFile file) { try { // 创建临时文件 Path tempFile = Files.createTempFile(“ocr_”, getFileExtension(file.getOriginalFilename())); Files.copy(file.getInputStream(), tempFile, StandardCopyOption.REPLACE_EXISTING); // 执行OCR识别 File imageFile = tempFile.toFile(); String result = tesseract.doOCR(imageFile) .replaceAll(“\s+”, " ").trim(); System.out.println(“读取的内容” + result); // 清理临时文件 Files.deleteIfExists(tempFile); return result; } catch (Exception e) { return "OCR处理失败: " + e.getMessage(); } } private String getFileExtension(String filename) { if (filename == null) return “.tmp”; int dotIndex = filename.lastIndexOf(‘.’); return (dotIndex == -1) ? “.tmp” : filename.substring(dotIndex); } /* * 解析json / public JsonNode parseJson(String jsonContent) throws Exception { return this.objectMapper.readTree(jsonContent); } public List compareContent(String pdfText, JsonNode jsonConfig) { List results = new ArrayList<>(); // 去除读取内容中的多余空格 pdfText = pdfText.replaceAll(“\s+”, “”); // 处理JSON结构(支持单个对象或数组) JsonNode dataNode; if (jsonConfig.isArray() && jsonConfig.size() > 0) { dataNode = jsonConfig.get(0); } else if (jsonConfig.isObject()) { dataNode = jsonConfig; } else { results.add(new ValidationResult(“ERROR”, “JSON格式错误”, “期望一个对象或包含对象的数组”, “实际格式不匹配”, false)); return results; } // 动态定义地址字段列表 Set addressFields = new HashSet<>(); // 字段直接匹配 checkNonAddressFields(pdfText, dataNode, results, addressFields); // 连续匹配 checkAddressFields(pdfText, dataNode, results, addressFields); return results; } /* * 检查 JSON 中非地址字段是否严格存在于 PDF 文本中 / private void checkNonAddressFields(String pdfText, JsonNode jsonConfig, List results, Set addressFields) { Iterator<Map.Entry<String, JsonNode>> fields = jsonConfig.fields(); while (fields.hasNext()) { Map.Entry<String, JsonNode> entry = fields.next(); String fieldName = entry.getKey(); JsonNode valueNode = entry.getValue(); if (valueNode.isValueNode() && !addressFields.contains(fieldName)) { //去除多余空格 String expectedValue = valueNode.asText().trim().replaceAll(“\s+”, “”); if (expectedValue.isEmpty()) continue; // 直接进行字符串匹配 boolean found = pdfText.contains(expectedValue); results.add(new ValidationResult( “FIELD”, fieldName, expectedValue, found ? “Found” : “Not Found”, found )); } } } /* * 检查 JSON 中地址字段是否严格存在于 PDF 文本中 */ private void checkAddressFields(String pdfText, JsonNode jsonConfig, List results, Set addressFields) { // HanLP分词 List terms = HanLP.segment(pdfText); List addressParts = new ArrayList<>(); for (Term term : terms) { String word = term.word; if (word.matches(”\d{5,7}“)) { addressParts.add(word); } else if (term.nature.toString().startsWith(“ns”)) { addressParts.add(word); } } // 遍历 JSON 配置中的地址字段 Iterator<Map.Entry<String, JsonNode>> fields = jsonConfig.fields(); while (fields.hasNext()) { Map.Entry<String, JsonNode> entry = fields.next(); String fieldName = entry.getKey(); JsonNode valueNode = entry.getValue(); if (valueNode.isValueNode() && addressFields.contains(fieldName)) { //去除多余空格 String expectedValue = valueNode.asText().trim().replaceAll(”\s+“, “”); if (expectedValue.isEmpty()) continue; boolean found = false; for (String part : addressParts) { if (part.equals(expectedValue)) { found = true; break; } } results.add(new ValidationResult( “FIELD”, fieldName, expectedValue, found ? “Found” : “Not Found”, found )); } } } public JsonNode parsePipeSeparatedDataToJson(String inputData) throws Exception { Map<String, String> dataMap = parsePipeSeparatedData(inputData); return objectMapper.valueToTree(dataMap); } //解析filE_CONTENT数据 public Map<String, String> parsePipeSeparatedData(String filE_CONTENT) { Map<String, String> dataMap = new HashMap<>(); String[] lines = filE_CONTENT.split(”\n"); if (lines.length >= 2) { String[] headers = lines[0].split(“\|”); String[] values = lines[1].split(“\|”); for (int i = 0; i < headers.length; i++) { dataMap.put(headers[i], values[i]); } } return dataMap; } // 判断是否是以 | 分隔的数据 public boolean isPipeSeparatedData(String inputData) { return inputData.contains(“|”); }上面的我的代码 我读取pdf文件内容 可以跟我传的json内容做比较和校验都是对的 然后我传这个数据"filE_CONTENT": "PALLET_ID|KNBOXNO|CARRIER|COC|CTRY|HAWB|PO|ORIGIN|INVOICENO|CARTONS|SHIPID|SHIP_DATE|TEL|SSCC|RETURN_TO1|RETURN_TO2|RETURN_TO3|RETURN_TO4|RETURN_TO5|RETURN_TO6|RETURN_TO7|RETURN_TO8|SHIP_TO1|SHIP_TO2|SHIP_TO3|SHIP_TO4|SHIP_TO5|SHIP_TO6|SHIP_TO7|SHIP_TO8|LINEITEM1|MPN1|QTY1|LINEITEM2|MPN2|QTY2|LINEITEM3|MPN3|QTY3|\nFO2501000233P0002||DGF-AD|NL|NL|8VG8286|0638138589|PVG|8VG8286|61/84|73292885370002|06/01/2025|00000000|001959499064098506|ADI EMEIA Logistics|Inbound Department|||||||PEGATRON CORPORATION|c/o DP World Logistics Netherlands BV|Van Hilststraat 23|5145 RK Waalwijk,Netherlands|5145 RK Waalwijk Netherlands||||00010|MLPF3AA/A|10|"它的\n 后的是要打印的内容 | 是来区分不同的参数读取pdf文件内容 比较和返回结果都是错误的 @PostMapping(value = “/compare”, produces = { MediaType.APPLICATION_JSON_VALUE, MediaType.APPLICATION_XML_VALUE }) public R compare(@RequestPart(“File”) MultipartFile file, @RequestPart(“jsonContent”) String jsonContent) { // 读取 PDF 文本 String pdfText = compareService.extractContent(file); // 解析 JSON 配置 JsonNode jsonConfig = null; try { if (compareService.isPipeSeparatedData(jsonContent)) { jsonConfig = compareService.parsePipeSeparatedDataToJson(jsonContent); System.out.println(“数据”+jsonContent); } else { jsonConfig = compareService.parseJson(jsonContent); } } catch (Exception e) { return R.fail(MessageUtils.message(“failed.convert.json”)); } // 执行对比校验 List results = compareService.compareContent(pdfText, jsonConfig); // 返回没有匹配成功的数据 List failedResults = new ArrayList<>(); for (ValidationResult result : results) { if (!result.isValid()) { failedResults.add(result); } } return failedResults.isEmpty() ? R.ok(“条件符合规范”) : R.ok(failedResults); }这个是我的接口 这个整体修改后的代码 不管是json还是 我传的都是可以比较和校验数据
07-12
{ "code": 200, "msg": "操作成功", "data": [ { "checkType": "FIELD", "fieldName": "DELIVERYNO", "expected": "QBC03585", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "DATACONTENT", "expected": "Computer \n(Shanghai)Co Ltd\n1249 Century Avenue Tower 3,\nPudong new district, \nshanghai , China", "actual": "Found", "valid": true }, { "checkType": "FIELD", "fieldName": "WEBORDERCODE", "expected": "2365335413", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "CARRIERCODE", "expected": "XDESC", "actual": "Found", "valid": true }, { "checkType": "FIELD", "fieldName": "SHIPCONDITDESC", "expected": "ROAD", "actual": "Found", "valid": true }, { "checkType": "FIELD", "fieldName": "SOLDTONAME", "expected": "深圳市终端有限公司", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "SHIPTONAME", "expected": "深圳市终端有限公司上海库", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "SHIPTOADDRESS", "expected": "嘉定区兴邦路6号", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "SHIPTOCITY", "expected": "上海", "actual": "Found", "valid": true }, { "checkType": "FIELD", "fieldName": "SHIPTOSTATE", "expected": "上海", "actual": "Found", "valid": true }, { "checkType": "FIELD", "fieldName": "SHIPTOZIP", "expected": "201800", "actual": "Found", "valid": true }, { "checkType": "FIELD", "fieldName": "SHIPTOCOUNTRY", "expected": "中国", "actual": "Found", "valid": true }, { "checkType": "FIELD", "fieldName": "SHIPDATE", "expected": "01-JUL-2025", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "SOLDTOADDRESS", "expected": "前湾一路1号A栋201室", "actual": "Found", "valid": true }, { "checkType": "FIELD", "fieldName": "SOLDTOCITY", "expected": "深圳", "actual": "Found", "valid": true }, { "checkType": "FIELD", "fieldName": "SOLDTOSTATE", "expected": "广东", "actual": "Found", "valid": true }, { "checkType": "FIELD", "fieldName": "SOLDTOZIP", "expected": "518000", "actual": "Found", "valid": true }, { "checkType": "FIELD", "fieldName": "SOLDTOCOUNTRY", "expected": "中国", "actual": "Found", "valid": true }, { "checkType": "FIELD", "fieldName": "TOTALCARTONQTY", "expected": "25", "actual": "Found", "valid": true }, { "checkType": "FIELD", "fieldName": "TOTALQTY", "expected": "250", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "TOTALWEIGHT", "expected": "102.33 KGS", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "V_CUBE", "expected": "0.3", "actual": "Found", "valid": true }, { "checkType": "FIELD", "fieldName": "DELIVERYNO", "expected": "QBC03585", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "DATACONTENT", "expected": "Computer \n(Shanghai)Co Ltd\n1249 Century Avenue Tower 3,\nPudong new district, \nshanghai , China", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "WEBORDERCODE", "expected": "2365335413", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "CARRIERCODE", "expected": "XDESC", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "SHIPCONDITDESC", "expected": "ROAD", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "SOLDTONAME", "expected": "深圳市终端有限公司", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "SOLDTOCOMPANY", "expected": "", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "SHIPTONAME", "expected": "深圳市终端有限公司上海库", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "SHIPTOCOMPANY", "expected": "", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "SHIPTOADDRESS", "expected": "嘉定区兴邦路6号", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "SHIPTOADDRESS2", "expected": "", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "SHIPTOADDRESS3", "expected": "", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "SHIPTOADDRESS4", "expected": "", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "SHIPTOCITY", "expected": "上海", "actual": "深圳市天", "valid": false }, { "checkType": "FIELD", "fieldName": "SHIPTOSTATE", "expected": "上海", "actual": "联终端有", "valid": false }, { "checkType": "FIELD", "fieldName": "SHIPTOZIP", "expected": "201800", "actual": "518000", "valid": false }, { "checkType": "FIELD", "fieldName": "SHIPTOCOUNTRY", "expected": "中国", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "SHIPINSTRUCT", "expected": "", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "CUSTWHINST", "expected": "", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "EXCUSTNOTE", "expected": "", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "EXCUSTNOTE1", "expected": "", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "SHIPDATE", "expected": "01-JUL-2025", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "SOLDTOADDRESS", "expected": "前湾一路1号A栋201室", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "SOLDTOADDRESS3", "expected": "", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "SOLDTOADDRESS2", "expected": "", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "SOLDTOCITY", "expected": "深圳", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "SOLDTOADDRESS4", "expected": "", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "SOLDTOSTATE", "expected": "广东", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "SOLDTOZIP", "expected": "518000", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "SOLDTOCOUNTRY", "expected": "中国", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "TOTALCARTONQTY", "expected": "25", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "TOTALQTY", "expected": "250", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "TOTALWEIGHT", "expected": "102.33 KGS", "actual": "Not Found", "valid": false }, { "checkType": "FIELD", "fieldName": "V_CUBE", "expected": "0.3", "actual": "Not Found", "valid": false } ] }我怎么执行了俩边 package com.luxsan.llm.ai.service.impl; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.luxsan.llm.ai.domain.ValidationResult; import lombok.RequiredArgsConstructor; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.text.PDFTextStripper; import org.springframework.stereotype.Service; import org.springframework.web.multipart.MultipartFile; import java.io.IOException; import java.io.InputStream; import java.util.*; import java.util.concurrent.ConcurrentHashMap; import java.util.regex.Matcher; import java.util.regex.Pattern; @RequiredArgsConstructor @Service public class PdfJsonCompareService { private final Map<String, Pattern> patternCache = new ConcurrentHashMap<>(); private final ObjectMapper objectMapper = new ObjectMapper(); /** * 读取pdf文件 * * @param file * @return * @throws IOException */ public String readPdfText(MultipartFile file) throws IOException { try (PDDocument doc = PDDocument.load(file.getInputStream())) { PDFTextStripper stripper = new PDFTextStripper(); String rawText = stripper.getText(doc); return rawText.replaceAll("\\s+", " ").trim(); //统一空白符 } } public JsonNode parseJson(String jsonContent) throws Exception { return this.objectMapper.readTree(jsonContent); } public List<ValidationResult> compareContent(String pdfText, JsonNode jsonConfig) { List<ValidationResult> results = new ArrayList<>(); if (jsonConfig.isArray() && jsonConfig.size() > 0) { JsonNode dataNode = jsonConfig.get(0); // 1. 字段直接匹配 checkDirectFields(pdfText, dataNode, results); // 2. 连续字段匹配 checkConsecutiveFields(pdfText, dataNode, results); // 3. 正则检查 performRegexChecks(pdfText, results); } else { results.add(new ValidationResult("ERROR", "JSON格式错误", "期望一个包含对象的数组", "实际格式不匹配", false)); } return results; } /** * 检查 JSON 中所有键值是否直接存在于 PDF 文本中 */ /** * 检查 JSON 中所有键值是否直接存在于 PDF 文本中 */ private void checkDirectFields(String pdfText, JsonNode jsonConfig, List<ValidationResult> results) { Iterator<Map.Entry<String, JsonNode>> fields = jsonConfig.fields(); while (fields.hasNext()) { Map.Entry<String, JsonNode> entry = fields.next(); String fieldName = entry.getKey(); JsonNode valueNode = entry.getValue(); if (valueNode.isValueNode()) { String expectedValue = valueNode.asText().trim(); if (expectedValue.isEmpty()) { continue; //空的就不比较了 } String pattern = expectedValue .replaceAll("\\*", ".*?") // * → 任意内容 .replaceAll("\\?", "."); // ? → 单个字符 boolean found = Pattern.compile(pattern).matcher(pdfText).find(); if (!found) { found = fuzzyKeywordMatch(pdfText, expectedValue); } results.add(new ValidationResult( "FIELD", fieldName, expectedValue, found ? "Found" : "Not Found", found )); } } } /** * 模糊关键词匹配: */ private boolean fuzzyKeywordMatch(String pdfText, String expectedValue) { // 优化:使用更精准的中文分词逻辑(可选:引入 Jieba 或 HanLP) List<String> keywords = Arrays.asList(expectedValue.split("[\\s\\p{Punct}]+")); keywords.removeIf(String::isEmpty); // 构建正则表达式 StringBuilder regexBuilder = new StringBuilder(); for (String keyword : keywords) { regexBuilder.append("(?i)").append(Pattern.quote(keyword)).append(".*?"); } // 移除最后一个 .*? 并添加边界限制 String regex = regexBuilder.toString().replaceAll("\\.?\\*?$", ""); regex = "\\b" + regex + "\\b"; return Pattern.compile(regex, Pattern.DOTALL).matcher(pdfText).find(); } /** * 执行正则表达式检查 */ private void performRegexChecks(String pdfText, List<ValidationResult> results) { try (InputStream is = getClass().getClassLoader().getResourceAsStream("validation_rules.json")) { JsonNode config = objectMapper.readTree(is); JsonNode regexChecks = config.path("regexChecks"); // 从文件中读取 regexChecks 字段 if (regexChecks.isMissingNode()) return; Iterator<Map.Entry<String, JsonNode>> regexes = regexChecks.fields(); while (regexes.hasNext()) { Map.Entry<String, JsonNode> entry = regexes.next(); String checkName = entry.getKey(); String regexPattern = entry.getValue().asText(); Pattern pattern = getCachedPattern(regexPattern); Matcher matcher = pattern.matcher(pdfText); boolean found = matcher.find(); results.add(new ValidationResult( "REGEX", checkName, regexPattern, found ? "Matched" : "Not Matched", found )); } } catch (IOException e) { throw new RuntimeException(e); } } /** * 获取或创建缓存中的 Pattern 对象 */ private Pattern getCachedPattern(String regex) { return patternCache.computeIfAbsent(regex, Pattern::compile); } /** * 允许城市、省份、邮编之间插入任意内容 */ private void checkConsecutiveFields(String pdfText, JsonNode jsonConfig, List<ValidationResult> results) { // 预处理 PDF 文本:保留换行符,标准化空格 String processedText = pdfText .replaceAll("[\\t\\v]+", " ") // 替换制表符/垂直空格 .replaceAll(" +", " ") // 合并多余空格 .trim(); //正则表达式支持跨行匹配 Pattern pattern = Pattern.compile( "([\\u4e00-\\u9fa5]{2,4})[\\s\\S]*?([\\u4e00-\\u9fa5]{2,4})[\\s\\S]*?(\\d{6})", Pattern.DOTALL | Pattern.UNICODE_CASE ); Matcher matcher = pattern.matcher(processedText); if (matcher.find()) { String city = matcher.group(1).trim(); String state = matcher.group(2).trim(); String zip = matcher.group(3).trim(); // 更新 JSON 配置中的字段值 Iterator<Map.Entry<String, JsonNode>> fields = jsonConfig.fields(); while (fields.hasNext()) { Map.Entry<String, JsonNode> entry = fields.next(); String fieldName = entry.getKey(); JsonNode valueNode = entry.getValue(); if (valueNode.isValueNode()) { String expectedValue = valueNode.asText().trim(); String actualValue = "Not Found"; if (fieldName.equals("SHIPTOCITY")) { actualValue = city; } else if (fieldName.equals("SHIPTOSTATE")) { actualValue = state; } else if (fieldName.equals("SHIPTOZIP")) { actualValue = zip; } results.add(new ValidationResult( "FIELD", fieldName, expectedValue, actualValue, expectedValue.equals(actualValue) )); } } } } } 这个是我的代码 怎么会执行俩边呢 这个结果 @PostMapping("/compare") public R compare(@RequestParam("pdfFile") MultipartFile pdfFile, @RequestParam("jsonContent") String jsonContent) throws Exception { //读取PDF文本 String pdfText = compareService.readPdfText(pdfFile); // 2. 解析JSON配置 JsonNode jsonConfig = compareService.parseJson(jsonContent); // 3. 执行对比校验 List<ValidationResult> results = compareService.compareContent(pdfText, jsonConfig); return R.ok(results); }
07-03
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值