[NIO.2] 第六篇 合并两个 Path 对象

本文介绍了如何使用NIO.2中的Path对象进行路径合并操作,包括resolve()方法和resolveSibling()方法的具体应用实例。
合并两个 Path 对象可以方便你先定义一个根路径,然后再在后面添加其它的路径。当你需要使用公共路径的时候,这个功能相当有用。NIO.2 提供了 resolve() 方法来支持这个功能。下面看看例子:

//define the fixed path 
Path base = Paths.get("C:/rafaelnadal/tournaments/2009");

//resolve BNP.txt file
Path path_1 = base.resolve("BNP.txt");
//output: C:\rafaelnadal\tournaments\2009\BNP.txt
System.out.println(path_1.toString());

//resolve AEGON.txt file
Path path_2 = base.resolve("AEGON.txt");
//output: C:\rafaelnadal\tournaments\2009\AEGON.txt
System.out.println(path_2.toString());


NIO.2 还提供了 resolveSibling() 方法用来替换最后一级文件或目录,例如:

//define the fixed path 
Path base = Paths.get("C:/rafaelnadal/tournaments/2009/BNP.txt");

//resolve sibling AEGON.txt file
Path path = base.resolveSibling("AEGON.txt");
//output: C:\rafaelnadal\tournaments\2009\AEGON.txt
System.out.println(path.toString());


文章来源:[url]http://www.aptusource.org/2014/03/nio-2-combining-path/[/url]
package com.luxsan.service; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.hankcs.hanlp.HanLP; import com.hankcs.hanlp.seg.common.Term; import com.luxsan.common.core.utils.MessageUtils; import com.luxsan.domain.ValidationResult; import jakarta.annotation.PostConstruct; import lombok.RequiredArgsConstructor; import net.sourceforge.tess4j.Tesseract; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.text.PDFTextStripper; import org.springframework.stereotype.Service; import org.springframework.web.multipart.MultipartFile; import javax.imageio.ImageIO; import java.awt.image.BufferedImage; import java.io.File; import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardCopyOption; import java.util.*; @RequiredArgsConstructor @Service public class ReadFileContentService { private final ObjectMapper objectMapper = new ObjectMapper(); private Tesseract tesseract; @PostConstruct public void initOcrEngine() { tesseract = new Tesseract(); //语言包路径和支持语言 tesseract.setDatapath("D:\\maven_use\\lingxi-lhc\\lingxi-ai-extend\\lingxi-ai-comparison\\src\\main\\resources\\tessdata"); tesseract.setLanguage("eng+chi_sim"); tesseract.setPageSegMode(6); // 自动页面分割 tesseract.setOcrEngineMode(1); // LSTM引擎 } /** * 支持PDF读取文件和图片ocr */ public String extractContent(MultipartFile file) { String contentType = file.getContentType(); String fileName = file.getOriginalFilename().toLowerCase(); if (contentType == null) { return "不支持的文件类型: " + contentType; } if (fileName.endsWith(".pdf")) { return readPdfText(file); } return extractImageText(file); } /** * 读取PDF文本内容 * * @param file * @return */ public String readPdfText(MultipartFile file) { try (PDDocument doc = PDDocument.load(file.getInputStream())) { PDFTextStripper stripper = new PDFTextStripper(); // 设置行分隔符 stripper.setLineSeparator("\n"); // 设置字符间距 stripper.setSortByPosition(true); String rawText = stripper.getText(doc); System.out.println("pdf内容" + rawText); return rawText.trim(); } catch (Exception e) { return MessageUtils.message("file.read.pdf.error"); } } /** * OCR识别图片内容 */ private String extractImageText(MultipartFile file) { try (InputStream is = file.getInputStream()) { // 将输入流直接转为BufferedImage BufferedImage image = ImageIO.read(is); if (image == null) { return MessageUtils.message("Image.parsing.failed"); } // 直接对BufferedImage进行OCR String result = tesseract.doOCR(image).replaceAll("\\s+", " ").trim(); return result; } catch (Exception e) { return MessageUtils.message("file.read.picture.error"); } } // private String getFileExtension(String filename) { // if (filename == null) return ".tmp"; // int dotIndex = filename.lastIndexOf('.'); // return (dotIndex == -1) ? ".tmp" : filename.substring(dotIndex); // } /** * 解析json */ public JsonNode parseJson(String jsonContent) throws Exception { return this.objectMapper.readTree(jsonContent); } public List<ValidationResult> compareContent(String pdfText, JsonNode jsonConfig) { List<ValidationResult> results = new ArrayList<>(); String cleanPdf = pdfText.replaceAll("\\s+", ""); // 预处理PDF文本 //处理JSON结构对象/数组 JsonNode dataNode = jsonConfig.isArray() && jsonConfig.size() > 0 ? jsonConfig.get(0) : jsonConfig; // 高效遍历JSON字段 dataNode.fields().forEachRemaining(entry -> { String key = entry.getKey(); String value = entry.getValue().asText().replaceAll("\\s+", ""); if (!value.isEmpty()) { boolean found = cleanPdf.contains(value); results.add(new ValidationResult( "FIELD", key, value, found ? "Found" : "Not Found", found )); } }); return results; } public JsonNode parsePipeSeparatedDataToJson(String inputData) throws Exception { Map<String, String> dataMap = parsePipeSeparatedData(inputData); return objectMapper.valueToTree(dataMap); } //解析分隔数据 public Map<String, String> parsePipeSeparatedData(String fileCONTENT) { //处理转义的换行符 fileCONTENT = fileCONTENT.replace("\\n", "\n").replaceAll("\\|+\"$", "").trim();; Map<String, String> dataMap = new LinkedHashMap<>(); String[] lines = fileCONTENT.split("\n"); if (lines.length >= 2) { String[] headers = lines[0].split("\\|"); String[] values = lines[1].split("\\|"); int minLength = Math.min(headers.length, values.length); for (int i = 0; i < minLength; i++) { dataMap.put(headers[i], values[i]); } } return dataMap; } //判断是不是json public boolean isPipeSeparatedData(String inputData) { return inputData.contains("|"); } // 比较和校验数据 public List<ValidationResult> compareAndValidate(String fileContent, JsonNode jsonConfig) { List<ValidationResult> results = new ArrayList<>(); Map<String, String> pipeDataMap = objectMapper.convertValue(jsonConfig, Map.class); fileContent = fileContent.replaceAll("\\s+", ""); for (Map.Entry<String, String> entry : pipeDataMap.entrySet()) { String key = entry.getKey(); String value = entry.getValue(); if (!value.isEmpty()) { value = value.replaceAll("\\s+", ""); boolean found = fileContent.contains(value); results.add(new ValidationResult( "FIELD", key, value, found ? "Found" : "Not Found", found )); } } return results; } } 哪里需要改进待优化
最新发布
07-17
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值