1002. Find Common Characters

本文介绍了一种算法,用于找出一组字符串中所有共同出现的字符,包括重复字符。通过使用统计表来跟踪每个字符串中字符的频率,然后找到这些字符在所有字符串中出现次数的最小值,从而得出最终的公共字符列表。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

Given an array A of strings made only from lowercase letters, return a list of all characters that show up in all strings within the list (including duplicates). For example, if a character occurs 3 times in all strings but not 4 times, you need to include that character three times in the final answer.
You may return the answer in any order.
Example 1:

Input: ["bella","label","roller"]
Output: ["e","l","l"]

Example 2:

Input: ["cool","lock","cook"]
Output: ["c","o"]

Note:
1 <= A.length <= 100
1 <= A[i].length <= 100
Ai is a lowercase letter

难度: easy

题目:给定字符串数组A仅由小字符组成,返回所有在所有字符串中都出现过的字符,包括重复。例如,如果一个字符在所有字符串出现了3次而非4次,则返回结果中要包含3次。返回顺序不限。

思路:每个字符串一个统计表。
Runtime: 7 ms, faster than 100.00% of Java online submissions for Find Common Characters.
Memory Usage: 38.1 MB, less than 100.00% of Java online submissions for Find Common Characters.

class Solution {
    public List<String> commonChars(String[] A) {
        int n = A.length;
        int[][] cc = new int[n][26];
        
        for (int i = 0; i < n; i++) {
            for (char c : A[i].toCharArray()) {
                cc[i][c - 'a']++;
            }
        }
        
        List<String> result = new ArrayList<>();
        for (int i = 0; i < 26; i++) {
            int minCount = 100;
            for (int j = 0; j < n; j++) {
                minCount = Math.min(minCount, cc[j][i]);
            }
            
            for (int j = 0; j < minCount; j++) {
                result.add(String.valueOf((char) (i + 'a')));
            }
        }
        
        return result;
    }
}
package com.luxsan.service; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.hankcs.hanlp.HanLP; import com.hankcs.hanlp.seg.common.Term; import com.luxsan.common.core.utils.MessageUtils; import com.luxsan.domain.ValidationResult; import jakarta.annotation.PostConstruct; import lombok.RequiredArgsConstructor; import net.sourceforge.tess4j.Tesseract; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.text.PDFTextStripper; import org.springframework.stereotype.Service; import org.springframework.web.multipart.MultipartFile; import java.io.File; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardCopyOption; import java.util.*; import java.util.concurrent.ConcurrentHashMap; import java.util.regex.Matcher; import java.util.regex.Pattern; @RequiredArgsConstructor @Service public class PdfJsonCompareService { private final Map<String, Pattern> patternCache = new ConcurrentHashMap<>(); private final ObjectMapper objectMapper = new ObjectMapper(); private Tesseract tesseract; @PostConstruct public void initOcrEngine() { tesseract = new Tesseract(); try { //语言包路径和支持语言 tesseract.setDatapath("D:\\maven_use\\lingxi-lhc\\lingxi-ai-extend\\lingxi-ai-comparison\\src\\main\\resources\\tessdata"); tesseract.setLanguage("eng+chi_sim"); tesseract.setPageSegMode(1); // 自动页面分割 tesseract.setOcrEngineMode(1); // LSTM引擎 } catch (Exception e) { throw new RuntimeException("OCR引擎初始化失败: " + e.getMessage(), e); } } /** * 支持PDF和图片 */ public String extractContent(MultipartFile file) { String contentType = file.getContentType(); String fileName = file.getOriginalFilename().toLowerCase(); if (contentType == null) { return "不支持的文件类型: " + contentType; } if (fileName.endsWith(".pdf")) { return readPdfText(file); } return extractImageText(file); } /** * 读取PDF文本内容 * * @param file * @return */ public String readPdfText(MultipartFile file) { try (PDDocument doc = PDDocument.load(file.getInputStream())) { PDFTextStripper stripper = new PDFTextStripper(); String rawText = stripper.getText(doc); return rawText.replaceAll("\\s+", " ").trim(); //统一空白符 } catch (Exception e) { return MessageUtils.message("file.red.pdf.error"); } } /** * OCR识别图片内容 */ private String extractImageText(MultipartFile file) { try { // 创建临时文件 Path tempFile = Files.createTempFile("ocr_", getFileExtension(file.getOriginalFilename())); Files.copy(file.getInputStream(), tempFile, StandardCopyOption.REPLACE_EXISTING); // 执行OCR识别 File imageFile = tempFile.toFile(); String result = tesseract.doOCR(imageFile) .replaceAll("\\s+", " ").trim(); System.out.println("读取的内容"+result); // 清理临时文件 Files.deleteIfExists(tempFile); return result; } catch (Exception e) { return "OCR处理失败: " + e.getMessage(); } } private String getFileExtension(String filename) { if (filename == null) return ".tmp"; int dotIndex = filename.lastIndexOf('.'); return (dotIndex == -1) ? ".tmp" : filename.substring(dotIndex); } public JsonNode parseJson(String jsonContent) throws Exception { return this.objectMapper.readTree(jsonContent); } public List<ValidationResult> compareContent(String pdfText, JsonNode jsonConfig) { List<ValidationResult> results = new ArrayList<>(); //处理JSO 结构(支持单个对象或数组) JsonNode dataNode; if (jsonConfig.isArray() && jsonConfig.size() > 0) { dataNode = jsonConfig.get(0); } else if (jsonConfig.isObject()) { dataNode = jsonConfig; } else { results.add(new ValidationResult("ERROR", "JSON格式错误", "期望一个对象或包含对象的数组", "实际格式不匹配", false)); return results; } // 定义地址字段列表 Set<String> addressFields = new HashSet<>(Arrays.asList("SHIPTOCITY", "SHIPTOSTATE", "SHIPTOZIP", "SOLDTOCITY", "SOLDTOSTATE", "SOLDTOZIP")); //字段直接匹配 checkNonAddressFields(pdfText, dataNode, results, addressFields); //连续匹配 checkAddressFields(pdfText, dataNode, results, addressFields); return results; } /** * 检查 JSON 中非地址字段是否严格存在于 PDF 文本中 */ private void checkNonAddressFields(String pdfText, JsonNode jsonConfig, List<ValidationResult> results, Set<String> addressFields) { Iterator<Map.Entry<String, JsonNode>> fields = jsonConfig.fields(); while (fields.hasNext()) { Map.Entry<String, JsonNode> entry = fields.next(); String fieldName = entry.getKey(); JsonNode valueNode = entry.getValue(); if (valueNode.isValueNode() && !addressFields.contains(fieldName)) { String expectedValue = valueNode.asText().trim(); if (expectedValue.isEmpty()) continue; // 使用正则表达式进行严格匹配 String regex = "\\b" + Pattern.quote(expectedValue) + "\\b"; Pattern pattern = getCachedPattern(regex); Matcher matcher = pattern.matcher(pdfText); boolean found = matcher.find(); results.add(new ValidationResult( "FIELD", fieldName, expectedValue, found ? "Found" : "Not Found", found )); } } } /** * 获取或创建缓存中的 Pattern 对象 */ private Pattern getCachedPattern(String regex) { return patternCache.computeIfAbsent(regex, Pattern::compile); } /** * 检查 JSON 中地址字段是否严格存在于 PDF 文本中 */ private void checkAddressFields(String pdfText, JsonNode jsonConfig, List<ValidationResult> results, Set<String> addressFields) { List<Term> terms = HanLP.segment(pdfText); List<String> addressParts = new ArrayList<>(); for (Term term : terms) { String word = term.word; if (word.matches("\\d{5,7}")) { addressParts.add(word); } else if (term.nature.toString().startsWith("ns")) { addressParts.add(word); } } // 遍历 JSON 配置中的地址字段 Iterator<Map.Entry<String, JsonNode>> fields = jsonConfig.fields(); while (fields.hasNext()) { Map.Entry<String, JsonNode> entry = fields.next(); String fieldName = entry.getKey(); JsonNode valueNode = entry.getValue(); if (valueNode.isValueNode() && addressFields.contains(fieldName)) { String expectedValue = valueNode.asText().trim(); if (expectedValue.isEmpty()) continue; boolean found = false; for (String part : addressParts) { if (part.equals(expectedValue)) { found = true; break; } } results.add(new ValidationResult( "FIELD", fieldName, expectedValue, found ? "Found" : "Not Found", found )); } } } } 如果是这样的话 我的pdf是没问题的 然后就是我的图片 可以读取内容但是没有比较和校验
最新发布
07-08
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值