290.Word Pattern--hashmap使用,字符匹配

本文介绍了一个LeetCode上的编程挑战——WordPattern问题的解决方案。该问题要求判断给定的模式和字符串是否完全匹配,即模式中的每个字符是否能与字符串中的单词建立一一对应的映射关系。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

290. Word Pattern

Given a pattern and a string str, find if str follows the same pattern.

Here follow means a full match, such that there is a bijection between a letter in pattern and a non-empty word in str.

Examples:

  1. pattern = "abba", str = "dog cat cat dog" should return true.
  2. pattern = "abba", str = "dog cat cat fish" should return false.
  3. pattern = "aaaa", str = "dog cat cat dog" should return false.
  4. pattern = "abba", str = "dog dog dog dog" should return false.

Notes:
You may assume pattern contains only lowercase letters, and str contains lowercase letters separated by a single space.

大致题意:上所示,给定一个字母的格式排序,判定其后的字符串是否仍是这样的排序:

public class Solution {
    public boolean wordPattern(String pattern, String str) {
    HashMap<Character,String> map = new HashMap<Character,String>();
     String[] arr = str.split(" ");
     if(arr.length!=pattern.length()) return false;
    for(int i = 0; i< pattern.length(); i++){
         char a = pattern.charAt(i);
         String b = arr[i];
         if(map.containsKey(a)){
             if(map.get(a).equals(b)){
                 continue;
             }
             return false;
         }else{
           if(map.containsValue(b)){
                 return false;
             }else{
             map.put(a,b);
             }
         }
        }
         return true;
    }
}

其中的如何将字符串分离出来,耽误了较长时间,将字符串按空格分离而后,保存成数组格式。

可以使用split 搞定,这个方法挺厉害的;(哥哥最近有点郁闷,脑袋疼)

原文链接:“https://leetcode.com/problems/word-pattern/

package com.luxsan.llm.ai.service.impl; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.luxsan.llm.ai.domain.ValidationResult; import lombok.RequiredArgsConstructor; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.text.PDFTextStripper; import org.springframework.stereotype.Service; import org.springframework.web.multipart.MultipartFile; import java.io.IOException; import java.io.InputStream; import java.util.*; import java.util.concurrent.ConcurrentHashMap; import java.util.regex.Matcher; import java.util.regex.Pattern; import com.hankcs.hanlp.HanLP; import com.hankcs.hanlp.seg.common.Term; @RequiredArgsConstructor @Service public class PdfJsonCompareService { private final Map<String, Pattern> patternCache = new ConcurrentHashMap<>(); private final ObjectMapper objectMapper = new ObjectMapper(); /** * 读取PDF文件并统一空白符 */ public String readPdfText(MultipartFile file) throws IOException { try (PDDocument doc = PDDocument.load(file.getInputStream())) { PDFTextStripper stripper = new PDFTextStripper(); String rawText = stripper.getText(doc); return rawText.replaceAll("\\s+", " ").trim(); // 合并空白符 } } /** * 解析JSON配置 */ public JsonNode parseJson(String jsonContent) throws Exception { return objectMapper.readTree(jsonContent); } /** * 主比较逻辑 */ public List<ValidationResult> compareContent(String pdfText, JsonNode jsonConfig) { List<ValidationResult> results = new ArrayList<>(); if (jsonConfig.isArray() && jsonConfig.size() > 0) { JsonNode dataNode = jsonConfig.get(0); checkDirectFields(pdfText, dataNode, results); checkConsecutiveFields(pdfText, dataNode, results); performRegexChecks(pdfText, results); } else { results.add(new ValidationResult("ERROR", "JSON格式错误", "期望一个包含对象的数组", "实际格式不匹配", false)); } return results; } /** * 直接字段匹配(含通配符支持) */ private void checkDirectFields(String pdfText, JsonNode jsonConfig, List<ValidationResult> results) { Iterator<Map.Entry<String, JsonNode>> fields = jsonConfig.fields(); while (fields.hasNext()) { Map.Entry<String, JsonNode> entry = fields.next(); String fieldName = entry.getKey(); JsonNode valueNode = entry.getValue(); if (valueNode.isValueNode()) { String expectedValue = valueNode.asText().trim(); if (expectedValue.isEmpty()) { results.add(new ValidationResult("FIELD", fieldName, expectedValue, "Not Found", false)); continue; } boolean found = matchWithWildcards(pdfText, expectedValue); results.add(new ValidationResult( "FIELD", fieldName, expectedValue, found ? "Found" : "Not Found", found )); } } } /** * 支持通配符的模糊匹配逻辑(* → 任意内容, ? → 单个字符) */ private boolean matchWithWildcards(String text, String patternStr) { String regex = patternStr .replaceAll("\\*", ".*?") // * → 非贪婪匹配任意内容 .replaceAll("\\?", "."); // ? → 匹配单个字符 return Pattern.compile(regex, Pattern.DOTALL).matcher(text).find(); } /** * 基于 HanLP 的精准中文分词匹配 */ private boolean fuzzyKeywordMatch(String text, String expectedValue) { List<String> textTerms = segmentText(text); List<String> expectedTerms = segmentText(expectedValue); StringBuilder regexBuilder = new StringBuilder(); for (String term : expectedTerms) { regexBuilder.append("(?i)").append(Pattern.quote(term)).append(".*?"); } String regex = regexBuilder.toString().replaceAll("\\.?\\*?$", ""); return Pattern.compile(regex, Pattern.DOTALL).matcher(text).find(); } /** * 使用 HanLP 进行中文分词 */ private List<String> segmentText(String text) { return HanLP.segment(text).stream() .map(term -> term.word()) .filter(word -> !word.matches("[\\s\\p{Punct}]+")) .collect(Collectors.toList()); } /** * 执行正则表达式检查(从配置文件加载规则) */ private void performRegexChecks(String pdfText, List<ValidationResult> results) { try (InputStream is = getClass().getClassLoader().getResourceAsStream("validation_rules.json")) { if (is == null) { throw new IOException("找不到 validation_rules.json 配置文件"); } JsonNode config = objectMapper.readTree(is); JsonNode regexChecks = config.path("regexChecks"); if (regexChecks.isMissingNode()) return; Iterator<Map.Entry<String, JsonNode>> regexes = regexChecks.fields(); while (regexes.hasNext()) { Map.Entry<String, JsonNode> entry = regexes.next(); String checkName = entry.getKey(); String regexPattern = entry.getValue().asText(); Pattern pattern = getCachedPattern(regexPattern); Matcher matcher = pattern.matcher(pdfText); boolean found = matcher.find(); results.add(new ValidationResult( "REGEX", checkName, regexPattern, found ? "Matched" : "Not Matched", found )); } } catch (IOException e) { results.add(new ValidationResult("ERROR", "正则规则加载失败", e.getMessage(), "IO异常", false)); } } /** * 获取或创建缓存中的 Pattern 对象 */ private Pattern getCachedPattern(String regex) { return patternCache.computeIfAbsent(regex, Pattern::compile); } /** * 连续字段匹配(城市+省份+邮编) */ private void checkConsecutiveFields(String pdfText, JsonNode jsonConfig, List<ValidationResult> results) { String processedText = pdfText .replaceAll("[\\t\\v]+", " ") // 替换制表符/垂直空格 .replaceAll(" +", " ") // 合并多余空格 .trim(); Pattern pattern = Pattern.compile( "([\\u4e00-\\u9fa5]{2,4})[\\s\\S]*?([\\u4e00-\\u9fa5]{2,4})[\\s\\S]*?(\\d{6})", Pattern.DOTALL | Pattern.UNICODE_CASE ); Matcher matcher = pattern.matcher(processedText); if (matcher.find()) { String city = matcher.group(1).trim(); String state = matcher.group(2).trim(); String zip = matcher.group(3).trim(); Iterator<Map.Entry<String, JsonNode>> fields = jsonConfig.fields(); while (fields.hasNext()) { Map.Entry<String, JsonNode> entry = fields.next(); String fieldName = entry.getKey(); JsonNode valueNode = entry.getValue(); if (valueNode.isValueNode()) { String expectedValue = valueNode.asText().trim(); String actualValue = "Not Found"; if (fieldName.equals("SHIPTOCITY")) { actualValue = city; } else if (fieldName.equals("SHIPTOSTATE")) { actualValue = state; } else if (fieldName.equals("SHIPTOZIP")) { actualValue = zip; } results.add(new ValidationResult( "FIELD", fieldName, expectedValue, actualValue, expectedValue.equals(actualValue) )); } } } } } 这个代码是不是太繁琐了
最新发布
07-03
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值