文字匹配解析例子(读取Json 格式配置文件)

本文介绍了一个基于JSON配置的职位级别解析器实现,该解析器能够根据职位名称中的关键字或正则表达式匹配来确定职位级别。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

目标:做一个文字匹配解析例子(这里这个例子是通过职位 title 得到真正级别 band,从而获取上下级关系)。

功能:

1)Json 格式配置文件读取;

2)通过配置文件灵活控制解析功能,如:filter 匹配,bypass 不匹配,filter_reg 正则匹配,bypass_reg 正则不匹配。

e.g. LangRecogUtils

public class LangRecogUtils {
	static final Logger logger = LoggerFactory.getLogger(LangRecogUtils.class);

	public static int getBand(String title) {
		final String PATH = "band.dic";
		final int DEFAULT_BAND = 0;				// default: no band
		int band = DEFAULT_BAND;
		String allLines = "";
		boolean continueScanFlag = true;
		boolean completeFlag = false;
		
		if (title == null || title.equalsIgnoreCase("NULL")) {
			return band;
		}
		String titleInLowerCase = title.toLowerCase();
		allLines = build(PATH);
		ObjectMapper mapper = new ObjectMapper();
		JsonNode rootNode;
		try {
			rootNode = mapper.readValue(allLines.getBytes(), 0, allLines.getBytes().length, JsonNode.class);
			if (rootNode == null) {
				return band;
			}
			Iterator<JsonNode> jsonItr_1 = rootNode.getElements();
			logger.debug("LangRecogUtils - rootNode Size: {}", rootNode.size());
			while (jsonItr_1.hasNext()) {
				JsonNode subNode = jsonItr_1.next();
				continueScanFlag = true;

				// get the band from the dictionary
				int tempBand = DEFAULT_BAND;
				if (subNode.get("band") != null) {
					tempBand = subNode.get("band").getIntValue();
					logger.debug("LangRecogUtils - temp band: {}", tempBand);
				} else {
					logger.error("The \"band\" session is madatory.");
				}
				// get the bypass list from the dictionary
				if (subNode.get("bypass_list") != null) {
					Iterator<JsonNode> jsonItr_3 = subNode.get("bypass_list").getElements();
					while (continueScanFlag && jsonItr_3.hasNext()) {
						JsonNode filterNode = jsonItr_3.next();
						if (filterNode.get("bypass") != null) {
							String bypass = filterNode.get("bypass").getTextValue();
							logger.debug("LangRecogUtils - bypass: {}", bypass);
							if (titleInLowerCase.contains(bypass)) {
								band = DEFAULT_BAND;
								continueScanFlag = false;			// bypass coming scanning until next band session
							}
						}
						if (filterNode.get("bypass_reg") != null) {
							String bypassReg = filterNode.get("bypass_reg").getTextValue();
							logger.debug("LangRecogUtils - bypass reg: {}", bypassReg);
							Pattern bypassPattern = Pattern.compile(bypassReg);
							Matcher bypassMatcher = bypassPattern.matcher(titleInLowerCase);
							if (bypassMatcher.matches()) {
								band = DEFAULT_BAND;
								continueScanFlag = false;			// bypass coming scanning until next band session
							}
						}
					}
				}
				// get the filter list from the dictionary
				if (subNode.get("filter_list") != null) {
					Iterator<JsonNode> jsonItr_2 = subNode.get("filter_list").getElements();
					while (continueScanFlag && jsonItr_2.hasNext()) {
						JsonNode filterNode = jsonItr_2.next();
						if (filterNode.get("filter") != null) {
							String filter = filterNode.get("filter").getTextValue();
							logger.debug("LangRecogUtils - filter: {}", filter);
							if (titleInLowerCase.contains(filter)) {
								band = tempBand;
								completeFlag = true;
							}
						}
						if (filterNode.get("filter_reg") != null) {
							String filterReg = filterNode.get("filter_reg").getTextValue();
							logger.debug("LangRecogUtils - filter reg: {}", filterReg);
							Pattern filterPattern = Pattern.compile(filterReg);
							Matcher filterMatcher = filterPattern.matcher(titleInLowerCase);
							if (filterMatcher.matches()) {
								band = tempBand;
								completeFlag = true;
							}
						}
					}
				}
				if (completeFlag) {
					return band;
				}
			}
		} catch (JsonParseException e) {
			logger.error(e.getMessage(), e);
		} catch (JsonMappingException e) {
			logger.error(e.getMessage(), e);
		} catch (IOException e) {
			logger.error(e.getMessage(), e);
		}
		
		return band;
	}
	
	public static String build(String dictName) {
        BufferedReader reader = null;
        int i = 0;
        String allLines = "";
        try {
            reader = new BufferedReader( new InputStreamReader(Util.getInputStream(dictName), "utf-8"));
            String line = reader.readLine();
            while (line != null && !line.trim().equals("")) {
            	i++;
            	allLines += line;
            	line = reader.readLine();
            }
            logger.debug("LangRecogUtils - total read lines: {}", i);
        } catch (Exception e) {
            logger.error(e.getMessage(), e);
        } finally {
            if (reader != null) {
                try {
                    reader.close();
                } catch (Exception e) {
                	logger.error(e.getMessage(), e);
                }
            }
        }
        return allLines.toLowerCase();
    }
	
	public static void main(String[] args) {
//		String testStr = "Human Resources Manager";
//		System.out.println("LangRecogUtils - band:" + getBand(testStr));
		
		String[] testStrs = {
				"Executive Officer & Chief of Staff for the CIO",
				"Senior Program Manager-Public Key Infrastructure (PKI)",
				"Deputy Director of Intelligence (Deployment)",
				"Human Resources Manager",
				"Overt Debriefing Team Chief",
				"lead Security Contractor",
				"Assistant Project Manager",
				"Senior Watch Officer",
				"Naval Attaché",
				"Operations Officer",
				"Executive Admin Assistant - E4",
				"Engineer Intern"
		};
		/*
		 * Expectation:
		 * 5,10,10,15,20,20,25,25,30,30,35,35
		 */
		for (int i = 0; i < testStrs.length; i++) {
			System.out.println("LangRecogUtils - Str:" + (i+1) + ", band:" + getBand(testStrs[i]));
		}
	}
}

配置文件,band.dic:

[
    {
        "band": 5,
        "filter_list": [
            {
                "filter": "chief"
            }
        ],
        "bypass_list": [
            {
                "bypass": "team chief"
            },
            {
                "bypass": "Colonel"
            }
        ]
    },
    {
        "band": 10,
        "filter_list": [
            {
                "filter": "director"
            },
            {
                "filter_reg": "senior(.*?)manager(.*)"
            },
            {
                "filter_reg": "senior(.*?)strategist(.*)"
            }
        ]
    },
    {
        "band": 15,
        "filter_list": [
            {
                "filter": "manager"
            },
            {
                "filter": "strategist"
            },
            {
                "filter": "Subject Matter Expert"
            },
            {
                "filter": "Consultant"
            }
        ],
        "bypass_list": [
            {
                "bypass_reg": "senior(.*?)manager(.*)"
            },
            {
                "bypass_reg": "senior(.*?)strategist(.*)"
            },
            {
                "bypass_reg": "Assistant(.*?)manager(.*)"
            }
        ]
    },
    {
        "band": 20,
        "filter_list": [
            {
                "filter": "instructor"
            },
            {
                "filter": "Lead"
            },
            {
                "filter": "Team Chief"
            },
            {
                "filter": "Superintendent"
            },
            {
                "filter": "Supervisor"
            },
            {
                "filter": "Specialist"
            }
        ]
    },
    {
        "band": 25,
        "filter_list": [
            {
                "filter_reg": "Assistant(.*?)manager(.*)"
            },
            {
                "filter_reg": "senior(.*)"
            }
        ],
        "bypass_list": [
            {
                "bypass_reg": "senior(.*?)manager(.*)"
            },
            {
                "bypass_reg": "senior(.*?)strategist(.*)"
            }
        ]
    },
    {
        "band": 35,
        "filter_list": [
            {
                "filter": "Administrative Assistant"
            },
            {
                "filter": "Admin Assistant"
            },
            {
                "filter": "Coordinator"
            },
            {
                "filter": "Contractor"
            },
            {
                "filter": "Internship"
            },
            {
                "filter": "Intern"
            },
            {
                "filter": "Student"
            },
            {
                "filter": "Trainee"
            },
            {
                "filter": "Security Guard"
            },
            {
                "filter": "Part-Time"
            },
            {
                "filter": "Volunteer"
            }
        ]
    },
    {
        "band": 30,
        "filter_list": [
            {
                "filter_reg": "(.*)"
            }
        ]
    }
]



评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值