这是一个解析方法,用以解析由().and.or组成的表达式, (你也可以自己添加规则)
如果想用数学上的带有()组合的表达式一样来处理与或操作
那么这个方法完全适合. _
例如你需要对以下规则进行数据过滤:
(region = “asia”) AND ((status = null) OR ((inactive = “true”) AND (department = “aaaa”)) OR ((costcenter = “ggg”) OR (location = “india”)))
处理流程介绍
现在有一批学生数据, 我想要筛选其中的女学生, 并且成绩都及格的, 或者是喜欢篮球的男生, 那么我们规定的规则如下:
((student.sex=0) and (student.score > 60) ) or ((student.sex=1) and (student.hobbies='basketball'))
- 解析后的单条规则:
@1: student.sex=0
@2: student.score > 60
@3: @1 and @2
@4: student.sex=1
@5: student.hobbies=‘basketball’
@6: @4 and @5
@7: @3 or @6 - 根据每个单规则, 生成规则函数, 例如
Predicate<Student> =
student -> {
if (student.sex==0) {
return true;
}else{
return false;
}
};
- 将规则函数整合为最终的规则, 最终只会有一个规则函数
例如and符
// and规则函数集合
List<Predicate<Student>> mappers;
// 合并后的规则函数
Predicate<Student> =
student -> mappers.stream()
.reduce(t ->true, Predicate::and)
.test(student);
- 对数据进行过滤
规则解析器
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* 这是一个解析方法,用以解析由().and.or组成的表达式
* 如果想用数学上的带有()组合的表达式一样来处理与或操作
* 那么这个方法完全适合. ~_~
*
* this is a common function of parsing str expression which combined by (). and . or .,
* if you want to operate the str like a mathematical expression,
* this is method is nice for you
*
* @author lycaon
* @date 05-29
* @version 1.0.0
*/
public class FilterCommandParser {
private static final Pattern BRACED_REDUX = Pattern.compile("\\(([^()]*)\\)");
/**
* <pre>
* String exp = "(region = "asia") AND ((status = null) OR ((inactive = "true") AND (department = "aaaa")) OR ((costcenter = "ggg") OR (location = "india")))";
* List<String> decodedExp = parse(exp);
* for (int i = 0; i < decodedExp.size(); ++i) {
* System.out.println("@%d = %s%n", i, decodedExp.get(i));
* }
* decodeExp:
* @0 = region = "asia"
* @1 = status = null
* @2 = inactive = "true"
* @3 = department = "aaaa"
* @4 = @2 AND @3
* @5 = costcenter = "ggg"
* @6 = location = "india"
* @7 = @5 OR @6
* @8 = @1 OR @4 OR @7
* @9 = @0 AND @8
* </pre>
*
* @param exp 规范的表达式 valid expression
* @return 解析后的表达式 parsed and ordered sub expressions
*/
static List<String> parse(String exp) {
List<String> vars = new ArrayList<>();
for (;;) {
Matcher m = BRACED_REDUX.matcher(exp);
if (!m.find()) {
break;
}
String value = m.group(1);
String var = "@" + vars.size();
vars.add(value);
StringBuffer sb = new StringBuffer();
// 把匹配到的内容替换为var,
// 并且把从上次替换的位置到这次替换位置之间的字符串也拿到,
// 然后,加上这次替换后的结果一起追加到StringBuffer里(假如这次替换是第一次替换,那就是只追加替换后的字符串)
m.appendReplacement(sb, var);
// 把最后一次匹配到内容之后的字符串追加到StringBuffer中
m.appendTail(sb);
exp = sb.toString();
}
vars.add(exp); // Add last unreduced expr too.
return vars;
}
public static void main(String[] args) {
String exp = "(region = \"asia\") AND ((status = null) OR ((inactive = \"true\") AND (department = \"aaaa\")) OR ((costcenter = \"ggg\") OR (location = \"india\")))";
parse(exp).forEach(System.out::println);
}
}
函数生成器
/**
* 规则符
* 1. 连接符
* 2. 判断符
*/
public enum RulesTag {
// 判断符
GTE, // 大于等于
GT, // 大于
LT, // 小于
LTE, // 小于等于
EQ, // 等于
NEQ, // 不等于
IN // in
}
// Event 业务实体类
public class KvConfig {
private String filter;
private boolean drop;
private String matchedParam;
...set get
...toString
}
import java.util.Arrays;
import java.util.List;
import java.util.function.Predicate;
import java.util.stream.Collectors;
/**
* 函数生成器
* 根据单个规则,生成对应函数
* Function generator
*/
public class FunctionGenerator {
// 这里可以添加自己想要的规则
public static Predicate<Event> genFilterFunc(List<Predicate<Event>> mappers, String tag) throws Exception {
if (" and ".equals(tag)){
return event -> mappers.stream()
.reduce(t -> true, Predicate::and)
.test(event);
}else if (" or ".equals(tag)){
if (mappers == null || mappers.size() == 0) {
return event -> false;
}
Predicate<Event> p = mappers.get(0);
for (int i = 1; i < mappers.size(); i++) {
p = p.or(mappers.get(i));
}
return p;
}else {
throw new Exception("invalid tag:" + tag);
}
}
// 这里可以添加自己想要的规则实现
public static Predicate<Event> generator(String exp) throws Exception {
RulesTag tag;
String[] splits;
if (exp.contains(">=")) {
splits = exp.split(">=");
tag = RulesTag.GTE;
} else if (exp.contains("<=")) {
splits = exp.split("<=");
tag = RulesTag.LTE;
} else if (exp.contains("!=")) {
splits = exp.split("!=");
tag = RulesTag.NEQ;
} else if (exp.contains(">")) {
splits = exp.split(">");
tag = RulesTag.GT;
} else if (exp.contains("<")) {
splits = exp.split(">");
tag = RulesTag.LT;
} else if (exp.contains("=")) {
splits = exp.split("=");
tag = RulesTag.EQ;
} else if (exp.contains(" in ")) {
splits = exp.split(" in ");
tag = RulesTag.IN;
} else {
throw new Exception("invalid expression:" + exp);
}
if (splits.length != 2) {
throw new Exception("invalid expression:" + exp);
}
switch (tag) {
case EQ:
return event -> {
if (event.getParserFieldContent().containsKey(splits[0].trim())) {
return event.getParserFieldContent().get(splits[0].trim()).equals(splits[1].trim());
}
return false;
};
case GT:
return event -> {
if (event.getParserFieldContent().containsKey(splits[0].trim())) {
return Long.parseLong(event.getParserFieldContent().get(splits[0].trim())) > Long.parseLong(splits[1].trim());
}
return false;
};
case LT:
return event -> {
if (event.getParserFieldContent().containsKey(splits[0].trim())) {
return Long.parseLong(event.getParserFieldContent().get(splits[0].trim())) < Long.parseLong(splits[1].trim());
}
return false;
};
case GTE:
return event -> {
if (event.getParserFieldContent().containsKey(splits[0].trim())) {
return Long.parseLong(event.getParserFieldContent().get(splits[0].trim())) >= Long.parseLong(splits[1].trim());
}
return false;
};
case LTE:
return event -> {
if (event.getParserFieldContent().containsKey(splits[0].trim())) {
return Long.parseLong(event.getParserFieldContent().get(splits[0].trim())) <= Long.parseLong(splits[1].trim());
}
return false;
};
case IN:
return event -> {
if (event.getParserFieldContent().containsKey(splits[0].trim())) {
return Arrays.asList(splits[1].trim().split(",")).parallelStream().collect(Collectors.toList())
.contains(event.getParserFieldContent().get(splits[0].trim()));
}
return false;
};
default:
break;
}
throw new Exception("unknown error:" + exp);
}
}
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
/**
* 事件过滤器
* 根据连接符组合规则
* 过滤&处理事件
*/
public class FilterProcessor {
private static final Logger logger = LoggerFactory.getLogger(FilterProcessor.class);
private KvConfig config;
public FilterProcessor(KvConfig config){
this.config = config;
}
/**
* 组合函数
* 1. 解析后的规则列表 decodedExp:age > 2
* decodedExp:sex = 0
* decodedExp:@0 and @1
* decodedExp:name = no.1
* decodedExp:@2 or @3
*
* 2. 根据单个规则生成的规则函数集合 Predicate0
* Predicate1
* null
* Predicate3
* null
* @param events 事件集合
* @param exp 规则表达式
* @return
* @throws Exception
*/
public Map<Boolean, List<Event>> process(List<Event> events) throws Exception {
// 解析规则
List<String> decodedExp = FilterCommandParser.parse(this.config.getFilter());
// 根据单规则生成Predicate函数
List<Predicate<Event>> assembly = new ArrayList<>();
IntStream.range(0, decodedExp.size()).forEach(i -> {
try {
String currentDecodedExp = decodedExp.get(i);
if (!currentDecodedExp.toLowerCase().contains(" and ") && !currentDecodedExp.toLowerCase().contains(" or ")){
// 不包含连接符的规则
try {
assembly.add(FunctionGenerator.generator(currentDecodedExp));
} catch (Exception e) {
e.printStackTrace();
}
}else {
// 连接符规则拼接
String tag = null;
if (currentDecodedExp.toLowerCase().contains(" and ")) {
tag = " and ";
} else if (currentDecodedExp.toLowerCase().contains(" or ")) {
tag = " or ";
} else {
try {
throw new Exception("invalid expression, current exp:" + currentDecodedExp);
} catch (Exception e) {
e.printStackTrace();
}
}
assert tag != null;
// 根据连接符tag收集规则
String[] singleRules = currentDecodedExp.split(tag);
List<Predicate<Event>> combine = new ArrayList<>();
for (String rule:singleRules){
combine.add(assembly.get(Integer.parseInt(rule.substring(1))));
}
// 根据tag组合收集到的规则集合
try {
assembly.add(FunctionGenerator.genFilterFunc(combine, tag));
} catch (Exception e) {
e.printStackTrace();
}
}
}catch (Exception e){
e.printStackTrace();
logger.error("generator function failed:" + e.getMessage());
}
});
Predicate<Event> predicate = assembly.get(assembly.size() - 1);
return events.parallelStream().collect(Collectors.partitioningBy(event -> {
boolean re = predicate.test(event);
if (re){
// 符合规则
event.getParserFieldContent().put(this.config.getMatchedParam(), "1");
}else {
// 不符合规则
event.getParserFieldContent().put(this.config.getMatchedParam(), "0");
}
return re;
}));
}
}