最近一个学Go语言朋友,给我发了一篇文章http://blog.youkuaiyun.com/chenssy/article/details/26961957,讲的是使用DFA(即Deterministic Finite Automaton,有穷自动机)算法实现敏感词的过滤。问我能不能使用php来实现,感谢chenssy的精彩文章,下面是我仿照版本的php实现。
2018-05-11新增:
这个做了增强版—增强版GitHub源码
<?php
header("Content-type:text/html; charset=utf-8");
class MyMap
{
public function get($key)
{
return isset($this->$key) ? $this->$key : null;
}
public function put($key, $value)
{
$this->$key = $value;
}
}
class MyFilter
{
public $map = null;
public function addWordToMap($word)
{
$len = mb_strlen($word);
if (is_null($this->map)) {
$map = new MyMap();
$map->put('isEnd', 0);
} else {
$map = $this->map;
}
$tmp = $map;
for ($i = 0; $i < $len; $i++) {
$nowWord = mb_substr($word, $i, 1);
$nowMap = $map->get($nowWord);
if (!is_null($nowMap)) {
$map = $nowMap;
} else {
$newMap = new MyMap();
$newMap->put('isEnd', 0);
$map->put($nowWord, $newMap);
$map = $newMap;
}
if ($i == ($len - 1)) {
$map->put('isEnd', 1);
}
}
$this->map = $tmp;
}
//仅支持最大匹配
public function searchFromMap($string)
{
$len = mb_strlen($string);
$tmp = $this->map;
$map = $this->map;
$str = '';
$result = [];
for ($i = 0; $i < $len; $i++) {
$nowWord = mb_substr($string, $i, 1);
$nowMap = $map->get($nowWord);
if (!is_null($nowMap)) {
$str .= $nowWord;
if ($nowMap->get('isEnd')) {
array_push($result, $str);
$str = '';
$map = $tmp;
} else {
$map = $nowMap;
}
} else {
if (!empty($str)) {
$i--;
}
$str = '';
$map = $tmp;
}
}
return $result;
}
}
$example = new MyFilter();
$example->addWordToMap('中国人');
$example->addWordToMap('中国男人');
$example->addWordToMap('女人');
$result = $example->searchFromMap('我是中国人,我爱中国,中国男人是最优秀的,中国女人是最漂亮的');
//var_dump($example->map);
var_dump($result);