Trie树 (特例结构树) 时间复杂度O(n),一般排序算法nlgn
Trie树,又称单词查找树、字典树,是一种树形结构,是一种哈希树的变种.典型应用是用于统计和排序大量的字符串(但不仅限于字符串),所以经常被搜索引擎系统用于文本词频统计.优点:最大限度地减少无谓的字符串比较,查询效率比哈希表高.Trie的核心思想是空间换时间。利用字符串的公共前缀来降低查询时间的开销以达到提高效率的目的。
缺点: Trie树的内存消耗非常大.当然,或许用左儿子右兄弟的方法建树的话,可能会好点
适用场景:字符串检索,词频统计,搜索引擎的热门查询
//场景一:前导匹配
//场景二:统计前缀个数
//场景三:统计整个完整次频个数
//场景四:词频统计topk (trie+小根堆)
publicclass Trie {
private Vertexroot = new Vertex();
protectedclass Vertex {
protectedintwords;// 单词个数
protectedintprefixes;// 前缀个数
protectedcharc;// 前缀个数
protected Vertex[]edges; // 子节点
Vertex() {
this.words = 0;
this.prefixes = 0;
edges =new Vertex[26];//26个字母按照索引0~25代表
for (int i = 0; i <edges.length; i++) {
edges[i] =null;
}
}
}
/**
* 获取tire树中所有的词
*
* @return
*/
public List<String>listAllWords() {
List<String> words = newArrayList<String>();
Vertex[] edges = root.edges;
for (int i = 0; i < edges.length; i++) {
if (edges[i] !=null) {
String word = "" + (char) ('a' + i);//将i索引强转为相应ascii码
depthFirstSearchWords(words,edges[i], word);
}
}
return words;
}
privatevoid depthFirstSearchWords(List words, Vertexvertex,
String wordSegment) {
if (vertex.words != 0) {
words.add(wordSegment);
}
Vertex[] edges = vertex.edges;
for (int i = 0; i < edges.length; i++) {
if (edges[i] !=null) {
String newWord = wordSegment +(char) ('a' + i);
depthFirstSearchWords(words,edges[i], newWord);
}
}
}
/**
* 计算指定前缀单词的个数
*
* @param prefix
* @return
*/
publicint countPrefixes(String prefix) {
return countPrefixes(root, prefix);
}
privateint countPrefixes(Vertex vertex, StringprefixSegment) {
if (prefixSegment.length() == 0) {// reach the last character of the word
return vertex.prefixes;
}
char c = prefixSegment.charAt(0);
int index = c -'a';
if (vertex.edges[index] ==null) {// theword does NOT exist
return 0;
} else {
return countPrefixes(vertex.edges[index],prefixSegment.substring(1));
}
}
/**
* 计算完全匹配单词的个数
*
* @param word
* @return
*/
publicint countWords(String word) {
return countWords(root, word);
}
privateint countWords(Vertex vertex, StringwordSegment) {
if (wordSegment.length() == 0) {// reach the last character of the word
return vertex.words;
}
char c = wordSegment.charAt(0);
int index = c -'a';
if (vertex.edges[index] ==null) {// theword does NOT exist
return 0;
} else {
return countWords(vertex.edges[index],wordSegment.substring(1));
}
}
publicvoid addWord(String word) {
addWord(root, word);
}
privatevoid addWord(Vertex vertex, String word) {
if (word.length() == 0) {// if all characters of the word has been added
vertex.words++;//到最后一个节点完整的词频个数
} else {
vertex.prefixes++;//每次前缀词频个数
char c = word.charAt(0);
c = Character.toLowerCase(c);
vertex.c=c;
int index = c -'a';
if (vertex.edges[index] ==null) {// if theedge does NOT exist
vertex.edges[index] =new Vertex();
}
addWord(vertex.edges[index],word.substring(1));// go the the next
// character
}
}
/**
* 返回指定字段前缀匹配最长的单词。
*
* @param word
* @return
*/
public String getMaxMatchWord(Stringword) {
String s = "";
String temp = "";// 记录最近一次匹配最长的单词
char[] w = word.toCharArray();
Vertex vertex = root;
for (int i = 0; i < w.length; i++) {
char c = w[i];
c = Character.toLowerCase(c);
int index = c -'a';
if (vertex.edges[index] ==null) {//如果没有子节点
if (vertex.words != 0)//如果是一个单词,则返回
return s;
else
//如果不是一个单词则返回null
returnnull;
} else {
if (vertex.words != 0)
temp = s;
s += c;
vertex = vertex.edges[index];
}
}
// trie中存在比指定单词更长(包含指定词)的单词
if (vertex.words == 0)//
return temp;
return s;
}
//递归调用,先根遍历
publicstaticvoid preOrder(Vertex vertex){
if(vertex==null){return;}
System.out.print(vertex.c); //先打印根
Vertex[] edges = vertex.edges;
for (int i = 0; i < edges.length; i++) {
preOrder(edges[i]);
}
}
publicstaticvoid main(String args[])// Just used for test
{
Trie trie = new Trie();
trie.addWord("ba");
trie.addWord("cde");
trie.addWord("a");
trie.addWord("abcf");
trie.addWord("abcd");
trie.addWord("abcd");
trie.addWord("abcd");
trie.addWord("abcd");
trie.addWord("ba");
trie.addWord("abcedfddd");
String maxMatch = trie.getMaxMatchWord("abcedfddd");//场景一:前导匹配
int count = trie.countPrefixes("abc");//场景二:统计前缀个数
int count1 = trie.countWords("abcd");//场景三:统计整个完整次频个数
trie.preOrder(trie.root);//场景四:词频统计topk (trie+小根堆)
System.out.println(maxMatch);
List<String> list =trie.listAllWords();
Iterator listiterator =list.listIterator();
while (listiterator.hasNext()) {
String s = (String)listiterator.next();
System.out.print(s+" ");
}
System.out.println("");
System.out.println("prefixes:" + count);
System.out.println("countWords:" + count1);
}
}