LintCode 471. 最高频的K个单词
题目:给一个单词列表,求出这个列表中出现频次最高的K个单词。
问题:web 日志, 求访问频次最高的kth个URI和频次。
求kth个频次最高的字符串:利用PriorityQueue和 HashMap
时间复杂度 nlogn, 利用PQ二叉堆排序好于快速排序。
-----
public class PriorityQueueTest {
public static void main(String[] args) {
int m=20;
int k=3;
//int[] a = {3,3,1,2,5,6,7,8,9,9,11,11,11,12,13,14,19,16,17,20};//m个元素
String[] s={
"hello","hello","hello","world","the",
"You", "should", "not","use","the",
"Know", "someone", "You","can", "answer",
"Share","link", "to", "this","email" };
URIQueue myQQ =new URIQueue(s,m);
myQQ.getHotURI(k);
}
}
-----
import java.util.Comparator;
import java.util.HashMap;
import java.util.PriorityQueue;
//统计URI 频次高的项!
public class URIQueue {
private HashMap<String, Integer> rawURI;
private PriorityQueue<String> pq;
private int mItem;
public URIQueue(String[] s, int mm) {
System.out.println("\nURIQueue\n");
mItem = mm;
rawURI = new HashMap<>(mItem);
for (int i = 0; i < mItem; i++) {
int count = 1;
if (rawURI.containsKey(s[i])) {
count = rawURI.get(s[i]) + 1;
}
rawURI.put(s[i], count);
}
System.out.println(rawURI);
URIComparator<String> strCompare = new URIComparator<>();
pq = new PriorityQueue<>(mItem, strCompare); //Line x
}
public void getHotURI(int kth) {
for (String s1 : rawURI.keySet()) {
pq.add(s1);
}
System.out.println(pq);
int count=0;
while (!pq.isEmpty() && count<kth) {
count++;
String sKey = pq.remove();
System.out.printf("#%d:%s=%d\n",count,sKey,rawURI.get(sKey));
}
}
public class URIComparator<String> implements Comparator<String> {
public int compare(String key1, String key2) {
if (rawURI.get(key2) - rawURI.get(key1) > 0) {
return 1;
} else if (rawURI.get(key2) - rawURI.get(key1) < 0) {
return -1;
}
return 0;
}
}
}
-----
运行结果:
URIQueue
{use=1, Know=1, link=1, this=1, the=2, can=1, not=1, world=1, someone=1, answer=1, should=1, hello=3, to=1, Share=1, You=2, email=1}
[hello, use, the, this, Know, link, You, world, someone, answer, should, can, to, Share, not, email]
#1:hello=3
#2:the=2
#3:You=2
-----
其他解法:
https://blog.youkuaiyun.com/qq_27139155/article/details/79754671
利用Collections.sort ,平均时间复杂度 O(nlogn)