692. Top K Frequent Words

最新推荐文章于 2025-02-03 15:37:10 发布

NewCoder_BL

最新推荐文章于 2025-02-03 15:37:10 发布

阅读量389

点赞数

CC 4.0 BY-SA版权

分类专栏： leetcode c++ 文章标签： hash-table queue

本文链接：https://blog.youkuaiyun.com/u011934885/article/details/78231495

leetcode 同时被 2 个专栏收录

134 篇文章

订阅专栏

c++

23 篇文章

订阅专栏

本文介绍了一种高效算法，用于从非空单词列表中找出出现频率最高的 k 个元素，并按频率及字母顺序排列。利用哈希表统计频率，并采用优先队列进行排序。提供了两种实现方式：最大堆和最小堆。

Given a non-empty list of words, return the k most frequent elements.

Your answer should be sorted by frequency from highest to lowest. If two words have the same frequency, then the word with the lower alphabetical order comes first.

Example 1: Input: [“i”, “love”, “leetcode”, “i”, “love”, “coding”], k = 2
Output: [“i”, “love”]
Explanation: “i” and “love” are the two most frequent words.
Note that “i” comes before “love” due to a lower alphabetical order.

这题首先可以用哈希表统计每个词的频率，至于之后的基于自定义性质的排序问题，可以用priority_queue来解决，将比较函数写成自定义的要求就可以了。

方法一： max_heap

class compLess {
public:
    bool operator () (const pair<string, int>& a, const pair<string,int>& b) {
        if (a.second == b.second) {
            return a.first > b.first;
        }
        else {
            return a.second < b.second;
        }
    }

};

class Solution {
public:
    vector<string> topKFrequent(vector<string>& words, int k) {
        unordered_map<string, int> hash;
        vector<string> res;
        for (string s : words) {
            if (hash.find(s) != hash.end()) {
                hash[s]++;
            }
            else {
                hash.emplace(s, 1);
            }
        }

        priority_queue<pair<string, int>, vector<pair<string, int>>, compLess> pq;
        for (auto s : hash) {
            pq.push(s);
        }

        for (int i = 0; i < k; i++) {
            res.push_back(pq.top().first);
            pq.pop();
        }
        return res;
    }
};

这里补充一下重载运算符的问题，不管是struct Node还是class Node 的写法，如果重载关系比较符，< > 之类的，当重载函数作为成员函数的时候，only one argument，就是写成：

struct Node {
    int freq;
    string str;
    Node (int f, string s) : freq(f), str(s) {}
    bool operator < (const Node& b) {
        return this->freq < b.freq;
    }
}

如果重载函数在结构体或者类之外的话，需要两个argument

#include <iostream>
#include <queue>

using namespace std;

struct Node{
    int x, y;
    Node( int a= 0, int b= 0 ):
        x(a), y(b) {}
};

bool operator<( Node a, Node b ){
    if( a.x== b.x ) return a.y> b.y;
    return a.x> b.x;
}

int main(){
    priority_queue<Node> q;

    for( int i= 0; i< 10; ++i )
    q.push( Node( rand(), rand() ) );

    while( !q.empty() ){
        cout << q.top().x << ' ' << q.top().y << endl;
        q.pop();
    }

    getchar();
    return 0;
}

这个函数就是把max priority_queue 通过重载< 变成了min priority_queue;

第二个想要补充说明的就是priority_queue的重载问题。
默认的是max heap, 默认的比较是 < , 如果想要变成min heap, 可以重载<, 将名义上的< 变成 >, 这种重载的时候不需要再声明该priority_queue的时候说明，所以声明的时候还是只要一个或者两个argument就行。
例如：

priority_queue<int, vector<int>> pq;

但也可以重载比较函数（默认的是less，也可以通过写成greater< int>改成小堆，但这种greater和less只对于默认的built-in类型可以，自定义类型需要自定义比较函数）。
例如上述方法一中的，class compLess;
将上面的重载方法也可以依次改写为：

#include <iostream>
#include <queue>

using namespace std;

struct Node{
    int x, y;
    Node( int a= 0, int b= 0 ):
        x(a), y(b) {}
};

struct cmp{
    bool operator() ( Node a, Node b ){
        if( a.x== b.x ) return a.y> b.y;

        return a.x> b.x; }
};

int main(){
    priority_queue<Node, vector<Node>, cmp> q;

    for( int i= 0; i< 10; ++i )
    q.push( Node( rand(), rand() ) );

    while( !q.empty() ){
        cout << q.top().x << ' ' << q.top().y << endl;
        q.pop();
    }

    getchar();
    return 0;
}

好，说回本题，在时间和空间上都可以更加优化一点，因为我们只需要前k个，所以在priority_queue里没有必要把所有的都插入，因为当元素越多，插入的时间就越长O(log n)嘛。所以我们可以只保留k个元素在里面，这个时候，就需要用最小堆了。因为每多插入一个，就要扔掉一个，只有明确比这四个小的才可以放心扔掉。

方法二：min heap

class compLess {
public:
    bool operator () (const pair<string, int>& a, const pair<string,int>& b) {
        if (a.second == b.second) {
            return a.first < b.first;
        }
        else {
            return a.second > b.second;
        }
    }

};

class Solution {
public:
    vector<string> topKFrequent(vector<string>& words, int k) {
        unordered_map<string, int> hash;
        for (string s : words) {
            if (hash.find(s) != hash.end()) {
                hash[s]++;
            }
            else {
                hash.emplace(s, 1);
            }
        }

        priority_queue<pair<string, int>, vector<pair<string, int>>, compLess> pq;
        for (auto s : hash) {
            if (pq.size() < k) {
                pq.push(s);
            }
            else {
                auto currmin = pq.top();
                if (currmin.second < s.second) {
                    pq.pop();
                    pq.push(s);
                }
                else if (currmin.second == s.second) {
                    if (currmin.first > s.first) {
                        pq.pop();
                        pq.push(s);
                    }    
                }
            }
        }

        vector<string> res(k);
        for (int i = k - 1; i >= 0; i--) {
            res[i] = pq.top().first;
            pq.pop();
        }
        return res;
    }

};