LintCode 1281: Top K Frequent Elements (堆经典题)

原创已于 2023-10-14 11:31:34 修改 · 258 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#LintCode

于 2019-10-22 22:31:27 首次发布

algorithm-design 专栏收录该内容

817 篇文章

订阅专栏

本文详细解析了求解数组中Top K频繁出现的元素的四种算法实现，包括使用最大堆、最小堆的不同方式，以及如何优化时间复杂度至优于O(nlogn)，适合对数据结构和算法效率有高要求的场景。

Top K Frequent Elements
中文English
Given a non-empty array of integers, return the k most frequent elements.

Example
Example 1:

Input: nums = [1,1,1,2,2,3], k = 2
Output: [1,2]
Example 2:

Input: nums = [1], k = 1
Output: [1]
Notice
You may assume k is always valid, 1 ≤ k ≤ number of unique elements.
Your algorithm’s time complexity must be better than O(n log n), where n is the array’s size.

解法1：用最大堆。prority_queue<>缺省就是最大堆。时间复杂度MlogM，其中M是不重复数据的数目。
注意：
1）unordered_map 不能用for (int i = 0; i < um.size(); ++i)，因为它是unordered。但可以用auto 。


class Solution {
public:
    /**
     * @param nums: the given array
     * @param k: the given k
     * @return: the k most frequent elements
     */
    vector<int> topKFrequent(vector<int> &nums, int k) {
        unordered_map<int, int> um;  //num, freq
        priority_queue<pair<int, int>> pq;
        vector<int> result;
        
        for (int i = 0; i < nums.size(); ++i) um[nums[i]]++;
        
        //for (int i = 0; i < um.size(); ++i) {
        //    pq.push({um[i].second, um[i].first});
        //}
        for (auto m : um) {
            pq.push({m.second, m.first});
        }
        
        for (int i = 0; i < k; ++i) {
            result.push_back(pq.top().second);
            pq.pop();
        }
     
        return result;   
    }
};

解法2：最小堆。时间复杂度MlogK。
代码如下：


struct ResultType {
    int val;
    int freq;
    ResultType(int v = 0, int f = 0) : val(v), freq(f) {}
};

struct cmp {
    bool operator() (const ResultType & a, const ResultType & b) {
        if (a.freq > b.freq) {
            return true;
        } else if (a.freq == b.freq && a.val > b.val) {
            return true;
        } else {
            return false;
        }
    }
};

class Solution {
public:
    /**
     * @param nums: the given array
     * @param k: the given k
     * @return: the k most frequent elements
     */
    vector<int> topKFrequent(vector<int> &nums, int k) {
        unordered_map<int, int> um; //num, freq
        vector<int> result;
        priority_queue<ResultType, vector<ResultType>, cmp> pq;
        
        for (int i = 0; i < nums.size(); ++i) {
            um[nums[i]]++;
        }
        
        int count = 0;
        for (auto it : um) {
            pq.push(ResultType(it.first, it.second));
            if (count++ >= k) pq.pop();
        }
        
        while(!pq.empty()) {
            result.push_back(pq.top().val);
            pq.pop();
        }
        
        return result;
    }
};

解法3：还是最小堆。用一个map<int,int> freqs和Node结构，里面包含了num和重载了operator<，这个operator<就是根据freqs来排序，注意是最小堆，所以是freqs[num] > freqs[node.num]。

map<int, int> freqs; //<int, freq>
struct Node {
    int num;
    Node(int n) : num(n) {}
    bool operator < (const Node & node) const {
        return freqs[num] > freqs[node.num];
    }
};

class Solution {
public:
    vector<int> topKFrequent(vector<int>& nums, int k) {
        int len = nums.size();
        priority_queue<Node> minHeap;
        freqs.clear();
        
        for (int i = 0; i < len; i++) {
            freqs[nums[i]]++;
        }
        
        for (auto f : freqs) {
            if (minHeap.size() < k) {
                minHeap.push(Node(f.first));
            } else {
                //注意minHeap里面已经包含了到现在为止k个most frequent的数字，minHeap.top就是到现在为止第kth most frequent的那个数字，如果当前数字的frequency比minHeap.top还小就没有必要push进去了。注意最开始的第一个for循环已经得到了每个数字在整个数组中的frequency。这个优化很重要。
                if (f.second > freqs[minHeap.top().num]) {
                    minHeap.push(Node(f.first));
                    minHeap.pop();
                }
            }
        }

        vector<int> res;
        while (!minHeap.empty()) {
            res.push_back(minHeap.top().num);
            minHeap.pop();
        }
        return res;
    }
};

解法4：跟上面差不多，只是用了decltype的表示法。

class Solution {
public:
    vector<int> topKFrequent(vector<int>& nums, int k) {
        vector<int> res;
        map<int, int> freqs; //<num, freq>
        int n = nums.size();
        for (int i = 0; i < n; i++) {
            freqs[nums[i]]++;
        }
        auto comp = [&](int a, int b){return freqs[a] > freqs[b];};
        priority_queue<int, vector<int>, decltype(comp)> minHeap(comp);
        
        for (auto f : freqs) {
            minHeap.push(f.first);
            if (minHeap.size() > k) {
                minHeap.pop();
            }
        }
        while(k--) {
            res.push_back(minHeap.top());
            minHeap.pop();
        }
        reverse(res.begin(), res.end());
        return res;
    } 
};

另一个用minHeap的方法，直接用pair<int, int>，注意是反着用的，freq是first, num是second，这样就可以利用freq来排序，如果freq相同则利用字母顺序来排序，不需要重新定义Node。

class Solution {
public:
    vector<int> topKFrequent(vector<int>& nums, int k) {
        int n = nums.size();
        priority_queue<pair<int, int>, vector<pair<int, int>>, greater<pair<int,int>>> minHeap; // <freq, number>
        vector<int> res;
        unordered_map<int, int> num2Freq;
        for (auto n : nums) num2Freq[n]++;
        int index = 0;
        for (auto elem : num2Freq) {
            index++;
            minHeap.push({elem.second, elem.first});
            if (index > k) minHeap.pop();
        }
        while(!minHeap.empty()) {
            auto node = minHeap.top();
            minHeap.pop();
            res.push_back(node.second);
        }
        reverse(res.begin(), res.end());
        return res;
    }
};