这个方法适用于数组比较小的情况,可以用快速排序的思路来求解最小的k个数,时间复杂度为O(n),这个方法需要修改输入的数组,如果先排序在输出时间复杂度最好为O(nlogn):
#include<iostream>
using namespace std;
int Partion(int a[], int left, int right)
{
int key = a[left];
while(left < right)
{
while(left < right && key <= a[right]) --right;
a[left] = a[right];
while(left < right && key >= a[left]) ++left;
a[right] = a[left];
}
a[left] = key;
return left;
}
void top_k(int a[], int n, int k)
{
int left = 0;
int right = n - 1;
int index = Partion(a, left, right);
while(index != k - 1)
{
if(index > k - 1)
{
right = index - 1;
index = Partion(a, left, right);
} else {
left = index + 1;
index = Partion(a ,left, right);
}
}
for(int i = 0; i < k; i++){
cout << a[i] << endl;
}
}
int main()
{
int a[] = {4, 5, 1, 6, 2, 7, 3, 8};
top_k(a, 8, 4);
return 0;
}
针对海量数据可以用大根堆来保存前k个数据,后面的数跟根节点比较,时间复杂度是O(nlogk):
#include<iostream>
#include<vector>
#include<set>
using namespace std;
typedef multiset<int, greater<int>> Set;
typedef multiset<int, greater<int>>::iterator Iterator;
void top_k(vector<int> &data, Set &least, size_t k)
{
least.clear();
vector<int>::const_iterator iter = data.cbegin();
for(; iter != data.cend(); ++iter)
{
if((least.size()) < k)
least.insert(*iter);
else {
Iterator iterGreatest = least.begin();
if(*iter < *iterGreatest)
{
least.erase(iterGreatest);
least.insert(*iter);
}
}
}
}
int main()
{
vector<int> data = {4, 5, 1, 6, 2, 7, 3, 8};
Set least;
top_k(data, least, 4);
auto iter = least.cbegin();
while(iter != least.cend())
cout << *iter++ << endl;
return 0;
}