Reservoir Sampling
适用于从很大的数据堆里随机取样,一般适用于内存不足以装入所有数据的情况。
从n个数据里随机取k个数
1. 用前k个数据组成一个reservoir
2. 从k+1开始,对于第i个元素,随机产生一个1~i之间的数j,如果j<=k,则reservoir里第j个元素就被i替换掉。直到n个数据都被试过。
证明tips
每次循环,reservoir里第j个(j<=k)元素被替换的概率是1/k * k/i = 1/i
Quickselect
O(n),每次都去掉一半剩余单边
package leetcode.blog;
public class Quickselect {
public static int partition(int[] array, int left, int right, int pivotIndex){
int pivot = array[pivotIndex];
swap(array, right, pivotIndex);
int start = left;
for(int i = left; i < right; i++){
if(array[i] <= pivot){
swap(array, start, i);
start++;
}
}
swap(array, start, right);
return start;
}
public static void swap(int[] array, int source, int sink){
int temp = array[sink];
array[sink] = array[source];
array[source] = temp;
}
public static int select(int[] array, int left, int right, int k){
if(left == right)
return array[left];
int pivotIndex = left + (int)Math.floor(Math.random() * (right - left + 1));
pivotIndex = partition(array, left, right, pivotIndex);
if(k == pivotIndex)
return array[pivotIndex];
else if (k < pivotIndex - 1)
return select(array, left, pivotIndex - 1, k);
else
return select(array, pivotIndex + 1, right, k);
}
public static void main(String[] args){
int[] array = new int[]{4, 5, 8, 1, 3, 2, 0, 6, 7, 10, 9};
System.out.println(select(array, 0, 10, 3));
System.out.println(select(array, 0, 10, 6));
System.out.println(select(array, 0, 10, 7));
System.out.println(select(array, 0, 10, 8));
System.out.println(select(array, 0, 10, 9));
}
}