Arrays.sort底层算法选择
本文采用jdk版本是jdk1.8,进入Arrays.sort方法
public static void sort(int[] a) {
DualPivotQuicksort.sort(a, 0, a.length - 1, null, 0, 0);
}
发现调用了final类DualPivotQuicksort(双轴快速排序)的sort方法,其中第四个、第五个参数、第六个参数代表是工作区数组,Arrays类中的所有sort方法都是将这些置为null、0、0,这三个主要是为了sort中归并排序使用的
@param work a workspace array (slice)
@param workBase origin of usable space in work array
@param workLen usable size of work array
DualPivotQuicksort.sort方法如下
static void sort(int[] a, int left, int right,
int[] work, int workBase, int workLen) {
// Use Quicksort on small arrays
if (right - left < QUICKSORT_THRESHOLD) {
sort(a, left, right, true);
return;
}
/**
* run[i] 意味着第i个有序数列开始的位置,(升序或者降序)
**/
int[] run =new int[MAX_RUN_COUNT + 1];
int count=0; run[0] = left;
// 检查数组是不是已经接近有序状态
for(int k = left; k < right; run[count] = k) {
if(a[k] < a[k + 1]){ // 升序
while(++k <= right && a[k - 1] <= a[k]) ;
} else if(a[k] > a[k + 1]) { // 降序
while(++k <=right && a[k - 1] >= a[k]);
//如果是降序的,找出k之后,把数列倒置
for (int lo = run[count],hi = k;++lo < --hi) {
int t = a[lo]; a[lo] = a[hi]; a[hi] = t;
}
} else { // 相等
for(int m = MAX_RUN_LENGTH; ++k <=right && a[k - 1] == a[k];) {
// 数列中有至少MAX_RUN_LENGTH的数据相等的时候,直接使用快排。
// 这里为什么这么处理呢?
if(--m == 0){
sort(a, left, right, true);
return;
}
}
}
/**
* 数组并非高度有序,使用快速排序,因为数组中有序数列的个数超过了MAX_RUN_COUNT
*/
if(++count == MAX_RUN_COUNT) {
sort(a, left, right, true);
return;
}
}
//检查特殊情况
if(run[count] == right++){ // 最后一个有序数列只有最后一个元素
run[++count] =right; // 那给最后一个元素的后面加一个哨兵
} else if(count == 1) { // 整个数组中只有一个有序数列,说明数组已经有序啦,不需要排序了
return;
}
/**
* 创建合并用的临时数组。
* 注意: 这里变量right被加了1,它在数列最后一个元素位置+1的位置
* 这里没看懂,没发现后面的奇数处理和偶数处理有什么不同
*/
int[] b; byte odd=0;
for(int n=1; (n <<= 1) < count; odd ^=1);
if(odd == 0) {
b=a;a= new int[b.length];
for(int i=left -1; ++i < right; a[i] = b[i]);
} else {
b=new int[a.length];
}
// 合并
// 最外层循环,直到count为1,也就是栈中待合并的序列只有一个的时候,标志合并成功
// a 做原始数组,b 做目标数组
for(int last; count > 1; count = last) {
// 遍历数组,合并相邻的两个升序序列
for(int k = (last = 0) + 2; k <= count; k += 2) {
// 合并run[k-2] 与 run[k-1]两个序列
int hi = run[k], mi = run[k - 1];
for(int i = run[k - 2], p = i,q = mi; i < hi; ++i){
// 这里我给源码加了一个括号,这样好理解一点。 之前总觉得它会出现数组越界问题,
// 后来加了这个括号之后发现是没有问题的
if(q >= hi || (p < mi && a[p] <= a[q])) {
b[i] = a[p++];
} else {
b[i] = a[q++];
}
}
// 这里把合并之后的数列往前移动
run[++last] = hi;
}
// 如果栈的长度为奇数,那么把最后落单的有序数列copy过对面
if((count & 1) != 0) {
for(int i = right, lo =run[count -1]; --i >= lo; b[i] = a[i]);
run[++last] = right;
}
//临时数组,与原始数组对调,保持a做原始数组,b 做目标数组
int[] t = a; a = b; b = t;
}
}
我们一步一步分析
// Use Quicksort on small arrays
if (right - left < QUICKSORT_THRESHOLD) {
sort(a, left, right, true);
return;
}
QUICKSORT_THRESHOLD为287,当要排序区间小于287时,发现调用了本类的重载sort方法
int length = right - left + 1;
// Use insertion sort on tiny arrays
if (length < INSERTION_SORT_THRESHOLD) {
if (leftmost) {
/*
* Traditional (without sentinel) insertion sort,
* optimized for server VM, is used in case of
* the leftmost part.
*/
for (int i = left, j = i; i < right; j = ++i) {
int ai = a[i + 1];
while (ai < a[j]) {
a[j + 1] = a[j];
if (j-- == left) {
break;
}
}
a[j + 1] = ai;
}
} else {
/**
* 首先跨过开头的升序的部分
*/
do {
if(left > right) {
return;
}
}while(a[++left] >= a[left - 1]);
/**
* 这里用到了成对插入排序方法,它比简单的插入排序算法效率要高一些
* 因为这个分支执行的条件是左边是有元素的
* 所以可以直接从left开始往前查找。
*/
for(int k = left; ++left <= right; k = ++left) {
int a1 = a[k], a2 = a[left];
//保证a1>=a2
if(a1 < a2) {
a2 = a1; a1 = a[left];
}
//先把两个数字中较大的那个移动到合适的位置
while(a1 < a[--k]) {
a[k + 2] = a[k]; //这里每次需要向左移动两个元素
}
a[++k + 1] = a1;
//再把两个数字中较小的那个移动到合适的位置
while(a2 < a[--k]) {
a[k + 1] = a[k]; //这里每次需要向左移动一个元素
}
a[k + 1] = a2;
}
int last = a[right];
while(last < a[--right]) {
a[right + 1] = last;
}
a[right + 1] = last;
}
return;
}
INSERTION_SORT_THRESHOLD为47,发现当要排序的个数小于47个时,采用插入排序,采用了哨兵方法,对于新元素从他前一个一个一个比较
for (int i = left, j = i; i < right; j = ++i) {
int ai = a[i + 1];
while (ai < a[j]) {
a[j + 1] = a[j];
if (j-- == left) {
break;
}
}
a[j + 1] = ai;
}
至于大过INSERTION_SORT_THRESHOLD(47)的,用一种快速排序(双轴快排)的方法:
1.从数列中挑出五个元素,称为 “基准”(pivot);
2.重新排序数列,所有元素比基准值小的摆放在基准前面,所有元素比基准值大的摆在基准的后面(相同的数可以到任一边)。在这个分区退出之后,该基准就处于数列的中间位置。这个称为分区(partition)操作;
3.递归地(recursive)把小于基准值元素的子数列和大于基准值元素的子数列排序。
// Inexpensive approximation of length / 7
int seventh = (length >> 3) + (length >> 6) + 1;
/*
* Sort five evenly spaced elements around (and including) the
* center element in the range. These elements will be used for
* pivot selection as described below. The choice for spacing
* these elements was empirically determined to work well on
* a wide variety of inputs.
*/
int e3 = (left + right) >>> 1; // The midpoint
int e2 = e3 - seventh;
int e1 = e2 - seventh;
int e4 = e3 + seventh;
int e5 = e4 + seventh;
// Sort these elements using insertion sort
if (a[e2] < a[e1]) { int t = a[e2]; a[e2] = a[e1]; a[e1] = t; }
if (a[e3] < a[e2]) { int t = a[e3]; a[e3] = a[e2]; a[e2] = t;
if (t < a[e1]) { a[e2] = a[e1]; a[e1] = t; }
}
if (a[e4] < a[e3]) { int t = a[e4]; a[e4] = a[e3]; a[e3] = t;
if (t < a[e2]) { a[e3] = a[e2]; a[e2] = t;
if (t < a[e1]) { a[e2] = a[e1]; a[e1] = t; }
}
}
if (a[e5] < a[e4]) { int t = a[e5]; a[e5] = a[e4]; a[e4] = t;
if (t < a[e3]) { a[e4] = a[e3]; a[e3] = t;
if (t < a[e2]) { a[e3] = a[e2]; a[e2] = t;
if (t < a[e1]) { a[e2] = a[e1]; a[e1] = t; }
}
}
}
seventh近似等于length/7,选取的五个点,分别是中间点-2seventh,中间点-seventh,中间点,中间点+seventh,中间点+2seventh,然后将这五个点从小到大排序,选择点代码如下:
当五个互不相等时,采用双轴快排
int less = left; // 中间区域的首个元素的位置
int great = right; //右边区域的首个元素的位置
if (a[e1] != a[e2] && a[e2] != a[e3] && a[e3] != a[e4] && a[e4] != a[e5]) {
/*
* 使用5个元素中的2,4两个位置,他们两个大致处在四分位的位置上。
* 需要注意的是pivot1 <= pivot2
*/
int pivot1 = a[e2];
int pivot2 = a[e4];
/*
* 第一个和最后一个元素被放到两个轴所在的位置。当阶段性的分段结束后
* 他们会被分配到最终的位置并从子排序阶段排除
*/
a[e2] = a[left];
a[e4] = a[right];
/*
* 跳过一些队首的小于pivot1的值,跳过队尾的大于pivot2的值
*/
while (a[++less] < pivot1);
while (a[--great] > pivot2);
/*
* Partitioning:
*
* left part center part right part
* +--------------------------------------------------------------+
* | < pivot1 | pivot1 <= && <= pivot2 | ? | > pivot2 |
* +--------------------------------------------------------------+
* ^ ^ ^
* | | |
* less k great
*
* Invariants:
*
* all in (left, less) < pivot1
* pivot1 <= all in [less, k) <= pivot2
* all in (great, right) > pivot2
*
* Pointer k is the first index of ?-part.
*/
outer:
for (int k = less - 1; ++k <= great; ) {
int ak = a[k];
if (ak < pivot1) { // Move a[k] to left part
a[k] = a[less];
/*
* Here and below we use "a[i] = b; i++;" instead
* of "a[i++] = b;" due to performance issue.
*/
a[less] = ak;
++less;
} else if (ak > pivot2) { // Move a[k] to right part
while (a[great] > pivot2) {
if (great-- == k) {
break outer;
}
}
if (a[great] < pivot1) { // a[great] <= pivot2
a[k] = a[less];
a[less] = a[great];
++less;
} else { // pivot1 <= a[great] <= pivot2
a[k] = a[great];
}
/*
* Here and below we use "a[i] = b; i--;" instead
* of "a[i--] = b;" due to performance issue.
*/
a[great] = ak;
--great;
}
}
// Swap pivots into their final positions
a[left] = a[less - 1]; a[less - 1] = pivot1;
a[right] = a[great + 1]; a[great + 1] = pivot2;
// Sort left and right parts recursively, excluding known pivots
sort(a, left, less - 2, leftmost);
sort(a, great + 2, right, false);
/*
* 如果中心区域太大,超过数组长度的 4/7。就先进行预处理,再参与递归排序。
* 预处理的方法是把等于pivot1的元素统一放到左边,等于pivot2的元素统一
*/
if (less < e1 && e5 < great) {
/*
* Skip elements, which are equal to pivot values.
*/
while (a[less] == pivot1) {
++less;
}
while (a[great] == pivot2) {
--great;
}
/*
* Partitioning:
*
* left part center part right part
* +----------------------------------------------------------+
* | == pivot1 | pivot1 < && < pivot2 | ? | == pivot2 |
* +----------------------------------------------------------+
* ^ ^ ^
* | | |
* less k great
*
* Invariants:
*
* all in (*, less) == pivot1
* pivot1 < all in [less, k) < pivot2
* all in (great, *) == pivot2
*
* Pointer k is the first index of ?-part.
*/
outer:
for (int k = less - 1; ++k <= great; ) {
int ak = a[k];
if (ak == pivot1) { // Move a[k] to left part
a[k] = a[less];
a[less] = ak;
++less;
} else if (ak == pivot2) { // Move a[k] to right part
while (a[great] == pivot2) {
if (great-- == k) {
break outer;
}
}
if (a[great] == pivot1) { // a[great] < pivot2
a[k] = a[less];
/*
* Even though a[great] equals to pivot1, the
* assignment a[less] = pivot1 may be incorrect,
* if a[great] and pivot1 are floating-point zeros
* of different signs. Therefore in float and
* double sorting methods we have to use more
* accurate assignment a[less] = a[great].
*/
a[less] = pivot1;
++less;
} else { // pivot1 < a[great] < pivot2
a[k] = a[great];
}
a[great] = ak;
--great;
}
}
}
// Sort center part recursively
sort(a, less, great, false);
}
当有上述if条件不成立时,采用单轴快排,采用三向切分的快速排序
// Pointers
int less = left; // The index of the first element of center part
int great = right; // The index before the first element of right part
// Partitioning with one pivot
/*
* Use the third of the five sorted elements as pivot.
* This value is inexpensive approximation of the median.
*/
//选取e3
int pivot = a[e3];
/*
* Partitioning degenerates to the traditional 3-way
* (or "Dutch National Flag") schema:
*
* left part center part right part
* +-------------------------------------------------+
* | < pivot | == pivot | ? | > pivot |
* +-------------------------------------------------+
* ^ ^ ^
* | | |
* less k great
*
* Invariants:
*
* all in (left, less) < pivot
* all in [less, k) == pivot
* all in (great, right) > pivot
*
* Pointer k is the first index of ?-part.
*/
for (int k = less; k <= great; ++k) {
if (a[k] == pivot) {
continue;
}
int ak = a[k];
if (ak < pivot) { // Move a[k] to left part
a[k] = a[less];
a[less] = ak;
++less;
} else { // a[k] > pivot - Move a[k] to right part
while (a[great] > pivot) {
--great;
}
if (a[great] < pivot) { // a[great] <= pivot
a[k] = a[less];
a[less] = a[great];
++less;
} else { // a[great] == pivot
/*
* Even though a[great] equals to pivot, the
* assignment a[k] = pivot may be incorrect,
* if a[great] and pivot are floating-point
* zeros of different signs. Therefore in float
* and double sorting methods we have to use
* more accurate assignment a[k] = a[great].
*/
a[k] = pivot;
}
a[great] = ak;
--great;
}
}
/*
* Sort left and right parts recursively.
* All elements from center part are equal
* and, therefore, already sorted.
*/
sort(a, left, less - 1, leftmost);
sort(a, great + 1, right, false);
这是少于阀值QUICKSORT_THRESHOLD(286)的两种情况,至于大于286的,它会进入归并排序(Merge Sort),归并排序前:count统计降序组个数,每遇到这样一个降序组,++count,当count大于MAX_RUN_COUNT(67),被判断为这个数组不具备结构(也就是这数据时而升时而降),然后送给之前的sort(里面的快速排序)的方法(The array is not highly structured,use Quicksort instead of merge sort.)。
// Check if the array is nearly sorted
for (int k = left; k < right; run[count] = k) {
if (a[k] < a[k + 1]) { // ascending
while (++k <= right && a[k - 1] <= a[k]);
} else if (a[k] > a[k + 1]) { // descending
while (++k <= right && a[k - 1] >= a[k]);
for (int lo = run[count] - 1, hi = k; ++lo < --hi; ) {
int t = a[lo]; a[lo] = a[hi]; a[hi] = t;
}
} else { // equal
for (int m = MAX_RUN_LENGTH; ++k <= right && a[k - 1] == a[k]; ) {
if (--m == 0) {
sort(a, left, right, true);
return;
}
}
}
/*
* The array is not highly structured,
* use Quicksort instead of merge sort.
*/
if (++count == MAX_RUN_COUNT) {
sort(a, left, right, true);
return;
}
}
最后上述条件不满足的话,采用归并排序
// Determine alternation base for merge
byte odd = 0;
for (int n = 1; (n <<= 1) < count; odd ^= 1);
// Use or create temporary array b for merging
int[] b; // temp array; alternates with a
int ao, bo; // array offsets from 'left'
int blen = right - left; // space needed for b
if (work == null || workLen < blen || workBase + blen > work.length) {
work = new int[blen];
workBase = 0;
}
if (odd == 0) {
System.arraycopy(a, left, work, workBase, blen);
b = a;
bo = 0;
a = work;
ao = workBase - left;
} else {
b = work;
ao = 0;
bo = workBase - left;
}
// Merging
for (int last; count > 1; count = last) {
for (int k = (last = 0) + 2; k <= count; k += 2) {
int hi = run[k], mi = run[k - 1];
for (int i = run[k - 2], p = i, q = mi; i < hi; ++i) {
if (q >= hi || p < mi && a[p + ao] <= a[q + ao]) {
b[i + bo] = a[p++ + ao];
} else {
b[i + bo] = a[q++ + ao];
}
}
run[++last] = hi;
}
if ((count & 1) != 0) {
for (int i = right, lo = run[count - 1]; --i >= lo;
b[i + bo] = a[i + ao]
);
run[++last] = right;
}
int[] t = a; a = b; b = t;
int o = ao; ao = bo; bo = o;
}
它的归并排序跟普通的归并排序有所差异
总结,插入排序,快速排序,归并排序三种排序的组合