归并排序的应用场景是当文件太大,没有办法一次行读入内存。则可以考虑归并排序,步骤如下:
1. 先将大文件依次读取,固定长度的数据,然后使用排序算法,将这些数据做内存排序;
2. 将排序好的部分数据存入临时文件中;
3. loop 1~2 直到大文件中的数据,被读完且排序后存入到临时文件中;
4. 读取这N个临时文件,选取最小的做归并排序。
归并排序是一种稳定排序方法。
程序代码如下,
参考: http://blog.youkuaiyun.com/v_JULY_v/article/details/6451990
但是原文中代码略有错误,如下代码中已经修改过了。
void DoSorting(char* file2Sort, char* sortedFile,
int number2sort)
{
this->file2Sort = file2Sort;
this->sortedFile = sortedFile;
this->number2sort = number2sort;
// 文件分块在内存中排序,并且写到临时文件中
int file_count = MemorySort();
MergeSort(file_count);
}
int MemorySort()
{
FILE* fin = fopen(this->file2Sort, "rt");
int n = 0, file_count = 0;
int* array = new int[this->number2sort];
// 每次读 number2sort 整数 在内存中一次排序,并写入临时文件
while((n = readData(fin, array, this->number2sort) ) > 0)
{
qsort(array, n, sizeof(int), cmp_int); //memory sort quick
char* fileName = temp_filename(file_count++);
FILE* tempFile = fopen(fileName, "w");
printf("write tempal sorted list to file: %s\n", fileName);
free(fileName);
writeData(tempFile, array, n);
fclose(tempFile);
}
delete [] array;
fclose(fin);
return file_count;
}
void writeData(FILE* f, int a[], int n)
{
for(int i = 0; i < n; i++)
fprintf(f, "%d ", a[i]);
}
int readData(FILE* fin, int* array, int num)
{
int i = 0;
while(i < num && (fscanf(fin, "%d", &array[i] )) != EOF)
{
i++;
}
printf("read %d integer\n", i);
return i;
}
void MergeSort(int fileCnt)
{
if(fileCnt <= 0)
{
return;
}
// 归并临时文件, 同时打开所有文件
FILE* *fArray = new FILE*[fileCnt];
for(int i = 0; i < fileCnt; i++)
{
char* fileName = temp_filename(i);
fArray[i] = fopen(fileName, "rt");
delete fileName;
}
int *data = new int[fileCnt];
bool *hasNext = new bool[fileCnt];
memset(data, 0, sizeof(int) * fileCnt);
memset(hasNext, 1, sizeof(bool) * fileCnt);
for(int i = 0; i < fileCnt; i++)
{
if(fscanf(fArray[i], "%d", &data[i]) == EOF)
{
hasNext[i] = false;
}
}
FILE* fout = fopen(this->sortedFile, "wt");
// Merge sort
while(true)
{
// find the min number in all the files
// 原文中如下部分有错误, 原文中min=data[0] 当文件0先被读完了,就会发生错误
int min = INT_MAX;
int min_index = -1;
for(int i = 0; i < fileCnt; i++)
{
if(hasNext[i] && min > data[i])
{
min = data[i];
min_index = i;
}
}
if(-1 == min_index)
{
break;
}
if(fscanf(fArray[min_index], "%d", &data[min_index]) == EOF)
{
hasNext[min_index] = false;
}
fprintf(fout, "%d ", min);
printf("%d ", min);
}
printf("\n");
delete [] data;
delete [] hasNext;
// close the file and release the memory
for(int i = 0; i < fileCnt; i++)
{
fclose(fArray[i]);
}
delete [] fArray;
fclose(fout);
}