归并排序

最新推荐文章于 2024-12-15 17:47:59 发布

转载最新推荐文章于 2024-12-15 17:47:59 发布 · 576 阅读

Tech 同时被 2 个专栏收录

18 篇文章

订阅专栏

algorithm

6 篇文章

订阅专栏

归并排序的应用场景是当文件太大，没有办法一次行读入内存。则可以考虑归并排序，步骤如下：

1. 先将大文件依次读取，固定长度的数据，然后使用排序算法，将这些数据做内存排序；

2. 将排序好的部分数据存入临时文件中；

3. loop 1~2 直到大文件中的数据，被读完且排序后存入到临时文件中；

4. 读取这N个临时文件，选取最小的做归并排序。

归并排序是一种稳定排序方法。

程序代码如下，

参考： http://blog.youkuaiyun.com/v_JULY_v/article/details/6451990

但是原文中代码略有错误，如下代码中已经修改过了。

void DoSorting(char* file2Sort, char* sortedFile, 
						int number2sort)
{
	this->file2Sort = file2Sort;
	this->sortedFile = sortedFile;
	this->number2sort = number2sort;

	// 文件分块在内存中排序，并且写到临时文件中
	int file_count = MemorySort();
	MergeSort(file_count);

}

int MemorySort()
{
	FILE* fin = fopen(this->file2Sort, "rt");
	int n = 0, file_count = 0; 
	int* array = new int[this->number2sort];

	// 每次读 number2sort 整数 在内存中一次排序，并写入临时文件
	while((n = readData(fin, array, this->number2sort) ) > 0)
	{
		qsort(array, n, sizeof(int), cmp_int); //memory sort quick

		char* fileName = temp_filename(file_count++);
		FILE* tempFile = fopen(fileName, "w");
		printf("write tempal sorted list to file: %s\n", fileName);
		free(fileName);
		writeData(tempFile, array, n);    
        fclose(tempFile); 
	}

	delete [] array;
	fclose(fin);
	return file_count;
}

void writeData(FILE* f, int a[], int n)    
{    
    for(int i = 0; i < n; i++)    
        fprintf(f, "%d ", a[i]);    
}    

int readData(FILE* fin, int* array, int num)
{
	int i = 0;
	while(i < num && (fscanf(fin, "%d", &array[i] )) != EOF)
	{
		i++;
	}
	printf("read %d integer\n", i);
	return i;
}

void MergeSort(int fileCnt)
{
	if(fileCnt <= 0)
	{
		return;
	}

	// 归并临时文件， 同时打开所有文件
	FILE* *fArray = new FILE*[fileCnt];
	for(int i = 0; i < fileCnt; i++)
	{
		char* fileName = temp_filename(i);
		fArray[i] = fopen(fileName, "rt");
		delete fileName;
	}

	int *data = new int[fileCnt];
	bool *hasNext = new bool[fileCnt];
	memset(data, 0, sizeof(int) * fileCnt);
	memset(hasNext, 1, sizeof(bool) * fileCnt);
	for(int i = 0; i < fileCnt; i++)
	{
		if(fscanf(fArray[i], "%d", &data[i]) == EOF)
		{
			hasNext[i] = false;
		}
	}

	FILE* fout = fopen(this->sortedFile, "wt");

	// Merge sort
	while(true)
	{
		// find the min number in all the files 
		// 原文中如下部分有错误, 原文中min=data[0] 当文件0先被读完了，就会发生错误
		int min = INT_MAX;		
		int min_index = -1;
		for(int i = 0; i < fileCnt; i++)
		{
			if(hasNext[i] && min > data[i])
			{
				min = data[i];
				min_index = i;
			}
		}
		if(-1 == min_index)
		{
			break;
		}
		if(fscanf(fArray[min_index], "%d", &data[min_index]) == EOF)
		{
			hasNext[min_index] = false;
		}
		fprintf(fout, "%d ", min);
		printf("%d ", min);
	}

	printf("\n");
	delete [] data;
	delete [] hasNext;
	// close the file and release the memory
	for(int i = 0; i < fileCnt; i++)
	{
		fclose(fArray[i]);
	}
	delete [] fArray;
	fclose(fout);
}