大文件排序

本文介绍了一种针对大文件的外部排序方法,通过将文件分割成多个小文件并分别排序,再利用归并思想进行合并,最终完成大文件的排序。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

大文件排序

对于很大的文件中的内容进行排序,不能和普通的排序一样将所有的数据读取到内存,然后对数据进行排序,因此需要使用外部排序进行整个文件的排序。

基本思路

首先将文件分割一个个小的文件,对于每个小的文件的内容使用普通的排序方法进行排序,所有的文件排序完毕后,对这些文件两两合并(使用归并的排序的思想进行合并)。最后形成排完序的文件。

简单实现
package com.fengyangdi.sort;

import java.io.*;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Random;

/**
 * 对于一个大文件进行排序
 * Created by GGM on 2016/9/22.
 */
public class BigFileSort {

    private static final int SPILT_SIZE = 10 * 10000;
    private String parentPath;
    private String filename;

    public void sort(String fileName){
        if (fileName == null || fileName.length() <= 0){
            System.out.println("The filename is invalid");
            return;
        }
        this.filename = fileName;
        createDirSaveSplitFile(fileName);
        splitFile(fileName);
        //删除原文件
        File f = new File(fileName);
        f.delete();
        //合并文件
        try {
            mergeFiles(0);
        } catch (IOException e) {
            e.printStackTrace();
        }
        deleteDirs();
    }

    private void deleteDirs() {
        File file = new File(parentPath + "\\tmp0\\");
        file.delete();
        file = new File(parentPath + "\\tmp1\\");
        file.delete();
    }

    private void mergeFiles(int level) throws IOException {
        int mergeIndex = level;
        int saveIndex = 1 - mergeIndex;
        File mergeDir = new File(parentPath + "\\tmp" + mergeIndex + "\\");
        File[] files = mergeDir.listFiles();
        if (files == null || files.length <= 0) return;
        if (files.length == 1){
            copyFile(files[0], this.filename);
            files[0].delete();
        }
        else if (files.length == 2){
            megeTwoFiles(this.filename, files[0], files[1]);
            files[0].delete();
            files[1].delete();
        }else{
            int index = 0;
            int i;
            for (i = 0; i < files.length - 1; i += 2){
                String fileName = parentPath + "\\tmp" + saveIndex + "\\tmp_" + index++;
                megeTwoFiles(fileName, files[i], files[i+1]);
                files[i].delete();
                files[i+1].delete();
            }
            if (i == files.length - 1){
                copyFile(files[i], parentPath + "\\tmp" + saveIndex + "\\tmp_" + index++);
                files[i].delete();
            }
            mergeFiles(1-level);
        }
    }

    private void copyFile(File file, String fileName) throws IOException {
        File file1 = new File(fileName);
        BufferedReader reader1 = new BufferedReader(new FileReader(file));
        BufferedWriter writer = new BufferedWriter(new FileWriter(file1));
        String n1;
        n1 = reader1.readLine();
        while (n1 != null){
            int num1 = Integer.parseInt(n1);
            writer.write(num1+"");
            writer.newLine();
            n1 = reader1.readLine();
        }
        reader1.close();
        writer.close();
    }

    private void megeTwoFiles(String filenamem, File file1, File file2) throws IOException {
        File file = new File(filenamem);
        BufferedReader reader1 = new BufferedReader(new FileReader(file1));
        BufferedReader reader2 = new BufferedReader(new FileReader(file2));
        BufferedWriter writer = new BufferedWriter(new FileWriter(file));
        String n1, n2;
        n1 = reader1.readLine();
        n2 = reader2.readLine();
        while (n1 != null && n2 != null){
            int num1 = Integer.parseInt(n1);
            int num2 = Integer.parseInt(n2);
            if (num1 <= num2) {
                writer.write(num1+"");
                writer.newLine();
                n1 = reader1.readLine();
            }else{
                writer.write(num2+"");
                writer.newLine();
                n2 = reader2.readLine();
            }
        }
        while (n1 != null){
            int num1 = Integer.parseInt(n1);
            writer.write(num1+"");
            writer.newLine();
            n1 = reader1.readLine();
        }
        while (n2 != null){
            int num2 = Integer.parseInt(n2);
            writer.write(num2+"");
            writer.newLine();
            n2 = reader2.readLine();
        }
        reader1.close();
        reader2.close();
        writer.close();
    }

    /**
     * 创建临时文件存放目录
     * @param fileName
     */
    private void createDirSaveSplitFile(String fileName) {
        File file = new File(fileName);
        String fileParent;
        if (file.isAbsolute()){
            fileParent = file.getParent();
        }else{
            fileParent = new File(file.getAbsolutePath()).getParent();
        }
        parentPath = fileParent;
        file = new File(fileParent + "\\tmp0\\");
        if (!file.exists()) file.mkdir();
        file = new File(fileParent + "\\tmp1\\");
        if (!file.exists()) file.mkdir();
    }

    private void splitFile(String fileName) {
        try {
            BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(fileName)));
            int i = 0;
            int count = 0;
            String num = "";
            ArrayList<Integer> list = new ArrayList<>();
            while ( (num = br.readLine()) != null){
                int number = Integer.parseInt(num);
                list.add(number);
                count++;
                if (count == BigFileSort.SPILT_SIZE){
                    File file = new File(parentPath + "\\tmp0\\tmp_" + i++);
                    FileWriter writer = new FileWriter(file);
                    BufferedWriter bw = new BufferedWriter(writer);
                    Collections.sort(list);
                    for (Integer integer : list) {
                        bw.write(integer+"");
                        bw.newLine();
                    }
                    bw.close();
                    list.clear();
                    count = 0;
                }
            }
            br.close();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static void main(String[] args) throws IOException {
        File file = new File("D:\\test\\data.txt");
        Random random = new Random((int)(Math.random() * 100));
        FileWriter writer = new FileWriter(file);
        BufferedWriter bw = new BufferedWriter(writer);
        for (int i = 0; i < 1; i++){
            for (int j = 0; j < 10000000; j++){
                bw.write(random.nextInt(20000)+"");
                bw.newLine();
            }
        }
        bw.close();
        new BigFileSort().sort("D:\\test\\data.txt");
        file = new File("D:\\test\\data.txt");
        BufferedReader br = new BufferedReader(new FileReader(file));
        String line;
        int count = 0;
        while ((line = br.readLine()) != null){
            System.out.print(line + " ");
            count ++;
            if (count == 50) {
                System.out.println();
                count = 0;
            }
        }
        br.close();
    }
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值