利用DeepSeek编写支持lz4和zstd格式的压缩工具

lz4和zstd格式都是facebook发明的,网传压缩算法对比表

算法	压缩率	压缩速度	解压速度	支持流式压缩	适用场景
LZ4	低	极快	极快	是	实时数据压缩、日志压缩、内存缓存等
Zstd	高	快	快	是	文件压缩、网络传输、数据库备份等

1.从源码安装zstd压缩工具

wget https://github.1git.de/facebook/zstd/releases/download/v1.5.7/zstd-1.5.7.tar.gz
tar xf zstd-1.5.7.tar.gz
mv zstd-1.5.7 zstd
cd zstd
make
sudo make install

make命令既生成命令行工具又生成动态链接库,make install把它们放入搜索路径。
注意:编译中输出如下信息:

==> building zstd with .gz compression support
==> no liblzma, building zstd without .xz/.lzma support
==> no liblz4, building zstd without .lz4 support

说明zstd工具还支持gz格式,如果先安装lz4,也会编译进去lz4支持。
2.从源码安装lz4压缩工具

wget https://github.1git.de/lz4/lz4/releases/download/v1.10.0/lz4-1.10.0.tar.gz
tar xf lz4-1.10.0.tar.gz
mv lz4-1.10.0 lz4
cd lz4
make
sudo make install

3.提示词
我先把网上的一个示例代码给它,让他编写main函数和测试代码。编译没问题,但多次尝试压缩都报错Error: ERROR_dstMaxSize_tooSmall。经过调试,
我把 max_compressed改大20,size_t src_size = strlen(original) + 1; // +1 for null terminator
size_t max_compressed = LZ4F_compressFrameBound(src_size, NULL)+20;重新编译执行就通过了。
DeepSeek对此分析

LZ4F_compressFrameBound 的工作原理
该函数返回的是理论最大值,计算公式为:
最大大小 = 帧头(7-15字节) + 数据块最大大小 + 帧尾(4字节) + 校验和(可选4字节)
但实际压缩时,可能因对齐、填充或内部实现细节需要额外几个字节。

解决了正确性问题,就继续提示让他添加功能。

这一版compress_streaming正确,请增加改变压缩级别,写一个测试不同级别的测试用例,计时和比较文件大小
请为zstd写一个同样功能的CLI,包含必要的头文件,只用给出代码,不做别的
请添加命令行参数-e 指定压缩算法 ,以便扩展

得到的整合程序如下:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <zstd.h>
#include <lz4.h>
#include <lz4frame.h>

#define MAX_FILENAME 256

typedef enum {
    COMPRESS_ZSTD,
    COMPRESS_LZ4,
    COMPRESS_INVALID
} compress_algorithm_t;

void print_help() {
    printf("zstdcli - Advanced Compression Tool\n");
    printf("Usage: zstdcli [options] <input_file>\n");
    printf("Options:\n");
    printf("  -e <alg>    Compression algorithm (zstd|lz4, default: zstd)\n");
    printf("  -l <level>  Compression level (zstd:1-22, lz4:1-12, default:3)\n");
    printf("  -o <file>   Output file (default: <input>.<ext>)\n");
    printf("  -h          Show this help\n");
}

int compress_zstd(const char* input_path, const char* output_path, int level) {
    FILE* fin = fopen(input_path, "rb");
    if (!fin) {
        fprintf(stderr, "Error: Cannot open input file '%s'\n", input_path);
        return -1;
    }

    FILE* fout = fopen(output_path, "wb");
    if (!fout) {
        fclose(fin);
        fprintf(stderr, "Error: Cannot create output file '%s'\n", output_path);
        return -1;
    }

    size_t buff_in_size = ZSTD_CStreamInSize();
    size_t buff_out_size = ZSTD_CStreamOutSize();
    void* buff_in = malloc(buff_in_size);
    void* buff_out = malloc(buff_out_size);

    ZSTD_CCtx* cctx = ZSTD_createCCtx();
    ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, level);

    size_t total_read = 0, total_written = 0;
    clock_t start = clock();

    while (1) {
        size_t read = fread(buff_in, 1, buff_in_size, fin);
        if (read == 0) break;

        ZSTD_inBuffer in_buf = { buff_in, read, 0 };
        while (in_buf.pos < in_buf.size) {
            ZSTD_outBuffer out_buf = { buff_out, buff_out_size, 0 };
            size_t remaining = ZSTD_compressStream2(cctx, &out_buf, &in_buf, ZSTD_e_continue);
            if (ZSTD_isError(remaining)) {
                fprintf(stderr, "Zstd compression error: %s\n", ZSTD_getErrorName(remaining));
                goto cleanup;
            }
            fwrite(buff_out, 1, out_buf.pos, fout);
            total_written += out_buf.pos;
        }
        total_read += read;
    }

    ZSTD_inBuffer final_in = { NULL, 0, 0 };
    while (1) {
        ZSTD_outBuffer final_out = { buff_out, buff_out_size, 0 };
        size_t remaining = ZSTD_compressStream2(cctx, &final_out, &final_in, ZSTD_e_end);
        if (ZSTD_isError(remaining)) {
            fprintf(stderr, "Zstd finalization error: %s\n", ZSTD_getErrorName(remaining));
            goto cleanup;
        }
        fwrite(buff_out, 1, final_out.pos, fout);
        total_written += final_out.pos;
        if (remaining == 0) break;
    }

    clock_t end = clock();
    printf("Zstd compressed %zu bytes to %zu bytes (%.2f%%)\n",
           total_read, total_written, (total_written * 100.0) / total_read);
    printf("Time: %.2f seconds\n", (double)(end - start) / CLOCKS_PER_SEC);

cleanup:
    ZSTD_freeCCtx(cctx);
    free(buff_in);
    free(buff_out);
    fclose(fin);
    fclose(fout);
    return 0;
}

int compress_lz4(const char* input_path, const char* output_path, int level) {
    FILE* fin = fopen(input_path, "rb");
    if (!fin) {
        fprintf(stderr, "Error: Cannot open input file '%s'\n", input_path);
        return -1;
    }

    FILE* fout = fopen(output_path, "wb");
    if (!fout) {
        fclose(fin);
        fprintf(stderr, "Error: Cannot create output file '%s'\n", output_path);
        return -1;
    }

    LZ4F_preferences_t prefs = LZ4F_INIT_PREFERENCES;
    prefs.compressionLevel = level;
    prefs.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled;

    LZ4F_cctx* cctx;
    size_t ret = LZ4F_createCompressionContext(&cctx, LZ4F_VERSION);
    if (LZ4F_isError(ret)) {
        fclose(fin);
        fclose(fout);
        fprintf(stderr, "LZ4 context error: %s\n", LZ4F_getErrorName(ret));
        return -1;
    }

    char in_buf[64 * 1024];
    char out_buf[LZ4F_compressFrameBound(sizeof(in_buf), &prefs) + 32];
    size_t in_size, out_size;
    size_t total_read = 0, total_written = 0;
    clock_t start = clock();

    // Write header
    out_size = LZ4F_compressBegin(cctx, out_buf, sizeof(out_buf), &prefs);
    if (LZ4F_isError(out_size)) goto cleanup;
    fwrite(out_buf, 1, out_size, fout);
    total_written += out_size;

    // Compress data
    while ((in_size = fread(in_buf, 1, sizeof(in_buf), fin)) > 0) {
        out_size = LZ4F_compressUpdate(cctx, out_buf, sizeof(out_buf), in_buf, in_size, NULL);
        if (LZ4F_isError(out_size)) goto cleanup;
        fwrite(out_buf, 1, out_size, fout);
        total_read += in_size;
        total_written += out_size;
    }

    // Write footer
    out_size = LZ4F_compressEnd(cctx, out_buf, sizeof(out_buf), NULL);
    if (LZ4F_isError(out_size)) goto cleanup;
    fwrite(out_buf, 1, out_size, fout);
    total_written += out_size;

    clock_t end = clock();
    printf("LZ4 compressed %zu bytes to %zu bytes (%.2f%%)\n",
           total_read, total_written, (total_written * 100.0) / total_read);
    printf("Time: %.2f seconds\n", (double)(end - start) / CLOCKS_PER_SEC);

cleanup:
    LZ4F_freeCompressionContext(cctx);
    fclose(fin);
    fclose(fout);
    return LZ4F_isError(out_size) ? -1 : 0;
}

int main(int argc, char** argv) {
    char input_path[MAX_FILENAME] = {0};
    char output_path[MAX_FILENAME] = {0};
    compress_algorithm_t algorithm = COMPRESS_ZSTD;
    int level = 3;

    // Parse arguments
    for (int i = 1; i < argc; i++) {
        if (strcmp(argv[i], "-h") == 0) {
            print_help();
            return 0;
        } else if (strcmp(argv[i], "-e") == 0 && i+1 < argc) {
            if (strcmp(argv[i+1], "zstd") == 0) {
                algorithm = COMPRESS_ZSTD;
            } else if (strcmp(argv[i+1], "lz4") == 0) {
                algorithm = COMPRESS_LZ4;
            } else {
                algorithm = COMPRESS_INVALID;
            }
            i++;
        } else if (strcmp(argv[i], "-l") == 0 && i+1 < argc) {
            level = atoi(argv[++i]);
        } else if (strcmp(argv[i], "-o") == 0 && i+1 < argc) {
            strncpy(output_path, argv[++i], MAX_FILENAME-1);
        } else if (argv[i][0] != '-') {
            strncpy(input_path, argv[i], MAX_FILENAME-1);
        }
    }

    // Validate arguments
    if (!input_path[0]) {
        print_help();
        return 1;
    }

    if (algorithm == COMPRESS_INVALID) {
        fprintf(stderr, "Error: Invalid compression algorithm\n");
        print_help();
        return 1;
    }

    // Set default output filename
    if (!output_path[0]) {
        const char* ext = (algorithm == COMPRESS_ZSTD) ? ".zst" : ".lz4";
        snprintf(output_path, MAX_FILENAME, "%s%s", input_path, ext);
    }

    // Validate level ranges
    if (algorithm == COMPRESS_ZSTD && (level < 1 || level > 22)) {
        fprintf(stderr, "Warning: Zstd level %d out of range (1-22), using 3\n", level);
        level = 3;
    } else if (algorithm == COMPRESS_LZ4 && (level < 1 || level > 12)) {
        fprintf(stderr, "Warning: LZ4 level %d out of range (1-12), using 3\n", level);
        level = 3;
    }

    // Execute compression
    int result;
    if (algorithm == COMPRESS_ZSTD) {
        result = compress_zstd(input_path, output_path, level);
    } else {
        result = compress_lz4(input_path, output_path, level);
    }

    return result != 0 ? 1 : 0;
}

编译命令行,对于已经在搜索路径的,不用-L参数指定,为了找到源代码文件,需要用-I参数指定。

gcc zmt.c -llz4 -lzstd -o zmt -I lz4/programs -I lz4/lib  -I zstd/programs -I zstd/lib -O3
或
gcc zmt.c -lchdb -o zmtchdb -I lz4/programs -I lz4/lib  -I zstd/programs -I zstd/lib -O3 -L ./

两者的功能和性能没有区别,只是调用不同的动态链接库,可以用ldd命令验证,后者可以在未安装lz4和zstd工具,但有libchdb.so的机器上运行。

ldd zmt
	linux-vdso.so.1 =>  (0x0000007f9679c000)
	/usr/lib/libzfh.so (0x0000007f96702000)
	liblz4.so.1 => /usr/local/lib/liblz4.so.1 (0x0000007f966c5000)
	libzstd.so.1 => /usr/local/lib/libzstd.so.1 (0x0000007f965f0000)
ldd zmtchdb
	linux-vdso.so.1 =>  (0x0000007f7cada000)
	/usr/lib/libzfh.so (0x0000007f7ca40000)
	libchdb.so => ./libchdb.so (0x0000007f6314b000)

运行时,需要用LD_LIBRARY_PATH指定动态库的非标准路径

export LD_LIBRARY_PATH=./:/par:/usr/local/lib
./zmtchdb -e zstd clickhouse -l 6
Zstd compressed 549101800 bytes to 110685174 bytes (20.16%)
Time: 8.74 seconds
./zmtchdb -e lz4 clickhouse -l 2
LZ4 compressed 549101800 bytes to 175161389 bytes (31.90%)
Time: 3.61 seconds
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值