利用DeepSeek编写一个使用lzav算法的文件压缩工具

LZAV是Aleksey Vaneev开发的一种嵌入式压缩算法,内存中压缩速度和压缩率在LZ4和ZSTD之间,我让DeepSeek编写了一个用于文件压缩的工具,并通过在文件头处保存源文件大小解决了解压缩需要了解源文件大小的问题。
为了提供不同的压缩级别,引入了-l参数,1表示默认,非1表示深度压缩。
代码如下:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "lzav.h"

#define MAX_FILENAME 256

void print_help() {
    printf("lzavcli - LZAV Compression Tool (with size header)\n");
    printf("Usage: lzavcli [options] <input_file>\n");
    printf("Options:\n");
    printf("  -l <level>  compress level (default: 1,high: other)\n");
    printf("  -o <file>   Output file (default: <input>.lzav)\n");
    printf("  -d          Decompress mode\n");
    printf("  -h          Show this help\n");
}

int compress_lzav(const char* input_path, const char* output_path, int level) {
    clock_t start = clock();
    FILE* fin = fopen(input_path, "rb");
    if (!fin) {
        fprintf(stderr, "Error: Cannot open input file '%s'\n", input_path);
        return -1;
    }

    // 获取原始文件大小
    fseek(fin, 0, SEEK_END);
    uint32_t src_len = (uint32_t)ftell(fin);
    fseek(fin, 0, SEEK_SET);

    // 读取原始数据
    void* src_buf = malloc(src_len);
    if (!src_buf) {
        fclose(fin);
        fprintf(stderr, "Error: Memory allocation failed\n");
        return -1;
    }

    if (fread(src_buf, 1, src_len, fin) != src_len) {
        fclose(fin);
        free(src_buf);
        fprintf(stderr, "Error: Failed to read input file\n");
        return -1;
    }
    fclose(fin);

    // 计算压缩后大小并分配缓冲区
    size_t max_len = (level==1?lzav_compress_bound(src_len):lzav_compress_bound_hi(src_len))+32;
    void* comp_buf = malloc(max_len);
    if (!comp_buf) {
        free(src_buf);
        fprintf(stderr, "Error: Memory allocation failed\n");
        return -1;
    }

    // 执行压缩

    int comp_len = level==1?lzav_compress_default(src_buf, comp_buf, src_len, max_len):lzav_compress_hi(src_buf, comp_buf, src_len, max_len);


    if (comp_len == 0 && src_len != 0) {
        free(src_buf);
        free(comp_buf);
        fprintf(stderr, "Error: Compression failed\n");
        return -1;
    }

    // 写入输出文件(包含4字节原始大小头)
    FILE* fout = fopen(output_path, "wb");
    if (!fout) {
        free(src_buf);
        free(comp_buf);
        fprintf(stderr, "Error: Cannot create output file '%s'\n", output_path);
        return -1;
    }

    // 写入原始大小(4字节小端格式)
    uint8_t header[4] = {
        (uint8_t)(src_len & 0xFF),
        (uint8_t)((src_len >> 8) & 0xFF),
        (uint8_t)((src_len >> 16) & 0xFF),
        (uint8_t)((src_len >> 24) & 0xFF)
    };
    fwrite(header, 1, 4, fout);
    
    // 写入压缩数据
    fwrite(comp_buf, 1, comp_len, fout);
    fclose(fout);
    clock_t end = clock();
    printf("Compressed %u bytes to %d bytes (%.2f%%)\n",
           src_len, comp_len, (comp_len * 100.0) / src_len);
    printf("Time: %.2f ms\n", (double)(end - start) * 1000 / CLOCKS_PER_SEC);

    free(src_buf);
    free(comp_buf);
    return 0;
}

int decompress_lzav(const char* input_path, const char* output_path) {
    clock_t start = clock();
    FILE* fin = fopen(input_path, "rb");
    if (!fin) {
        fprintf(stderr, "Error: Cannot open input file '%s'\n", input_path);
        return -1;
    }

    // 读取文件头中的原始大小(4字节小端格式)
    uint8_t header[4];
    if (fread(header, 1, 4, fin) != 4) {
        fclose(fin);
        fprintf(stderr, "Error: Failed to read size header\n");
        return -1;
    }

    uint32_t src_len = (uint32_t)header[0] | 
                      ((uint32_t)header[1] << 8) |
                      ((uint32_t)header[2] << 16) |
                      ((uint32_t)header[3] << 24);

    // 获取压缩数据大小
    fseek(fin, 0, SEEK_END);
    size_t comp_len = ftell(fin) - 4; // 减去4字节头
    fseek(fin, 4, SEEK_SET); // 跳过头部

    // 读取压缩数据
    void* comp_buf = malloc(comp_len);
    if (!comp_buf) {
        fclose(fin);
        fprintf(stderr, "Error: Memory allocation failed\n");
        return -1;
    }

    if (fread(comp_buf, 1, comp_len, fin) != comp_len) {
        fclose(fin);
        free(comp_buf);
        fprintf(stderr, "Error: Failed to read compressed data\n");
        return -1;
    }
    fclose(fin);

    // 分配解压缓冲区
    void* decomp_buf = malloc(src_len);
    if (!decomp_buf) {
        free(comp_buf);
        fprintf(stderr, "Error: Memory allocation failed\n");
        return -1;
    }

    // 执行解压

    int result_len = lzav_decompress(comp_buf, decomp_buf, comp_len, src_len);


    if (result_len < 0) {
        free(comp_buf);
        free(decomp_buf);
        fprintf(stderr, "Error: Decompression failed\n");
        return -1;
    }

    // 写入解压后的文件
    FILE* fout = fopen(output_path, "wb");
    if (!fout) {
        free(comp_buf);
        free(decomp_buf);
        fprintf(stderr, "Error: Cannot create output file '%s'\n", output_path);
        return -1;
    }

    fwrite(decomp_buf, 1, src_len, fout);
    fclose(fout);
    clock_t end = clock();
    printf("Decompressed %zu bytes to %u bytes\n", comp_len, src_len);
    printf("Time: %.2f ms\n", (double)(end - start) * 1000 / CLOCKS_PER_SEC);

    free(comp_buf);
    free(decomp_buf);
    return 0;
}

int main(int argc, char** argv) {
    char input_path[MAX_FILENAME] = {0};
    char output_path[MAX_FILENAME] = {0};
    int decompress_mode = 0;
    int compress_level=1;

    // 参数解析保持不变
    for (int i = 1; i < argc; i++) {
        if (strcmp(argv[i], "-h") == 0) {
            print_help();
            return 0;
        } else if (strcmp(argv[i], "-l") == 0 && i+1 < argc) {
            compress_level = atoi(argv[++i]);
        } else if (strcmp(argv[i], "-d") == 0) {
            decompress_mode = 1;
        } else if (strcmp(argv[i], "-o") == 0 && i+1 < argc) {
            strncpy(output_path, argv[++i], MAX_FILENAME-1);
        } else if (argv[i][0] != '-') {
            strncpy(input_path, argv[i], MAX_FILENAME-1);
        }
    }

    if (!input_path[0]) {
        print_help();
        return 1;
    }

    if (!output_path[0]) {
        const char* ext = decompress_mode ? ".decomp" : ".lzav";
        snprintf(output_path, MAX_FILENAME, "%s%s", input_path, ext);
    }

    int result;
    if (decompress_mode) {
        result = decompress_lzav(input_path, output_path);
    } else {
        result = compress_lzav(input_path, output_path, compress_level);
    }

    return result != 0 ? 1 : 0;
}

注意计算输出内存大小的函数lzav_compress_bound(src_len)和lzav_compress_bound_hi必须和压缩函数lzav_compress_default及lzav_compress_hi配套使用,深度压缩所需的内存更大,而且都超过原始文件大小,这与最终输出的压缩文件大小不是一个概念。比如595203472对应的lzav_compress_bound是599171513,lzav_compress_bound_hi是623546512。
编译命令行和执行结果如下,附上lz4和zstd的压缩大小和时间。

gcc lzavhd.c -o lzavhd -I . -O3
time ./lzavhd clickhouse
Compressed 595203472 bytes to 188672728 bytes (31.70%)
Time: 1218.29 ms

real    0m3.581s
user    0m0.806s
sys     0m0.419s
time ./lzavhd -l 2 clickhouse
Compressed 595203472 bytes to 165780399 bytes (27.85%)
Time: 4225.82 ms

real    0m6.489s
user    0m3.802s
sys     0m0.434s
time ./lzavhd -d clickhouse.lzav
Decompressed 165780399 bytes to 595203472 bytes
Time: 522.35 ms

real    0m2.597s
user    0m0.114s
sys     0m0.416s

time zstd clickhouse
clickhouse           : 23.59%   (   568 MiB =>    134 MiB, clickhouse.zst)

real    0m3.116s
user    0m1.424s
sys     0m0.241s

time lz4 -3 clickhouse clickhous.lz4
Compressed 595203472 bytes into 186838255 bytes ==> 31.39%

real    0m2.328s
user    0m3.502s
sys     0m0.235s

内部计时和外部用time命令计时差别挺大,留到以后分析。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值