lz4和zstd格式都是facebook发明的,网传压缩算法对比表
算法 压缩率 压缩速度 解压速度 支持流式压缩 适用场景
LZ4 低 极快 极快 是 实时数据压缩、日志压缩、内存缓存等
Zstd 高 快 快 是 文件压缩、网络传输、数据库备份等
1.从源码安装zstd压缩工具
wget https://github.1git.de/facebook/zstd/releases/download/v1.5.7/zstd-1.5.7.tar.gz
tar xf zstd-1.5.7.tar.gz
mv zstd-1.5.7 zstd
cd zstd
make
sudo make install
make命令既生成命令行工具又生成动态链接库,make install把它们放入搜索路径。
注意:编译中输出如下信息:
==> building zstd with .gz compression support
==> no liblzma, building zstd without .xz/.lzma support
==> no liblz4, building zstd without .lz4 support
说明zstd工具还支持gz格式,如果先安装lz4,也会编译进去lz4支持。
2.从源码安装lz4压缩工具
wget https://github.1git.de/lz4/lz4/releases/download/v1.10.0/lz4-1.10.0.tar.gz
tar xf lz4-1.10.0.tar.gz
mv lz4-1.10.0 lz4
cd lz4
make
sudo make install
3.提示词
我先把网上的一个示例代码给它,让他编写main函数和测试代码。编译没问题,但多次尝试压缩都报错Error: ERROR_dstMaxSize_tooSmall。经过调试,
我把 max_compressed改大20,size_t src_size = strlen(original) + 1; // +1 for null terminator
size_t max_compressed = LZ4F_compressFrameBound(src_size, NULL)+20;重新编译执行就通过了。
DeepSeek对此分析
LZ4F_compressFrameBound 的工作原理
该函数返回的是理论最大值,计算公式为:
最大大小 = 帧头(7-15字节) + 数据块最大大小 + 帧尾(4字节) + 校验和(可选4字节)
但实际压缩时,可能因对齐、填充或内部实现细节需要额外几个字节。
解决了正确性问题,就继续提示让他添加功能。
这一版compress_streaming正确,请增加改变压缩级别,写一个测试不同级别的测试用例,计时和比较文件大小
请为zstd写一个同样功能的CLI,包含必要的头文件,只用给出代码,不做别的
请添加命令行参数-e 指定压缩算法 ,以便扩展
得到的整合程序如下:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <zstd.h>
#include <lz4.h>
#include <lz4frame.h>
#define MAX_FILENAME 256
typedef enum {
COMPRESS_ZSTD,
COMPRESS_LZ4,
COMPRESS_INVALID
} compress_algorithm_t;
void print_help() {
printf("zstdcli - Advanced Compression Tool\n");
printf("Usage: zstdcli [options] <input_file>\n");
printf("Options:\n");
printf(" -e <alg> Compression algorithm (zstd|lz4, default: zstd)\n");
printf(" -l <level> Compression level (zstd:1-22, lz4:1-12, default:3)\n");
printf(" -o <file> Output file (default: <input>.<ext>)\n");
printf(" -h Show this help\n");
}
int compress_zstd(const char* input_path, const char* output_path, int level) {
FILE* fin = fopen(input_path, "rb");
if (!fin) {
fprintf(stderr, "Error: Cannot open input file '%s'\n", input_path);
return -1;
}
FILE* fout = fopen(output_path, "wb");
if (!fout) {
fclose(fin);
fprintf(stderr, "Error: Cannot create output file '%s'\n", output_path);
return -1;
}
size_t buff_in_size = ZSTD_CStreamInSize();
size_t buff_out_size = ZSTD_CStreamOutSize();
void* buff_in = malloc(buff_in_size);
void* buff_out = malloc(buff_out_size);
ZSTD_CCtx* cctx = ZSTD_createCCtx();
ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, level);
size_t total_read = 0, total_written = 0;
clock_t start = clock();
while (1) {
size_t read = fread(buff_in, 1, buff_in_size, fin);
if (read == 0) break;
ZSTD_inBuffer in_buf = { buff_in, read, 0 };
while (in_buf.pos < in_buf.size) {
ZSTD_outBuffer out_buf = { buff_out, buff_out_size, 0 };
size_t remaining = ZSTD_compressStream2(cctx, &out_buf, &in_buf, ZSTD_e_continue);
if (ZSTD_isError(remaining)) {
fprintf(stderr, "Zstd compression error: %s\n", ZSTD_getErrorName(remaining));
goto cleanup;
}
fwrite(buff_out, 1, out_buf.pos, fout);
total_written += out_buf.pos;
}
total_read += read;
}
ZSTD_inBuffer final_in = { NULL, 0, 0 };
while (1) {
ZSTD_outBuffer final_out = { buff_out, buff_out_size, 0 };
size_t remaining = ZSTD_compressStream2(cctx, &final_out, &final_in, ZSTD_e_end);
if (ZSTD_isError(remaining)) {
fprintf(stderr, "Zstd finalization error: %s\n", ZSTD_getErrorName(remaining));
goto cleanup;
}
fwrite(buff_out, 1, final_out.pos, fout);
total_written += final_out.pos;
if (remaining == 0) break;
}
clock_t end = clock();
printf("Zstd compressed %zu bytes to %zu bytes (%.2f%%)\n",
total_read, total_written, (total_written * 100.0) / total_read);
printf("Time: %.2f seconds\n", (double)(end - start) / CLOCKS_PER_SEC);
cleanup:
ZSTD_freeCCtx(cctx);
free(buff_in);
free(buff_out);
fclose(fin);
fclose(fout);
return 0;
}
int compress_lz4(const char* input_path, const char* output_path, int level) {
FILE* fin = fopen(input_path, "rb");
if (!fin) {
fprintf(stderr, "Error: Cannot open input file '%s'\n", input_path);
return -1;
}
FILE* fout = fopen(output_path, "wb");
if (!fout) {
fclose(fin);
fprintf(stderr, "Error: Cannot create output file '%s'\n", output_path);
return -1;
}
LZ4F_preferences_t prefs = LZ4F_INIT_PREFERENCES;
prefs.compressionLevel = level;
prefs.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled;
LZ4F_cctx* cctx;
size_t ret = LZ4F_createCompressionContext(&cctx, LZ4F_VERSION);
if (LZ4F_isError(ret)) {
fclose(fin);
fclose(fout);
fprintf(stderr, "LZ4 context error: %s\n", LZ4F_getErrorName(ret));
return -1;
}
char in_buf[64 * 1024];
char out_buf[LZ4F_compressFrameBound(sizeof(in_buf), &prefs) + 32];
size_t in_size, out_size;
size_t total_read = 0, total_written = 0;
clock_t start = clock();
// Write header
out_size = LZ4F_compressBegin(cctx, out_buf, sizeof(out_buf), &prefs);
if (LZ4F_isError(out_size)) goto cleanup;
fwrite(out_buf, 1, out_size, fout);
total_written += out_size;
// Compress data
while ((in_size = fread(in_buf, 1, sizeof(in_buf), fin)) > 0) {
out_size = LZ4F_compressUpdate(cctx, out_buf, sizeof(out_buf), in_buf, in_size, NULL);
if (LZ4F_isError(out_size)) goto cleanup;
fwrite(out_buf, 1, out_size, fout);
total_read += in_size;
total_written += out_size;
}
// Write footer
out_size = LZ4F_compressEnd(cctx, out_buf, sizeof(out_buf), NULL);
if (LZ4F_isError(out_size)) goto cleanup;
fwrite(out_buf, 1, out_size, fout);
total_written += out_size;
clock_t end = clock();
printf("LZ4 compressed %zu bytes to %zu bytes (%.2f%%)\n",
total_read, total_written, (total_written * 100.0) / total_read);
printf("Time: %.2f seconds\n", (double)(end - start) / CLOCKS_PER_SEC);
cleanup:
LZ4F_freeCompressionContext(cctx);
fclose(fin);
fclose(fout);
return LZ4F_isError(out_size) ? -1 : 0;
}
int main(int argc, char** argv) {
char input_path[MAX_FILENAME] = {0};
char output_path[MAX_FILENAME] = {0};
compress_algorithm_t algorithm = COMPRESS_ZSTD;
int level = 3;
// Parse arguments
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "-h") == 0) {
print_help();
return 0;
} else if (strcmp(argv[i], "-e") == 0 && i+1 < argc) {
if (strcmp(argv[i+1], "zstd") == 0) {
algorithm = COMPRESS_ZSTD;
} else if (strcmp(argv[i+1], "lz4") == 0) {
algorithm = COMPRESS_LZ4;
} else {
algorithm = COMPRESS_INVALID;
}
i++;
} else if (strcmp(argv[i], "-l") == 0 && i+1 < argc) {
level = atoi(argv[++i]);
} else if (strcmp(argv[i], "-o") == 0 && i+1 < argc) {
strncpy(output_path, argv[++i], MAX_FILENAME-1);
} else if (argv[i][0] != '-') {
strncpy(input_path, argv[i], MAX_FILENAME-1);
}
}
// Validate arguments
if (!input_path[0]) {
print_help();
return 1;
}
if (algorithm == COMPRESS_INVALID) {
fprintf(stderr, "Error: Invalid compression algorithm\n");
print_help();
return 1;
}
// Set default output filename
if (!output_path[0]) {
const char* ext = (algorithm == COMPRESS_ZSTD) ? ".zst" : ".lz4";
snprintf(output_path, MAX_FILENAME, "%s%s", input_path, ext);
}
// Validate level ranges
if (algorithm == COMPRESS_ZSTD && (level < 1 || level > 22)) {
fprintf(stderr, "Warning: Zstd level %d out of range (1-22), using 3\n", level);
level = 3;
} else if (algorithm == COMPRESS_LZ4 && (level < 1 || level > 12)) {
fprintf(stderr, "Warning: LZ4 level %d out of range (1-12), using 3\n", level);
level = 3;
}
// Execute compression
int result;
if (algorithm == COMPRESS_ZSTD) {
result = compress_zstd(input_path, output_path, level);
} else {
result = compress_lz4(input_path, output_path, level);
}
return result != 0 ? 1 : 0;
}
编译命令行,对于已经在搜索路径的,不用-L参数指定,为了找到源代码文件,需要用-I参数指定。
gcc zmt.c -llz4 -lzstd -o zmt -I lz4/programs -I lz4/lib -I zstd/programs -I zstd/lib -O3
或
gcc zmt.c -lchdb -o zmtchdb -I lz4/programs -I lz4/lib -I zstd/programs -I zstd/lib -O3 -L ./
两者的功能和性能没有区别,只是调用不同的动态链接库,可以用ldd命令验证,后者可以在未安装lz4和zstd工具,但有libchdb.so的机器上运行。
ldd zmt
linux-vdso.so.1 => (0x0000007f9679c000)
/usr/lib/libzfh.so (0x0000007f96702000)
liblz4.so.1 => /usr/local/lib/liblz4.so.1 (0x0000007f966c5000)
libzstd.so.1 => /usr/local/lib/libzstd.so.1 (0x0000007f965f0000)
ldd zmtchdb
linux-vdso.so.1 => (0x0000007f7cada000)
/usr/lib/libzfh.so (0x0000007f7ca40000)
libchdb.so => ./libchdb.so (0x0000007f6314b000)
运行时,需要用LD_LIBRARY_PATH指定动态库的非标准路径
export LD_LIBRARY_PATH=./:/par:/usr/local/lib
./zmtchdb -e zstd clickhouse -l 6
Zstd compressed 549101800 bytes to 110685174 bytes (20.16%)
Time: 8.74 seconds
./zmtchdb -e lz4 clickhouse -l 2
LZ4 compressed 549101800 bytes to 175161389 bytes (31.90%)
Time: 3.61 seconds