1.使用github.com/valyala/gozstd包
这是一个用go语言包装c语言zstd源代码的包,DeepSeek给出的调用此包的程序如下:
package main
import (
"flag"
"fmt"
// "io"
"os"
"path/filepath"
// "github.com/valyala/gozstd"
"gozstd"
)
var (
compressLevel int
decompress bool
force bool
outputFile string
)
func init() {
flag.IntVar(&compressLevel, "level", 3, "compression level (1-19)")
flag.BoolVar(&decompress, "d", false, "decompress")
flag.BoolVar(&force, "f", false, "force overwrite")
flag.StringVar(&outputFile, "o", "", "output file")
flag.Usage = func() {
fmt.Fprintf(os.Stderr, "gozstd version - Usage: %s [OPTIONS] INPUT [OUTPUT]\n", os.Args[0])
flag.PrintDefaults()
}
}
func main() {
flag.Parse()
if flag.NArg() < 1 {
flag.Usage()
os.Exit(1)
}
inputFile := flag.Arg(0)
output := getOutputPath(inputFile)
if !force && fileExists(output) {
fmt.Fprintf(os.Stderr, "Error: output file exists (use -f to overwrite)\n")
os.Exit(1)
}
var err error
if decompress {
err = gozstdDecompress(inputFile, output)
} else {
if compressLevel < 1 || compressLevel > 19 {
fmt.Fprintln(os.Stderr, "Error: invalid compression level (1-19)")
os.Exit(1)
}
err = gozstdCompress(inputFile, output, compressLevel)
}
if err != nil {
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
os.Exit(1)
}
}
func gozstdCompress(input, output string, level int) error {
data, err := os.ReadFile(input)
if err != nil {
return err
}
compressed := gozstd.CompressLevel(nil, data, level)
return os.WriteFile(output, compressed, 0644)
}
func gozstdDecompress(input, output string) error {
compressed, err := os.ReadFile(input)
if err != nil {
return err
}
decompressed, err := gozstd.Decompress(nil, compressed)
if err != nil {
return err
}
return os.WriteFile(output, decompressed, 0644)
}
// Helper functions
func getOutputPath(input string) string {
if outputFile != "" {
return outputFile
}
if decompress {
return trimZstdExt(input)
}
return input + ".zst"
}
func trimZstdExt(path string) string {
ext := filepath.Ext(path)
if ext == ".zst" || ext == ".zstd" {
return path[:len(path)-len(ext)]
}
return path
}
func fileExists(path string) bool {
_, err := os.Stat(path)
return !os.IsNotExist(err)
}
但是我直连github网站有问题,于是用如下命令从镜像网站克隆了github.com/valyala/gozstd源代码,并在其上级目录保存DeepSeek编写的程序gozip.go,但是编译报错,加上相对路径也不支持,只好将完整路径复制到编译器提示的目录下,提示有未使用的模块io,将其注释掉,编译通过,压缩和解压测试也正常,因为解压的文件已存在,所以改名,易于比较文件大小。
git clone --depth=1 https://gitclone.com/github.com/valyala/gozstd
../go/bin/go build -o gozip gozip.go
gozip.go:10:2: package gozstd is not in std (/par/go/src/gozstd)
../go/bin/go build -o gozip gozip.go
gozip.go:10:2: "./gozstd" is relative, but relative import paths are not supported in module mode
cp gozstd /par/go/src/ -R
../go/bin/go build -o gozip gozip.go
# command-line-arguments
./gozip.go:6:2: "io" imported and not used
ls
gozip.go gozstd
../go/bin/go build -o gozip gozip.go
ls
gozip gozip.go gozstd
./gozip
gozstd version - Usage: ./gozip [OPTIONS] INPUT [OUTPUT]
-d decompress
-f force overwrite
-level int
compression level (1-19) (default 3)
-o string
output file
cp ../varchar.txt .
./gozip varchar.txt
./gozip -d varchar.txt.zst -o v.txt
Error: output file exists (use -f to overwrite)
mv varchar.txt.zst v.txt.zst
./gozip -d v.txt.zst
ls -l
total 158428
-rwxr-xr-x 1 root root 3825360 Aug 7 14:35 gozip
-rw-rw-r-- 1 1000 1000 2131 Aug 7 14:35 gozip.go
drwxr-xr-x 5 root root 4096 Aug 7 14:08 gozstd
-rw-r--r-- 1 root root 55966906 Aug 7 14:40 v.txt
-rw-r--r-- 1 root root 46459422 Aug 7 14:37 v.txt.zst
-rw-r--r-- 1 root root 55966906 Aug 7 14:37 varchar.txt
2.使用github.com/klauspost/compress/包
这是一个用纯go语言编写zstd源代码的包,DeepSeek给出的调用此包的程序如下
package main
import (
"flag"
"fmt"
"io"
"os"
"path/filepath"
"github.com/klauspost/compress/zstd"
// "compress2/zstd"
)
var (
compressLevel int
decompress bool
force bool
outputFile string
)
func init() {
flag.IntVar(&compressLevel, "level", 3, "compression level (1-19)")
flag.BoolVar(&decompress, "d", false, "decompress")
flag.BoolVar(&force, "f", false, "force overwrite")
flag.StringVar(&outputFile, "o", "", "output file")
flag.Usage = func() {
fmt.Fprintf(os.Stderr, "Usage: %s [OPTIONS] INPUT [OUTPUT]\n", os.Args[0])
fmt.Fprintln(os.Stderr, "Options:")
flag.PrintDefaults()
fmt.Fprintln(os.Stderr, "\nExamples:")
fmt.Fprintln(os.Stderr, " Compress: zstdutil -level 5 file.txt")
fmt.Fprintln(os.Stderr, " Decompress: zstdutil -d file.txt.zst")
}
}
func main() {
flag.Parse()
if flag.NArg() < 1 {
flag.Usage()
os.Exit(1)
}
inputFile := flag.Arg(0)
var output string
if outputFile != "" {
output = outputFile
} else {
if decompress {
ext := filepath.Ext(inputFile)
if ext != ".zst" && ext != ".zstd" {
fmt.Fprintf(os.Stderr, "Error: input file must have .zst or .zstd extension for decompression\n")
os.Exit(1)
}
output = inputFile[:len(inputFile)-len(ext)]
} else {
output = inputFile + ".zst"
}
}
if !force {
if _, err := os.Stat(output); err == nil {
fmt.Fprintf(os.Stderr, "Error: output file %s already exists (use -f to overwrite)\n", output)
os.Exit(1)
}
}
if decompress {
if err := decompressFile(inputFile, output); err != nil {
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
os.Exit(1)
}
} else {
if compressLevel < 1 || compressLevel > 19 {
fmt.Fprintln(os.Stderr, "Error: compression level must be between 1 and 19")
os.Exit(1)
}
if err := compressFile(inputFile, output, compressLevel); err != nil {
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
os.Exit(1)
}
}
}
func compressFile(input, output string, level int) error {
inFile, err := os.Open(input)
if err != nil {
return err
}
defer inFile.Close()
outFile, err := os.Create(output)
if err != nil {
return err
}
defer outFile.Close()
encoder, err := zstd.NewWriter(outFile, zstd.WithEncoderLevel(zstd.EncoderLevelFromZstd(level)))
if err != nil {
return err
}
defer encoder.Close()
_, err = io.Copy(encoder, inFile)
return err
}
func decompressFile(input, output string) error {
inFile, err := os.Open(input)
if err != nil {
return err
}
defer inFile.Close()
outFile, err := os.Create(output)
if err != nil {
return err
}
defer outFile.Close()
decoder, err := zstd.NewReader(inFile)
if err != nil {
return err
}
defer decoder.Close()
_, err = io.Copy(outFile, decoder)
return err
}
有了前面的经验,直接克隆和复制github镜像库,
git clone --depth=1 https://gitclone.com/github.com/klauspost/compress/
cp compress/zstd /par/go/src/compress -R
注意zstd是compress下的一个子目录,而go安装包中已有/par/go/src/compress目录,所以把子目录zstd复制到/par/go/src/compress下以避免冲突。
但是其实不行,因为有依赖关系,即使把整个compress目录复制过去也不行。报错如下
../go/bin/go build -o zstdutil zstdutil.go
../go/src/compress/zstd/enc_best.go:11:2: no required module provides package github.com/klauspost/compress: go.mod file not found in current directory or any parent directory; see 'go help modules'
../go/src/compress/zstd/blockdec.go:14:2: no required module provides package github.com/klauspost/compress/huff0: go.mod file not found in current directory or any parent directory; see 'go help modules'
..
cp compress /par/go/src/compress2 -R
../go/bin/go build -o zstdutil zstdutil.go
../go/src/compress2/zstd/enc_best.go:11:2: no required module provides package github.com/klauspost/compress: go.mod file not found in current directory or any parent directory; see 'go help modules'
此路不通,只好用DeepSeek介绍的另一种方法,不修改自己程序的包含路径,而使用go.mod文件中的replace指令来替换目录,把zstd子目录下的go.mod复制到自己程序目录下,直接用报错:
../go/bin/go build -o zstdutil zstdutil.go
zstdutil.go:10:2: github.com/klauspost/compress@v1.15.15 (replaced by ../..): reading /go.mod: open /go.mod: no such file or directory
把最后一行改为:
replace github.com/klauspost/compress => ./compress以引用当前目录下的compress目录。改后完整内容如下,也不知其他行有用没有
module github.com/klauspost/compress/s2/_generate
go 1.22.0
toolchain go1.22.4
require (
github.com/klauspost/compress v1.15.15
github.com/mmcloughlin/avo v0.6.0
)
require (
golang.org/x/mod v0.21.0 // indirect
golang.org/x/sync v0.8.0 // indirect
golang.org/x/tools v0.25.0 // indirect
)
replace github.com/klauspost/compress => ./compress
编译成功,测试通过,只是默认级别的压缩率为0,改为-level 9就正常了。
../go/bin/go build -o zstdutil zstdutil.go
./zstdutil
Usage: ./zstdutil [OPTIONS] INPUT [OUTPUT]
Options:
-d decompress
-f force overwrite
-level int
compression level (1-19) (default 3)
-o string
output file
Examples:
Compress: zstdutil -level 5 file.txt
Decompress: zstdutil -d file.txt.zst
./zstdutil v.txt -o v2.txt.zst
Error: output file v.txt.zst already exists (use -f to overwrite)
mv v.txt v2.txt
./zstdutil v2.txt
mv v2.txt.zst v3.txt.zst
./zstdutil d v3.txt.zst
Error: open d: no such file or directory
./zstdutil -d v3.txt.zst
ls -l
total 271120
drwxr-xr-x 18 root root 4096 Aug 7 14:49 compress
-rw-rw-r-- 1 1000 1000 351 Aug 7 15:41 go.mod
-rwxr-xr-x 1 root root 3825360 Aug 7 14:35 gozip
-rw-rw-r-- 1 1000 1000 2131 Aug 7 14:35 gozip.go
drwxr-xr-x 5 root root 4096 Aug 7 14:08 gozstd
-rw-r--r-- 1 root root 46459422 Aug 7 14:37 v.txt.zst
-rw-r--r-- 1 root root 55966906 Aug 7 14:40 v2.txt
-rw-r--r-- 1 root root 55966906 Aug 7 15:46 v3.txt
-rw-r--r-- 1 root root 55968197 Aug 7 15:45 v3.txt.zst
-rw-r--r-- 1 root root 55966906 Aug 7 14:37 varchar.txt
-rwxr-xr-x 1 root root 3441123 Aug 7 15:41 zstdutil
-rw-rw-r-- 1 1000 1000 2737 Aug 7 15:40 zstdutil.go
rm v3.*
./zstdutil -level 9 v2.txt
ls -l v2*
-rw-r--r-- 1 root root 55966906 Aug 7 14:40 v2.txt
-rw-r--r-- 1 root root 46459418 Aug 7 16:00 v2.txt.zst
55MB的文件测不出性能,用tpch 的2GB大小的lineitem文件测试结果如下
#使用带GLIBC低版本的环境
#zstd官方二进制文件
time zstd lineitem.csv
lineitem.csv : 30.66% ( 2.18 GiB => 685 MiB, lineitem.csv.zst)
real 0m17.287s
user 0m26.980s
sys 0m2.028s
ls -l lineitem.csv.zst
-rw-r--r-- 1 kylin kylin 718508544 7月 17 10:30 lineitem.csv.zst
rm lineitem.csv.zst
time gozstd/gozip lineitem.csv
gozstd/gozip: /lib/aarch64-linux-gnu/libc.so.6: version `GLIBC_2.32' not found (required by gozstd/gozip)
gozstd/gozip: /lib/aarch64-linux-gnu/libc.so.6: version `GLIBC_2.34' not found (required by gozstd/gozip)
real 0m0.026s
user 0m0.000s
sys 0m0.000s
time gozstd/zstdutil lineitem.csv
real 0m33.128s
user 0m34.316s
sys 0m1.992s
ls -l lineitem.csv.zst
-rw-rw-r-- 1 kylin kylin 726424595 8月 7 16:42 lineitem.csv.zst
#使用带GLIBC高版本的环境
rm lineitem.csv.zst
time gozstd/gozip lineitem.csv
real 1m2.271s
user 0m34.496s
sys 0m19.860s
ls -l lineitem.csv.zst
-rw-r--r-- 1 root root 719013634 Aug 7 16:46 lineitem.csv.zst
| 工具 | czstd | gozstd | go调用czstd |
|---|---|---|---|
| 压缩比 | 718 | 719 | 726 |
| 压缩时间 | 17 | 34 | 62 |
564

被折叠的 条评论
为什么被折叠?



