2014-10-01 wcdj
摘要:本文使用libzip读取和修改zip文件comment的方法,并支持添加扩展字符。前提是需要修改libzip的源码支持可以添加扩展字符, 否则_zip_guess_encoding会判断出错(修改代码 zip_set_archive_comment.c:65)。
0 方法
(1) 首先要安装zlib和libzip编译环境:
初始化编译libzip
./configure --prefix="/Users/gerryyang/LAMP/libzip/install/libzip-0.11.2" --with-zlib="/Users/gerryyang/LAMP/zlib/install/zlib-1.2.8"
(2) 修改libzip的源码:
去除对comment编码格式的判断,zip_set_archive_comment.c:65
ZIP_EXTERN int
zip_set_archive_comment(struct zip *za, const char *comment, zip_uint16_t len)
{
struct zip_string *cstr;
if (ZIP_IS_RDONLY(za)) {
_zip_error_set(&za->error, ZIP_ER_RDONLY, 0);
return -1;
}
if (len > 0 && comment == NULL) {
_zip_error_set(&za->error, ZIP_ER_INVAL, 0);
return -1;
}
if (len > 0) {
if ((cstr=_zip_string_new((const zip_uint8_t *)comment, len, ZIP_FL_ENC_GUESS, &za->error)) == NULL)
{
printf("_zip_string_new err\n");
return -1;
}
#if 0
if (_zip_guess_encoding(cstr, ZIP_ENCODING_UNKNOWN) == ZIP_ENCODING_CP437) {
printf("_zip_guess_encoding err\n");
_zip_string_free(cstr);
_zip_error_set(&za->error, ZIP_ER_INVAL, 0);
return -1;
}
#endif
}
else
cstr = NULL;
_zip_string_free(za->comment_changes);
za->comment_changes = NULL;
if (((za->comment_orig && _zip_string_equal(za->comment_orig, cstr))
|| (za->comment_orig == NULL && cstr == NULL))) {
_zip_string_free(cstr);
za->comment_changed = 0;
}
else {
za->comment_changes = cstr;
za->comment_changed = 1;
}
return 0;
}
zip_utf-8.c:119
enum zip_encoding_type
_zip_guess_encoding(struct zip_string *str, enum zip_encoding_type expected_encoding)
{
enum zip_encoding_type enc;
const zip_uint8_t *name;
zip_uint32_t i, j, ulen;
if (str == NULL)
return ZIP_ENCODING_ASCII;
name = str->raw;
if (str->encoding != ZIP_ENCODING_UNKNOWN)
enc = str->encoding;
else {
enc = ZIP_ENCODING_ASCII;
for (i=0; i<str->length; i++) {
if ((name[i] > 31 && name[i] < 128) || name[i] == '\r' || name[i] == '\n' || name[i] == '\t')
continue;
enc = ZIP_ENCODING_UTF8_GUESSED;
if ((name[i] & UTF_8_LEN_2_MASK) == UTF_8_LEN_2_MATCH)
ulen = 1;
else if ((name[i] & UTF_8_LEN_3_MASK) == UTF_8_LEN_3_MATCH)
ulen = 2;
else if ((name[i] & UTF_8_LEN_4_MASK) == UTF_8_LEN_4_MATCH)
ulen = 3;
else {
enc = ZIP_ENCODING_CP437;
break;
}
if (i + ulen >= str->length) {
enc = ZIP_ENCODING_CP437;
break;
}
for (j=1; j<=ulen; j++) {
if ((name[i+j] & UTF_8_CONTINUE_MASK) != UTF_8_CONTINUE_MATCH) {
enc = ZIP_ENCODING_CP437;
goto done;
}
}
i += ulen;
}
}
done:
str->encoding = enc;
if (expected_encoding != ZIP_ENCODING_UNKNOWN) {
if (expected_encoding == ZIP_ENCODING_UTF8_KNOWN && enc == ZIP_ENCODING_UTF8_GUESSED)
str->encoding = enc = ZIP_ENCODING_UTF8_KNOWN;
if (expected_encoding != enc && enc != ZIP_ENCODING_ASCII)
return ZIP_ENCODING_ERROR;
}
return enc;
}
1 测试代码
参考代码:
https://github.com/gerryyang/mac-utils/tree/master/tools/libzip/src
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>
#include <errno.h>
#include "zip.h"
using namespace std;
int encode_hex_string(const unsigned char *src, int len, unsigned char *dst)
{
unsigned char szHexTable[] = "0123456789ABCDEF";
for (int i = 0; i < len; ++i)
{
*dst = szHexTable[(src[i] >> 4) & 0x0f];
++dst;
*dst = szHexTable[src[i] & 0x0f];
++dst;
}
*dst = '\0';
return 0;
}
int main(int argc, char* argv[])
{
if (argc < 3)
{
printf("usage: %s zipfile zipcomment\n", argv[0]);
exit(1);
}
string zipfile = argv[1];
string zipcomment = argv[2];
int error;
struct zip * zipfd = zip_open(zipfile.c_str(), ZIP_CHECKCONS, &error);
if (zipfd == NULL)
{
switch (error)
{
case ZIP_ER_NOENT:
printf("The file specified by path does not exist and ZIP_CREATE is not set [%d]\n", error);
break;
case ZIP_ER_EXISTS:
printf("The file specified by path exists and ZIP_EXCL is set [%d]\n", error);
break;
case ZIP_ER_INVAL:
printf("The path argument is NULL [%d]\n", error);
break;
case ZIP_ER_NOZIP:
printf("The file specified by path is not a zip archive [%d]\n", error);
break;
case ZIP_ER_OPEN:
printf("The file specified by path could not be opened [%d]\n", error);
break;
case ZIP_ER_READ:
printf("A read error occurred; see errno for details [%d]\n", error);
break;
case ZIP_ER_SEEK:
printf("The file specified by path does not allow seeks [%d]\n", error);
break;
default:
printf("unknown err [%d]\n", error);
break;
}
exit(1);
}
// get the comment for the entire zip archive
int commentlen = 0;
const char * comment = zip_get_archive_comment(zipfd, &commentlen, ZIP_FL_ENC_RAW);
if (comment == NULL)
{
printf("zip_get_archive_comment get null or err[%d:%s]\n", errno, strerror(errno));
}
else
{
printf("zip_get_archive_comment[%d:%s]\n", commentlen, comment);
char copy[1024] = {0};
memcpy(copy, comment, commentlen);
unsigned char hex[1024] = {0};
encode_hex_string((unsigned char *)copy, commentlen, hex);
printf("zip_get_archive_comment hex[%d:%s]\n", commentlen, hex);
}
// Midas Header
// idx:0 bytes:2 0X96FA
// idx:2 bytes:2 comment len = strlen(channelId) + 0D0A
// idx:4 bytes:N channelId=xxx
// idx:4+N bytes:2 end:0X0D0A
char dstcomment[1024] = {0};
zip_uint16_t dstlen = 0;
memset(dstcomment + dstlen, 0XFA, 1);
dstlen += 1;
memset(dstcomment + dstlen, 0X96, 1);
dstlen += 1;
memset(dstcomment + dstlen, (zipcomment.length() + 2) % 0XFF, 1);// 0D0A
dstlen += 1;
memset(dstcomment + dstlen, (zipcomment.length() + 2) / 0XFF, 1);
dstlen += 1;
memcpy(dstcomment + dstlen, zipcomment.data(), zipcomment.length());
dstlen += zipcomment.length();
memset(dstcomment + dstlen, 0X0D, 1);
dstlen += 1;
memset(dstcomment + dstlen, 0X0A, 1);
dstlen += 1;
unsigned char hex[1024] = {0};
encode_hex_string((unsigned char *)dstcomment, dstlen, hex);
printf("zip_set_archive_comment hex[%d:%s]\n", dstlen, hex);
// sets the comment for the entire zip archive
// If comment is NULL and len is 0, the archive comment will be removed
// comment must be encoded in ASCII or UTF-8
int iret = zip_set_archive_comment(zipfd, dstcomment, dstlen);// err !!!
if (iret != 0)
{
printf("zip_set_archive_comment err[%d:%s]\n", iret, strerror(errno));
switch (iret)
{
case ZIP_ER_INVAL:
printf("zip_set_archive_comment: len is less than 0 or longer than the maximum comment length in a zip file (65535), or comment is not a valid UTF-8 encoded string\n");
break;
case ZIP_ER_MEMORY:
printf("zip_set_archive_comment: Required memory could not be allocated\n");
break;
default:
printf("zip_set_archive_comment: unknown err\n");
break;
}
}
// close, If any files within were changed, those changes are written to disk first
iret = zip_close(zipfd);
if (iret != 0)
{
printf("zip_close err[%d:%s]\n", errno, strerror(errno));
}
return 0;
}
2 总结
通过使用libzip可以方便的对zip的comment内容进行修改,但是限制必须使用可见的字符集,通过对libzip源码的简单修改,可以做到添加扩展的字符集。除了通过代码的方式,也可以直通使用命令行工具zip和unzip (-z add zipfile comment) 修改和读取zip的comment内容。
3 参考
[1] http://www.nih.at/libzip/zip_get_archive_comment.html
[2] http://www.nih.at/libzip/zip_set_archive_comment.html
[3] http://www.coderanch.com/t/530362/java-io/java/Zip-file-archive-comment-extended