delphi 源码:
unit Decode85;
// wiss文档协同系统
// PDF 文件中的ASCII85解密程序
// 2008-10-20 王春晨
interface
uses SysUtils, Windows, Classes, Math, Types;
type
TDecode85 = class
private
n, index: Integer;
outdata: PByte;
procedure wput(tuple: Cardinal; num: byte);
procedure Decode85(Bt: PByte; Len: Integer); //Transform
public
function Decode(Bt: PByte; Len: Integer; var Rs: PByte): Integer; overload;
function Decode(InStream, OutStream: TMemoryStream): Integer; overload;
end;
implementation
const pow85: array[0..4] of Cardinal = (85 * 85 * 85 * 85, 85 * 85 * 85, 85 * 85, 85, 1);
procedure TDecode85.Wput(tuple: Cardinal; num: byte);
var I: Integer;
begin
for I := 1 to Num do begin
outdata^ := tuple shr ((4 - I) * 8);
Inc(outdata);
Inc(index);
end;
end;
procedure TDecode85.Decode85(Bt: PByte; Len: Integer);
var I: Integer;
b1, b2: Byte;
tuple: Cardinal;
begin
n := 0;
tuple := 0;
for I := 0 to Len - 1 do begin
b1 := Bt^;
Inc(Bt);
case b1 of
122: begin //z
if n <> 0 then
Break;
Wput(0, 4)
end;
126: begin //'~'
b2 := Bt^;
if b2 = Byte('>') then begin
if n > 0 then begin
Dec(n);;
tuple := tuple + pow85[n];
wput(tuple, n);
end;
end;
Break;
end;
10, 13, 32, 0, 9: Continue;
else
if (b1 < 33) or (b1 > 117) then //'!' 'u'
Break;
tuple := tuple + pow85[n] * (b1 - 33);
Inc(n);
if n = 5 then begin
wput(tuple, 4);
tuple := 0;
n := 0;
end;
end;
end;
end;
function TDecode85.Decode(Bt: PByte; Len: Integer; var Rs: PByte): Integer;
var I, Ik: Integer;
Tp: PByte;
begin
Tp := Bt;
//得到压缩流中 ‘z’ 的个数,因为一个 byte(z) 变为一个 integer(0)
Ik := 0;
for I := 0 to Len - 1 do begin
if Tp^ = 122 then
Inc(Ik);
Inc(Tp);
end;
index := 0;
Rs := Allocmem(Len + 4 * Ik);
outdata := Rs;
Decode85(Bt, Len);
Result := Index;
end;
function TDecode85.Decode(InStream, OutStream: TMemoryStream): Integer;
var I, Ik: Integer;
Tp: PByte;
begin
Tp := InStream.Memory;
Ik := 0;
for I := 0 to InStream.Size - 1 do begin
if Tp^ = 122 then
Inc(Ik);
Inc(Tp);
end;
index := 0;
OutStream.Size := InStream.Size + 4 * Ik;
outdata := OutStream.Memory;
Decode85(InStream.Memory, InStream.Size);
Result := Index;
end;
end.
C源码:
/* decode85 -- convert from ascii85 format */
#include static unsigned long pow85[] = { 85*85*85*85, 85*85*85, 85*85, 85, 1 };
void wput(unsigned long tuple, int bytes)
{
switch (bytes)
{
case 4:
putchar(tuple >> 24);
putchar(tuple >> 16);
putchar(tuple >> 8);
putchar(tuple);
break;
case 3:
putchar(tuple >> 24);
putchar(tuple >> 16);
putchar(tuple >> 8);
break;
case 2:
putchar(tuple >> 24);
putchar(tuple >> 16);
break;
case 1: putchar(tuple >> 24);
break;
}
}
void decode85(FILE *fp, const char *file)
{
unsigned long tuple = 0;
int c, count = 0;
for (;;)
switch (c = getc(fp))
{
default: if (c < '!' || c > 'u')
{
fprintf(stderr, "%s: bad character in ascii85 region: %#o/n", file, c);
exit(1);
}
tuple += (c - '!') * pow85[count++];
if (count == 5)
{
wput(tuple, 4);
count = 0;
tuple = 0;
}
break;
case 'z':
if (count != 0)
{
fprintf(stderr, "%s: z inside ascii85 5-tuple/n", file);
exit(1);
}
putchar(0);
putchar(0);
putchar(0);
putchar(0);
break;
case '~':
if (getc(fp) == '>')
{
if (count > 0)
{
count--;
tuple += pow85[count];
wput(tuple, count);
}
c = getc(fp);
return;
}
fprintf(stderr, "%s: ~ without > in ascii85 section/n", file);
exit(1);
case '/n':
case '/r':
case '/t':
case ' ':
case '/0':
case '/f':
case '/b':
case 0177:
break;
case EOF:
fprintf(stderr, "%s: EOF inside ascii85 section/n", file);
exit(1);
}
}
void decode(FILE *fp, const char *file, int preserve)
{
int c;
while (
(c = getc(fp)) != EOF)
if (c == '<')
if ((c = getc(fp)) == '~')
decode85(fp, file);
else
{
if (preserve)
putchar('<');
if (c == EOF)
break;
if (preserve)
putchar(c);
}
else if
(preserve) putchar(c);
}
void usage(void)
{
fprintf(stderr, "usage: decode85 [-p] file .../n");
exit(1);
}
extern int getopt(int, char *[], const char *);
extern int optind;
extern char *optarg;
int main(int argc, char *argv[])
{
int i, preserve; preserve = 0;
while ((i = getopt(argc, argv, "p?")) != EOF)
switch (i)
{
case 'p': preserve = 1; break; case '?': usage();
}
if (optind == argc)
decode(stdin, "decode85", preserve);
else
for (i = optind; i < argc; i++)
{
FILE *fp = fopen(argv[i], "r");
if (fp == NULL)
{
perror(argv[i]);
return 1; } decode(fp, argv[i], preserve); fclose(fp); } return 0; }
Java源码:
public class ASCII85InputStream extends FilterInputStream {
static private final long CONST_85 = 85L;
static private final long HIGH_BYTE = 0xFFL;
private byte[] ascii;
private byte[] b;
private boolean eof;
private int index;
private int n;
/**
* Constructor
*
* @param is
* The input stream to actually read from.
*/
public ASCII85InputStream(InputStream is) {
super (is);
index = 0;
n = 0;
eof = false;
ascii = new byte[5];
b = new byte[4];
}
/*
* (non-Javadoc)
*
* @see java.io.InputStream#read()
*/
public final int read() throws IOException {
if (index >= n) {
if (eof) {
return -1;
}
index = 0;
int k;
byte z;
do {
int zz = (byte) in.read();
if (zz == -1) {
eof = true;
return -1;
}
z = (byte) zz;
} while ((z == '/n') || (z == '/r') || (z == ' '));
if ((z == '~') | (z == 'x')) {
eof = true;
ascii = null;
b = null;
n = 0;
return -1;
} else if (z == 'z') {
b[0] = 0;
b[1] = 0;
b[2] = 0;
b[3] = 0;
n = 4;
} else {
ascii[0] = z; // may be EOF here....
for (k = 1; k < 5; ++k) {
do {
int zz = (byte) in.read();
if (zz == -1) {
eof = true;
return -1;
}
z = (byte) zz;
} while ((z == '/n') || (z == '/r') || (z == ' '));
ascii[k] = z;
if ((z == '~') | (z == 'x')) {
break;
}
}
n = k - 1;
if (n == 0) {
eof = true;
ascii = null;
b = null;
return -1;
}
if (k < 5) {
for (++k; k < 5; ++k) {
ascii[k] = 0x21;
}
eof = true;
}
// decode stream
long t = 0;
for (k = 0; k < 5; ++k) {
z = (byte) (ascii[k] - 0x21);
if ((z < 0) || (z > 93)) {
n = 0;
eof = true;
ascii = null;
b = null;
throw new IOException(
"Invalid data in Ascii85 stream");
}
t = (t * CONST_85) + z;
}
for (k = 3; k >= 0; --k) {
b[k] = (byte) (t & HIGH_BYTE);
t >>>= 8;
}
}
}
return b[index++] & 0xFF;
}
/*
* (non-Javadoc)
*
* @see java.io.InputStream#read(byte[], int, int)
*/
public final int read(byte[] data, int offset, int len)
throws IOException {
if (eof && (index >= n)) {
return -1;
}
for (int i = 0; i < len; i++) {
if (index < n) {
data[i + offset] = b[index++];
} else {
int t = read();
if (t == -1) {
return i;
}
data[i + offset] = (byte) t;
}
}
return len;
}
/*
* (non-Javadoc)
*
* @see java.io.InputStream#available()
*/
public int available() throws IOException {
throw new IOException("method not supported");
}
/*
* (non-Javadoc)
*
* @see java.io.InputStream#close()
*/
public void close() throws IOException {
ascii = null;
eof = true;
b = null;
super .close();
}
/*
* (non-Javadoc)
*
* @see java.io.InputStream#markSupported()
*/
public boolean markSupported() {
return false;
}
/*
* (non-Javadoc)
*
* @see java.io.InputStream#reset()
*/
public synchronized void reset() throws IOException {
throw new IOException("method not supported");
}
/*
* (non-Javadoc)
*
* @see java.io.InputStream#skip(long)
*/
public long skip(long bytes) throws IOException {
throw new IOException("method not supported");
}
}
原理:
AS CII 85 编码使用的是从字符J到u,字符z和编码结束标记~>组成。
PDF的ASCII85Decode过滤器忽略所有的空白字符。如果编码后的数据中含
有所要求的字符之外的字符,解码时将出错。
AS CII 85 编码的基本思想;用5个ASCII码字符来替代字节数据中的4
个字节的二进制数。其基本原理就是数据进制的转换,以85进制的数据表示
256进制的数据。5位85进制数据最大可以表示855 =4437053124,而4位
256进制可以表示的最大数值为256'=4294967295, 5位85进制可以完全表
哈尔滨}_程人学硕十学位论文
示4位256进制的数,所以ASCII85编码方式可行。
设每 组 4 字节二进制数据为〔b,,b2,b,,b,),转换后的一组5字节输出为
(q,cz,c3,c4.cs),此算法的转换关系如下:
(b, x2 563 )+(b,x 2562)+(b,x 2563)-+4=
(c,x 8 5 ')+ (c 2、 85' )+ (c3 x8 5 2) +(c 4x 8 5' )+cs “一3)
其中 b ,一 b。的取值范围为0-255,c, - c ;的取值范围为。-840
然后 将 c, 一‘s分别加上33得到e、一e,,取值范围为33117。这期间的
字符都是可见的,最后,以字符(~>)表示数据结束。
需要 注 意 的特殊情况是;当c一Cs都为。时,以编码122(z)而不是5个
33(!)表示b、一b,o 当原数据不是4字节的倍数时,最后剩余的n(1,2,3 )字
节二进制数据,在低位补0,凑成4位,仍按转换关系处理(但不使用z),
得到5位ASCI工数据,取其中高n+l位为编码结果。PDF编码方法中1个字
节数据的取值范围为0-255,而不是Java中byte的取值范围一128-"1270
所以程序中要进行转换:0-127取值不变,一128~一1取值加256变为128-
255.
为 了便 于 理解,下面提供几个例子。
例 2: 没 有特殊情况的数据编码
原二 进 制 数据:
b一 so urc e[O]=00000II Ob=06h=6
b2= s o urc e[I] =1 1110110b=Of6h=246
瓦= so urc e[2 ]=00000111b=07h=7
b4= so urc e[3]=1I100111b=Oe7h=231
8_5 进制 数 据:Ci=2C ,=20C i=14C 4=2 6C 5=65
经过 AS CIIHex编码后:
enc od ed [0] =3 5='#'
enc od ed[ ]]= 53 ='5'
enc od ed[ 2]= 47 ='/'
enc od ed 汇3] =5 9=';'
enc od ed [4卜 9 8='b'
哈尔滨一_程大学硕十学位论文
例 3: 原 数据不足4位
原 数 #.1 :
b,= s o ur ce[ 0]= 00000IIO b =06h=6
b2= s o urc e[ 1卜 川10110b=Of6h=246
b3= so urc e[ 2]= 00000IIlb=07h=7
85 进制 数 据;c,=2c ,=20C 3=14C 4=24
AS CI IH ex编码后:
enc od ed [0] =3 5='#'
enc od ed [1 ]= 53='5'
enc od ed[ 2]= 47 ='/'
enc od ed [3卜 5 7='9'
例 4: 编 码后为0的特殊处理
原二 进 制 数据:source[0]=source[1]=source[2]=source[4]=0
AS CII He x编码后:encoded[0卜122='z'.