pdf文件中ASCII85的解码程序

delphi 源码:

 

unit Decode85;

// wiss文档协同系统
// PDF 文件中的ASCII85解密程序
// 2008-10-20 王春晨

interface

uses SysUtils, Windows, Classes, Math, Types;

type
    TDecode85 = class
    private
        n, index: Integer;
        outdata: PByte;

        procedure wput(tuple: Cardinal; num: byte);
        procedure Decode85(Bt: PByte; Len: Integer); //Transform
    public
     function Decode(Bt: PByte; Len: Integer; var Rs: PByte): Integer; overload;
        function Decode(InStream, OutStream: TMemoryStream): Integer; overload;
    end;

implementation

const pow85: array[0..4] of Cardinal = (85 * 85 * 85 * 85, 85 * 85 * 85, 85 * 85, 85, 1);

procedure TDecode85.Wput(tuple: Cardinal; num: byte);
var I: Integer;
begin
    for I := 1 to Num do begin
      outdata^ := tuple shr ((4 - I) * 8);
        Inc(outdata);
  Inc(index);
    end;
end;

procedure TDecode85.Decode85(Bt: PByte; Len: Integer);
var I: Integer;
    b1, b2: Byte;
    tuple: Cardinal;
begin
    n := 0;
    tuple := 0;

    for I := 0 to Len - 1 do begin
        b1 := Bt^;
        Inc(Bt);

        case b1 of
            122: begin  //z
                    if n <> 0 then
                        Break;
                    Wput(0, 4)
                end;

            126: begin  //'~'
                    b2 := Bt^;
                    if b2 = Byte('>') then begin
                        if n > 0 then begin
                         Dec(n);;
       tuple := tuple + pow85[n];
       wput(tuple, n);
                        end;
                    end;
                    Break;
                end;

            10, 13, 32, 0, 9: Continue;
        else
         if (b1 < 33) or (b1 > 117) then  //'!'  'u'
             Break;

            tuple := tuple + pow85[n] * (b1 - 33);
            Inc(n);

            if n = 5 then begin
             wput(tuple, 4);
             tuple := 0;
                n := 0;
            end;
        end;
    end;
end;

function TDecode85.Decode(Bt: PByte; Len: Integer; var Rs: PByte): Integer;
var I, Ik: Integer;
 Tp: PByte;
begin
 Tp := Bt;
 //得到压缩流中 ‘z’ 的个数,因为一个 byte(z) 变为一个 integer(0)
 Ik := 0;
    for I := 0 to Len - 1 do begin
     if Tp^ = 122 then
         Inc(Ik);
        Inc(Tp);
    end;

 index := 0;
    Rs := Allocmem(Len + 4 * Ik);
 outdata := Rs;
    Decode85(Bt, Len);
    Result := Index;
end;

function TDecode85.Decode(InStream, OutStream: TMemoryStream): Integer;
var I, Ik: Integer;
 Tp: PByte;
begin
 Tp := InStream.Memory;
 Ik := 0;
    for I := 0 to InStream.Size - 1 do begin
     if Tp^ = 122 then
         Inc(Ik);
        Inc(Tp);
    end;

 index := 0;
    OutStream.Size := InStream.Size + 4 * Ik;
 outdata := OutStream.Memory;
    Decode85(InStream.Memory, InStream.Size);
    Result := Index;
end;

end.

 

C源码:

/* decode85 -- convert from ascii85 format */

#include static unsigned long pow85[] = { 85*85*85*85, 85*85*85, 85*85, 85, 1 };
void wput(unsigned long tuple, int bytes)
{
 switch (bytes)
 {
  case 4:
   putchar(tuple >> 24);
   putchar(tuple >> 16);
   putchar(tuple >> 8);
   putchar(tuple);
   break;
  case 3:
   putchar(tuple >> 24);
   putchar(tuple >> 16);
   putchar(tuple >> 8);
   break;
  case 2:
   putchar(tuple >> 24);
   putchar(tuple >> 16);
   break;
  case 1: putchar(tuple >> 24);
   break;
 }
}

void decode85(FILE *fp, const char *file)
{
 unsigned long tuple = 0;
 int c, count = 0;
 
 for (;;)
 switch (c = getc(fp))
 {
  default: if (c < '!' || c > 'u')
   {
    fprintf(stderr, "%s: bad character in ascii85 region: %#o/n", file, c);
    exit(1);
   }
   
   tuple += (c - '!') * pow85[count++];
   if (count == 5)
   {
    wput(tuple, 4);
    count = 0;
    tuple = 0;
   }
   break;
  
  case 'z':
   if (count != 0)
   {
    fprintf(stderr, "%s: z inside ascii85 5-tuple/n", file);
    exit(1);
   }
   putchar(0);
   putchar(0);
   putchar(0);
   putchar(0);
   break;

  case '~':
   if (getc(fp) == '>')
   {
    if (count > 0)
    {
     count--;
     tuple += pow85[count];
     wput(tuple, count);
    }
    c = getc(fp);
    return;
   }
   fprintf(stderr, "%s: ~ without > in ascii85 section/n", file);
   exit(1);
  case '/n':
  case '/r':
  case '/t':
  case ' ':
  case '/0':
  case '/f':
  case '/b':
  case 0177:
   break;
  case EOF:
   fprintf(stderr, "%s: EOF inside ascii85 section/n", file);
   exit(1);
 }
}

void decode(FILE *fp, const char *file, int preserve)
{
 int c;
 while (
  (c = getc(fp)) != EOF)
  if (c == '<')
   if ((c = getc(fp)) == '~')
    decode85(fp, file);
   else
   {
    if (preserve)
     putchar('<');
    if (c == EOF)
     break;
    if (preserve)
     putchar(c);

   }
  else if
   (preserve) putchar(c);
}

void usage(void)
{
 fprintf(stderr, "usage: decode85 [-p] file .../n");
 exit(1);
}

extern int getopt(int, char *[], const char *);
extern int optind;
extern char *optarg;

int main(int argc, char *argv[])
{
 int i, preserve; preserve = 0;
 while ((i = getopt(argc, argv, "p?")) != EOF)
 switch (i)
 {
  case 'p': preserve = 1; break; case '?': usage();
  }
  if (optind == argc)
   decode(stdin, "decode85", preserve);
  else
   for (i = optind; i < argc; i++)
   {
    FILE *fp = fopen(argv[i], "r");
    if (fp == NULL)
    {
     perror(argv[i]);
     return 1; } decode(fp, argv[i], preserve); fclose(fp); } return 0; }

 

Java源码:

public class ASCII85InputStream extends FilterInputStream {
    static private final long CONST_85 = 85L;

    static private final long HIGH_BYTE = 0xFFL;

    private byte[] ascii;

    private byte[] b;

    private boolean eof;

    private int index;

    private int n;

    /**
     * Constructor
     *
     * @param is
     *            The input stream to actually read from.
     */
    public ASCII85InputStream(InputStream is) {
        super (is);
        index = 0;
        n = 0;
        eof = false;
        ascii = new byte[5];
        b = new byte[4];
    }

    /*
     * (non-Javadoc)
     *
     * @see java.io.InputStream#read()
     */
    public final int read() throws IOException {
        if (index >= n) {
            if (eof) {
                return -1;
            }
            index = 0;

            int k;
            byte z;
            do {
                int zz = (byte) in.read();
                if (zz == -1) {
                    eof = true;
                    return -1;
                }
                z = (byte) zz;
            } while ((z == '/n') || (z == '/r') || (z == ' '));

            if ((z == '~') | (z == 'x')) {
                eof = true;
                ascii = null;
                b = null;
                n = 0;
                return -1;
            } else if (z == 'z') {
                b[0] = 0;
                b[1] = 0;
                b[2] = 0;
                b[3] = 0;
                n = 4;
            } else {
                ascii[0] = z; // may be EOF here....
                for (k = 1; k < 5; ++k) {
                    do {
                        int zz = (byte) in.read();
                        if (zz == -1) {
                            eof = true;
                            return -1;
                        }
                        z = (byte) zz;
                    } while ((z == '/n') || (z == '/r') || (z == ' '));
                    ascii[k] = z;
                    if ((z == '~') | (z == 'x')) {
                        break;
                    }
                }
                n = k - 1;
                if (n == 0) {
                    eof = true;
                    ascii = null;
                    b = null;
                    return -1;
                }
                if (k < 5) {
                    for (++k; k < 5; ++k) {
                        ascii[k] = 0x21;
                    }
                    eof = true;
                }

                // decode stream
                long t = 0;
                for (k = 0; k < 5; ++k) {
                    z = (byte) (ascii[k] - 0x21);
                    if ((z < 0) || (z > 93)) {
                        n = 0;
                        eof = true;
                        ascii = null;
                        b = null;
                        throw new IOException(
                                "Invalid data in Ascii85 stream");
                    }
                    t = (t * CONST_85) + z;
                }
                for (k = 3; k >= 0; --k) {
                    b[k] = (byte) (t & HIGH_BYTE);
                    t >>>= 8;
                }
            }
        }
        return b[index++] & 0xFF;
    }

    /*
     * (non-Javadoc)
     *
     * @see java.io.InputStream#read(byte[], int, int)
     */
    public final int read(byte[] data, int offset, int len)
            throws IOException {
        if (eof && (index >= n)) {
            return -1;
        }
        for (int i = 0; i < len; i++) {
            if (index < n) {
                data[i + offset] = b[index++];
            } else {
                int t = read();
                if (t == -1) {
                    return i;
                }
                data[i + offset] = (byte) t;
            }
        }
        return len;
    }

    /*
     * (non-Javadoc)
     *
     * @see java.io.InputStream#available()
     */
    public int available() throws IOException {
        throw new IOException("method not supported");
    }

    /*
     * (non-Javadoc)
     *
     * @see java.io.InputStream#close()
     */
    public void close() throws IOException {
        ascii = null;
        eof = true;
        b = null;
        super .close();
    }

    /*
     * (non-Javadoc)
     *
     * @see java.io.InputStream#markSupported()
     */
    public boolean markSupported() {
        return false;
    }

    /*
     * (non-Javadoc)
     *
     * @see java.io.InputStream#reset()
     */
    public synchronized void reset() throws IOException {
        throw new IOException("method not supported");
    }

    /*
     * (non-Javadoc)
     *
     * @see java.io.InputStream#skip(long)
     */
    public long skip(long bytes) throws IOException {
        throw new IOException("method not supported");
    }
}

 

 

原理:

AS CII 85 编码使用的是从字符J到u,字符z和编码结束标记~>组成。
PDF的ASCII85Decode过滤器忽略所有的空白字符。如果编码后的数据中含
有所要求的字符之外的字符,解码时将出错。
AS CII 85 编码的基本思想;用5个ASCII码字符来替代字节数据中的4
个字节的二进制数。其基本原理就是数据进制的转换,以85进制的数据表示
256进制的数据。5位85进制数据最大可以表示855 =4437053124,而4位
256进制可以表示的最大数值为256'=4294967295, 5位85进制可以完全表
哈尔滨}_程人学硕十学位论文
示4位256进制的数,所以ASCII85编码方式可行。
设每 组 4 字节二进制数据为〔b,,b2,b,,b,),转换后的一组5字节输出为
(q,cz,c3,c4.cs),此算法的转换关系如下:
(b, x2 563 )+(b,x 2562)+(b,x 2563)-+4=
(c,x 8 5 ')+ (c 2、 85' )+ (c3 x8 5 2) +(c 4x 8 5' )+cs “一3)
其中 b ,一 b。的取值范围为0-255,c, - c ;的取值范围为。-840
然后 将 c, 一‘s分别加上33得到e、一e,,取值范围为33117。这期间的
字符都是可见的,最后,以字符(~>)表示数据结束。
需要 注 意 的特殊情况是;当c一Cs都为。时,以编码122(z)而不是5个
33(!)表示b、一b,o 当原数据不是4字节的倍数时,最后剩余的n(1,2,3 )字
节二进制数据,在低位补0,凑成4位,仍按转换关系处理(但不使用z),
得到5位ASCI工数据,取其中高n+l位为编码结果。PDF编码方法中1个字
节数据的取值范围为0-255,而不是Java中byte的取值范围一128-"1270
所以程序中要进行转换:0-127取值不变,一128~一1取值加256变为128-
255.
为 了便 于 理解,下面提供几个例子。
例 2: 没 有特殊情况的数据编码
原二 进 制 数据:
b一 so urc e[O]=00000II Ob=06h=6
b2= s o urc e[I] =1 1110110b=Of6h=246
瓦= so urc e[2 ]=00000111b=07h=7
b4= so urc e[3]=1I100111b=Oe7h=231
8_5 进制 数 据:Ci=2C ,=20C i=14C 4=2 6C 5=65
经过 AS CIIHex编码后:
enc od ed [0] =3 5='#'
enc od ed[ ]]= 53 ='5'
enc od ed[ 2]= 47 ='/'
enc od ed 汇3] =5 9=';'
enc od ed [4卜 9 8='b'
哈尔滨一_程大学硕十学位论文
例 3: 原 数据不足4位
原 数 #.1 :
b,= s o ur ce[ 0]= 00000IIO b =06h=6
b2= s o urc e[ 1卜 川10110b=Of6h=246
b3= so urc e[ 2]= 00000IIlb=07h=7
85 进制 数 据;c,=2c ,=20C 3=14C 4=24
AS CI IH ex编码后:
enc od ed [0] =3 5='#'
enc od ed [1 ]= 53='5'
enc od ed[ 2]= 47 ='/'
enc od ed [3卜 5 7='9'
例 4: 编 码后为0的特殊处理
原二 进 制 数据:source[0]=source[1]=source[2]=source[4]=0
AS CII He x编码后:encoded[0卜122='z'.

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值