pdf文件中ASCII85的解码程序

最新推荐文章于 2024-10-27 11:23:06 发布

wiss66

最新推荐文章于 2024-10-27 11:23:06 发布

阅读量2.4k

点赞数

文章标签： integer byte file fp bt null

本文链接：https://blog.youkuaiyun.com/wiss66/article/details/3117973

版权

delphi 源码：

unit Decode85;

// wiss文档协同系统
// PDF 文件中的ASCII85解密程序
// 2008-10-20 王春晨

interface

uses SysUtils, Windows, Classes, Math, Types;

type
    TDecode85 = class
    private
        n, index: Integer;
        outdata: PByte;

        procedure wput(tuple: Cardinal; num: byte);
        procedure Decode85(Bt: PByte; Len: Integer); //Transform
    public
    function Decode(Bt: PByte; Len: Integer; var Rs: PByte): Integer; overload;
        function Decode(InStream, OutStream: TMemoryStream): Integer; overload;
    end;

implementation

const pow85: array[0..4] of Cardinal = (85 * 85 * 85 * 85, 85 * 85 * 85, 85 * 85, 85, 1);

procedure TDecode85.Wput(tuple: Cardinal; num: byte);
var I: Integer;
begin
    for I := 1 to Num do begin
     outdata^ := tuple shr ((4 - I) * 8);
        Inc(outdata);
  Inc(index);
    end;
end;

procedure TDecode85.Decode85(Bt: PByte; Len: Integer);
var I: Integer;
    b1, b2: Byte;
    tuple: Cardinal;
begin
    n := 0;
    tuple := 0;

    for I := 0 to Len - 1 do begin
        b1 := Bt^;
        Inc(Bt);

        case b1 of
            122: begin //z
                    if n <> 0 then
                        Break;
                    Wput(0, 4)
                end;

            126: begin //'~'
                    b2 := Bt^;
                    if b2 = Byte('>') then begin
                       if n > 0 then begin
                        Dec(n);;
       tuple := tuple + pow85[n];
       wput(tuple, n);
                        end;
                    end;
                    Break;
                end;

            10, 13, 32, 0, 9: Continue;
        else
        if (b1 < 33) or (b1 > 117) then //'!' 'u'
            Break;

tuple := tuple + pow85[n] * (b1 - 33);
Inc(n);

            if n = 5 then begin
            wput(tuple, 4);
            tuple := 0;
                n := 0;
            end;
        end;
    end;
end;

function TDecode85.Decode(Bt: PByte; Len: Integer; var Rs: PByte): Integer;
var I, Ik: Integer;
Tp: PByte;
begin
Tp := Bt;
//得到压缩流中 ‘z’ 的个数，因为一个 byte(z) 变为一个 integer(0)
Ik := 0;
    for I := 0 to Len - 1 do begin
    if Tp^ = 122 then
        Inc(Ik);
        Inc(Tp);
    end;

index := 0;
    Rs := Allocmem(Len + 4 * Ik);
outdata := Rs;
    Decode85(Bt, Len);
    Result := Index;
end;

function TDecode85.Decode(InStream, OutStream: TMemoryStream): Integer;
var I, Ik: Integer;
Tp: PByte;
begin
Tp := InStream.Memory;
Ik := 0;
    for I := 0 to InStream.Size - 1 do begin
    if Tp^ = 122 then
        Inc(Ik);
        Inc(Tp);
    end;

index := 0;
    OutStream.Size := InStream.Size + 4 * Ik;
outdata := OutStream.Memory;
    Decode85(InStream.Memory, InStream.Size);
    Result := Index;
end;

end.

C源码：

/* decode85 -- convert from ascii85 format */

#include static unsigned long pow85[] = { 85*85*85*85, 85*85*85, 85*85, 85, 1 };
void wput(unsigned long tuple, int bytes)
{
switch (bytes)
{
  case 4:
   putchar(tuple >> 24);
   putchar(tuple >> 16);
   putchar(tuple >> 8);
   putchar(tuple);
   break;
  case 3:
   putchar(tuple >> 24);
   putchar(tuple >> 16);
   putchar(tuple >> 8);
   break;
  case 2:
   putchar(tuple >> 24);
   putchar(tuple >> 16);
   break;
  case 1: putchar(tuple >> 24);
   break;
}
}

void decode85(FILE *fp, const char *file)
{
unsigned long tuple = 0;
int c, count = 0;

for (;;)
switch (c = getc(fp))
{
  default: if (c < '!' || c > 'u')
   {
    fprintf(stderr, "%s: bad character in ascii85 region: %#o/n", file, c);
    exit(1);
   }

   tuple += (c - '!') * pow85[count++];
   if (count == 5)
   {
    wput(tuple, 4);
    count = 0;
    tuple = 0;
   }
   break;

  case 'z':
   if (count != 0)
   {
    fprintf(stderr, "%s: z inside ascii85 5-tuple/n", file);
    exit(1);
   }
   putchar(0);
   putchar(0);
   putchar(0);
   putchar(0);
   break;

  case '~':
   if (getc(fp) == '>')
   {
    if (count > 0)
    {
     count--;
     tuple += pow85[count];
     wput(tuple, count);
    }
    c = getc(fp);
    return;
   }
   fprintf(stderr, "%s: ~ without > in ascii85 section/n", file);
   exit(1);
  case '/n':
  case '/r':
  case '/t':
  case ' ':
  case '/0':
  case '/f':
  case '/b':
  case 0177:
   break;
  case EOF:
   fprintf(stderr, "%s: EOF inside ascii85 section/n", file);
   exit(1);
}
}

void decode(FILE *fp, const char *file, int preserve)
{
int c;
while (
  (c = getc(fp)) != EOF)
  if (c == '<')
   if ((c = getc(fp)) == '~')
    decode85(fp, file);
   else
   {
    if (preserve)
     putchar('<');
    if (c == EOF)
     break;
    if (preserve)
     putchar(c);

   }
  else if
   (preserve) putchar(c);
}

void usage(void)
{
fprintf(stderr, "usage: decode85 [-p] file .../n");
exit(1);
}

extern int getopt(int, char *[], const char *);
extern int optind;
extern char *optarg;

int main(int argc, char *argv[])
{
int i, preserve; preserve = 0;
while ((i = getopt(argc, argv, "p?")) != EOF)
switch (i)
{
  case 'p': preserve = 1; break; case '?': usage();
  }
  if (optind == argc)
   decode(stdin, "decode85", preserve);
  else
   for (i = optind; i < argc; i++)
   {
    FILE *fp = fopen(argv[i], "r");
    if (fp == NULL)
    {
     perror(argv[i]);
     return 1; } decode(fp, argv[i], preserve); fclose(fp); } return 0; }

Java源码：

public class ASCII85InputStream extends FilterInputStream {
static private final long CONST_85 = 85L;

static private final long HIGH_BYTE = 0xFFL;

private byte[] ascii;

private byte[] b;

private boolean eof;

private int index;

private int n;

    /**
     * Constructor
     *
     * @param is
     *            The input stream to actually read from.
     */
    public ASCII85InputStream(InputStream is) {
        super (is);
        index = 0;
        n = 0;
        eof = false;
        ascii = new byte[5];
        b = new byte[4];
    }

    /*
     * (non-Javadoc)
     *
     * @see java.io.InputStream#read()
     */
    public final int read() throws IOException {
        if (index >= n) {
            if (eof) {
                return -1;
            }
            index = 0;

            int k;
            byte z;
            do {
                int zz = (byte) in.read();
                if (zz == -1) {
                    eof = true;
                    return -1;
                }
                z = (byte) zz;
            } while ((z == '/n') || (z == '/r') || (z == ' '));

            if ((z == '~') | (z == 'x')) {
                eof = true;
                ascii = null;
                b = null;
                n = 0;
                return -1;
            } else if (z == 'z') {
                b[0] = 0;
                b[1] = 0;
                b[2] = 0;
                b[3] = 0;
                n = 4;
            } else {
                ascii[0] = z; // may be EOF here....
                for (k = 1; k < 5; ++k) {
                    do {
                        int zz = (byte) in.read();
                        if (zz == -1) {
                            eof = true;
                            return -1;
                        }
                        z = (byte) zz;
                    } while ((z == '/n') || (z == '/r') || (z == ' '));
                    ascii[k] = z;
                    if ((z == '~') | (z == 'x')) {
                        break;
                    }
                }
                n = k - 1;
                if (n == 0) {
                    eof = true;
                    ascii = null;
                    b = null;
                    return -1;
                }
                if (k < 5) {
                    for (++k; k < 5; ++k) {
                        ascii[k] = 0x21;
                    }
                    eof = true;
                }

                // decode stream
                long t = 0;
                for (k = 0; k < 5; ++k) {
                    z = (byte) (ascii[k] - 0x21);
                    if ((z < 0) || (z > 93)) {
                        n = 0;
                        eof = true;
                        ascii = null;
                        b = null;
                        throw new IOException(
                                "Invalid data in Ascii85 stream");
                    }
                    t = (t * CONST_85) + z;
                }
                for (k = 3; k >= 0; --k) {
                    b[k] = (byte) (t & HIGH_BYTE);
                    t >>>= 8;
                }
            }
        }
        return b[index++] & 0xFF;
    }

    /*
     * (non-Javadoc)
     *
     * @see java.io.InputStream#read(byte[], int, int)
     */
    public final int read(byte[] data, int offset, int len)
            throws IOException {
        if (eof && (index >= n)) {
            return -1;
        }
        for (int i = 0; i < len; i++) {
            if (index < n) {
                data[i + offset] = b[index++];
            } else {
                int t = read();
                if (t == -1) {
                    return i;
                }
                data[i + offset] = (byte) t;
            }
        }
        return len;
    }

    /*
     * (non-Javadoc)
     *
     * @see java.io.InputStream#available()
     */
    public int available() throws IOException {
        throw new IOException("method not supported");
    }

    /*
     * (non-Javadoc)
     *
     * @see java.io.InputStream#close()
     */
    public void close() throws IOException {
        ascii = null;
        eof = true;
        b = null;
        super .close();
    }

    /*
     * (non-Javadoc)
     *
     * @see java.io.InputStream#markSupported()
     */
    public boolean markSupported() {
        return false;
    }

    /*
     * (non-Javadoc)
     *
     * @see java.io.InputStream#reset()
     */
    public synchronized void reset() throws IOException {
        throw new IOException("method not supported");
    }

    /*
     * (non-Javadoc)
     *
     * @see java.io.InputStream#skip(long)
     */
    public long skip(long bytes) throws IOException {
        throw new IOException("method not supported");
    }
}

原理：

AS CII 85 编码使用的是从字符J到u，字符z和编码结束标记~>组成。
PDF的ASCII85Decode过滤器忽略所有的空白字符。如果编码后的数据中含
有所要求的字符之外的字符，解码时将出错。
AS CII 85 编码的基本思想;用5个ASCII码字符来替代字节数据中的4
个字节的二进制数。其基本原理就是数据进制的转换，以85进制的数据表示
256进制的数据。5位85进制数据最大可以表示855 =4437053124，而4位
256进制可以表示的最大数值为256'=4294967295, 5位85进制可以完全表
哈尔滨}_程人学硕十学位论文
示4位256进制的数，所以ASCII85编码方式可行。
设每组 4 字节二进制数据为〔b,,b2,b,,b,),转换后的一组5字节输出为
(q,cz,c3,c4.cs)，此算法的转换关系如下:
(b, x2 563 )+(b,x 2562)+(b,x 2563)-+4=
(c,x 8 5 ')+ (c 2、 85' )+ (c3 x8 5 2) +(c 4x 8 5' )+cs “一3)
其中 b ，一 b。的取值范围为0-255,c, - c ;的取值范围为。-840
然后将 c，一‘s分别加上33得到e、一e,，取值范围为33117。这期间的
字符都是可见的，最后，以字符(~>)表示数据结束。
需要注意的特殊情况是;当c一Cs都为。时，以编码122(z)而不是5个
33(!)表示b、一b,o 当原数据不是4字节的倍数时，最后剩余的n(1,2,3 )字
节二进制数据，在低位补0,凑成4位，仍按转换关系处理(但不使用z)，
得到5位ASCI工数据，取其中高n+l位为编码结果。PDF编码方法中1个字
节数据的取值范围为0-255，而不是Java中byte的取值范围一128-"1270
所以程序中要进行转换:0-127取值不变，一128~一1取值加256变为128-
255.
为了便于理解，下面提供几个例子。
例 2: 没有特殊情况的数据编码
原二进制数据:
b一 so urc e[O]=00000II Ob=06h=6
b2= s o urc e[I] =1 1110110b=Of6h=246
瓦= so urc e[2 ]=00000111b=07h=7
b4= so urc e[3]=1I100111b=Oe7h=231
8_5 进制数据:Ci=2C ,=20C i=14C 4=2 6C 5=65
经过 AS CIIHex编码后:
enc od ed [0] =3 5='#'
enc od ed[ ]]= 53 ='5'
enc od ed[ 2]= 47 ='/'
enc od ed 汇3] =5 9=';'
enc od ed [4卜 9 8='b'
哈尔滨一_程大学硕十学位论文
例 3: 原数据不足4位
原数 #.1 :
b,= s o ur ce[ 0]= 00000IIO b =06h=6
b2= s o urc e[ 1卜川10110b=Of6h=246
b3= so urc e[ 2]= 00000IIlb=07h=7
85 进制数据;c,=2c ,=20C 3=14C 4=24
AS CI IH ex编码后:
enc od ed [0] =3 5='#'
enc od ed [1 ]= 53='5'
enc od ed[ 2]= 47 ='/'
enc od ed [3卜 5 7='9'
例 4: 编码后为0的特殊处理
原二进制数据:source[0]=source[1]=source[2]=source[4]=0
AS CII He x编码后:encoded[0卜122='z'.