简介
格式说明
Base64是网络上最常见的用于传输8Bit字节代码的编码方式之一,参见规范 RFC2045 - 6.8 Base64 Content-Transfer-Encoding.
Base64 是编码格式 , 不是加密
Base64 的设计初衷是创造一种不可读的字节序列, 来作为HTTP的传输内容。
64 + 1个字符是 64 个可打印的 , 在各种协议中都没有什么特殊含义的字符。 外加 一个 = 用来作为编码输出的终止符和补全符。
编码规范简而言之就是 : 顺序从字节流中取出 6 bits 数据 , 查表找到对应的字符。
这样每3个char (24 bits) 的原始数据就会产生 4个编码字符 (32 bits) 。 数据大小大约膨胀 33 % 。
当编码进行到字节流尾部的时候, 如果去不足6 bit , 剩余的补 0 。
输出字节不足 4 的整数倍的话, 用=补齐。
一个简单的例子 (来自百度百科):
Table 1: The Base64 Alphabet
Value Encoding Value Encoding Value Encoding Value Encoding
0 A 17 R 34 i 51 z
1 B 18 S 35 j 52 0
2 C 19 T 36 k 53 1
3 D 20 U 37 l 54 2
4 E 21 V 38 m 55 3
5 F 22 W 39 n 56 4
6 G 23 X 40 o 57 5
7 H 24 Y 41 p 58 6
8 I 25 Z 42 q 59 7
9 J 26 a 43 r 60 8
10 K 27 b 44 s 61 9
11 L 28 c 45 t 62 +
12 M 29 d 46 u 63 /
13 N 30 e 47 v
14 O 31 f 48 w (pad) =
15 P 32 g 49 x
16 Q 33 h 50 y
#转前:
s 1 3
#先转成ascii:
115 49 51
#进制:
01110011 00110001 00110011
#6个一组(4组)
011100 110011 000100 110011
#得到
28 51 4 51
查对下照表 c z E z
C 实现代码
#include <assert.h>
#include <stdlib.h>
static const char encode_map[64] = {
'A','B','C','D','E','F','G',
'H','I','J','K','L','M','N',
'O','P','Q','R','S','T','U',
'V','W','X','Y','Z',
'a','b','c','d','e','f','g',
'h','i','j','k','l','m','n',
'o','p','q','r','s','t','u',
'v','w','x','y','z',
'0','1','2','3','4','5','6','7','8','9',
'+','/'};
static const char decode_map[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
62, // '+'
0, 0, 0,
63, // '/'
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, // '0'-'9'
0, 0, 0, 0, 0, 0, 0,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 'A'-'Z'
0, 0, 0, 0, 0, 0,
26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, // 'a'-'z'
};
void * CBase64_Encode(const void * buf,unsigned int len, unsigned int * ret_len)
{
*ret_len= ( len / 3 ) *4 + ((len % 3 !=0) ? 4 : 0) + 1;
char * ret_buf = (char*)malloc(*ret_len);
assert(ret_buf);
char * curr_ret = ret_buf;
char * curr = (char*)buf;
ret_buf[(*ret_len)-1]= 0;
unsigned int len_left = len;
while(len_left >2){
*curr_ret++ = encode_map[(*curr & 0xfc) >>2]; // H 6
*curr_ret++ = encode_map[((*curr & 0x3)<<4) + ((*(curr+1) & 0xf0) >> 4)]; // L2 H 4
*curr_ret++ = encode_map[(((*(curr+1))&0xf)<<2) + (((*(curr+2) & 0xc0))>>6) ]; // L4 H2
*curr_ret++ = encode_map[(*(curr+2)) & 0x3f];// L6
len_left -= 3;
curr += 3;
}
if(len_left ==1 ){
*curr_ret++ = encode_map[(*curr & 0xfc) >>2]; // H 6
*curr_ret++ = encode_map[((*curr & 0x3)<<4)]; // L2 + 0
*curr_ret++ = '=';
*curr_ret++ = '=';
len_left -= 1;
}else if(len_left == 2){
*curr_ret++ = encode_map[(*curr & 0xfc) >>2]; // H 6
*curr_ret++ = encode_map[((*curr & 0x3)<<4) + ((*(curr+1) & 0xf0) >> 4)]; // L2 H 4
*curr_ret++ = encode_map[(*(curr+1)&0xf)<<2]; // L4 H2
*curr_ret++ = '=';
len_left -= 2;
}
assert(len_left == 0);
return (void*)ret_buf;
}
void * CBase64_Decode(const void * buf,unsigned int len, unsigned int * ret_len){
unsigned int base_len = len / 4 * 3 +1 ;
unsigned int len_left = len;
char * ret_bufs = (char *)calloc(sizeof(char),base_len);
char * ret_buf = ret_bufs;
char * b64_buf = (char *)buf;
while(len_left > 3){
*ret_buf += (decode_map[*b64_buf] & 0x3f )<<2; // 6-0 -> H6
*ret_buf += (decode_map[*(b64_buf+1)] & 0x30 )>>4; // L 6-4 -> L2
if(*(b64_buf+2) == '=' ){
base_len -= 2;
*(ret_buf+1) = 0;
break;
} else {
*(ret_buf+1) += (decode_map[*(b64_buf+1)] & 0x0f )<<4; // 4-0 -> H4
*(ret_buf+1) += (decode_map[*(b64_buf+2)] & 0x3c )>>2; // 6-2 -> L4
if(*(b64_buf +3) != '='){
*(ret_buf+2) += (decode_map[*(b64_buf+2)] & 0x03 )<<6; //2-0 -> H2
*(ret_buf+2) += (decode_map[*(b64_buf+3)] & 0x3f ); //6-0 -> L6
} else {
base_len -= 1;
*(ret_buf+2) = 0;
len_left = 0;
break;
}
}
len_left -= 4;
ret_buf += 3;
b64_buf += 4;
}
*ret_len = base_len;
ret_bufs[base_len-1] = 0;
return ret_bufs;
}