#define _DF1S 0x81
#define _DF1E 0xFE
#define _DS1S 0x40
#define _DS1E 0x7E
#define _DS2S 0x80
#define _DS2E 0xFE
#define IsDBCS1(c) ((BYTE)(c) >= _DF1S && (BYTE)(c) <= _DF1E)
#define IsDBCS2(c) (((BYTE)(c) >= _DS1S && (BYTE)(c) <= _DS1E) || ((BYTE)(c) >= _DS2S && (BYTE)(c) <= _DS2E))
int GBKToUnicode(TCHAR* gbk2312, WCHAR *unicode)
{
BYTE b, cf;
WCHAR w, *lfn;
int si, di;
const TCHAR *p;
cf=cf;
/* Create LFN in Unicode */
p = gbk2312; lfn = unicode; si = di = 0;
for (;;) {
w = p[si++]; /* Get a character */
if (w == 0){
lfn[di] = w;
return di; /* Break if end of the path name */
}
w &= 0xFF;
if (IsDBCS1(w)) { /* Check if it is a DBC 1st byte (always false on SBCS cfg) */
b = (BYTE)p[si++]; /* Get 2nd byte */
w = (w << 8) + b; /* Create a DBC */
if (!IsDBCS2(b)) return -1; /* Reject invalid sequence */
}
w = ff_convert(w, 1); /* Convert ANSI/OEM to Unicode */
if (!w) return -1; /* Reject invalid code */
lfn[di++] = w; /* Store the Unicode character */
}
}
// 将GBK编码的字符串转换为UTF-8编码
// gbk_str: 输入的GBK字符串
// utf8_str: 输出的UTF-8字符串缓冲区
// utf8_size: utf8缓冲区大小
// 返回值: 转换后的UTF-8字符串长度,失败返回-1
int GBKToUtf8(const char *gbk_str, char *utf8_str, int utf8_size)
{
if (!gbk_str || !utf8_str || utf8_size <= 0)
{
return -1;
}
// 临时Unicode缓冲区
WCHAR unicode_buf[MAX_LINE_LENGTH];
int utf8_pos = 0;
// 先转换为Unicode
int unicode_len = GBKToUnicode((TCHAR *)gbk_str, unicode_buf);
if (unicode_len < 0)
{
return -1;
}
// 将Unicode转换为UTF-8
for (int i = 0; i < unicode_len && utf8_pos < utf8_size - 3; i++)
{
uint32_t unicode_char = unicode_buf[i];
if (unicode_char < 0x80)
{
// ASCII字符(0-127)用一个字节表示
utf8_str[utf8_pos++] = unicode_char;
}
else if (unicode_char < 0x800)
{
// 两字节表示
utf8_str[utf8_pos++] = 0xC0 | (unicode_char >> 6);
utf8_str[utf8_pos++] = 0x80 | (unicode_char & 0x3F);
}
else
{
// 三字节表示
utf8_str[utf8_pos++] = 0xE0 | (unicode_char >> 12);
utf8_str[utf8_pos++] = 0x80 | ((unicode_char >> 6) & 0x3F);
utf8_str[utf8_pos++] = 0x80 | (unicode_char & 0x3F);
}
}
utf8_str[utf8_pos] = '\0'; // 添加字符串结束符
return utf8_pos;
}