(ZT)string is utf-8

UTF-8编码检测
本文介绍了一种简单的方法来判断给定的字符串是否为UTF-8编码,并提供了一个C++实现示例。通过位操作检查每个字符的有效性,确保整个字符串符合UTF-8编码规则。

一般工程中, 文本不是utf-8就是gbk.
那先判断文本是否为utf-8, 如果不是就按照gbk走.
同事在网上找了一段代码, 我整理一下,也备一份.
准备了一段utf-8的字符串缓冲区,还判断的挺准的.

实验

// prj.cpp : 定义控制台应用程序的入口点。
//

#include "stdafx.h"
#include <stdlib.h>
#include <stdio.h>
#include "class_is_utf8.h"

int _tmain(int argc, _TCHAR* argv[])
{
    // plain text is "abc中文测试"

    // save as utf-8,don't input utf-8 header(0xef,0xbb,0xbf), because the input isn't file content
    unsigned char szMsg[] = {0x61,0x62,0x63,0xe4,0xb8,0xad,0xe6,0x96,0x87,0xe6,0xb5,0x8b,0xe8,0xaf,0x95,0x00,0x00};

    // save as ansi, detect is not utf-8
//     {
//         0x61,0x62,0x63,0xd6,0xd0,0xce,0xc4,0xb2,0xe2,0xca,0xd4,0x00,0x00
//     };

    // {'a', 'b', 'c', '\0', '\0'}; // not utf-8
    bool b_rc = class_is_utf8::is_utf8((const char*)&szMsg[0]);
    printf("class_is_utf8::is_utf8 = %s\n", (b_rc ? "true" : "false"));

    /** run result
    class_is_utf8::is_utf8 = true
    */

    system("pause");
    return 0;
}

// @file class_is_utf8.h

#ifndef __CLASS_IS_UTF8_H__
#define __CLASS_IS_UTF8_H__

class class_is_utf8
{
public:
    class_is_utf8(void);
    virtual ~class_is_utf8(void);

    static bool is_utf8(const char* str); // 给定的字符串是否为utf-8
};

#endif // #ifndef __CLASS_IS_UTF8_H__
// @file class_is_utf8.cpp

#include "StdAfx.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "class_is_utf8.h"

class_is_utf8::class_is_utf8(void)
{
}

class_is_utf8::~class_is_utf8(void)
{
}

// http://www.fileformat.info/info/unicode/utf8.htm
bool class_is_utf8::is_utf8(const char* str)
{
    int i = 0;     
    int size = strlen(str);       
    while(i < size)     
    {         
        int step = 0;         
        if((str[i] & 0x80) == 0x00)        
        {             
            step = 1;         
        }
        else if((str[i] & 0xe0) == 0xc0)         
        {             
            if(i + 1 >= size) 
            {
                return false;  
            }

            if((str[i + 1] & 0xc0) != 0x80) 
            {
                return false; 
            }

            step = 2;         
        }
        else if((str[i] & 0xf0) == 0xe0)         
        {             
            if(i + 2 >= size) 
            {
                return false;   
            }

            if((str[i + 1] & 0xc0) != 0x80) 
            {
                return false;   
            }

            if((str[i + 2] & 0xc0) != 0x80) 
            {
                return false; 
            }

            step = 3;        
        }
        else  
        {             
            return false;         
        } 

        i += step;    
    } 

    if(i == size) 
    {
        return true;  
    }

    return false; 
}
POST /zt_rec/stationif/receive HTTP/1.1 Host: 10.41.0.85 Remote_Addr: 10.141.1.181 X-REAL-IP: 10.141.1.181 X-Forwarded-For: 10.241.100.89, 10.41.0.85, 10.141.1.181 Content-Length: 1659 binfile-md5: mm binfile-gzip: false binfile-auth: myTicket Content-Type: multipart/form-data; boundary=ypm4nr-tfJntYDcZKjYbQoyJMsGzRR8Sk7z; charset=UTF-8 User-Agent: Apache-HttpClient/4.5.14 (Java/1.8.0_102) Accept-Encoding: gzip,deflate --ypm4nr-tfJntYDcZKjYbQoyJMsGzRR8Sk7z Content-Disposition: form-data; name="binFile"; filename="TRC_EXETCPU_REQ_S0085410010010_20250224160012808.json" Content-Type: application/json; charset=UTF-8 Content-Transfer-Encoding: binary {"discountFee":2265,"feeMileage":50316,"exTollStationName":"郑州西南","splitProvince":[{"serProvinceId":"410201","id":"S008541001001020103202025022415453508000","tollFee":0}],"fee":0,"enTollLaneHex":"4101FDEC1F","etcCardType":1,"preBalance":45933,"obuTotalDiscountAmount":0,"totalCount":6,"feeRate":1.0,"spcRateVersion":"20250221001","identification":2,"payFee":2265,"cardTotalAmount":1980,"provTransCount":6,"mediaNo":"4101174103618496","vehicleClass":0,"enTime":"2025-02-24T15:24:04","id":"S008541001001020103202025022415453508","vehicleId":"豫A03898_0","vehicleType":1,"vehicleSign":"0xff","obuTotalAmount":2085,"terminalNo":"214100030AAA","exitFeeType":1,"transFee":0,"passId":"014101164922010207194220250224152404","mediaType":1,"shortFeeMileage":50316,"postBalance":45933,"enTollLaneId":"S00854100100501010310","transType":"09","provinceCount":1,"cardId":"41011649220102071942","TAC":"BEB33A0A","transPayType":1,"obuSign":2,"enTollStationName":"河南唐庄站","noCardCount":0,"exTime":"2025-02-24T15:58:28","exTollLaneId":"S00854100100102010320","terminalTransNo":"007FBB03","identifyVehicleId":"豫A03898_0","shortFee":2300,"obuProvinceFee":1980} --ypm4nr-tfJntYDcZKjYbQoyJMsGzRR8Sk7z Content-Disposition: form-data; name="filename" Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TRC_EXETCPU_REQ_S0085410010010_20250224160012808.json --ypm4nr-tfJntYDcZKjYbQoyJMsGzRR8Sk7z-- 使用springboot框架接收上面的文件,每秒大概6m,几百个文件,要求能够接受并转发到其他服务器上,每天新建一个文件夹,发送失败时存到每天新建的文件夹后后重发,7天删除一次文件夹,避免内存泄露、代码写注释和log日志
03-13
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值