修改tinyxml让其支持解析特殊字符

本文介绍了TinyXML中实体解析功能的实现方式,并展示了如何通过修改tinyxmlparser.cpp和tinyxml.h文件来支持更多特殊字符实体的解析。这些修改有助于提高TinyXML在处理含有特殊字符的XML文档时的灵活性。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

修改tinyxmlparser.cpp文件中的部分代码如下:

TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] =
{
    { "&",  5, "&" },
    { "<",   4, "<" },
    { ">",   4, ">" },
    { """, 6, "\"" },
    { "'", 6, "\'" },
    { "&\#955;", 6, "λ" },
    { "&\#934;", 6, "Φ" },
    { "&\#951;", 6, "η" }
};

    const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )  
    {  
        // Presume an entity, and pull it out.  
        TIXML_STRING ent;  
        int i;  
        *length = 0;  
      
        // Now try to match it.  
        for( i=0; i<NUM_ENTITY; ++i )  
        {  
            if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )  
            {  
                assert( strlen( entity[i].str ) == entity[i].strLength );  
                strncat(value, entity[i].chr, strlen(entity[i].chr));  
                *length = strlen(entity[i].chr);  
                return ( p + entity[i].strLength );  
            }  
        }  
      
        if ( *(p+1) && *(p+1) == '#' && *(p+2) )  
        {  
            unsigned long ucs = 0;  
            ptrdiff_t delta = 0;  
            unsigned mult = 1;  
      
            if ( *(p+2) == 'x' )  
            {  
                // Hexadecimal.  
                if ( !*(p+3) ) return 0;  
      
                const char* q = p+3;  
                q = strchr( q, ';' );  
      
                if ( !q || !*q ) return 0;  
      
                delta = q-p;  
                --q;  
      
                while ( *q != 'x' )  
                {  
                    if ( *q >= '0' && *q <= '9' )  
                        ucs += mult * (*q - '0');  
                    else if ( *q >= 'a' && *q <= 'f' )  
                        ucs += mult * (*q - 'a' + 10);  
                    else if ( *q >= 'A' && *q <= 'F' )  
                        ucs += mult * (*q - 'A' + 10 );  
                    else   
                        return 0;  
                    mult *= 16;  
                    --q;  
                }  
            }  
            else  
            {  
                // Decimal.  
                if ( !*(p+2) ) return 0;  
      
                const char* q = p+2;  
                q = strchr( q, ';' );  
      
                if ( !q || !*q ) return 0;  
      
                delta = q-p;  
                --q;  
      
                while ( *q != '#' )  
                {  
                    if ( *q >= '0' && *q <= '9' )  
                        ucs += mult * (*q - '0');  
                    else   
                        return 0;  
                    mult *= 10;  
                    --q;  
                }  
            }  
            if ( encoding == TIXML_ENCODING_UTF8 )  
            {  
                // convert the UCS to UTF-8  
                ConvertUTF32ToUTF8( ucs, value, length );  
            }  
            else  
            {  
                *value = (char)ucs;  
                *length = 1;  
            }  
            return p + delta + 1;  
        }  
      
        // So it wasn't an entity, its unrecognized, or something like that.  
        *value = *p;    // Don't put back the last one, since we return it!  
        //*length = 1;  // Leave unrecognized entities - this doesn't really work.  
                        // Just writes strange XML.  
        return p+1;  
    }  




修改tinyxml.h文件中的部分代码如下:

    struct Entity  
    {  
        const char*     str;  
        unsigned int    strLength;  
        char            chr[3];//特殊字符的长度一般最长是两个字节,所以这里设置为3的字符串  
    };  
    enum  
    {  
        NUM_ENTITY = 8,//特殊符号的个数,可以修改  
        MAX_ENTITY_LENGTH = 6  
      
    }; 



评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值