自己用expact做的一个rss解析器,大体成功,记录一下心得,然后又时间继续进行 。
#include
<
stdio.h
>
#include
<
string
.h
>
#include
<
stdarg.h
>
#include
"
./expat_mine/expat.h
"

int
elementShowFlag
=
0
;
int
elementTitleNum
=
0
;
char
rssMainTitle[
256
]
=
"
o
"
;

char
rssSubTitle[
30
][
256
]
=
...
{"o"}
;

char
rssContent[
30
][
5120
]
=
...
{"o"}
;
char
RssStoreBuf[
256
*
1024
];

int
CharacterDataStat
=
0
;
/**/
/*1main title 2sub titles 3cdata*/
int
rssSubTitleNum
=
0
;
int
rssContentNum
=
0
;
size_t rssLen;

void
SetElementShowFlag(
int
value)

...
{
elementShowFlag = value;
}
int
GetElementShowFlag()

...
{
return elementShowFlag;
}

int
GetCharacterDataStat()
/**/
/*1main title 2sub titles 3cdata*/

...
{
return CharacterDataStat;
}
void
rssDelSubString(
int
num,
char
*
sor,...)

...
{
va_list arg_ptr;
int i;
char *q;
int tempnum;
va_start(arg_ptr,sor);
for(tempnum=num; tempnum>0 ; tempnum--)

...{
char *arg = va_arg(arg_ptr,char*);
i=strlen(arg);
while((q=strstr(sor,arg))!=NULL)

...{
strcpy(q,q+i);
}
}
va_end(arg_ptr);
}
void
rssDelElementString(
int
num,
char
*
sor,...)

...
{
va_list arg_ptr;
int templen = 0;
char *q;
char *qtemp;
int tempnum;

va_start(arg_ptr,sor);
for(tempnum=num; tempnum>0 ; tempnum--)

...{
char *arg = va_arg(arg_ptr,char*);
while((q=strstr(sor,arg))!=NULL)

...{
qtemp = q;
while(*qtemp != '>')

...{
qtemp++;
templen++;
}
strcpy(q,q+templen+1);
templen = 0;
}
}
va_end(arg_ptr);
}

//
tag的开头处理函数
void
startElement(
void
*
userData,
const
char
*
name,
const
char
**
atts)

...
{
int i;
int *depthPtr = userData;
if((0 == strcmp(name,"title"))||(0 == strcmp(name,"description"))||(0 == strcmp(name,"summary")))

...{
if(0==strcmp(name,"title") )

...{
CharacterDataStat = (0==GetCharacterDataStat()) ? 1:2;
}
else if((0 == strcmp(name,"description"))||(0 == strcmp(name,"summary")))

...{
CharacterDataStat = 3;
}
for (i = 0; i < *depthPtr; i++)
putchar(' ');
printf ("XML: <%s", name);
i = 0;
while (atts[i] && *atts[i] && atts[i][0] != 0)

...{
if (i % 2)

...{
printf ("=");
printf (""%s"", atts[i]);
}
else

...{
printf (" ");
printf ("%s", atts[i]);
}
i++;
}
printf ("> ");
SetElementShowFlag(1);
}
*depthPtr += 1;
}
//
tag的结束处理函数
void
endElement(
void
*
userData,
const
char
*
name)

...
{
int *depthPtr = userData;
if((0 == strcmp(name,"description"))||(0 == strcmp(name,"summary")))

...{
rssDelElementString(2,rssContent[rssContentNum],"<A ","<IMG ");
rssContentNum++;
}
*depthPtr -= 1;
SetElementShowFlag(0);
}

//
字符处理函数
void
CharacterDataHandler(
void
*
userData,
const
XML_Char
*
s,
int
len)

...
{
char strtmp[10240] = "o";
int i;
int tempstat;
if(GetElementShowFlag())

...{
for(i=0;i<len;i++)

...{
if(s[i] == ' ')
continue;
strtmp[i]=s[i];
}
strtmp[len]='o';
tempstat = GetCharacterDataStat();
// printf("tempstat:%d",tempstat);
switch(tempstat)

...{
case 1:

...{
// printf("main title*%s* ",strtmp);
strcpy(rssMainTitle,strtmp);
break;
}
case 2:

...{
// printf("Sub title*%s* ",strtmp);
strcpy(rssSubTitle[rssSubTitleNum],strtmp);
rssSubTitleNum++;
break;
}
case 3:

...{
if(0!=strlen(strtmp))

...{
// printf("Content~%s~ ",strtmp);

rssDelSubString(8,strtmp," ","<br/>","</FONT>","<P>","</P>","<DIV>","</DIV>","</A>");/**//*del 7 substr*/
// printf("Content_%s_ ",strtmp);
if(0!=strlen(strtmp))

...{
strcat(rssContent[rssContentNum],strtmp);
}
}
break;
}
}
//strcat(Cdatastring,strtmp);
//printf("%s ",strtmp);
}
}
void
DefaultHandler(
void
*
userData,
const
XML_Char
*
s,
int
len)

...
{
char strtmp[1024] = "o";
int i;

...{
for(i=0;i<len;i++)

...{
if(s[i] == ' ')
continue;
strtmp[i]=s[i];
}
strtmp[len]='o';
printf("%s ",strtmp);
}
}

int
main()

...
{
openDoc();
rsstest(RssStoreBuf);
}
int
rsstest(
char
*
buf)

...
{
int done = 0;
int depth = 0;
int i = 0;
XML_Parser parser = XML_ParserCreate(NULL);
XML_SetUserData(parser, &depth);
XML_SetElementHandler(parser, startElement, endElement);
XML_SetCharacterDataHandler(parser,CharacterDataHandler);
// XML_SetDefaultHandlerExpand(parser,DefaultHandler);

if (XML_Parse(parser, buf, rssLen, done) == XML_STATUS_ERROR) return 1;
XML_ParserFree(parser);

/**//* printf("EXPACT Finished! ");
printf("^^^^^^^^^^^^^^^^^^ ");
printf("%s",Cdatastring);
printf("^^^^^^^^^^^^^^^^^^ ");
printf("%s ",rssMainTitle);
printf("^^^^^^^^^^^^^^^^^^ ");
for(i=0 ; i<rssSubTitleNum ;i++)
printf("%s ",rssSubTitle[i]);
*/ printf("^^^^^^^^^^^^^^^^^^ ");
for(i=0 ; i<rssContentNum ; i++)
printf("%s ",rssContent[i]);

return 0;
}

int
openDoc()

...
{
FILE* fp;
fp = fopen("./xml/bf1111.xml","rb");
if(!fp)

...{
printf("ERROR:XML NOT FOUND ");
return 1;
}
rssLen = fread(RssStoreBuf, 1, 256*1024, fp);
fclose(fp);
}
FT,还是老规矩,/0全部用/o代替了~木有办法~