自己用expact做的一个rss解析器,大体成功,记录一下心得,然后又时间继续进行 。
#include <stdio.h>
#include <string.h>
#include<stdarg.h>
#include "./expat_mine/expat.h"

int elementShowFlag=0;
int elementTitleNum = 0;
char rssMainTitle[256]="o";

char rssSubTitle[30][256]=...{"o"};

char rssContent[30][5120]=...{"o"};
char RssStoreBuf[256*1024];

int CharacterDataStat = 0; /**//*1main title 2sub titles 3cdata*/
int rssSubTitleNum = 0;
int rssContentNum = 0;
size_t rssLen;

void SetElementShowFlag(int value)

...{
elementShowFlag = value;
}
int GetElementShowFlag()

...{
return elementShowFlag;
}

int GetCharacterDataStat()/**//*1main title 2sub titles 3cdata*/

...{
return CharacterDataStat;
}
void rssDelSubString(int num,char *sor,...)

...{
va_list arg_ptr;
int i;
char *q;
int tempnum;
va_start(arg_ptr,sor);
for(tempnum=num; tempnum>0 ; tempnum--)

...{
char *arg = va_arg(arg_ptr,char*);
i=strlen(arg);
while((q=strstr(sor,arg))!=NULL)

...{
strcpy(q,q+i);
}
}
va_end(arg_ptr);
}
void rssDelElementString(int num,char *sor,...)

...{
va_list arg_ptr;
int templen = 0;
char *q;
char *qtemp;
int tempnum;

va_start(arg_ptr,sor);
for(tempnum=num; tempnum>0 ; tempnum--)

...{
char *arg = va_arg(arg_ptr,char*);
while((q=strstr(sor,arg))!=NULL)

...{
qtemp = q;
while(*qtemp != '>')

...{
qtemp++;
templen++;
}
strcpy(q,q+templen+1);
templen = 0;
}
}
va_end(arg_ptr);
}

// tag的开头处理函数
void startElement(void *userData, const char *name, const char **atts)

...{
int i;
int *depthPtr = userData;
if((0 == strcmp(name,"title"))||(0 == strcmp(name,"description"))||(0 == strcmp(name,"summary")))

...{
if(0==strcmp(name,"title") )

...{
CharacterDataStat = (0==GetCharacterDataStat()) ? 1:2;
}
else if((0 == strcmp(name,"description"))||(0 == strcmp(name,"summary")))

...{
CharacterDataStat = 3;
}
for (i = 0; i < *depthPtr; i++)
putchar(' ');
printf ("XML: <%s", name);
i = 0;
while (atts[i] && *atts[i] && atts[i][0] != 0)

...{
if (i % 2)

...{
printf ("=");
printf (""%s"", atts[i]);
}
else

...{
printf (" ");
printf ("%s", atts[i]);
}
i++;
}
printf ("> ");
SetElementShowFlag(1);
}
*depthPtr += 1;
}
// tag的结束处理函数
void endElement(void *userData, const char *name)

...{
int *depthPtr = userData;
if((0 == strcmp(name,"description"))||(0 == strcmp(name,"summary")))

...{
rssDelElementString(2,rssContent[rssContentNum],"<A ","<IMG ");
rssContentNum++;
}
*depthPtr -= 1;
SetElementShowFlag(0);
}

//字符处理函数
void CharacterDataHandler(void *userData, const XML_Char *s, int len)

...{
char strtmp[10240] = "o";
int i;
int tempstat;
if(GetElementShowFlag())

...{
for(i=0;i<len;i++)

...{
if(s[i] == ' ')
continue;
strtmp[i]=s[i];
}
strtmp[len]='o';
tempstat = GetCharacterDataStat();
// printf("tempstat:%d",tempstat);
switch(tempstat)

...{
case 1:

...{
// printf("main title*%s* ",strtmp);
strcpy(rssMainTitle,strtmp);
break;
}
case 2:

...{
// printf("Sub title*%s* ",strtmp);
strcpy(rssSubTitle[rssSubTitleNum],strtmp);
rssSubTitleNum++;
break;
}
case 3:

...{
if(0!=strlen(strtmp))

...{
// printf("Content~%s~ ",strtmp);

rssDelSubString(8,strtmp," ","<br/>","</FONT>","<P>","</P>","<DIV>","</DIV>","</A>");/**//*del 7 substr*/
// printf("Content_%s_ ",strtmp);
if(0!=strlen(strtmp))

...{
strcat(rssContent[rssContentNum],strtmp);
}
}
break;
}
}
//strcat(Cdatastring,strtmp);
//printf("%s ",strtmp);
}
}
void DefaultHandler(void *userData,const XML_Char *s,int len)

...{
char strtmp[1024] = "o";
int i;

...{
for(i=0;i<len;i++)

...{
if(s[i] == ' ')
continue;
strtmp[i]=s[i];
}
strtmp[len]='o';
printf("%s ",strtmp);
}
}

int main()

...{
openDoc();
rsstest(RssStoreBuf);
}
int rsstest(char *buf)

...{
int done = 0;
int depth = 0;
int i = 0;
XML_Parser parser = XML_ParserCreate(NULL);
XML_SetUserData(parser, &depth);
XML_SetElementHandler(parser, startElement, endElement);
XML_SetCharacterDataHandler(parser,CharacterDataHandler);
// XML_SetDefaultHandlerExpand(parser,DefaultHandler);

if (XML_Parse(parser, buf, rssLen, done) == XML_STATUS_ERROR) return 1;
XML_ParserFree(parser);

/**//* printf("EXPACT Finished! ");
printf("^^^^^^^^^^^^^^^^^^ ");
printf("%s",Cdatastring);
printf("^^^^^^^^^^^^^^^^^^ ");
printf("%s ",rssMainTitle);
printf("^^^^^^^^^^^^^^^^^^ ");
for(i=0 ; i<rssSubTitleNum ;i++)
printf("%s ",rssSubTitle[i]);
*/ printf("^^^^^^^^^^^^^^^^^^ ");
for(i=0 ; i<rssContentNum ; i++)
printf("%s ",rssContent[i]);

return 0;
}

int openDoc()

...{
FILE* fp;
fp = fopen("./xml/bf1111.xml","rb");
if(!fp)

...{
printf("ERROR:XML NOT FOUND ");
return 1;
}
rssLen = fread(RssStoreBuf, 1, 256*1024, fp);
fclose(fp);
}
FT,还是老规矩,/0全部用/o代替了~木有办法~