#ifndef _PARSEIMAGE_H_
#define _PARSEIMAGE_H_
//<img src=[ ' "]
class CImageTagParser
{
const char* m_pNextPos;
public:
enum
{
IMAGE_TYPE=0,
MEDIA_TYPE
};
CImageTagParser(const char* pData)
{
m_pNextPos = pData;
}
~CImageTagParser()
{
}
const char* ParseNext(int &nLen,int &type);//0 图片,1 多媒体
private:
const char* FindSRC(int &nLen);
const char* FindSRCLink(int &nLen);
};
#endif
#include <stdio.h>
#include "ParseImage.h"
//<img src=[ ' "]
const char* CImageTagParser::ParseNext(int &nLen,int &type)
{
nLen = 0;
if( m_pNextPos == NULL )
return NULL;
const char* pTagLeft = NULL;
int nTagMatchedStep = 0;
for( ; *m_pNextPos != '/0'; ++m_pNextPos)
{
char ch = *m_pNextPos;
//embed
if( ch == '<' )
{
if( ( *(m_pNextPos+1) == 'i' || *(m_pNextPos+1) == 'I')
&& *(m_pNextPos+2) != '/0'&& *(m_pNextPos+3) != '/0' )
{
//确保有4个字符
pTagLeft = m_pNextPos + 1;
m_pNextPos += 4;
/*if( *(pTagLeft) != 'i'&& *(pTagLeft) != 'I' )
continue;*/
pTagLeft++;
if( *(pTagLeft) != 'm' && *(pTagLeft) != 'M' )
continue;
pTagLeft++;
if( *(pTagLeft) != 'g' && *(pTagLeft) != 'G' )
continue;
//到这里说明是一个完整的<IMG,查找SRC
type = IMAGE_TYPE;//图片
return FindSRC(nLen);
}
else if( *(m_pNextPos+1) == 'e' || *(m_pNextPos+1) == 'E'
&& *(m_pNextPos+2) != '/0'&& *(m_pNextPos+3) != '/0'
&& *(m_pNextPos+4) != '/0' && *(m_pNextPos+5) != '/0')
{
//确保有5个字符
pTagLeft = m_pNextPos + 1;
m_pNextPos += 6;
/*if( *(pTagLeft) != 'e'&& *(pTagLeft) != 'E' )
continue;*/
pTagLeft++;
if( *(pTagLeft) != 'm' && *(pTagLeft) != 'M' )
continue;
pTagLeft++;
if( *(pTagLeft) != 'b' && *(pTagLeft) != 'B' )
continue;
pTagLeft++;
if( *(pTagLeft) != 'e' && *(pTagLeft) != 'E' )
continue;
pTagLeft++;
if( *(pTagLeft) != 'd' && *(pTagLeft) != 'D' )
continue;
//到这里说明是一个完整的<EMBED,查找SRC
type = MEDIA_TYPE;//多媒体
return FindSRC(nLen);
}
}
}
return NULL;
}
const char* CImageTagParser::FindSRC(int &nLen)
{
for( ; *m_pNextPos != '/0'; ++m_pNextPos)
{
if( *m_pNextPos == ' ' )
{
int nStep = 0;
for( ; *m_pNextPos != '/0'; ++m_pNextPos )
{
switch(*m_pNextPos)
{
case 's':
case 'S':
if( nStep != 0 )
{
++m_pNextPos;
goto OUT_FOR;
}
nStep = 1;
break;
case 'r':
case 'R':
if( nStep != 1 )
{
++m_pNextPos;
goto OUT_FOR;
}
nStep = 2;
break;
case 'c':
case 'C':
if( nStep != 2 )
{
++m_pNextPos;
goto OUT_FOR;
}
nStep = 3;
break;
case ' ':
if( nStep == 1 || nStep == 2)
{
++m_pNextPos;
goto OUT_FOR;
}
break;
case '=':
if( nStep != 3 )
{
++m_pNextPos;
goto OUT_FOR;
}
++m_pNextPos;
//找SRC=连接
return FindSRCLink(nLen);
break;
default:
goto OUT_FOR;
}
}
}
OUT_FOR:
;
}
return NULL;
}
const char* CImageTagParser::FindSRCLink(int &nLen)
{
int nStep = 0;
const char* pSrc = NULL;
for( ; *m_pNextPos != '/0'; ++m_pNextPos)
{
switch( *m_pNextPos )
{
case ' ':
if( nStep != 0 && nStep != 1 )
{
//直接输出这段
++m_pNextPos;
return pSrc;
}
nStep = 1;
break;
case '/'':
case '"':
if(nStep == 2 || nStep == 3 )
{
++m_pNextPos;
return pSrc;
}
nStep = 2;
break;
case '>':
++m_pNextPos;
return pSrc;
default:
if( nStep != 3 )
pSrc = m_pNextPos;
nStep = 3;
nLen++;
break;
}
}
return NULL;
}
/*
int _tmain(int argc, _TCHAR* argv[])
{
char * buf = new char[1024*1024];
if( buf == NULL )
return 0;
WIN32_FIND_DATAA wd;
HANDLE hFind =FindFirstFileA("biturldata//*.*", &wd);
if( hFind == INVALID_HANDLE_VALUE )
{
delete[] buf;
printf("failed to open file/n");
return 0;
}
do
{
if( wd.dwFileAttributes &FILE_ATTRIBUTE_DIRECTORY )
continue;
char szFilePath[256] = {0 };
strcpy(szFilePath, "biturldata//");
strcat(szFilePath, wd.cFileName);
printf("----------------------------%s----------------------------/n", wd.cFileName);
FILE* fp = fopen(szFilePath, "r");
if( fp != NULL )
{
memset(buf, 0, 1024 * 1024);
fread(buf, 1, 1024*1024, fp);
fclose(fp);
CImageTagParser p(buf);
int n = 0;
const char* t = NULL;
while( (t = p.ParseNext(n) ) != NULL )
{
char szTest[1024] = { 0 };
memcpy(szTest, t, n);
printf("%s/n", szTest);
}
}
system("pause");
}while( FindNextFileA(hFind,&wd) );
FindClose(hFind);
return 0;
}
*/
//if( nTagMatchedStep < 4 )
// { //没找到<IMG之前
// switch( *m_pNextPos )
// {
// case '<':
// nTagMatchedStep = nTagMatchedStep == 0 ? nTagMatchedStep + 1 : 0;
// break;
// case 'i':
// case 'I':
// nTagMatchedStep = nTagMatchedStep == 1 ? nTagMatchedStep + 1 : 0;
// break;
// case 'm':
// case 'M':
// nTagMatchedStep = nTagMatchedStep == 2 ? nTagMatchedStep + 1 : 0;
// break;
// case 'g':
// case 'G':
// nTagMatchedStep = nTagMatchedStep == 3 ? nTagMatchedStep + 1 : 0;
// break;
//
// }
// }
// else
// {
// //找到了img
//
// }