int ConstructMap(map<string,vector<pair<int,int>>>&mymap,int beginindex,int endindex)
{
// vector<string> mySplit(string s);
set<string>MakeStopSet();
vector<string>goodWordsinPieceArticle(string rawtext,set<string>stopwords);
CoInitialize(NULL);
_ConnectionPtr pConn(__uuidof(Connection));
_RecordsetPtr pRst(__uuidof(Recordset));
char * select =new char[5000];
memset(select,0,5000);
char *firstpart="select CKeyWord,ArticleId,CAbstract from Article where ArticleId between ";
char *lastpart=" order by ArticleId";
char middlepart1[100];
char middlepart2[100];
sprintf_s(middlepart1,sizeof(middlepart1),"%d",beginindex);
sprintf_s(middlepart2,sizeof(middlepart2),"%d",endindex);
strcat(select,firstpart);
strcat(select,middlepart1);
strcat(select," and ");
strcat(select,middlepart2);
strcat(select,lastpart);
pConn->ConnectionString="Provider=SQLOLEDB.1;Password=xxxxxx;Persist Security Info=True; User ID=sa;Initial Catalog=ArticleCollection";
pConn->Open("","","",adConnectUnspecified);
pRst=pConn->Execute(select,NULL,adCmdText);
set<string>stopwords=MakeStopSet();
while(!pRst->rsEOF)
{ vector<string>wordcollection;
//string keywordstr=(_bstr_t)pRst->GetCollect("CKeyWord");
string rawtext=(_bstr_t)pRst->GetCollect("CAbstract");
if(rawtext!="")
{
wordcollection=goodWordsinPieceArticle(rawtext,stopwords);
string tempid=(_bstr_t)pRst->GetCollect("ArticleId");
int articleid=atoi(tempid.c_str());
for(vector<string>::iterator strit=wordcollection.begin();strit!=wordcollection.end();strit++)
{
vector<pair<int,int>>::iterator it;
if(mymap[*strit].empty())
{
pair<int,int>mytemppair=make_pair(articleid,1);
mymap[*strit].push_back(mytemppair);
}
else
{
for(it=mymap[*strit].begin();it!=mymap[*strit].end();it++)
{
if(it->first==articleid)
{
it->second=++(it->second);
break;
}
}
if(it==mymap[*strit].end())
{
pair<int,int>mytemppair=make_pair(articleid,1);
mymap[*strit].push_back(mytemppair);
}
}
}
}
pRst->MoveNext();
wordcollection.clear();
}
pRst->Close();
pConn->Close();
pRst.Release();
pConn.Release();
CoUninitialize();
delete[] select;
return 0;
}