多线程下载文件这个话题已经是老汤了。
在HTTP协议1.1中,允许通过增加一个HTTP Header“Range”来指定下载文件的区间。
所以一般的做法都是:
- 首先获取到文件大小(通过Content-Length)
- 开一个线程池在进行分块下载。
而在具体怎么实现的流程上,还是有差别的。
1. 标准的做法是:首先用一个线程发送HTTP GET指令,服务器会返回Content-Length,并能够根据协议判断出服务器是否支持Range。如果支持Range,则调配其它线程对后续部分分块下载。第一个线程继续下载第1块。
2. 还一种做法,首先发送HTTP HEAD指令,通过返回的Content-Length进行分块,调配线程进行下载。
这里提供一个类,属于第2种。
为了提高IO性能,类中可以使用内存映射文件方式进行操作。
- #ifndefCHTTPFILEDOWNLOADER_H_
- #defineCHTTPFILEDOWNLOADER_H_
- #include"Generic.h"
- classCHttpFileDownloader{
- public:
- CHttpFileDownloader();
- virtual~CHttpFileDownloader();
- booldownloadUrlToFile(constchar*lpszUrl,constchar*lpszFile);
- boolwaitForCompletion(void);
- private:
- stringm_strLocalFile;
- pthread_tm_lLeaderThread;
- structsockaddr_inm_stServerAddr;
- charm_szResourceURI[1024];
- charm_szDomain[1024];
- charm_szHost[1024];
- charm_szUrl[1024];
- sem_tm_stDownSem;
- pthread_mutex_tm_stDownloadThreadMutex;
- intm_nDownloadThreadCnt;
- boolm_bFailed;
- sem_tm_stCompleteSem;
- boolm_bSuccess;
- staticvoid*leaderThread(void*param);
- staticvoid*downloadThread(void*param);
- booldownloadProcess(void);
- voiddownloadBlock(unsignedchar*pMemory,intnRangeStart,intnRangeSize);
- boolsendBuffer(intnSocket,char*pBuf,intnSize);
- boolsendStringStream(intnSocket,stringstream&oStream);
- intrecvStringStream(intnSocket,stringstream&oStream);
- std::vector<string>parseResponse(stringstrResponse);
- boolisHttpStatusSuccess(string&strHttpResponse);
- stringgetHeaderValueByName(constchar*lpszHeader,std::vector<string>&vItems);
- };
- #endif/*CHTTPFILEDOWNLOADER_H_*/
- /*
- *CHttpFileDownloader.cpp
- *
- *Createdon:2008-12-15
- *Author:root
- */
- #include"Generic.h"
- #include"CMainApp.h"
- #include"CHttpFileDownloader.h"
- //64K
- #defineDOWNLOAD_BLOCK_SIZE1024*512
- #defineMAX_DOWNLOAD_THREAD5
- typedefstruct_tagDownloadTask
- {
- CHttpFileDownloader*pThis;
- unsignedchar*pStart;
- intnSize;
- intnRangeStart;
- }tagDownloadTask,*LPDownloadTask;
- CHttpFileDownloader::CHttpFileDownloader(){
- sem_init(&m_stCompleteSem,0,0);
- }
- CHttpFileDownloader::~CHttpFileDownloader(){
- sem_destroy(&m_stCompleteSem);
- }
- boolCHttpFileDownloader::sendStringStream(intnSocket,stringstream&oStream)
- {
- intnSize=oStream.str().length()*sizeof(char);
- char*pBuffer=newchar[nSize];
- memcpy(pBuffer,oStream.str().c_str(),nSize);
- intnSent=0;
- while(nSent<nSize)
- {
- intnRet=send(nSocket,(char*)(pBuffer+nSent),nSize-nSent,0);
- if(nRet==-1)
- break;
- nSent+=nRet;
- }
- delete[]pBuffer;
- return(nSent==nSize);
- }
- boolCHttpFileDownloader::sendBuffer(intnSocket,char*pBuf,intnSize)
- {
- intnSent=0;
- while(nSent<nSize)
- {
- intnRet=send(nSocket,(char*)(pBuf+nSent),nSize-nSent,0);
- if(nRet==-1)
- break;
- nSent+=nRet;
- }
- return(nSent==nSize);
- }
- intCHttpFileDownloader::recvStringStream(intnSocket,stringstream&oStream)
- {
- intnReceived=0;
- while(true)
- {
- charszBuf[1025]={0};
- intnRet=recv(nSocket,szBuf,1024,0);
- if(nRet==0)
- break;
- if(nRet<0)
- break;
- oStream<<szBuf;
- nReceived+=nRet;
- if(oStream.str().find("\r\n\r\n")!=string::npos)
- break;
- }
- returnnReceived;
- }
- std::vector<string>CHttpFileDownloader::parseResponse(stringstrResponse)
- {
- std::vector<string>vItems;
- size_tnLast=strResponse.find("\r\n\r\n");
- if(nLast>=strResponse.length())
- returnvItems;
- size_tnPos=0;
- while(nPos<nLast)
- {
- size_tnFind=strResponse.find("\r\n",nPos);
- if(nFind>nLast)
- break;
- vItems.push_back(strResponse.substr(nPos,nFind-nPos));
- nPos=nFind+2;
- }
- returnvItems;
- }
- boolCHttpFileDownloader::isHttpStatusSuccess(string&strHttpResponse)
- {
- size_tnBegin=strHttpResponse.find('');
- if(nBegin>strHttpResponse.length())
- returnfalse;
- size_tnEnd=strHttpResponse.find_last_of('');
- if(nEnd>strHttpResponse.length())
- returnfalse;
- stringstrStatusCode=strHttpResponse.substr(nBegin+1,nEnd-nBegin-1);
- intnStatusCode=atoi(strStatusCode.c_str());
- return(nStatusCode>=200&&nStatusCode<300);
- }
- stringCHttpFileDownloader::getHeaderValueByName(constchar*lpszHeader,std::vector<string>&vItems)
- {
- stringstrHeader=lpszHeader;
- std::transform(strHeader.begin(),strHeader.end(),strHeader.begin(),(int(*)(int))std::tolower);
- strHeader.append(":");
- stringstrValue="";
- std::vector<string>::iteratoriter;
- for(iter=vItems.begin();iter!=vItems.end();iter++)
- {
- stringstrItem=*iter;
- std::transform(strItem.begin(),strItem.end(),strItem.begin(),(int(*)(int))std::tolower);
- if(strItem.find(strHeader)!=0)
- continue;
- strValue=strItem.substr(strHeader.length());
- break;
- }
- returnstrValue.erase(0,strValue.find_first_not_of(''));
- }
- boolCHttpFileDownloader::downloadUrlToFile(constchar*lpszUrl,constchar*lpszFile)
- {
- memset(m_szUrl,0,1024);
- memcpy(m_szUrl,lpszUrl,strlen(lpszUrl));
- m_strLocalFile=lpszFile;
- //createthread
- intnErr=pthread_create(&m_lLeaderThread
- ,NULL
- ,&leaderThread
- ,this
- );
- if(nErr!=0)
- {
- CMainApp::getSingleton()->log("Error:pthread_createdownloadleaderthreadfailed.Return=%d,Message=%s"
- ,nErr
- ,strerror(nErr)
- );
- returnfalse;
- }
- returntrue;
- }
- boolCHttpFileDownloader::waitForCompletion(void)
- {
- sem_wait(&m_stCompleteSem);
- returnm_bSuccess;
- }
- void*CHttpFileDownloader::leaderThread(void*param)
- {
- CHttpFileDownloader*pThis=static_cast<CHttpFileDownloader*>(param);
- CMainApp::getSingleton()->log("Info:downloadfile\"%s\"start..."
- ,pThis->m_szUrl
- );
- pThis->m_bSuccess=pThis->downloadProcess();
- sem_post(&pThis->m_stCompleteSem);
- CMainApp::getSingleton()->log("Info:downloadfile\"%s\"%s..."
- ,pThis->m_szUrl
- ,pThis->m_bSuccess?"success":"failed"
- );
- returnNULL;
- }
- boolCHttpFileDownloader::downloadProcess(void)
- {
- //parsetheurlandport
- stringstrUrl=m_szUrl;
- std::transform(strUrl.begin(),strUrl.end(),strUrl.begin(),(int(*)(int))std::tolower);
- size_tuFind=strUrl.find("http://");
- if(uFind!=0)
- {
- CMainApp::getSingleton()->log("Error:InvalidURL:%s"
- ,m_szUrl
- );
- returnfalse;
- }
- intnLen=string("http://").length();
- uFind=strUrl.find('/',nLen);
- if(uFind>strUrl.length())
- {
- CMainApp::getSingleton()->log("Error:InvalidURL:%s"
- ,m_szUrl
- );
- returnfalse;
- }
- strUrl=m_szUrl;
- stringstrHost=strUrl.substr(nLen,uFind-nLen);
- stringstrResourceURI=strUrl.substr(uFind);
- stringstrDomain=strHost;
- uintuPort=80;
- uFind=strHost.find(':');
- if(uFind<strHost.length())
- {
- strDomain=strHost.substr(0,uFind);
- uPort=atoi(strHost.substr(uFind+1).c_str());
- }
- structhostent*pHostent=gethostbyname(strDomain.c_str());
- if(pHostent==NULL)
- {
- CMainApp::getSingleton()->log("Error:failedtoresolvetheIPaddressfortheURL:%s"
- ,m_szUrl
- );
- returnfalse;
- }
- memset(&m_stServerAddr,0,sizeof(m_stServerAddr));
- m_stServerAddr.sin_family=AF_INET;
- m_stServerAddr.sin_port=htons((short)uPort);
- memcpy((char*)&m_stServerAddr.sin_addr.s_addr,pHostent->h_addr_list[0],pHostent->h_length);
- intsock=socket(AF_INET,SOCK_STREAM,0);
- if(sock==-1)
- {
- CMainApp::getSingleton()->log("Error:socketfailed.error=%s"
- ,strerror(errno)
- );
- returnfalse;
- }
- memset(m_szResourceURI,0,1024);
- memcpy(m_szResourceURI,strResourceURI.c_str(),strlen(strResourceURI.c_str()));
- memset(m_szHost,0,1024);
- memcpy(m_szHost,strHost.c_str(),strlen(strHost.c_str()));
- memset(m_szDomain,0,1024);
- memcpy(m_szDomain,strDomain.c_str(),strlen(strDomain.c_str()));
- //populatetheHTTPHEADrequest
- stringstreamstrHttp;
- strHttp<<"HEAD"<<m_szResourceURI<<"HTTP/1.1\r\n";
- strHttp<<"User-Agent:Mozilla/4.0(compatible;MSIE5.00;Windows98)\r\n";
- strHttp<<"Host:"<<m_szHost<<"\r\n";
- strHttp<<"Cache-Control:no-cache\r\n";
- strHttp<<"Pragma:no-cache\r\n";
- strHttp<<"Connection:Keep-Alive\r\n";
- strHttp<<"Accept:*/*\r\n";
- strHttp<<"\r\n";
- intnRet=connect(sock
- ,(structsockaddr*)&m_stServerAddr
- ,sizeof(structsockaddr)
- );
- if(nRet==-1)
- {
- CMainApp::getSingleton()->log("Error:failedtoconnecttoURL:%s"
- ,m_szUrl
- );
- returnfalse;
- }
- structtimevaltv={0};
- tv.tv_sec=15;
- if(setsockopt(sock,SOL_SOCKET,SO_RCVTIMEO,(char*)&tv,sizeof(tv)))
- {
- CMainApp::getSingleton()->log("Error:setsockoptfailed(1).error=%s"
- ,strerror(errno)
- );
- returnfalse;
- }
- if(!sendStringStream(sock,strHttp))
- {
- CMainApp::getSingleton()->log("Error:failedtosendtheHTTPHEADrequesttoURL:%s"
- ,m_szUrl
- );
- returnfalse;
- }
- stringstreamstrResponse;
- recvStringStream(sock,strResponse);
- shutdown(sock,SHUT_RDWR);
- close(sock);
- //parsetheresponse
- std::vector<string>vItems=parseResponse(strResponse.str());
- if(vItems.size()==0)
- {
- CMainApp::getSingleton()->log("Error:theHTTPHEADresponsecontainsnothing.URL:%s"
- ,m_szUrl
- );
- returnfalse;
- }
- if(!isHttpStatusSuccess(vItems[0]))
- {
- CMainApp::getSingleton()->log("Error:%s.URL:%s"
- ,vItems[0].c_str()
- ,m_szUrl
- );
- returnfalse;
- }
- stringstrContentLen=getHeaderValueByName("Content-Length",vItems);
- if(strContentLen.length()==0)
- {
- CMainApp::getSingleton()->log("Error:InvalidContent-LengthinHTTPHEADresponse.URL:%s"
- ,m_szUrl
- );
- returnfalse;
- }
- intnContentLength=atoi(strContentLen.c_str());
- //createfile
- std::ofstreamoutStream;
- outStream.open(m_strLocalFile.c_str(),ios_base::out|ios_base::binary|ios_base::trunc);
- outStream.seekp(nContentLength-1);
- outStream.put('\0');
- outStream.close();
- intfd=open(m_strLocalFile.c_str(),O_RDWR);
- if(fd==-1)
- {
- CMainApp::getSingleton()->log("Error:cannotcreatefile\"%s\".%s"
- ,m_strLocalFile.c_str()
- ,strerror(errno)
- );
- returnfalse;
- }
- unsignedchar*pMemory=(unsignedchar*)mmap(NULL,nContentLength,PROT_WRITE,MAP_SHARED|MAP_POPULATE|MAP_NONBLOCK,fd,0);
- close(fd);
- if(pMemory==MAP_FAILED)
- {
- CMainApp::getSingleton()->log("Error:failedtomapthefile\"%s\"intomemory;size=%d;error=%s"
- ,m_strLocalFile.c_str()
- ,nContentLength
- ,strerror(errno)
- );
- returnfalse;
- }
- mlock(pMemory,nContentLength);
- //createthread
- sem_init(&m_stDownSem,0,MAX_DOWNLOAD_THREAD);
- pthread_mutex_init(&m_stDownloadThreadMutex,NULL);
- m_bFailed=false;
- intnDownloadLength=0;
- m_nDownloadThreadCnt=0;
- while(true)
- {
- sem_wait(&m_stDownSem);
- if(nDownloadLength>=nContentLength||
- m_bFailed)
- {
- if(m_nDownloadThreadCnt==0)
- break;
- else
- continue;
- }
- LPDownloadTaskpTask=(LPDownloadTask)malloc(sizeof(tagDownloadTask));
- pTask->pStart=(unsignedchar*)(pMemory+nDownloadLength);
- pTask->nSize=((DOWNLOAD_BLOCK_SIZE+nDownloadLength)>nContentLength)
- ?(nContentLength-nDownloadLength):DOWNLOAD_BLOCK_SIZE;
- pTask->nRangeStart=nDownloadLength;
- pTask->pThis=this;
- nDownloadLength+=pTask->nSize;
- //createthread
- pthread_tlThread;
- intnErr=pthread_create(&lThread
- ,NULL
- ,&downloadThread
- ,pTask
- );
- if(nErr!=0)
- {
- CMainApp::getSingleton()->log("Error:pthread_createdownloadthreadfailed.Error=%d,Message=%s"
- ,nErr
- ,strerror(nErr)
- );
- m_bFailed=true;
- }
- else
- {
- pthread_mutex_lock(&m_stDownloadThreadMutex);
- m_nDownloadThreadCnt++;
- pthread_mutex_unlock(&m_stDownloadThreadMutex);
- }
- }
- pthread_mutex_destroy(&m_stDownloadThreadMutex);
- sem_destroy(&m_stDownSem);
- if(msync(pMemory,nContentLength,MS_SYNC)==-1)
- {
- CMainApp::getSingleton()->log("Error:failedtomsyncthefile\"%s\"frommemory;size=%d;error=%s"
- ,m_strLocalFile.c_str()
- ,nContentLength
- ,strerror(errno)
- );
- m_bFailed=true;
- }
- munlock(pMemory,nContentLength);
- munmap(pMemory,nContentLength);
- return!m_bFailed;
- }
- void*CHttpFileDownloader::downloadThread(void*param)
- {
- LPDownloadTaskpTask=static_cast<LPDownloadTask>(param);
- pTask->pThis->downloadBlock(pTask->pStart
- ,pTask->nRangeStart
- ,pTask->nSize
- );
- pthread_mutex_lock(&(pTask->pThis->m_stDownloadThreadMutex));
- pTask->pThis->m_nDownloadThreadCnt--;
- pthread_mutex_unlock(&(pTask->pThis->m_stDownloadThreadMutex));
- sem_post(&(pTask->pThis->m_stDownSem));
- free(pTask);
- returnNULL;
- }
- voidCHttpFileDownloader::downloadBlock(unsignedchar*pMemory,intnRangeStart,intnRangeSize)
- {
- CMainApp::getSingleton()->log("Info:downloadblock\"%s\"[%08d-%08d]start..."
- ,m_szUrl
- ,nRangeStart
- ,nRangeStart+nRangeSize-1
- );
- intnReceived=0;
- intnErrorTimes=0;
- while(nReceived<nRangeSize&&nErrorTimes<5&&!m_bFailed)
- {
- intnSocket=socket(AF_INET,SOCK_STREAM,0);
- if(nSocket==-1)
- {
- nErrorTimes++;
- continue;
- }
- intnRet=connect(nSocket
- ,(structsockaddr*)&m_stServerAddr
- ,sizeof(structsockaddr)
- );
- if(nRet==-1)
- {
- nErrorTimes++;
- CMainApp::getSingleton()->log("Error:failedtoconnecttoURL:%s"
- ,m_szUrl
- );
- continue;
- }
- {
- structtimevaltv={0};
- tv.tv_sec=15;
- if(setsockopt(nSocket,SOL_SOCKET,SO_RCVTIMEO,(char*)&tv,sizeof(tv)))
- {
- nErrorTimes++;
- CMainApp::getSingleton()->log("Error:setsockoptfailed(2).error=%s"
- ,strerror(errno)
- );
- continue;
- }
- }
- {
- charszRequest[4096]={0};
- sprintf(szRequest,"GET%sHTTP/1.1\r\n"
- "User-Agent:Mozilla/4.0(compatible;MSIE5.00;Windows98)\r\n"
- "Host:%s\r\n"
- "Cache-Control:no-cache\r\n"
- "Pragma:no-cache\r\n"
- "Connection:Keep-Alive\r\n"
- "Accept:*/*\r\n"
- "Range:bytes=%d-%d\r\n"
- "\r\n"
- ,m_szResourceURI
- ,m_szHost
- ,nRangeStart+nReceived
- ,nRangeStart+nRangeSize-1
- );
- if(!sendBuffer(nSocket,szRequest,strlen(szRequest)))
- {
- shutdown(nSocket,SHUT_RDWR);
- close(nSocket);
- CMainApp::getSingleton()->log("Error:failedtosendtheHTTPGETrequesttoURL:%s"
- ,m_szUrl
- );
- nErrorTimes++;
- continue;
- }
- }
- charszBuf[1024]={0};
- nRet=recv(nSocket,szBuf,1024,0);
- if(nRet<=0)
- {
- shutdown(nSocket,SHUT_RDWR);
- close(nSocket);
- CMainApp::getSingleton()->log("Error:recvfailed(1).returncode=%d,error=%s,URL=%s"
- ,nRet
- ,strerror(errno)
- ,m_szUrl
- );
- nErrorTimes++;
- continue;
- }
- stringstrHttpResponse;
- intnRemain=0;
- intnIndex=0;
- for(nIndex=0;nIndex<nRet;nIndex++)
- {
- if(szBuf[nIndex]=='\r'&&
- szBuf[nIndex+1]=='\n'&&
- szBuf[nIndex+2]=='\r'&&
- szBuf[nIndex+3]=='\n')
- {
- charszTemp[1025]={0};
- memcpy(szTemp,szBuf,nIndex+4);
- strHttpResponse=szTemp;
- nRemain=nRet-(nIndex+4);
- break;
- }
- }
- if(strHttpResponse.length()==0)
- {
- shutdown(nSocket,SHUT_RDWR);
- close(nSocket);
- nErrorTimes++;
- CMainApp::getSingleton()->log("Error:theresponsedoesnotcontainaHTTPheader(1),URL:%s"
- ,m_szUrl
- );
- continue;
- }
- std::vector<string>vItems=parseResponse(strHttpResponse);
- if(vItems.size()==0)
- {
- shutdown(nSocket,SHUT_RDWR);
- close(nSocket);
- nErrorTimes++;
- CMainApp::getSingleton()->log("Error:theresponsedoesnotcontainaHTTPheader(2).URL:%s"
- ,m_szUrl
- );
- continue;
- }
- if(!isHttpStatusSuccess(vItems[0]))
- {
- shutdown(nSocket,SHUT_RDWR);
- close(nSocket);
- nErrorTimes++;
- CMainApp::getSingleton()->log("Error:%s.URL:%s"
- ,vItems[0].c_str()
- ,m_szUrl
- );
- continue;
- }
- if(nRemain>0)
- {
- memcpy((unsignedchar*)(pMemory+nReceived),&(szBuf[nIndex+4]),nRemain);
- nReceived+=nRemain;
- }
- while((nReceived<nRangeSize)&&!m_bFailed)
- {
- nRet=recv(nSocket,(unsignedchar*)(pMemory+nReceived),nRangeSize-nReceived,0);
- if(nRet<=0)
- {
- CMainApp::getSingleton()->log("Error:recvfalied(2).returncode=%d,error=%s,URL=%s"
- ,nRet
- ,strerror(errno)
- ,m_szUrl
- );
- nErrorTimes++;
- break;
- }
- nReceived+=nRet;
- }
- shutdown(nSocket,SHUT_RDWR);
- close(nSocket);
- }//while
- m_bFailed=m_bFailed?m_bFailed:(nReceived!=nRangeSize);
- CMainApp::getSingleton()->log("Info:downloadblock\"%s\"[%08d-%08d]%s."
- ,m_szUrl
- ,nRangeStart
- ,nRangeStart+nRangeSize-1
- ,(nReceived!=nRangeSize)?"Failed":"Success"
- );
- }