using System; using System.Collections.Generic; using System.Text.RegularExpressions; using System.Net; using System.IO; namespace EFPlatform.Common.Helper ... { public class WebRequestHelper ...{ RequestFileFromHtml#region RequestFileFromHtml public static string RequestFileFromHtml(string html, string fileUrl, string filePath, string reservedHost) ...{ Uri url; string fileExt; string fileName; WebClient c = new WebClient(); string p = @"((http|https|ftp):(//|//){1}(([A-Za-z0-9_-])+[.]){1,}(net|com|cn|org|cc|tv|[0-9]{1,3})(S*/)((S)+[.]{1}(gif|jpg|png)))"; Regex r = new Regex(p, RegexOptions.Compiled | RegexOptions.IgnoreCase); MatchCollection mc = r.Matches(html); if(mc.Count > 0) ...{ List<Uri> urlList = new List<Uri>(); int matchIndex = 0; bool repeated = false; for(int i = 0;i < mc.Count;i++) ...{ url = new Uri(mc[i].Value); for(int j = 0;j < urlList.Count;j++) ...{ if(url == urlList[j]) ...{ repeated = true; break; } } if(!repeated && (url.Host.ToLower() != reservedHost.ToLower())) ...{ urlList.Add(url); matchIndex++; } } for(int i = 0;i < urlList.Count;i++) ...{ url = urlList[i]; fileExt = url.AbsoluteUri.Substring(url.AbsoluteUri.LastIndexOf(".")); fileName = string.Format("{0:yyMMddHHmmssff}{1}{2}", DateTime.Now, i, fileExt); try ...{ c.DownloadFile(url, filePath + fileName); html = html.Replace(url.AbsoluteUri, fileUrl + fileName); } catch ...{ } } } return html; } #endregion }}