public class HttpHelper
{
public static Stream GetStream(string url, ref CookieContainer cookies)
{
Stream strm = null;
try
{
WebRequest req = WebRequest.Create(url);
HttpWebRequest httpreg = (HttpWebRequest)req;
httpreg.CookieContainer = cookies;
httpreg.Method = "GET";
//httpreg.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; CIBA)";
httpreg.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) ; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)"; WebResponse resp = httpreg.GetResponse();
strm = resp.GetResponseStream();
}
catch (Exception ex)
{
return null;
}
return strm;
}
public static string GetHTML(string url)
{
string result = null;
try
{
WebRequest req = WebRequest.Create(url);
HttpWebRequest httpreg = (HttpWebRequest)req;
//httpreg.CookieContainer = cookies;
httpreg.Method = "GET";
httpreg.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; CIBA)";
WebResponse resp = httpreg.GetResponse();
StreamReader reader = new StreamReader(resp.GetResponseStream(), Encoding.UTF8);
result = reader.ReadToEnd();
}
catch
{
return "wrong page";
}
return result;
}
public static string doGet(string url, ref CookieContainer cookies)
{
string result = null;
try
{
WebRequest req = WebRequest.Create(url);
HttpWebRequest httpreg = (HttpWebRequest)req;
httpreg.CookieContainer = cookies;
httpreg.Method = "GET";
httpreg.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; CIBA)";
WebResponse resp = httpreg.GetResponse();
StreamReader reader = new StreamReader(resp.GetResponseStream(), Encoding.UTF8);
result = reader.ReadToEnd();
}
catch (Exception ex)
{
return "";
}
return result;
}
public static Stream PostStream(string action, string data, ref CookieContainer cookies)
{
Stream strm = null;
try
{
byte[] bytes = Encoding.UTF8.GetBytes(data);
WebRequest req = WebRequest.Create(action);
HttpWebRequest httpreq = (HttpWebRequest)req;
httpreq.Method = "POST";
httpreq.ContentType = "application/x-www-form-urlencoded";
httpreq.Accept = "application/x-shockwave-flash, image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, application/x-silverlight, */*";
httpreq.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; CIBA)";
httpreq.ContentLength = bytes.Length;
httpreq.CookieContainer = cookies;
strm = httpreq.GetRequestStream();
}
catch (Exception ex)
{
return null;
}
return strm;
}
public static string doPost(string action, string data, ref CookieContainer cookies)
{
string result = null;
try
{
byte[] bytes = Encoding.UTF8.GetBytes(data);
WebRequest req = WebRequest.Create(action);
HttpWebRequest httpreq = (HttpWebRequest)req;
httpreq.Method = "POST";
httpreq.ContentType = "application/x-www-form-urlencoded";
httpreq.Accept = "application/x-shockwave-flash, image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, application/x-silverlight, */*";
httpreq.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; CIBA)";
httpreq.ContentLength = bytes.Length;
httpreq.CookieContainer = cookies;
Stream strm = httpreq.GetRequestStream();
strm.Write(bytes, 0, bytes.Length);
strm.Close();
WebResponse resq = httpreq.GetResponse();
StreamReader reader = new StreamReader(resq.GetResponseStream(), Encoding.UTF8);
result = reader.ReadToEnd();
}
catch (Exception ex)
{
return "";
}
return result;
}
public static string doGet(string url)
{
string result = null;
try
{
WebRequest req = WebRequest.Create(url);
HttpWebRequest httpreg = (HttpWebRequest)req;
httpreg.Method = "GET";
httpreg.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; CIBA)";
WebResponse resp = httpreg.GetResponse();
StreamReader reader = new StreamReader(resp.GetResponseStream(), Encoding.UTF8);
result = reader.ReadToEnd();
}
catch (Exception ex)
{
return "";
}
return result;
}
/// <summary>
/// 返回匹配多个的集合值
/// </summary>
/// <param name="start">开始html tag</param>
/// <param name="end">结束html tag</param>
/// <param name="html">html</param>
///
public static IList<string> GetHtmls(string start, string end, string html)
{
IList<string> list = new List<string>();
try
{
string pattern = string.Format("{0}(?<g>(.|[\r\n])+?){1}", start, end);//匹配URL的模式,并分组
MatchCollection mc = Regex.Matches(html, pattern);//满足pattern的匹配集合
if (mc.Count != 0)
{
foreach (Match match in mc)
{
GroupCollection gc = match.Groups;
list.Add(gc["g"].Value);
}
}
}
catch { }
return list;
}
public static string GetHtml(string start, string end, string html)
{
string ret = string.Empty;
try
{
string pattern = string.Format("{0}(?<g>(.|[\r\n])+?)?{1}", start, end);//匹配URL的模式,并分组
ret = Regex.Match(html, pattern).Groups["g"].Value;
}
catch { }
return ret;
}
public static string funcGetSrc(string aText)
{
string pattern = @"(?is)<img (?:title=(['""]?)?(?<title>[^'""]*?)\1\s*|src=(['""]?)?(?<url>[^'""]*?)\2\s*)*/>";
Regex r = new Regex(pattern, RegexOptions.IgnoreCase);
MatchCollection mc = r.Matches(aText);
string imgname = "";
foreach (Match m in mc)
{
imgname += m.Groups[1].Value;
}
return imgname;
}
public static List<string> Get_A_Href(string html)
{
List<String> links = new List<String>();
MatchCollection matches = Regex.Matches(html, "<a(?:\\s+.+?)*?\\s+href=\"([^\"]*?)\".+>(.*?)</a>", RegexOptions.IgnoreCase);
foreach (Match match in matches)
{
string s = match.Groups[1].Value;
links.Add(s);
}
return links;
}
/// <summary>
/// 获取字符中指定标签的值
/// </summary>
/// <param name="str">字符串</param>
/// <param name="title">标签</param>
/// <returns>值</returns>
public static List<string> GetTitleContent(string str, string title)
{
string tmpStr = string.Format("<{0}[^>]*?>(?<Text>[^<]*)</{1}>", title, title); //获取<title>之间内容
MatchCollection TitleMatchs = Regex.Matches(str, tmpStr, RegexOptions.IgnoreCase);
List<string> list = new List<string>();
if (TitleMatchs.Count != 0)
{
foreach (Match TitleMatch in TitleMatchs)
{
string result = TitleMatch.Groups["Text"].Value;
list.Add(result);
}
}
return list;
}
/// <summary>
/// 获取字符中指定标签的值
/// </summary>
/// <param name="str">字符串</param>
/// <param name="title">标签</param>
/// <param name="attrib">属性名</param>
/// <returns>属性</returns>
public static List<string> GetTitleContent(string str, string title, string attrib)
{
string tmpStr = string.Format("<{0}[^>]*?{1}=(['\"\"]?)(?<url>[^'\"\"\\s>]+)\\1[^>]*>", title, attrib); //获取<title>之间内容
MatchCollection TitleMatchs = Regex.Matches(str, tmpStr, RegexOptions.IgnoreCase);
List<string> list = new List<string>();
if (TitleMatchs.Count != 0)
{
foreach (Match TitleMatch in TitleMatchs)
{
string result = TitleMatch.Groups["url"].Value;
list.Add(result);
}
}
return list;
}
}