using System;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
namespace LoginkCreditCenter.WebSpiderConsole
{
class Program
{
static void Main(string[] args)
{
Console.WriteLine("\r\n-------Please Enter URl--------\r\n");
string url = Console.ReadLine();
string strResponse = GetPageData(url, "");
#region
//StringBuilder strWebData = new StringBuilder(strResponse);
//Console.WriteLine(strWebData);//写StringBuilder对象
//Console.WriteLine(strResponse.IndexOf("<table"));
//Console.WriteLine(strResponse.IndexOf("</table>"));
//Console.WriteLine("\r\n-------The Table IS--------\r\n");
//string strSub = strResponse.Substring(strResponse.IndexOf("<table"), (strResponse.IndexOf("</table>") - strResponse.IndexOf("<table")+8));
//Console.WriteLine(strSub);
//Console.WriteLine("\r\n-------The Last Table IS--------\r\n");
//string strSubLast = strResponse.Substring(strResponse.LastIndexOf("<table"), (strResponse.LastIndexOf("</table>") - strResponse.LastIndexOf("<table") + 8));
//Console.WriteLine(strSubLast);
#endregion
string strRemain = strResponse;
string nextStrRemain = strResponse;
do
{
strRemain = nextStrRemain;
string strSubStr = strRemain.Substring(strRemain.IndexOf("<table"), (strRemain.IndexOf("</table>") - strRemain.IndexOf("<table") + 8));
Console.WriteLine("\r\n-------The IS A Table--------\r\n");
Console.WriteLine(strSubStr);
nextStrRemain = strRemain.Substring((strRemain.IndexOf("</table>") + 8));
} while (nextStrRemain.Contains("</table>")) ;
Console.ReadLine();
}
private static string GetPageData(string url, string charSet)
{
try
{
//StringBuilder strWebData = new StringBuilder();
string strWebData = string.Empty;
if (url != null || url.Trim() != "")
{
//创建WebClient实例wc
WebClient wc = new WebClient();
//下载网页要解决编码问题或者Cookie
//在头部加入Cookie
//需要一些重载方法
//string cookie="";
//wc.Headers.Add("Cookie", cookie);
//获取或设置用于对向Internet资源请求进行身份验证的网络凭据
wc.Credentials = CredentialCache.DefaultCredentials;
//如果服务器要验证用户名密码
//string username="";
//string password="";
//NetworkCredential credential = new NetworkCredential(username, password);
//wc.Credentials = credential;
//从资源下载并返回字节数组
byte[] dataBuffer = wc.DownloadData(url);
strWebData = Encoding.Default.GetString(dataBuffer);
//获取网页字符编码描述信息
Match charSetMatch = Regex.Match(strWebData, "<meta([^<]*)charset=([^<]*)\"", RegexOptions.IgnoreCase | RegexOptions.Multiline);
//<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" >
//Match charSetMatchs=Regex.Matches(,,);所有匹配项
string webCharSet = charSetMatch.Groups[2].Value;//"<meta([^<]*)charset=([^<]*)\"有两个()获取两个所以Group[2].value
//string webCharSet = "";
if (charSet == null || charSet == "")
{
//如果未获取到编码,则设置默认编码
if (webCharSet == null || webCharSet == "")
{
charSet = "utf-8";
}
else
{
charSet = webCharSet;
}
}
if (charSet != null && charSet != "" && Encoding.GetEncoding(charSet) != Encoding.Default)
{
strWebData = Encoding.GetEncoding(charSet).GetString(dataBuffer);
//strWebData = Encoding.Default.GetString(dataBuffer);
}
}
return strWebData;
}
catch (Exception ex)
{
return ex.Message;
}
}
}
}
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
namespace LoginkCreditCenter.WebSpiderConsole
{
class Program
{
static void Main(string[] args)
{
Console.WriteLine("\r\n-------Please Enter URl--------\r\n");
string url = Console.ReadLine();
string strResponse = GetPageData(url, "");
#region
//StringBuilder strWebData = new StringBuilder(strResponse);
//Console.WriteLine(strWebData);//写StringBuilder对象
//Console.WriteLine(strResponse.IndexOf("<table"));
//Console.WriteLine(strResponse.IndexOf("</table>"));
//Console.WriteLine("\r\n-------The Table IS--------\r\n");
//string strSub = strResponse.Substring(strResponse.IndexOf("<table"), (strResponse.IndexOf("</table>") - strResponse.IndexOf("<table")+8));
//Console.WriteLine(strSub);
//Console.WriteLine("\r\n-------The Last Table IS--------\r\n");
//string strSubLast = strResponse.Substring(strResponse.LastIndexOf("<table"), (strResponse.LastIndexOf("</table>") - strResponse.LastIndexOf("<table") + 8));
//Console.WriteLine(strSubLast);
#endregion
string strRemain = strResponse;
string nextStrRemain = strResponse;
do
{
strRemain = nextStrRemain;
string strSubStr = strRemain.Substring(strRemain.IndexOf("<table"), (strRemain.IndexOf("</table>") - strRemain.IndexOf("<table") + 8));
Console.WriteLine("\r\n-------The IS A Table--------\r\n");
Console.WriteLine(strSubStr);
nextStrRemain = strRemain.Substring((strRemain.IndexOf("</table>") + 8));
} while (nextStrRemain.Contains("</table>")) ;
Console.ReadLine();
}
private static string GetPageData(string url, string charSet)
{
try
{
//StringBuilder strWebData = new StringBuilder();
string strWebData = string.Empty;
if (url != null || url.Trim() != "")
{
//创建WebClient实例wc
WebClient wc = new WebClient();
//下载网页要解决编码问题或者Cookie
//在头部加入Cookie
//需要一些重载方法
//string cookie="";
//wc.Headers.Add("Cookie", cookie);
//获取或设置用于对向Internet资源请求进行身份验证的网络凭据
wc.Credentials = CredentialCache.DefaultCredentials;
//如果服务器要验证用户名密码
//string username="";
//string password="";
//NetworkCredential credential = new NetworkCredential(username, password);
//wc.Credentials = credential;
//从资源下载并返回字节数组
byte[] dataBuffer = wc.DownloadData(url);
strWebData = Encoding.Default.GetString(dataBuffer);
//获取网页字符编码描述信息
Match charSetMatch = Regex.Match(strWebData, "<meta([^<]*)charset=([^<]*)\"", RegexOptions.IgnoreCase | RegexOptions.Multiline);
//<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" >
//Match charSetMatchs=Regex.Matches(,,);所有匹配项
string webCharSet = charSetMatch.Groups[2].Value;//"<meta([^<]*)charset=([^<]*)\"有两个()获取两个所以Group[2].value
//string webCharSet = "";
if (charSet == null || charSet == "")
{
//如果未获取到编码,则设置默认编码
if (webCharSet == null || webCharSet == "")
{
charSet = "utf-8";
}
else
{
charSet = webCharSet;
}
}
if (charSet != null && charSet != "" && Encoding.GetEncoding(charSet) != Encoding.Default)
{
strWebData = Encoding.GetEncoding(charSet).GetString(dataBuffer);
//strWebData = Encoding.Default.GetString(dataBuffer);
}
}
return strWebData;
}
catch (Exception ex)
{
return ex.Message;
}
}
}
}