using System;using System.Net;using System.Collections;using System.ComponentModel;using System.Data;using System.Drawing;using System.Web;using System.Web.SessionState;using System.Web.UI;using System.Web.UI.WebControls;using System.Web.UI.HtmlControls;using System.Data;using System.Data.OleDb;using System.IO;using System.Text;using System.Globalization;using System.Text.RegularExpressions;namespace GETURL ...{ public class getText : System.Web.UI.Page ...{ protected System.Web.UI.WebControls.Label label_mess; protected System.Web.UI.WebControls.Label content; protected System.Web.UI.WebControls.Label MetaUrl; protected System.Web.UI.WebControls.Label GetTitle; protected System.Web.UI.WebControls.Label label_Title; protected System.Web.UI.WebControls.Label startSpider; protected System.Web.UI.WebControls.TextBox searchurl; protected System.Web.UI.WebControls.Button urlButton; protected System.Web.UI.WebControls.Label txtsiteurl; private void Page_Load(Object sender,EventArgs e) ...{ if(Request.QueryString["url"]!=null && Request.QueryString["url"].ToString()!="") ...{ searchurl.Text=Request.QueryString["url"].ToString(); getHTTP(sender,e); } } public void getHTTP(Object sender,EventArgs e) ...{ startSpider.Text=""; GetTitle.Text=""; MetaUrl.Text=""; //string host_all_Path=Request.ServerVariables["PATH_TRANSLATED"]; //string host_aPath=Request.ServerVariables["PATH_INFO"]; //string host_IP=Request.ServerVariables["REMOTE_ADDR"]; //string host_Host_IP=Request.ServerVariables["LOCAL_ADDR"]; //string host_NAME=Request.ServerVariables["SERVER_NAME"]; if(searchurl.Text!="") ...{ txtsiteurl.Text="--spider url'Result!"; string aUrl; aUrl=searchurl.Text; string[] txtspiderurl=aUrl.Split('/'); if(txtspiderurl[0].ToString()!="http:") ...{ label_mess.Text="<hr>Url form must match'http://' ahead!!"; content.Text=""; } else ...{ if(txtspiderurl.Length>=3) ...{//=========================url spider start================== if(txtspiderurl[2].ToString()!="" && txtspiderurl[1].ToString()=="") ...{ string description; string keywords; string title_site; //about the url to spider the hostname //and his IP_ADDR //and the the url's Title //the url's description,and the keywords. //and last to show the whole site's contents. string hosturl=txtspiderurl[2].ToString(); string defaultLanguage; //defaultLanguage=Request.ServerVariables["REMOTE_ADDR"]; //startSpider.Text="<br>d: "+defaultLanguage; HttpWebRequest myReq=(HttpWebRequest)WebRequest.Create(aUrl); //(([a-z0-9A-Z]|/-)+/.)+[a-z0-9A-Z]{1,4}$ Regex match_hosturl=new Regex("^(/w+(-/w+)*)(/.(/w+(-/w+)*))*(/?/S*)?(|/.)$"); try ...{ if(match_hosturl.IsMatch(hosturl)) ...{ label_mess.Text="<hr>You spidering the site:<font color=red size='3'>"+aUrl+"</font><br>";//-------------------------------look the domain is formed by NUMERIC----- Regex a_UrlRegex=new Regex(@"^(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]).(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0).(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0).(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])$"); if(a_UrlRegex.IsMatch(hosturl)) ...{ IPHostEntry IP_To_HostName=Dns.GetHostByAddress(hosturl); content.Text="Host Name: <font color=red size='3'> "+IP_To_HostName.HostName+"</font><hr color='red'>"; } else ...{ string s=""; content.Text+="Host domain: <font color=red size='3'> "+hosturl+"</font><hr color='red'>"; System.Net.IPAddress[] addressList=Dns.GetHostByName(hosturl).AddressList; for (int i = 0; i < addressList.Length; i ++) ...{ s += addressList[i].ToString(); } content.Text+="Host IP: <font color=red size=3>"+s+"</font><hr color=red>"; }//---------------------------------end--the --program application-------------------------------------- HttpWebResponse urlHttpWebRes=(HttpWebResponse)myReq.GetResponse(); myReq.ContentType="application/x-www-form-urlencoded"; Stream urlStream=urlHttpWebRes.GetResponseStream(); Encoding encode=System.Text.Encoding.GetEncoding("utf-8"); StreamReader urlStreamReader; //Encoding.Default if (a_UrlRegex.IsMatch(hosturl)) ...{ if(hosturl=="127.0.0.1" || hosturl=="172.19.23.14") ...{ if (aUrl.IndexOf("?")!=-1) ...{ string[] getStringAhead=aUrl.Split('?'); string[] getStringHtm=getStringAhead[0].ToString().Split('.'); if(getStringHtm[getStringHtm.Length-1].ToString().ToLower()=="htm" || getStringHtm[getStringHtm.Length-1].ToString().ToLower()=="html") ...{ urlStreamReader=new StreamReader(urlStream,Encoding.Default); } else ...{ urlStreamReader=new StreamReader(urlStream,encode); } } else ...{ string[] getStringHtm=aUrl.Split('.'); if(getStringHtm[getStringHtm.Length-1].ToString().ToLower()=="htm" || getStringHtm[getStringHtm.Length-1].ToString().ToLower()=="html") ...{ urlStreamReader=new StreamReader(urlStream,Encoding.Default); } else ...{ urlStreamReader=new StreamReader(urlStream,encode); } } //urlStreamReader=new StreamReader(urlStream,Encoding.Default); } else ...{ urlStreamReader=new StreamReader(urlStream,Encoding.Default); } } //The a to z URL else ...{ if(hosturl.ToLower()=="microsoft.com" || hosturl.ToLower()=="www.microsoft.com") ...{ urlStreamReader=new StreamReader(urlStream,encode); } else ...{ urlStreamReader=new StreamReader(urlStream,Encoding.Default); } } //The a to z URL End StringBuilder urlContent=new StringBuilder(); Char[] getChar=new Char[256]; int Count=urlStreamReader.Read(getChar,0,256); while (Count>0) ...{ String line=new String(getChar,0,Count); urlContent.Append(line); Count=urlStreamReader.Read(getChar,0,256); //Regex getTitle=new Regex(@"<title>",RegexOptions.IgnoreCase); //Regex endTitle=new Regex(@"</title>",RegexOptions.IgnoreCase); //***********************************Get the Site's Title Name*******Start******************************************* //*****************************************Writed 2005-5-19*******Author WeisNet System Information************************ Regex title_aReg=new Regex(@"<title"); Regex title_bReg=new Regex(@"</title"); int thefirstTitleIndexOf; String urlContentString; String urlContentStringToLower; urlContentString=urlContent.ToString(); urlContentStringToLower=urlContentString.ToLower(); if(title_aReg.IsMatch(urlContentStringToLower) && title_bReg.IsMatch(urlContentStringToLower)) ...{ thefirstTitleIndexOf=urlContentStringToLower.IndexOf("title"); int thelastTitleIndexOf; thelastTitleIndexOf=urlContentStringToLower.IndexOf("/title"); //**************Check first title left is have space *********88 string testIfthisIsTagleft; string getStringfromfirstTitle; //TOlower string 'urlContentStringToLower'->urlContentString 0xDDDIFCIFWEISNET____ getStringfromfirstTitle=urlContentString.Substring(thefirstTitleIndexOf); testIfthisIsTagleft=getStringfromfirstTitle.Substring(5,1); //**************Check first title left is have space ***End****** if(testIfthisIsTagleft==">") ...{ int getTitleNum; getTitleNum=thelastTitleIndexOf-1-thefirstTitleIndexOf-6; string getTitleName; //TOlower string 'urlContentStringToLower'->urlContentString 0xDDDIFCIFWEISNET____ getTitleName=urlContentString.Substring(thefirstTitleIndexOf+6,getTitleNum); GetTitle.Text="Spider Title :<font color='red' size='3'>"+getTitleName+"</font>--"+urlContentString.Length/1024+"KB<hr color=red>"; } else ...{ int getTitleNum; int isHaveSpacetitlerightTagNum; int lastTitlefromElseNum; isHaveSpacetitlerightTagNum=getStringfromfirstTitle.IndexOf(">"); lastTitlefromElseNum=getStringfromfirstTitle.IndexOf("</title"); getTitleNum=lastTitlefromElseNum-isHaveSpacetitlerightTagNum-1; string getTitleName; getTitleName=getStringfromfirstTitle.Substring(isHaveSpacetitlerightTagNum+1,getTitleNum); GetTitle.Text="Spider Title :<font color='red' size='3'>"+getTitleName+"</font>--"+urlContentString.Length%1024+"KB<hr color=red>"; } } else ...{ GetTitle.Text="No Title Document!"; }//***************************************Get the Site's Title Name*****End***2005-8-13***********************************//****************************************Get The Site's Charset Code*******Start******2005-5-23***************************** string getCharsetValue; String bString; bString=urlContent.ToString(); String toLowerString; toLowerString=bString.ToLower(); Regex meta_first_last=new Regex("<meta"); int getFirstMetaNum; int getLastMetaNum; if(meta_first_last.IsMatch(toLowerString)) ...{ getFirstMetaNum=toLowerString.IndexOf("<meta"); getLastMetaNum=toLowerString.LastIndexOf("<meta"); string getStringFromLastMeta; getStringFromLastMeta=toLowerString.Substring(getLastMetaNum+1); int getLastMetaleftTagNum; getLastMetaleftTagNum=getStringFromLastMeta.IndexOf(">"); string MetaAllString; //OK WeisNet 0XDCNDIIGMT 08:00IKABIDKF2005-9-20 16:26:37JWEISNET__ MetaAllString=bString.Substring(getFirstMetaNum,getLastMetaleftTagNum+2+getLastMetaNum-getFirstMetaNum); string formatMetaStringNoSpace; formatMetaStringNoSpace=Regex.Replace(Regex.Replace(Regex.Replace(Regex.Replace(Regex.Replace(Regex.Replace(MetaAllString," +",""),"<meta","<meta "),"http-equiv"," http-equiv"),"content"," content")," content-type","content-type")," +"," "); if (formatMetaStringNoSpace.IndexOf("http-equiv=")!=-1 && formatMetaStringNoSpace.IndexOf("content=")!=-1) MetaUrl.Text="OK"; else MetaUrl.Text="NO"; } else ...{ MetaUrl.Text="Not Charset Code"; } //MetaUrl.Text="ok"; //***************************************Get The Site's CharsetCode****End************************************************** } //startSpider.Text="<hr>content: " + urlContent.ToString()+"<hr color='red'>"; urlStreamReader.Close(); urlStream.Close(); urlHttpWebRes.Close(); } else ...{ label_mess.Text=""; content.Text="<hr>Host domain Wrong!"; } } catch(ArgumentException exp) //WEBSITE ARGUMENTEXCEPTION'S==WEISNET OX12ACEB0068-----Start----> ...{ Console.WriteLine("The Urls Spider has Time Out!,Try Again..."); Console.WriteLine(exp.Message); //content.Text="The Urls Spider has Time Out!,Try Again...<br>"; //startSpider.Text="Or you Check the Url is true you want Test!<br>"; } catch(WebException exp) ...{ Console.WriteLine("The Urls Spider has Time Out!,Try Again..."); Console.WriteLine(exp.Message); Console.WriteLine(exp.Status); //content.Text="The Urls Spider has Time Out!,Try Again...<br>"; //startSpider.Text="Or you Check the Url is true you want Test!<br>"; } catch(Exception exp) ...{ Console.WriteLine("The Urls Spider has Time Out!,Try Again..."); Console.WriteLine(exp.Message); //content.Text="The Urls Spider has Time Out!,Try Again...<br>"; //startSpider.Text="Or you Check the Url is true you want Test!<br>"; } //WEBSITE ARGUMENTEXCEPTION'S==WEISNET OX12ACEB0068-------------------------------End------> //===============================================Url spider Ending===================================================== } else ...{ label_mess.Text="<hr>Host Domain must Exists!!!"; content.Text=""; } } else ...{ label_mess.Text="<hr>Please write the whole url text!"; content.Text=""; } } } else ...{ label_mess.Text="<hr>Please enter the URL"; content.Text=""; } } //=========================Ending Application======================== WeisNet WebTools#region WeisNet WebTools override protected void OnInit(EventArgs e) ...{ InitializeComponent(); base.OnInit(e); } private void InitializeComponent() ...{ this.Load += new System.EventHandler(this.Page_Load); } #endregion }}/**//*---------------------------------------------------Power By WeisNet System-----------------------------------------------*/