今天给大家列出一些代码,仅供参考
列出数据层和逻辑层的代码

WebPage类
1
using System;
2
using System.Collections.Generic;
3
using System.Text;
4
using System.Web;
5
using System.Web.SessionState;
6
using System.Web.UI;
7
using System.Web.UI.WebControls;
8
using System.Web.UI.HtmlControls;
9
namespace WebPage
10

{
11
public class StringHelper
12
{
13
public StringHelper()
14
{
15
}
16
/**//// <summary>
17
/// 将中文字符转换为URL编码格式
18
/// </summary>
19
/// <param name="strText">字符串</param>
20
/// <returns>URL编码格式</returns>
21
static public string GetChineseURLCode(string strText)
22
{
23
int len = strText.Length;
24
string myStr = "";
25
for (int i = 0; i < len; i++)
26
{
27
myStr += getSpell(strText.Substring(i, 1));
28
}
29
return myStr;
30
}
31
32
static private string getSpell(string cnChar)
33
{
34
byte[] arrCN = System.Text.Encoding.Default.GetBytes(cnChar);
35
if (arrCN.Length > 1)
36
{
37
return System.Web.HttpUtility.UrlEncode(cnChar);
38
}
39
else
40
{
41
return cnChar;
42
}
43
}
44
}
45
}
46
这个是逻辑层的一个辅助类
WebDetail类
using System;
using System.Collections.Generic;
using System.Text;
using System.Collections;
using System.Data;
using System.Text.RegularExpressions;

/**//// <summary>
/// 功能:文章最终页类
/// 创建时间:07-3-5
/// 创建人:曹振华
/// </summary>
namespace WebPage


{
//文章最终页类
public class WebDetailPage:WebPage

{
private string _strDelRegex;
public WebDetailPage(WebDetailPageConfig clsDetailPage)

{
Url=clsDetailPage.DetailUrl;
CutRegex=clsDetailPage.strCutRegex;
TimeOut=clsDetailPage.intTimeOut;
filterFlag = clsDetailPage.filterFlag;
_strDelRegex = clsDetailPage.strDelRegex;
}

//获得文章最终页截取的内容
public string GetDetailPageHtml()

{
string strHtml = "";
Match match = matchRegexHtml;
if (match.Success)

{
strHtml = match.Groups["tmpDetailContent"].Value.ToString().Trim();
strHtml = Regex.Replace(strHtml, _strDelRegex, "");
}
return strHtml;
}



}
}
这个是个页面最终解析类

WebPage类
using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
using System.Net;
using System.Text.RegularExpressions;

namespace WebPage


{

/**//// <summary>
/// 功能:web页面基类
/// 创建时间:07-3-5
/// 创建人:曹振华
/// </summary>
public class WebPage

{
private string _strUrl;
private int _intTimeOut;//设置抓取时间
private string _strCutHtmlRegex;//截取列表内容部分正则
private int _intflag;

//网页的url地址
public string Url

{

get
{ return _strUrl; }

set
{ _strUrl = value; }
}

//抓取超时时间
public int TimeOut

{

get
{ return _intTimeOut; }

set
{ _intTimeOut = value; }
}

//截取主要内容的正则表达式
public string CutRegex

{

get
{ return _strCutHtmlRegex; }

set
{ _strCutHtmlRegex = value; }
}

//网页的内容



public string PageHtml

{

get
{ return GetUrlstrHtml(); }
}

//获取正则表达式得到的match
public Match matchRegexHtml

{

get
{ return GetRegexHtml(); }
}

//过滤标志
public int filterFlag

{

get
{ return _intflag; }

set
{ _intflag = value; }
}



/**////////////////////////////////////////////////////////////////////////////////////////////
//获取页面内容
protected string GetUrlstrHtml()

{
string strHtml = "";
strHtml = WebPageTools.GetConent(_strUrl, _intTimeOut, filterFlag);
return strHtml;
}

//获取截取的页面内容



protected Match GetRegexHtml()

{
Regex reg = new Regex(_strCutHtmlRegex, RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.ExplicitCapture | RegexOptions.IgnoreCase);
Match match;
try

{
match = reg.Match(GetUrlstrHtml());
}
catch

{
match = null;
}
return match;
}



}
}
这个是个页面基类

WebList类
using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.Data;
using System.Collections;

/**//// <summary>
/// 功能:新闻列表页类
/// 创建时间:07-3-5
/// 创建人:张杰
/// </summary>
namespace WebPage


{
//WebListPage:新闻列表页类
public class WebListPage:WebPage

{
private string _strDelRegex;//删除不需要html的正则
private string _strListRegex;//获取列表正则
private string _strNextPageRegex;//获取下一页的正则
private string _strListPageHtml;

public WebListPage(WebListPageConfig objListPageConfig)

{
Url = objListPageConfig.ListUrl;

CutRegex = objListPageConfig.strCutRegex;

TimeOut = objListPageConfig.intTimeOut;

filterFlag = objListPageConfig.filterFlag;

_strDelRegex = objListPageConfig.strDelRegex;

_strListRegex = objListPageConfig.strListRegex;

_strNextPageRegex = objListPageConfig.strNextPageRegex;

_strListPageHtml = GetListPageHtml();

}

//获得列表页截取的内容
private string GetListPageHtml()

{
string strHtml="";
Match match = matchRegexHtml;
if (match.Success)

{
strHtml = match.Groups["tmpListContent"].Value.ToString().Trim();
strHtml = Regex.Replace(strHtml, _strDelRegex, "");
}
return strHtml;
}

//获得需要文章列表的数组
public List<ListPageItems> GetListArray(string replaceStr,string beReplaceStr)

{
Regex reg = new Regex(_strListRegex, RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.ExplicitCapture | RegexOptions.IgnoreCase);
MatchCollection mCollect = reg.Matches(_strListPageHtml);
List<ListPageItems> list = new List<ListPageItems>();
if (mCollect.Count > 0)

{
for (int i = 0; i < mCollect.Count - 1; i++)

{
ListPageItems clsListItem = new ListPageItems();
clsListItem.Title = mCollect[i].Groups["Title"].Value.ToString().Trim();
clsListItem.DetailUrl = mCollect[i].Groups["DetailUrl"].Value.ToString().Trim();
string detailUrl = clsListItem.DetailUrl;
if (beReplaceStr != "")

{
detailUrl=detailUrl.Replace(beReplaceStr, replaceStr);
clsListItem.DetailUrl = detailUrl;
}
list.Add(clsListItem);
}
}
return list;
}

//获得下一页网页地址
public string GetNextPage()

{
string strHtml = "";
Regex reg = new Regex(_strNextPageRegex, RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.ExplicitCapture | RegexOptions.IgnoreCase);
Match match = reg.Match(_strListPageHtml);
if (match.Success)

{

strHtml = match.Groups["NextPageUrl"].Value.ToString().Trim();
}
strHtml = StringHelper.GetChineseURLCode(strHtml);
return strHtml;
}

}

//列表类
public class ListPageItems

{
private string _strTitle;
private string _strDetailUrl;

public string Title

{

get
{ return _strTitle; }

set
{ _strTitle = value; }
}

public string DetailUrl

{

get
{ return _strDetailUrl; }

set
{ _strDetailUrl = value; }
}
}
}
这个是列表类

WebTool类
using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.Net;
using System.IO;

/**//// <summary>
/// 功能:页面请求
/// 创建时间:07-3-5
/// 创建人:曹振华
/// </summary>
namespace WebPage


{
public static class WebPageTools

{
public static string GetCompeletUrl(string strFristPage, string NextPage)

{
string strNextPageUrl = NextPage;
string strDomain = "";
strFristPage = StringHelper.GetChineseURLCode(strFristPage);
Regex reg = new Regex(@"http://([/w-]+/.)+[/w-]+(/[/w- ./?%&=]*)?");
Match match = reg.Match(strNextPageUrl);
if (match.Success)

{
return strNextPageUrl;
}
else

{
//获取相对地址的路径,例如:/zh_cn/hairdressing/nxzr/zx/11052507/20060705/
Regex reg2 = new Regex(@"/(?<Files>.*/)");
Match match2 = reg2.Match(NextPage);
string strFiles="";
if (match2.Success)

{
strFiles = match2.Groups["Files"].Value.ToString().Trim();
}

if (strFiles.Length > 0)

{
Regex reg1 = new Regex(@"(?<domain>http://([/w-]+/.)+[/w-]+(/)?)");

Match match1 = reg1.Match(strFristPage);
if (match1.Success)

{
strDomain = match1.Groups["domain"].Value.ToString().Trim();
}
string PageNum = Regex.Replace(strNextPageUrl, @"/(?<Files>.*/)", "");
strNextPageUrl = strDomain + strFiles+PageNum;
}
else

{
Regex reg3 = new Regex(@"(?<domain>.*/)");

Match match3 = reg3.Match(strFristPage);
if (match3.Success)

{
strDomain = match3.Groups["domain"].Value.ToString().Trim();
}
strNextPageUrl = Regex.Replace(strNextPageUrl, @"(?<domain>.*/)", "");
strNextPageUrl = strDomain + strNextPageUrl;
}
//Regex reg1 = new Regex(@"(?<domain>.*/)");
}

return strNextPageUrl;
}


public static string GetConent(string _strUrl, int _intTimeOut,int flag)

{
string strHtml = "";
try

{
HttpWebRequest myReq = (HttpWebRequest)WebRequest.Create(_strUrl);
myReq.Timeout = _intTimeOut;
myReq.Method = "GET";
//myReq.ContentType = "application/x-www-form-urlencoded";

HttpWebResponse MyRes = (HttpWebResponse)myReq.GetResponse();
Stream stream = MyRes.GetResponseStream();
StreamReader streamreader = new StreamReader(stream, Encoding.GetEncoding("GB2312"));
strHtml = streamreader.ReadToEnd();
if (flag == 1)

{
strHtml = Regex.Replace(strHtml, @"[/""/r/f/n']", "");
}
MyRes.Close();
streamreader.Close();
}
catch

{

}
return strHtml;
}

public static string FilterPaticularChar(string strFilter)

{
strFilter = strFilter.Replace("<BR>", "<br>");
strFilter = strFilter.Replace("<BR />", "<br>");
strFilter = strFilter.Replace("<BR/>", "<br>");
strFilter = strFilter.Replace("<Br>", "<br>");
strFilter = strFilter.Replace("<br>", "[---]");

strFilter = strFilter.Replace("<P>", "<p>");
strFilter = strFilter.Replace("<p>", "[----]");

strFilter = strFilter.Replace("</P>", "</p>");
strFilter = strFilter.Replace("</p>", "[/----]");

strFilter = strFilter.Replace("<STRONG>", "<strong>");
strFilter = strFilter.Replace("<strong>", "[-----]");

strFilter = strFilter.Replace("</STRONG>", "</strong>");
strFilter = strFilter.Replace("</strong>", "[/-----]");

strFilter = Regex.Replace(strFilter, "<[^>]*>", "");

strFilter = strFilter.Replace("[---]", "<br>");
strFilter = strFilter.Replace("[----]", "<p>");
strFilter = strFilter.Replace("[/----]", "</p>");
strFilter = strFilter.Replace("[-----]", "<strong>");
strFilter = strFilter.Replace("[/-----]", "</strong>");

return strFilter;
}
}

}
页面工具类

WebConfig类
using System;
using System.Collections.Generic;
using System.Text;

/**//// <summary>
/// 功能:ListPage类设置
/// 创建时间:07-3-5
/// 创建人:张杰
/// </summary>
namespace WebPage


{
public class WebPageConfig

{

}

//ListPage类设置
public class WebListPageConfig : WebPageConfig

{
private string _strUrl;
private string strRegex_CutHtml;
private string strRegex_ListHtml;
private string strRegex_DelHtml;
private string strRegex_NextPageHtml;
private int _intTimeOut;
private int _intfilterFlag;

public string ListUrl

{

get
{ return _strUrl; }

set
{ _strUrl = value; }
}

public string strCutRegex

{

get
{ return strRegex_CutHtml; }

set
{ strRegex_CutHtml = value; }
}

public string strListRegex

{

get
{ return strRegex_ListHtml; }

set
{ strRegex_ListHtml = value; }
}

public string strDelRegex

{

get
{ return strRegex_DelHtml; }

set
{ strRegex_DelHtml = value; }
}

public string strNextPageRegex

{

get
{ return strRegex_NextPageHtml; }

set
{ strRegex_NextPageHtml = value; }
}

public int intTimeOut

{

get
{ return _intTimeOut; }

set
{ _intTimeOut = value; }
}

public int filterFlag

{

get
{ return _intfilterFlag; }

set
{ _intfilterFlag = value; }
}

public WebListPageConfig(string strListPage, string strCutRegex, string strListRegex, string strDelRegex, string strNextPageRegex, int intTimeOut, int filterFlag)

{
_strUrl = strListPage;
strRegex_CutHtml = strCutRegex;
strRegex_ListHtml = strListRegex;
strRegex_DelHtml = strDelRegex;
strRegex_NextPageHtml = strNextPageRegex;
_intTimeOut = intTimeOut;
_intfilterFlag = filterFlag;
}

}


//DetailPage类设置
public class WebDetailPageConfig : WebPageConfig

{
private string _strUrl;
private string strRegex_CutHtml;
private string strRegex_DelHtml;
private int _intTimeOut;
private int _intfilterFlag;

public string DetailUrl

{

get
{ return _strUrl; }

set
{ _strUrl = value; }
}

public string strCutRegex

{

get
{ return strRegex_CutHtml; }

set
{ strRegex_CutHtml = value; }
}

public string strDelRegex

{

get
{ return strRegex_DelHtml; }

set
{ strRegex_DelHtml = value; }
}

public int intTimeOut

{

get
{ return _intTimeOut; }

set
{ _intTimeOut = value; }
}

public int filterFlag

{

get
{ return _intfilterFlag; }

set
{ _intfilterFlag = value; }
}


public WebDetailPageConfig(string strDetailUrl, string strCutRegex, string strDelRegex, int intTimeOut, int filterFlag)

{
_strUrl = strDetailUrl;
strRegex_CutHtml = strCutRegex;
strRegex_DelHtml = strDelRegex;
_intTimeOut = intTimeOut;
_intfilterFlag = filterFlag;
}
}

//数据库结构
public class DBSoures

{
private string _strUrlAddress;
private string _strKindName;
private int _intKindID;
private int _intTotolPage;
private string _strListRegex1;
private string _strListRegex2;
private string _strListRegex3;
private string _strListRegex4;
private int _intTimeOut;
private string _strDetailRegex1;
private string _strDetailRegex2;
private int _intFlag;
private string _ReplaceUrl;
private string _BeReplaceStr;
public string UrlAddress

{

get
{ return _strUrlAddress; }

set
{ _strUrlAddress = value; }
}

public string KindName

{

get
{ return _strKindName; }

set
{ _strKindName = value; }
}

public int KindID

{

get
{ return _intKindID; }

set
{ _intKindID = value; }
}

public int TotolPage

{

get
{ return _intTotolPage; }

set
{ _intTotolPage = value; }
}

public string ListRegex1

{

get
{ return _strListRegex1; }

set
{ _strListRegex1 = value; }
}

public string ListRegex2

{

get
{ return _strListRegex2; }

set
{ _strListRegex2 = value; }
}

public string ListRegex3

{

get
{ return _strListRegex3; }

set
{ _strListRegex3 = value; }
}

public string ListRegex4

{

get
{ return _strListRegex4; }

set
{ _strListRegex4 = value; }
}

public int TimeOut

{

get
{ return _intTimeOut; }

set
{ _intTimeOut = value; }
}

public string DetailRegex1

{

get
{ return _strDetailRegex1; }

set
{ _strDetailRegex1 = value; }
}

public string DetailRegex2

{

get
{ return _strDetailRegex2; }

set
{ _strDetailRegex2 = value; }
}

public int filterFlag

{

get
{ return _intFlag; }

set
{ _intFlag = value; }
}
public string ReplaceUrl

{

get
{ return _ReplaceUrl; }

set
{ _ReplaceUrl = value; }
}
public string BeReplaceStr

{

get
{ return _BeReplaceStr; }

set
{ _BeReplaceStr = value; }
}

public DBSoures(string UrlAddress,string KindName,int KindID,int TotolPage,string ListRegex1,string ListRegex2,string ListRegex3,string ListRegex4,int TimeOut,string DetailRegex1,string DetailRegex2,int intFlag,string ReplaceUrl,string BeReplaceStr)

{
_strUrlAddress = UrlAddress;
_strKindName = KindName;
_intKindID = KindID;
_intTotolPage = TotolPage;
_strListRegex1 = ListRegex1;
_strListRegex2 = ListRegex2;
_strListRegex3 = ListRegex3;
_strListRegex4 = ListRegex4;
_intTimeOut = TimeOut;
_strDetailRegex1 = DetailRegex1;
_strDetailRegex2 = DetailRegex2;
_intFlag = intFlag;
_ReplaceUrl = ReplaceUrl;
_BeReplaceStr = BeReplaceStr;
}
}

}
这就是所有的业务层代码,数据层可以对sqlserver和mysq进行操作
列出数据层和逻辑层的代码


1

2

3

4

5

6

7

8

9

10



11

12



13

14



15

16


17

18

19

20

21

22



23

24

25

26



27

28

29

30

31

32

33



34

35

36



37

38

39

40



41

42

43

44

45

46

这个是逻辑层的一个辅助类








































































































































































































































































































































































这个是列表类




































































































































































页面工具类











































































































































































































































































































































































































































这就是所有的业务层代码,数据层可以对sqlserver和mysq进行操作