应用部分:
using
System;
using
System.Collections.Generic;
using
System.Text;
using
System.IO;
using
System.Data;
using
System.Text.RegularExpressions;
using
System.Net;
using
System.Threading;


namespace
GetBrand
...
{

class Program ...{

static void Main(string[] args) ...{
GetUrlAndDoWork();
Console.WriteLine("数据获取完毕,完任意键退出。。。");
Console.Read();
}

//处理第一次获取失败的url

private static void DoError() ...{
DataSet ds = new DBHelp().GetErrorDate();

if (ds != null && ds.Tables[0].Rows.Count > 0) ...{

for (int i = 0; i < ds.Tables[0].Rows.Count; i++) ...{
string url = ds.Tables[0].Rows[i]["url"].ToString();
SaveDate(url,i);
}
DataSet dsOther = new DBHelp().GetErrorDate();

if (dsOther != null && dsOther.Tables[0].Rows.Count > 0) ...{
DoError();
}
}
}
//获取url并处理数据

private static void GetUrlAndDoWork() ...{

for (int i = 1; i <= 813; i++) ...{
string url = "http://brand.chinasspp.com/Index-0-{0}.htm";
url = string.Format(url,i.ToString());
SaveDate(url,i);
}
DoError();
}

//获取数据保存到本地

private static void SaveDate(string url,int i) ...{
string content = ReturnByUrl(url);

if (!string.IsNullOrEmpty(content)) ...{
int start = content.IndexOf("</SELECT>");
int end = content.LastIndexOf("转到");


if (end > start) ...{
string WorkContent = content.Substring(start, end - start);
//Console.WriteLine(WorkContent);
Regex r1 = new Regex("<b>");
MatchCollection mc1 = r1.Matches(WorkContent);
Regex r2 = new Regex("</b>");
MatchCollection mc2 = r2.Matches(WorkContent);
Console.WriteLine("================开始获取第" + i.ToString() + "页数据==================");

for (int k = 0; k < mc1.Count; k++) ...{
//Console.WriteLine(mc1[k].Index.ToString());
string BrandName = WorkContent.Substring(mc1[k].Index + 3, mc2[k].Index - mc1[k].Index - 3);
k++;
string Company = WorkContent.Substring(mc1[k].Index + 3, mc2[k].Index - mc1[k].Index - 3);

if (IsValidate(BrandName)) ...{
Console.WriteLine(BrandName + "----" + Company);
// Add the match string to the DataBase.
//new DBHelp().Add(BrandName, Company);
Brand model = new Brand();
model.BrandName = BrandName;
model.ComPan = Company;
model.Pic = "";
model.Create = DateTime.Now;
new DBHelp().Add(model);
}
}
Console.WriteLine("================本页获取数据结束=====================");
}
}
}

//验证数据的有效性

private static bool IsValidate(string _str) ...{
return !Regex.IsMatch(_str, @"^-?d+$");
}

//根据url返回请求的内容

private static string ReturnByUrl(string url) ...{
string responseFromServer = string.Empty;

try ...{
WebRequest request = WebRequest.Create(url);
request.Credentials = CredentialCache.DefaultCredentials;
WebResponse response = request.GetResponse();

Stream dataStream = response.GetResponseStream();
StreamReader reader = new StreamReader(dataStream, Encoding.Default);
responseFromServer = reader.ReadToEnd();
reader.Close();
response.Close();
}

catch ...{
//保存日志
new DBHelp().AddLog(url);
//Ucar.Common.LogHelper.ErrorLog(e, @"D:");
//Thread.Sleep(500000);
//IsValidate(url);
}
return responseFromServer;
}
}
}
数据交互部分:
using
System;
using
System.Collections.Generic;
using
System.Text;
using
System.Data;
using
System.Data.SqlClient;

namespace
GetBrand
...
{

class DBHelp ...{
private string SqlConnection = "server=.;database=pubs;uid=sa;pwd=123123;";


public void Add(string BrandName,string Conmpany) ...{
string sql = "insert into Brand (BrandName,ComPan,[Create])values('" + BrandName + "','" + Conmpany + "','" + DateTime.Now.ToString() + "')";
Ucar.BaseClass.DbHelperSQL.ExecuteSql(sql,SqlConnection);
}
public void Add(Brand model)

...{
StringBuilder strSql=new StringBuilder();
strSql.Append("insert into Brand(");
strSql.Append("BrandName,ComPan,Pic,[Create])");
strSql.Append(" values (");
strSql.Append("@BrandName,@ComPan,@Pic,@Create)");

SqlParameter[] parameters = ...{
new SqlParameter("@BrandName", SqlDbType.VarChar,50),
new SqlParameter("@ComPan", SqlDbType.VarChar,100),
new SqlParameter("@Pic", SqlDbType.VarChar,100),
new SqlParameter("@Create", SqlDbType.DateTime)};
parameters[0].Value = model.BrandName;
parameters[1].Value = model.ComPan;
parameters[2].Value = model.Pic;
parameters[3].Value = model.Create;

Ucar.BaseClass.DbHelperSQL.ExecuteSql(strSql.ToString(), SqlConnection, parameters);
}


public DataSet GetErrorDate() ...{
string sql = "select * from Log where IsValidata=0 and DoTimes<=3";
return Ucar.BaseClass.DbHelperSQL.Query(sql, SqlConnection);
}

public bool Exists(string url) ...{
StringBuilder strSql = new StringBuilder();
strSql.Append("select count(1) from Log");
strSql.Append(" where url= @url");

SqlParameter[] parameters = ...{
new SqlParameter("@url", SqlDbType.VarChar,50)
};
parameters[0].Value = url;
return Ucar.BaseClass.DbHelperSQL.Exists(strSql.ToString(), SqlConnection, parameters);
}


public void AddLog(string url) ...{
string sql = string.Empty;

if (Exists(url)) ...{
sql = "update Log set DoTimes=DoTimes+1 where url ='" + url + "'";
}

else ...{
sql = "insert into Log (url,IsValidata,DoTimes)values('" + url + "','0',0)";
}
Ucar.BaseClass.DbHelperSQL.ExecuteSql(sql, SqlConnection);
}
}

public class Brand ...{

public Brand() ...{ }

Model#region Model
private int _id;
private string _brandname;
private string _compan;
private string _pic;
private DateTime _create;

/**//// <summary>
///
/// </summary>

public int ID ...{

set ...{ _id = value; }

get ...{ return _id; }
}

/**//// <summary>
///
/// </summary>

public string BrandName ...{

set ...{ _brandname = value; }

get ...{ return _brandname; }
}

/**//// <summary>
///
/// </summary>

public string ComPan ...{

set ...{ _compan = value; }

get ...{ return _compan; }
}

/**//// <summary>
///
/// </summary>

public string Pic ...{

set ...{ _pic = value; }

get ...{ return _pic; }
}

/**//// <summary>
///
/// </summary>

public DateTime Create ...{

set ...{ _create = value; }

get ...{ return _create; }
}
#endregion Model

}
}
SQL脚本:
if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[Brand]') and OBJECTPROPERTY(id, N'IsUserTable') = 1)
drop table [dbo].[Brand]
GO
CREATE TABLE [dbo].[Brand] (
[ID] [int] IDENTITY (1, 1) NOT NULL ,
[BrandName] [varchar] (50) COLLATE Chinese_PRC_CI_AS NULL ,
[ComPan] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
[Pic] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
[Create] [datetime] NULL
) ON [PRIMARY]
GO
运行截图:

总结:简单的可以针对某个网站来进行数据抓取,但普遍性的抓取数据还在研究中,由于各个网站的风格可能不一致,所以本程序的局限性很大,扩展性很差...