using System;
using System.Data;
using System.Configuration;
using System.Collections;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Web.UI.HtmlControls;
using System.IO;
using System.Net;
using System.Text;
using Dang.Utils;
namespace MyTest.CaiJi
...{
public partial class caiji01 : System.Web.UI.Page
...{
public static string reAllListHtml,reAllHtml,htmlAddress,flashAddress;
public static string RelativeWay, RootWay, htmlListLu;
public static ArrayList alist;//地址列表
public static int getSavedNum;
protected void Page_Load(object sender, EventArgs e)
...{
getSavedNum = 0;
}
protected void Button1_Click(object sender, EventArgs e)
...{
htmlAddress = TextBox1.Text.Trim().ToLower();
RelativeWay = htmlAddress.Substring(0, htmlAddress.LastIndexOf("/")) + "/";//相对路径
RootWay = htmlAddress.Substring(0, htmlAddress.IndexOf("/", 7)) + "/";//根路径
reAllListHtml = GetPageHTML(htmlAddress);
Panel1.Visible = true;
}

protected void Button3_Click(object sender, EventArgs e)
...{
string starstr = TextBox2.Text.Trim();
string endstr = TextBox3.Text.Trim();
alist = Get_fileAddresss(starstr, endstr, reAllListHtml);
Panel1.Visible = false;
StringDo.Visible = true;
}
protected void Button2_Click(object sender, EventArgs e)
...{
for (int i = 0; i < alist.Count; i++)
...{
getflash(alist[i].ToString());
}
}
protected void Go_Click(object sender, EventArgs e)
...{
for (int i = 0; i < alist.Count; i++)
...{
getflash(alist[i].ToString());
}
}

public void getflash(string url)
...{
reAllHtml = GetPageHTML(url);
string starstr = StartUrlString.Text.Trim();
string endstr = EndUrlString.Text.Trim();
flashAddress = Get_fileAddress(starstr, endstr, reAllHtml);
bool isSave = false;
if (flashAddress != null)
...{
flashAddress = Tohttp(flashAddress);
isSave = SaveFileFromUrl(flashAddress);
} 
if (isSave)
Label1.Text += url + "成功!<br />";
else
Label1.Text += url + "失败!<br />";
getSavedNum += 1;
}

public string Tohttp(string str)
...{
if (str.StartsWith("/"))
str = RootWay + str;
if (!str.StartsWith("http://"))
str = RelativeWay + str;
return str;
}
public string Get_fileAddress(string startstr,string endstr,string strResult)
...{
String temp = "";
int start, stop;
start = strResult.IndexOf(startstr, 0, strResult.Length);
stop = strResult.IndexOf(endstr, 0, strResult.Length);
if (start == -1 || stop == -1)
return null;
temp = strResult.Substring(start+startstr.Length, stop-start-startstr.Length);
return temp;
}
public ArrayList Get_fileAddresss(string startstr, string endstr, string strResult)
...{
ArrayList list = new ArrayList();
int start = 0, stop = 0;
while (start != -1 && stop!=-1)
...{
start = strResult.IndexOf(startstr, 0, strResult.Length);
if (start == -1)
break;
strResult = strResult.Substring(start + startstr.Length);
stop = strResult.IndexOf(endstr, 0, strResult.Length);
if ( stop == -1)
break;
string tempaddress = strResult.Substring(0, stop);
tempaddress = Tohttp(tempaddress);
list.Add(tempaddress);
strResult = strResult.Substring(stop);
} 
return list;
}


/**//// <summary>
/// 从文件地址下载文件到本地磁盘
/// </summary>
/// <param name="Url">文件网址</param>
/// <returns></returns>
public bool SaveFileFromUrl(string Url)
...{
if (Url.IndexOf(".") == -1)
return false;
string fileExt = Url.Substring(Url.LastIndexOf(".") + 1);
bool Value = false;
WebResponse response = null;
Stream stream = null;
try
...{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);
response = request.GetResponse();
stream = response.GetResponseStream();
if (!response.ContentType.ToLower().StartsWith("text/"))
...{
Value = SaveBinaryFile("flashsrc", fileExt, response);
}
}
catch (Exception err)
...{
string aa = err.ToString();
}
return Value;
}

/**//// <summary>
/// 将二进制文件保存到磁盘
/// </summary>
/// <param name="fileDirectory">保存的目录flashsrc</param>
/// <param name="fileNameExt">保存的类型</param>
/// <param name="response">网络响应</param>
/// <returns></returns>
private bool SaveBinaryFile(string fileDirectory,string fileNameExt,WebResponse response)
...{
bool Value = true;
byte[] buffer = new byte[1024];
string dirpath = Server.MapPath("/"+fileDirectory+"/");
if (Directory.Exists(dirpath) == false)
...{
Directory.CreateDirectory(dirpath);
}
try
...{
string FileName = dirpath + GetUniquelyString() + "." + fileNameExt;
if (File.Exists(FileName))
File.Delete(FileName);
Stream outStream = System.IO.File.Create(FileName);
Stream inStream = response.GetResponseStream();
int l;
do
...{
l = inStream.Read(buffer, 0, buffer.Length);
if (l > 0)
outStream.Write(buffer, 0, l);
}
while (l > 0);
outStream.Close();
inStream.Close();
}
catch
...{
Value = false;
}
return Value;
}

/**//// <summary>
/// 获取一个不重复的文件名
/// </summary>
/// <returns></returns>
public static string GetUniquelyString()
...{
const int RANDOM_MAX_VALUE = 1000;
string strTemp, strYear, strMonth, strDay, strHour, strMinute, strSecond, strMillisecond;
Random rnd = new Random();
DateTime dt = DateTime.Now;
int rndNumber = rnd.Next(RANDOM_MAX_VALUE);
strYear = YieldRandNum(5);
strMonth = (dt.Month > 9) ? dt.Month.ToString() : "i" + dt.Month.ToString();
strDay = (dt.Day > 9) ? dt.Day.ToString() : "a" + dt.Day.ToString();
strHour = (dt.Hour > 9) ? dt.Hour.ToString() : "n" + dt.Hour.ToString();
strMinute = (dt.Minute > 9) ? dt.Minute.ToString() : "j" + dt.Minute.ToString();
strSecond = (dt.Second > 9) ? dt.Second.ToString() : "n" + dt.Second.ToString();
strMillisecond = dt.Millisecond.ToString();
strTemp = strYear + strDay + strMonth + strHour + strMinute + strSecond + strMillisecond + rndNumber.ToString();
strTemp = strTemp.Replace("1", "q");
return strTemp;
}

/**//// <summary>
/// 产生随机字母
/// </summary>
/// <param name="d"></param>
/// <returns></returns>
public static string YieldRandNum(int d)
...{
char[] seed =...{ 'A', 'b', 'B', 'c', 'C', 'd', 'D', 'e', 'E', 'f', 'F', 'G', 'h', 'H', 'i', 'j', 'J', 'k', 'K', 'L', 'm', 'M', 'n', 'N', 'p', 'P', 'q', 'Q', 'R', 's', 'S', 't', 'T', 'u', 'U', 'v', 'V', 'w', 'W', 'x', 'X', 'y', 'Y', 'z', 'Z' };
int seed_count = seed.Length;
Random rand = new Random();
StringBuilder sb = new StringBuilder(4);
for (int i = 0; i < d; i++)
sb.Append(seed[rand.Next(0, seed_count)]);
return sb.ToString();
}

/**//// <summary>
/// 获取给定Url PageHtml
/// </summary>
/// <param name="url">Url</param>
/// <returns>PageHtml</returns>
public static string GetPageHTML(string url)
...{
WebRequest request = null;
HttpWebResponse response = null;
Stream stream = null;
StreamReader sr = null;
try
...{
request = WebRequest.Create(url);
request.Credentials = CredentialCache.DefaultCredentials;
request.Timeout = 2000;
response = request.GetResponse() as HttpWebResponse;
stream = response.GetResponseStream();
sr = new StreamReader(stream, Encoding.Default);
return sr.ReadToEnd();
}
catch
...{
return string.Empty;
}
finally
...{
if (sr != null)
...{
sr.Close();
sr.Dispose();
}
if (stream != null)
...{
stream.Close();
stream.Dispose();
}
}
}


}
}


<%...@ Page Language="C#" AutoEventWireup="true" CodeBehind="caiji01.aspx.cs" Inherits="MyTest.CaiJi.caiji01" validateRequest="false" %>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" >
<head runat="server">
<title>无标题页</title>
<script type="text/javascript">...
var xmlHttp;
var key;
function createXMLHttpRequest() ...{
if (window.ActiveXObject) ...{
xmlHttp = new ActiveXObject("Microsoft.XMLHTTP");
} 
else if (window.XMLHttpRequest) ...{
xmlHttp = new XMLHttpRequest();
}
}

function pollServer() ...{
createXMLHttpRequest();
data = "load="+"";
var url = "Loading.aspx";
xmlHttp.open("POST", url, true);
xmlHttp.setRequestHeader("Content-Type","application/x-www-form-urlencoded");
xmlHttp.onreadystatechange = pollCallback;
xmlHttp.send(data);
}

function pollCallback() ...{
if (xmlHttp.readyState == 4) ...{
if (xmlHttp.status == 200) ...{
var percent_complete = xmlHttp.responseText;
var progress = document.getElementById("progress");
var progressPersent = document.getElementById("progressPersent");
progress.style.width = percent_complete + "%";
progressPersent.innerHTML = percent_complete + "%";
if (percent_complete < 100) ...{
setTimeout("pollServer()", 2000);
} else ...{
document.getElementById("complete").innerHTML = "已生成完成!";
}
}
}
}

function clearBar() ...{
var progress_bar = document.getElementById("progressBar");
var progressPersent = document.getElementById("progressPersent");
var complete = document.getElementById("complete");
//progress_bar.style.visibility = "visible"
progressPersent.innerHTML = " ";
complete.innerHTML = "开始生成!";
}
function next()
...{
pollServer();
__doPostBack('Go','');
return false;
}
</script>

</head>
<body>
<form id="form1" runat="server">
<div>结果:<asp:Label
ID="Label1" runat="server" Text=""></asp:Label><br />
获取列表页面:<asp:TextBox ID="TextBox1" runat="server"></asp:TextBox>
<asp:Button ID="Button1" runat="server" Text="下一步" OnClick="Button1_Click" />
<asp:Panel runat="server" ID="Panel1" Visible="false">
开始循环标记:<asp:TextBox Text="" TextMode="MultiLine" runat="server" ID="TextBox2"></asp:TextBox><br />
结束循环结束:<asp:TextBox Text="" TextMode="MultiLine" runat="server" ID="TextBox3"></asp:TextBox><br />
<asp:Button ID="Button3" runat="server" Text="下一步" OnClick="Button3_Click" />
</asp:Panel>
<asp:Panel runat="server" ID="StringDo" Visible="false">
开始标记:<asp:TextBox Text="" TextMode="MultiLine" runat="server" ID="StartUrlString"></asp:TextBox><br />
结束标记:<asp:TextBox Text="" TextMode="MultiLine" runat="server" ID="EndUrlString"></asp:TextBox><br />
<a href="javascript:next();">下一步</a><asp:Button ID="Button2" runat="server" Text="下一步" OnClick="Button2_Click" />
<asp:LinkButton ID="Go" runat="server" Text="生成" OnClick="Go_Click"></asp:LinkButton>
</asp:Panel>
</div>
<div id="progressBar" style="padding:0px;border:solid black 0px;visibility:hidden">
<table width="300" border="0" cellspacing="0" cellpadding="0" align="center" >
<tr>
<td align="center" id="progressPersent" >0%</td>
</tr>
<tr >
<td>
<table width="100%" border="1" cellspacing="0" cellpadding="0" bordercolor="#000000">
<tr>
<td>
<table width="1%" border="0" cellspacing="0" cellpadding="0" bgcolor="#FF0000" id="progress" height="20">
<tr>
<td> </td>
</tr>
</table></td>
</tr>
</table>
</td>
</tr>
<tr>
<td align="center" id="complete"></td>
</tr>
</table>
</div>
</form>
<script type="text/javascript">...
clearBar();
</script>
</body>
</html>

本文介绍了一种使用C#实现的网页资源抓取及下载的方法。通过输入目标URL,程序能够解析网页内容,提取所需的链接,并下载指定类型的文件如Flash资源等。文章详细展示了如何构造请求、处理响应及文件保存的全过程。
1284

被折叠的 条评论
为什么被折叠?



