返回URL方法:
- /// <summary>
- /// 返回文本中的url地址
- /// </summary>
- /// <param name="html">html文本</param>
- /// <param name="regstr">正则表达式</param>
- /// <param name="keyname">关键字</param>
- /// <returns></returns>
- public ArrayList geturl(string html, string regstr, string keyname)
- {
- ArrayList resultStr = new ArrayList();
- Regex r = new Regex(regstr, RegexOptions.IgnoreCase);
- MatchCollection mc = r.Matches(html);
- foreach (Match m in mc)
- {
- resultStr.Add(m.Groups[keyname].Value.ToLower());
- }
- if (resultStr.Count > 0)
- {
- return resultStr;
- }
- else
- {
- //没有地址的时候返回空字符
- resultStr.Add("");
- return resultStr;
- }
- }
- 链接地址:
- 正则表达式: "((http|https|ftp):(////|////)((/w)+[.]){1,}(txt|rar|doc|iso|pdf|zip|xls|[0-9]{1,3})(((//[/~]*|//[/~]*)(/w)+)|[.](/w)+)*(((([?](/w)+){1}[=]*))*((/w)+){1}([/&](/w)+[/=](/w)+)*)*)";
- 图片:
- <IMG[^>]+src=/s*(?:'(?<src>[^']+)'|""(?<src>[^""]+)""|(?<src>[^>/s]+))/s*[^>]*>
- FLASH:
- <embed[^>]+src=/s*(?:'(?<src>[^']/S*.swf/b+)'|""(?<src>[^""]/S*.swf/b+)""|(?<src>[^>/s]/S*.swf/b+))/s*[^>]*>