别的就不多说了,直接上代码,代码很简单的,不懂的留言。
1using System; 2using System.Collections.Generic; 3using System.Linq; 4using System.Text; 5using System.Runtime.InteropServices; 6using mshtml; 7using System.Threading; 8using System.Runtime.InteropServices.ComTypes; 9using System.IO; 10 11namespace Eric.Utilities.Html 12{ 13 public enum HRESULT 14 { 15 E_FAIL = -2147467259, 16 E_INVALIDARG = -2147024809, 17 E_NOINTERFACE = -2147467262, 18 E_NOTIMPL = -2147467263, 19 E_UNEXPECTED = -2147418113, 20 S_FALSE = 1, 21 S_OK = 0 22 } 23 24 [ComImport, Guid("0000010c-0000-0000-C000-000000000046"), InterfaceType(ComInterfaceType.InterfaceIsIUnknown), ComVisible(true)] 25 public interface IPersist 26 { 27 void GetClassID(ref Guid pClassID); 28 } 29 30 [ComImport, InterfaceType(ComInterfaceType.InterfaceIsIUnknown), ComVisible(true), Guid("7FD52380-4E07-101B-AE2D-08002B2EC713")] 31 public interface IPersistStreamInit : IPersist 32 { 33 new void GetClassID(ref Guid pClassID); 34 [PreserveSig] 35 int IsDirty(); 36 [PreserveSig] 37 HRESULT Load(IStream pstm); 38 [PreserveSig] 39 HRESULT Save(IStream pstm, [MarshalAs(UnmanagedType.Bool)] bool fClearDirty); 40 [PreserveSig] 41 HRESULT GetSizeMax([In, Out, MarshalAs(UnmanagedType.U8)] ref long pcbSize); 42 [PreserveSig] 43 HRESULT InitNew(); 44 } 45 46 public class HtmlParser 47 { 48 public IHTMLDocument3 Parse(string url) 49 { 50 HTMLDocument objMSHTML = new HTMLDocument(); 51 IHTMLDocument2 objMSHTML2; 52 IHTMLDocument3 objMSHTML3; 53 54 IPersistStreamInit objIPS; 55 objIPS = objMSHTML as IPersistStreamInit; 56 objIPS.InitNew(); 57 objIPS = null; 58 59 objMSHTML2 = objMSHTML.createDocumentFromUrl(url, "null"); 60 while (objMSHTML2.readyState != "complete") 61 { 62 Thread.Sleep(1000); 63 } 64 objMSHTML3 = objMSHTML2 as IHTMLDocument3; 65 return objMSHTML3; 66 } 67 68 public IHTMLDocument3 ParseHtml(string html, Encoding encoding) 69 { 70 string tmpFile = Path.GetTempFileName(); 71 File.WriteAllText(tmpFile, html, encoding); 72 return Parse(tmpFile); 73 } 74 } 75}