别的就不多说了,直接上代码,代码很简单的,不懂的留言。

Code
1
using System;
2
using System.Collections.Generic;
3
using System.Linq;
4
using System.Text;
5
using System.Runtime.InteropServices;
6
using mshtml;
7
using System.Threading;
8
using System.Runtime.InteropServices.ComTypes;
9
using System.IO;
10
11
namespace Eric.Utilities.Html
12

{
13
public enum HRESULT
14
{
15
E_FAIL = -2147467259,
16
E_INVALIDARG = -2147024809,
17
E_NOINTERFACE = -2147467262,
18
E_NOTIMPL = -2147467263,
19
E_UNEXPECTED = -2147418113,
20
S_FALSE = 1,
21
S_OK = 0
22
}
23
24
[ComImport, Guid("0000010c-0000-0000-C000-000000000046"), InterfaceType(ComInterfaceType.InterfaceIsIUnknown), ComVisible(true)]
25
public interface IPersist
26
{
27
void GetClassID(ref Guid pClassID);
28
}
29
30
[ComImport, InterfaceType(ComInterfaceType.InterfaceIsIUnknown), ComVisible(true), Guid("7FD52380-4E07-101B-AE2D-08002B2EC713")]
31
public interface IPersistStreamInit : IPersist
32
{
33
new void GetClassID(ref Guid pClassID);
34
[PreserveSig]
35
int IsDirty();
36
[PreserveSig]
37
HRESULT Load(IStream pstm);
38
[PreserveSig]
39
HRESULT Save(IStream pstm, [MarshalAs(UnmanagedType.Bool)] bool fClearDirty);
40
[PreserveSig]
41
HRESULT GetSizeMax([In, Out, MarshalAs(UnmanagedType.U8)] ref long pcbSize);
42
[PreserveSig]
43
HRESULT InitNew();
44
}
45
46
public class HtmlParser
47
{
48
public IHTMLDocument3 Parse(string url)
49
{
50
HTMLDocument objMSHTML = new HTMLDocument();
51
IHTMLDocument2 objMSHTML2;
52
IHTMLDocument3 objMSHTML3;
53
54
IPersistStreamInit objIPS;
55
objIPS = objMSHTML as IPersistStreamInit;
56
objIPS.InitNew();
57
objIPS = null;
58
59
objMSHTML2 = objMSHTML.createDocumentFromUrl(url, "null");
60
while (objMSHTML2.readyState != "complete")
61
{
62
Thread.Sleep(1000);
63
}
64
objMSHTML3 = objMSHTML2 as IHTMLDocument3;
65
return objMSHTML3;
66
}
67
68
public IHTMLDocument3 ParseHtml(string html, Encoding encoding)
69
{
70
string tmpFile = Path.GetTempFileName();
71
File.WriteAllText(tmpFile, html, encoding);
72
return Parse(tmpFile);
73
}
74
}
75
}
76
说一个问题,进行parsehtml的时候,是先建立了一个临时文件,想不建立临时文件直接进行,但是不得法,网上有通过makup service进行的,但我发现不好用。不知道大家有没有好的办法。