邮件解析大致思路
思路是用正则表达试和字符串操作函数提取。找到分割符位置,提取分割快里面的内容后判断相应的内容形式,编码方式。
首先要添加几个引用
Imports System.IO
Imports System.Text
Imports System.Text.RegularExpressions
'|______ 邮件解析大致思路 ___________|
'| Coypright wgscd (c)2005 |
'| QQ:153964481 E-mail:wgscd@126.com |
'| Blog:http://blog.youkuaiyun.com/wgsnet |
'|______________________________________|
Dim t1 As Single = System.Environment.TickCount
'全局变量
Dim MailContent As String '邮件内容字符
Dim MailHead As String
Dim Boundarys() As String '总的定义的Boundary数
Dim PerBoundarys() As String '总的Boundary数
Dim SplitString() As String '总的标志行
Dim MyHashtable As New Hashtable
Dim keyIndex() As Integer
Dim NotificationTo As String '回复地址
Dim strFrom As String 'From:
Dim strTo As String '= "To:"
Dim strSubject As String ' = "Subject:"
Dim strXPriority As String '= "X-Priority:"
Dim strXOriginatingIP As String '= "X-Originating-IP:"
Dim strXMailer As String '= "X-Mailer:"
Dim strContentType As String '= "Content-Type:"
Dim strAttachment() As String '= "Content-Disposition: attachment"
Dim TemCT As String = "Content-Type:" ' 用来分割头部
'--------------------------------------
Dim oFileStream As New FileStream("d:/mail3.txt", FileMode.OpenOrCreate) 'mail2.eml'mail.eml
Dim sr As New StreamReader(oFileStream)
mailContent = sr.ReadToEnd().Trim
'--------------------------------
' strHead = mailContent.Substring(0, mailContent.IndexOf(TemCT)).Trim
Dim TemSubject As String = "Subject:"
Dim strMessageID As String = "Message-ID:"
Dim strDate As String = "Date:"
' strBoundary = strBoundary.Trim '分割感符标志
Dim strContentTransferEncoding As String = "Content-Transfer-Encoding:"
Dim ContentTypes(6) As String '定义Content-Type类型用于分别
ContentTypes(0) = "Content-Type: text/plain"
ContentTypes(1) = "Content-Type: multipart/alternative"
ContentTypes(2) = "Content-Type: multipart/mixed"
ContentTypes(3) = "Content-Type: image/GIF"
ContentTypes(4) = "Content-Type: image/JPEG"
ContentTypes(5) = "Content-Type: application/x-www-form-urlencoded"
ContentTypes(6) = "application/x-shockwave-flash"
''''''
'''
'''''''''''''''''''''''''''''''
'"boundary=""[^""]+"""----------Baoundary
'"/nContent[^/n].*"-------Content-Type
Dim str As String
Dim strB As String = "Boundary="
Dim sb As New System.Text.StringBuilder
' MsgBox(strB)
' Dim strBs As String() = mailContent.Split("boundary=") '分割区域块
Const boundaryParttner As String = "boundary=""[^""]+"""
Dim regexBoundary As New Regex(boundaryParttner)
MailHead = mailContent.Substring(0, mailContent.IndexOf("Content-Type"))
If regexBoundary.Match(mailContent).Success Then
Dim strBoundary As String = regexBoundary.Match(mailContent).ToString.Substring(10, regexBoundary.Match(mailContent).Length - 11)
Dim boundaryIndex As Integer = regexBoundary.Match(mailContent).Index
MailHead = mailContent.Substring(0, boundaryIndex) '邮件头全部
Dim Mymach As RegularExpressions.Match
' MsgBox(reg.ToString)
Dim matchCount As Integer = regexBoundary.Matches(mailContent).Count
ReDim Boundarys(matchCount)
ReDim PerBoundarys(matchCount)
Dim BoundaryPattners As String
If matchCount >= 2 Then '提取套嵌的boundary
ReDim Preserve Boundarys(matchCount)
ReDim Preserve PerBoundarys(matchCount)
Dim i As Integer
For Each Mymach In regexBoundary.Matches(MailContent)
'sb.Append(vbCrLf & Mymach.ToString & " Index(" & Mymach.Index & "this.length(" & Mymach.Length & ")")
Boundarys(i) = Mymach.ToString & " Index(" & Mymach.Index & "this.length(" & Mymach.Length & ")"
PerBoundarys(i) = Mymach.ToString.Substring(10, Mymach.Length - 11)
i += 1
Next
'------------test boundarys-------------
' For j As Integer = 0 To Boundarys.Length - 1
' MsgBox(Boundarys(j))
' Next
'---------------------------------
For j As Integer = 0 To matchCount - 1
BoundaryPattners &= "/n.*" & PerBoundarys(j) & "|"
Next
Else
PerBoundarys(0) = regexBoundary.Match(MailContent).ToString().Substring(10, regexBoundary.Match(MailContent).ToString().Length - 11)
' MsgBox(PerBoundarys(0))
BoundaryPattners &= "/n.*" & PerBoundarys(0) & "|"
' MsgBox(BoundaryPattners)
End If ' matchCount >= 2
Dim Allmach As RegularExpressions.Match
Dim Count As Integer
' MsgBox(BoundaryPattners)
For Each Allmach In regexBoundary.Matches(MailContent, BoundaryPattners & "Content-[^/n]+/n")
sb.Append(vbCrLf & Allmach.ToString & "Index(" & Allmach.Index & "this.length(" & Allmach.Length & ")")
Count += 1
MyHashtable.Add(Allmach.Index, Allmach.ToString)
Next
'-------------------------------------
Dim t2 As DateTime = Now
Me.Text = System.Environment.TickCount - t1 't2.Millisecond.ToString & "---" & t1.Millisecond.ToString
Me.RichTextBox1.Text = sb.ToString
'-------------------------------
sr.Close()
oFileStream.Close()
Dim t3 As DateTime = Now
Me.RichTextBox1.Text = sb.ToString
End If 'regexBoundary.Match(mailContent).Success
‘到这里已经提取了所有有用的标志和分割符及长度和位置
’接下来的任务就是根据boundary 分割区域块的内容进行对应解码
‘邮件和附件的编码
’base64
‘7bit
’8bit(
‘binary
’quoted-printable 等
‘如果是附件可凭借“Content-Disposition: attachment”判断。
'.NET 里有几种现成编码如Base64可在我的写一个类封装的类(Base64)找到!
’其他几中编码也容易解决,这里就不一一贴出来了。
' -------------------------------------