汉字拼音的一个解决方法(初具使用价值)

本文介绍了一个实用的汉字转拼音工具,该工具通过加载汉字拼音库实现了字符串到拼音的转换,并支持多音字处理及按拼音查找汉字的功能。
Author: 水如烟

这个版本已有使用价值。如果要求不高,基本满足需要。一并贴出来,也对比一下。

这个版本的汉字库已纳入程序资源里头,大小为324K。

主要代码如下: 

Namespace  Businness.PinYin
    
Public   Class  SpellInformation
        
Private  gTone  As   String
        
Private  gSpellInput  As   String
        
Private  gSpellExpress  As   String

        
' '' <summary>
         ' '' 声调
         ' '' </summary>
         Public   Property  Tone()  As   String
            
Get
                
Return  gTone
            
End   Get
            
Set ( ByVal  value  As   String )
                gTone 
=  value.Trim
            
End   Set
        
End Property

        
' '' <summary>
         ' '' 拼音输入码
         ' '' </summary>
         Public   Property  SpellInput()  As   String
            
Get
                
Return  gSpellInput
            
End   Get
            
Set ( ByVal  value  As   String )
                gSpellInput 
=  value.Trim
            
End   Set
        
End Property

        
' '' <summary>
         ' '' 拼音
         ' '' </summary>
         Public   Property  SpellExpress()  As   String
            
Get
                
Return  gSpellExpress
            
End   Get
            
Set ( ByVal  value  As   String )
                gSpellExpress 
=  value.Trim
            
End   Set
        
End Property

        
Sub   New ()
        
End Sub

        
' '' <param name="tone">声调</param>
         ' '' <param name="spellinput">拼音输入码</param>
         ' '' <param name="spellexpress">拼音</param>
         Sub   New ( ByVal  tone  As   String ByVal  spellinput  As   String ByVal  spellexpress  As   String )
            
Me .Tone  =  tone
            
Me .SpellInput  =  spellinput
            
Me .SpellExpress  =  spellexpress
        
End Sub

        
' '' <param name="spellexpress">拼音</param>
         Public   Overrides   Function  Equals( ByVal  spellexpress  As   Object As   Boolean
            
Return   Me .SpellExpress.Equals(spellexpress)
        
End Function

        
Public   Overrides   Function  ToString()  As   String
            
Return   String .Format( " 拼音码 {0,-6} 声调 {1} 拼音 {2} " Me .SpellInput,  Me .Tone,  Me .SpellExpress)
        
End Function
    
End Class

End Namespace

 

Namespace  Businness.PinYin
    
Public   Class  Word
        
Private  gValue  As   String

        
' '' <param name="word">单个汉字</param>
         Sub   New ( ByVal  word  As   String )
            
Me .gValue  =  word
        
End Sub

        
' '' <summary>
         ' '' 汉字
         ' '' </summary>
         Public   ReadOnly   Property  Value()  As   String
            
Get
                
Return  gValue
            
End   Get
        
End Property

        
Public   ReadOnly   Property  Code()  As   String
            
Get
                
Return  Common.Code(gValue)
            
End   Get
        
End Property

        
Private  gSpellList  As   New  Dictionary( Of   String , SpellInformation)

        
' '' <summary>
         ' '' 拼音集
         ' '' </summary>
         Public   ReadOnly   Property  Spells()  As  SpellInformation()
            
Get
                
Dim  tmp(gSpellList.Count  -   1 As  SpellInformation
                gSpellList.Values.CopyTo(tmp, 
0 )
                
Return  tmp
            
End   Get
        
End Property

        
' '' <summary>
         ' '' 是否多音字
         ' '' </summary>
         Public   ReadOnly   Property  IsMutiSpell()  As   Boolean
            
Get
                
Return  gSpellList.Count  >   1
            
End   Get
        
End Property


        
Public   Sub  AddSpell( ByVal  spell  As  SpellInformation)
            
If   String .IsNullOrEmpty(spell.SpellExpress)  Then   Exit Sub

            
If   Me .gSpellList.ContainsKey(spell.SpellExpress)  Then   Exit Sub

            
Me .gSpellList.Add(spell.SpellExpress, spell)
        
End Sub

        
Public   Sub  AddSpell( ByVal  tone  As   String ByVal  spellinput  As   String ByVal  spellexpress  As   String )
            
Dim  tmp  As   New  SpellInformation(tone, spellinput, spellexpress)

            AddSpell(tmp)
        
End Sub

        
Public   ReadOnly   Property  DefaultSpell()  As  SpellInformation
            
Get
                
If   Me .gSpellList.Count  =   0   Then   Return   Nothing
                
Return   Me .Spells( 0 )
            
End   Get
        
End Property

        
Public   ReadOnly   Property  DefaultSpellExpress()  As   String
            
Get
                
If  DefaultSpell  Is   Nothing   Then   Return   Nothing
                
Return   Me .DefaultSpell.SpellExpress
            
End   Get
        
End Property

        
Public   ReadOnly   Property  DefaultSpellInput()  As   String
            
Get
                
If  DefaultSpell  Is   Nothing   Then   Return   Nothing
                
Return   Me .DefaultSpell.SpellInput
            
End   Get
        
End Property

        
Public   ReadOnly   Property  DefaultTone()  As   String
            
Get
                
If  DefaultSpell  Is   Nothing   Then   Return   Nothing
                
Return   Me .DefaultSpell.Tone
            
End   Get
        
End Property

        
Public   ReadOnly   Property  AllSpellExpress()  As   String
            
Get
                
If   Me .gSpellList.Count  =   0   Then   Return   Nothing

                
If   Not   Me .IsMutiSpell  Then   Return   Me .DefaultSpellExpress

                
Dim  tmp(gSpellList.Count  -   1 As   String
                
For  i  As   Integer   =   0   To  gSpellList.Count  -   1
                    tmp(i) 
=   Me .Spells(i).SpellExpress
                
Next

                
Return   String .Join( "   " , tmp)
            
End   Get
        
End Property

        
Public   ReadOnly   Property  AllSpellInput()  As   String
            
Get
                
If   Me .gSpellList.Count  =   0   Then   Return   Nothing

                
If   Not   Me .IsMutiSpell  Then   Return   Me .DefaultSpellInput

                
Dim  tmp(gSpellList.Count  -   1 As   String
                
For  i  As   Integer   =   0   To  gSpellList.Count  -   1
                    tmp(i) 
=   Me .Spells(i).SpellInput
                
Next

                
Return   String .Join( "   " , tmp)
            
End   Get
        
End Property

        
Public   ReadOnly   Property  AllTone()  As   String
            
Get
                
If   Me .gSpellList.Count  =   0   Then   Return   Nothing

                
If   Not   Me .IsMutiSpell  Then   Return   Me .DefaultTone

                
Dim  tmp(gSpellList.Count  -   1 As   String
                
For  i  As   Integer   =   0   To  gSpellList.Count  -   1
                    tmp(i) 
=   Me .Spells(i).Tone
                
Next

                
Return   String .Join( "   " , tmp)
            
End   Get
        
End Property

        
Public   Overrides   Function  ToString()  As   String
            
Dim  mBuilder  As   New  System.Text.StringBuilder
            
For   Each  spell  As  SpellInformation  In   Me .Spells
                mBuilder.AppendLine(
String .Concat( Me .Value,  "   " , spell.ToString))
            
Next
            
Return  mBuilder.ToString
        
End Function
    
End Class

End Namespace

 

Imports  System.IO
Imports  System.Text.RegularExpressions

Namespace  Businness.PinYin
    
Public   Class  PYService
        
Private  gDataSet  As  dsPinYin

        
' '' <summary>
         ' '' 汉字表
         ' '' </summary>
         Public   ReadOnly   Property  PinYinTable()  As  dsPinYin.PinYinDataTable
            
Get
                
Return  gDataSet.PinYin
            
End   Get
        
End Property

        
' '' <summary>
         ' '' 单个汉字信息
         ' '' </summary>
         ' '' <param name="word">单个汉字</param>
         Public   Function  GetWord( ByVal  word  As   String As  Word
            
Dim  mRow  As  dsPinYin.PinYinRow  =   Me .gDataSet.PinYin.FindBy代码(GetCode(word))

            
Return  RowConverter(mRow)
        
End Function

        
Private   Function  RowConverter( ByVal  row  As  dsPinYin.PinYinRow)  As  Word
            
If  row  Is   Nothing   Then   Return   Nothing

            
Dim  mWord  As   New  Word(row.汉字)

            
Dim  mSpellExpressArray()  As   String   =  row.拼音.Split( "   " c)
            
Dim  mToneArray()  As   String   =  row.声调.Split( "   " c)
            
Dim  mSpellInputArray()  As   String   =  row.拼音码.Split( "   " c)

            
For  i  As   Integer   =   0   To  mSpellExpressArray.Length  -   1
                mWord.AddSpell(mToneArray(i), mSpellInputArray(i), mSpellExpressArray(i))
            
Next

            
Return  mWord
        
End Function

        
Private   Sub  RowUpdate( ByVal  word  As  Word)
            
Dim  mWord  As  Word  =  GetWord(word.Value)
            
If  mWord  Is   Nothing   Then
                
Dim  tmpRow  As  dsPinYin.PinYinRow  =   Me .gDataSet.PinYin.AddPinYinRow(word.Value, word.Code,  "" "" "" True )
                mWord 
=  RowConverter(tmpRow)
            
End   If

            
For   Each  spell  As  SpellInformation  In  word.Spells
                mWord.AddSpell(spell)
            
Next

            
Dim  mRow  As  dsPinYin.PinYinRow  =   Me .gDataSet.PinYin.FindBy代码(mWord.Code)
            
With  mRow
                .拼音码 
=  mWord.AllSpellInput
                .拼音 
=  mWord.AllSpellExpress
                .声调 
=  mWord.AllTone
                .单音 
=   Not  mWord.IsMutiSpell
            
End   With

        
End Sub

        
Public   Sub  Load()
            UpdateFromTxt()
        
End Sub

        
' 文件存放的格式是:汉字,拼音,音调,拼音码
         Private   Sub  UpdateFromTxt()
            
Me .gDataSet  =   New  dsPinYin

            LzmTW.uSystem.uCollections.CommonServices.MoveNext(
Of   String )(My.Resources.pinyin.Split( CChar (vbCrLf)),  AddressOf  Action)

            
Me .gDataSet.AcceptChanges()
        
End Sub


        
' 文件存放的格式是:汉字,拼音,音调,拼音码
         Private   Sub  Action( ByVal  line  As   String )

            
Dim  mArray  As   String ()
            mArray 
=  line.Split( " , " c)

            
If  mArray.Length  <>   4   Then   Exit Sub

            
Dim  mWord  As   String   =  mArray( 0 ).Trim
            
Dim  mSpellExpress  As   String   =  mArray( 1 ).Trim
            
Dim  mTone  As   String   =  mArray( 2 ).Trim
            
Dim  mSpellInput  As   String   =  mArray( 3 ).Trim

            
Dim  mWordInformation  As   New  Word(mWord)
            mWordInformation.AddSpell(mTone, mSpellInput, mSpellExpress)

            RowUpdate(mWordInformation)

        
End Sub

        
' '' <summary>
         ' '' 将字符串转为拼音
         ' '' </summary>
         ' '' <param name="line">字符串</param>
         ' '' <param name="isgetfirst">如是多音字,取第一个拼音</param>
         ' '' <param name="forInput">是则查拼音码,否则查拼音</param>
         Public   Function  ToPinyin( ByVal  line  As   String ByVal  isgetfirst  As   Boolean ByVal  forInput  As   Boolean As   String
            
Dim  mBuilder  As   New  Text.StringBuilder

            
For   Each  s  As   Char   In  line.ToCharArray
                
If  Common.IsSingleWord(s)  Then
                    mBuilder.Append(GetPinyin(s, isgetfirst, forInput))
                
Else
                    mBuilder.Append(s)
                
End   If
            
Next

            
Return  mBuilder.ToString
        
End Function

        
Private   Function  GetPinyin( ByVal  word  As   String ByVal  isgetfirst  As   Boolean ByVal  forInput  As   Boolean As   String
            
Dim  mResult  As   String

            
Dim  mWord  As  Word  =  GetWord(word)

            
If  isgetfirst  Or   Not  mWord.IsMutiSpell  Then
                
If  forInput  Then
                    mResult 
=  mWord.DefaultSpellInput
                
Else
                    mResult 
=  mWord.DefaultSpellExpress
                
End   If

            
Else
                
If  forInput  Then

                    
Dim  tmpList( - 1 As   String

                    
For   Each  spell  As  SpellInformation  In  mWord.Spells
                        
If  Array.IndexOf(tmpList, spell.SpellInput)  =   - 1   Then
                            LzmTW.uSystem.uCollections.CommonServices.Append(tmpList, spell.SpellInput)
                        
End   If
                    
Next

                    
If  tmpList.Length  =   1   Then
                        mResult 
=  mWord.DefaultSpellInput
                    
Else
                        mResult 
=   String .Format( " ({0}) " String .Join( "   " , tmpList))
                    
End   If
                
Else
                    mResult 
=   String .Format( " ({0}) " , mWord.AllSpellExpress)
                
End   If

            
End   If

            
Return  mResult
        
End Function

        
' '' <summary>
         ' '' 按拼音查字
         ' '' </summary>
         ' '' <param name="pinyin">拼音</param>
         Public   Function  WordArray( ByVal  pinyin  As   String As   String ()
            
Dim  mRows  As  dsPinYin.PinYinRow()  =   CType ( Me .gDataSet.PinYin.Select( String .Format( " 拼音码 LIKE '%{0}%' " , pinyin)), dsPinYin.PinYinRow())
            
Dim  mResult( - 1 As   String
            
For  i  As   Integer   =   0   To  mRows.Length  -   1
                
If  Array.IndexOf(mRows(i).拼音码.Split( "   " c), pinyin)  <>   - 1   Then
                    LzmTW.uSystem.uCollections.CommonServices.Append(mResult, mRows(i).汉字)
                
End   If
            
Next
            
Return  mResult
        
End Function

        
' '' <summary>
         ' '' 按拼音查字
         ' '' </summary>
         ' '' <param name="pinyin">拼音</param>
         Public   Function  Words( ByVal  pinyin  As   String As   String
            
Return   String .Concat(WordArray(pinyin))
        
End Function


        
Public   Function  GetCode( ByVal  word  As   String As   String
            
Return  Common.Code(word)
        
End Function

    
End Class
End Namespace

调用的代码

Public   Class  Form1
    
Dim  gPinyinService  As   New  LzmTW.Businness.PinYin.PYService

    
Private   Sub  ButtonLoad_Click( ByVal  sender  As  System.Object,  ByVal  e  As  System.EventArgs)  Handles  ButtonLoad.Click
        gPinyinService.Load()
        
Me .DataGridView1.DataSource  =  gPinyinService.PinYinTable
    
End Sub

    
Private   Sub  ButtonTran_Click( ByVal  sender  As  System.Object,  ByVal  e  As  System.EventArgs)  Handles  ButtonTran.Click
        
Me .RichTextBox2.Text  =  gPinyinService.ToPinyin( Me .RichTextBox1.Text,  Me .CheckBox1.Checked,  Me .CheckBox2.Checked)
    
End Sub

    
Private   Sub  ButtonWord_Click( ByVal  sender  As  System.Object,  ByVal  e  As  System.EventArgs)  Handles  ButtonWord.Click
        
Me .RichTextBox3.Text  =  gPinyinService.Words( Me .TextBoxPinyin.Text)
    
End Sub

    
Private   Sub  Button1_Click( ByVal  sender  As  System.Object,  ByVal  e  As  System.EventArgs)  Handles  Button1.Click
        
Me .RichTextBox3.Text  =  gPinyinService.GetWord( Me .TextBox1.Text).ToString
    
End Sub
End Class

效果图:

下载方案:代码

 

 



Trackback: http://tb.blog.youkuaiyun.com/TrackBack.aspx?PostId=1330615

 

 

平常中,经常用到汉字转拼音,比如批量生成姓名->拼音作为登录帐号。

这个方法只是简单的利用汉字拼音库。至于怎么找这个库,网上多有介绍。在最后提供下载的方案中也提供了这个库文本文件。

主要代码如下:

Imports  System.IO
Imports  System.Text.RegularExpressions

Namespace  Businness.PinYin
    
Public   Class  PYService
        
Private  gDataSet  As   New  dsPinYin

        
' '' <summary>
         ' '' 汉字表
         ' '' </summary>
         Public   ReadOnly   Property  PinYinTable()  As  dsPinYin.PinYinDataTable
            
Get
                
Return  gDataSet.PinYin
            
End   Get
        
End Property

        
Private  gTxtFile  As   String   =  AppDomain.CurrentDomain.SetupInformation.ApplicationBase  &   " pinyin.txt "
        
Private  gxmlFile  As   String   =  AppDomain.CurrentDomain.SetupInformation.ApplicationBase  &   " pinyin.xml "

        
Private  gRegex  As   New  Regex("(?<Word>^[/u4e00-/u9fa5]+)(?<PingYin>.*)")

        
' '' <summary>
         ' '' 加载汉字库,文件名为pinyin.xml,在程序当前目录下
         ' '' </summary>
         Public   Sub  Load()
            
If   Not  IO.File.Exists(gxmlFile)  Then
                
Throw   New  Exception( String .Format( " 文件{0}不存在 " , gxmlFile))
            
End   If

            DataSetInitialize()
            gDataSet.ReadXml(gxmlFile)
        
End Sub

        
' '' <summary>
         ' '' 从汉字文件中更新,文件名为pinyin.txt,在程序当前目录下
         ' '' </summary>
         ' '' <remarks></remarks>
         Public   Sub  Update()
            
If   Not  IO.File.Exists(gTxtFile)  Then
                
Throw   New  Exception( String .Format( " 文件{0}不存在 " , gTxtFile))
            
End   If

            UpdateFromTxt(gTxtFile)
        
End Sub

        
' '' <summary>
         ' '' 保存汉字库,文件为pingyin.xml,在程序当前目录下
         ' '' </summary>
         ' '' <remarks></remarks>
         Public   Sub  Save()
            gDataSet.WriteXml(gxmlFile)
        
End Sub

        
Private   Sub  DataSetInitialize()
            
' 在更新或读入时,清除
             Me .gDataSet.Clear()
            
Me .gDataSet.AcceptChanges()
        
End Sub

        
Private   Sub  UpdateFromTxt( ByVal  file  As   String )
            DataSetInitialize()

            
Dim  mLine  As   String
            
Dim  mBuilder  As   New  System.Text.StringBuilder
            
Dim  mReader  As   New  IO.StreamReader(file, System.Text.Encoding.Default)
            
Do
                mLine 
=  mReader.ReadLine
                Add(mLine)
            
Loop   Until   String .IsNullOrEmpty(mLine)

            mReader.Close()
            mReader.Dispose()

            
Me .gDataSet.PinYin.AcceptChanges()
        
End Sub

        
Private   Sub  Add( ByVal  line  As   String )
            
If  line  Is   Nothing   Then   Exit Sub

            
With  gRegex.Match(line)

                
If  .Success  Then

                    
' 只取单字,不取词组
                     If  .Groups( " Word " ).Value.Length  =   1   Then
                        Add(.Groups(
" Word " ).Value, .Groups( " PingYin " ).Value)
                    
End   If

                
End   If
            
End   With
        
End Sub

        
Private   Sub  Add( ByVal  word  As   String ByVal  py  As   String )
            
' 多音的,拼音间用单个空枨符隔开

            py 
=  py.Trim.Replace( "    " "   " )

            
Dim  mCode  As   String   =  ChineseCode(word)
            
Dim  mRow  As  dsPinYin.PinYinRow  =   Me .gDataSet.PinYin.FindBy代码(mCode)
            
If  mRow  Is   Nothing   Then
                
Me .gDataSet.PinYin.AddPinYinRow(word, mCode, py)
            
Else
                
Dim  pyArray()  As   String   =  py.Split( "   " c)
                
For   Each  s  As   String   In  pyArray
                    
If   Not  mRow.拼音.Contains(s)  Then
                        mRow.拼音 
=   String .Concat(mRow.拼音,  "   "   &  s)
                    
End   If
                
Next
            
End   If

        
End Sub

        
' '' <summary>
         ' '' 将字符串转为拼音
         ' '' </summary>
         ' '' <param name="line">字符串</param>
         ' '' <param name="isgetfirst">如是多音字,取第一个拼音</param>
         Public   Function  ToPinyin( ByVal  line  As   String ByVal  isgetfirst  As   Boolean As   String
            
Dim  mBuilder  As   New  Text.StringBuilder

            
For   Each  s  As   Char   In  line.ToCharArray
                
If  IsTrue(s)  Then
                    mBuilder.Append(GetPinyin(s, isgetfirst))
                
Else
                    mBuilder.Append(s)
                
End   If
            
Next

            
Return  mBuilder.ToString
        
End Function

        
Private   Function  GetPinyin( ByVal  word  As   String ByVal  isgetfirst  As   Boolean As   String
            
Dim  mResult  As   String   =  word

            
Dim  mArray  As   String ()  =  PinYinArray(ChineseCode(word))  ' 取拼音组
             If   Not  mArray  Is   Nothing   Then
                
If  mArray.Length  =   1   Or  isgetfirst  Then
                    mResult 
=  mArray( 0 ' 单音的
                 Else

                    mResult 
=   String .Format( " ({0}) " String .Join( " , " , mArray))  ' 多音的用括号括住,拼音间用逗号隔开
                 End   If
            
End   If

            
Return  mResult
        
End Function

        
' 取拼音组
         Private   Function  PinYinArray( ByVal  code  As   String As   String ()
            
Dim  mRow  As  dsPinYin.PinYinRow  =   Me .gDataSet.PinYin.FindBy代码(code)
            
If  mRow  Is   Nothing   Then   Return   Nothing

            
Return  mRow.拼音.Split( "   " c)
        
End Function

        
' '' <summary>
         ' '' 按拼音查字
         ' '' </summary>
         ' '' <param name="pinyin">拼音</param>
         Public   Function  WordArray( ByVal  pinyin  As   String As   String ()
            
Dim  mRows  As  dsPinYin.PinYinRow()  =   CType ( Me .gDataSet.PinYin.Select( String .Format( " 拼音 LIKE '%{0}%' " , pinyin)), dsPinYin.PinYinRow())
            
Dim  mResult( - 1 As   String
            
For  i  As   Integer   =   0   To  mRows.Length  -   1
                
If  Array.IndexOf(mRows(i).拼音.Split( "   " c), pinyin)  <>   - 1   Then
                    
Me .Append(mResult, mRows(i).汉字)
                
End   If
            
Next
            
Return  mResult
        
End Function

        
' '' <summary>
         ' '' 按拼音查字
         ' '' </summary>
         ' '' <param name="pinyin">拼音</param>
         Public   Function  Words( ByVal  pinyin  As   String As   String
            
Return   String .Concat(WordArray(pinyin))
        
End Function


        
' '' <summary>
         ' '' 汉字代码
         ' '' </summary>
         ' '' <param name="word">单个汉字</param>
         Public   Shared   Function  ChineseCode( ByVal  word  As   String As   String
            
If   Not  IsTrue(word)  Then   Return   Nothing

            
Dim  bytes()  As   Byte   =  System.Text.Encoding.Default.GetBytes(word)

            
Return   String .Concat( Hex (bytes( 0 )),  Hex (bytes( 1 )))
        
End Function

        
' '' <summary>
         ' '' 是否是单个汉字
         ' '' </summary>
         ' '' <param name="word">字符</param>
         Public   Shared   Function  IsTrue( ByVal  word  As   String As   Boolean
            
If  word  Is   Nothing   Then   Return   False

            
Return  System.Text.RegularExpressions.Regex.IsMatch(word,  "^[/u4e00-/u9fa5]$" )
        
End Function

        
Private   Sub  Append( ByRef  collection  As   String (),  ByVal  value  As   String )
            
ReDim   Preserve  collection(collection.Length)
            collection(collection.Length 
-   1 =  value
        
End Sub
    
End Class
End Namespace

 效果图:


下载方案:代码  版本二



Trackback: http://tb.blog.youkuaiyun.com/TrackBack.aspx?PostId=1329538



 
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值