将\jieba\posseg目录下的文件__init__.py替换成下面的代码
from __future__ import absolute_import, unicode_literals
import os
import re
import sys
import jieba
import pickle
from .._compat import *
from .viterbi import viterbi
PROB_START_P = "prob_start.p"
PROB_TRANS_P = "prob_trans.p"
PROB_EMIT_P = "prob_emit.p"
CHAR_STATE_TAB_P = "char_state_tab.p"
re_han_detail = re.compile("([\u4E00-\u9FD5]+)",re.U)
re_skip_detail = re.compile("([\.0-9]+%|[a-zA-Z0-9]+#&\._%)",re.U)
re_han_internal = re.compile("([\u4E00-\u9FD5a-zA-Z0-9+#&\._%]+)",re.U)
re_skip_internal = re.compile("(\r\n|\s)",re.U)
re_eng = re.compile("[a-zA-Z0-9]+",re.U)
re_num = re.compile("[\.0-9]+",re.U)
re_bf = re.compile("[0-9+#&\._%]+")
re_eng1 = re.compile('^[a-zA-Z0-9]$', re.U)
def load_model():
# For Jython
start_p = pickle.load(get_module_res("posseg", PROB_START_P))
trans_p = pickle.load(get_module_res("posseg", PROB_TRANS_P))
emit_p = pickle.load(get_module_res("posseg", PROB_EMIT_P))
state = pickle.load(get_module_res("posseg", CHAR_STATE_TAB_P))
return state, start_p, trans_p, emit_p
if sys.platform.startswith("java"):
char_state_tab_P, start_P, trans_P, emit_P = load_model()
else:
from .char_state_tab import P as char_state_tab_P
from .prob_start import P as start_P
from .prob_trans import P as trans_P
from .prob_emit import P as emit_P
class pair(object):
def __init__(self, word, flag):
self.word = word
self.flag = flag
def __unicode__(self):
return '%s/%s' % (self.word, self.flag)
def __repr__(self):
return 'pair(%r, %r)' % (self.word, self.flag)
def __str__(self):
if PY2:
return self.__unicode__().encode(default_encoding)
else:
return