训练集
TrainingSet.py
# 词条切分后的文档集合
postingList = [['my', 'dog', 'has', 'flea', 'problems', 'help', 'please'],
['maybe', 'not', 'take', 'him', 'to', 'dog', 'park', 'stupid'],
['my', 'dalmation', 'is', 'so', 'cute', 'I', 'love', 'him'],
['stop', 'posting', 'stupid', 'worthless', 'garbage'],
['mr', 'licks', 'ate', 'my', 'steak', 'how', 'to', 'stop', 'him'],
['quit', 'buying', 'worthless', 'dog', 'food', 'stupid']]
# 类别标签的集合
classVec = [0, 1, 0, 1, 0, 1]
朴素贝叶斯分类器对测试集自动分类
NaiveBayes.py
from numpy import *
import numpy as np
from TrainingSet import postingList, classVec
# 加载训练集
def loadDataSet():
# 词条切分后的文档集合和类别标签集合
return postingList, classVec
def createVocabList(dataSet):
# 创建一个空集
v