# coding:utf8
import random
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn import tree
X = []
Y = []
for i in range(10000):
a = random.uniform(-1, 1)
b = random.uniform(-1, 1)
X.append([a, b])
if a > 0 and b > 0:
Y.append(1)
elif a < 0 and b < 0:
Y.append(1)
else:
Y.append(-1)
x = np.array(X)
y = np.array(Y)
clf1 = LogisticRegression()
clf1.fit(x, y)
clf2 = tree.DecisionTreeClassifier()
clf2.fit(x, y)
X_t = []
Y_t = []
for i in range(10000):
a = random.uniform(-1, 1)
b = random.uniform(-1, 1)
X_t.append([a, b])
if a > 0 and b > 0:
Y_t.append(1)
elif a < 0 and b < 0:
Y_t.append(1)
else:
Y_t.append(-1)
x_t = np.array(X_t)
y_t = np.array(Y_t)
print clf1.score(x_t, y_t)
print clf2.score(x_t, y_t)
结果
0.5288
0.9999
构造训练数据
X = {x1, x2} 当 x1 和 x2同号时 Y 为1,异号时 Y 为0。
从结果可以看出来lr的准确率几乎等同于随机猜测,而决策树的准确率接近1。
lr为线性分类器,在本例中类似于在二维空间中画一条直线将正负样本分开。
决策树则是不停的在平面中画竖线将平面一份为二。