主要思想:将多分类问题转换为多个二分类问题。
import pandas as pd
import matplotlib.pyplot as plt
#由于数据集中每列数据没有标签,因此需要先手动添加,且用空格来隔开
columns = [
'mpg', 'cylinders', 'displacement', 'horsepower', 'weight', 'acceleration',
'model year', 'origin', 'car name'
]
cars = pd.read_table('./data/auto-mpg.data', delim_whitespace=True, names = columns)
print(cars.head())
mpg cylinders displacement horsepower weight acceleration model year \
0 18.0 8 307.0 130.0 3504.0 12.0 70
1 15.0 8 350.0 165.0 3693.0 11.5 70
2 18.0 8 318.0 150.0 3436.0 11.0 70
3 16.0 8 304.0 150.0 3433.0 12.0 70
4 17.0 8 302.0 140.0 3449.0 10.5 70
origin car name
0 1 chevrolet chevelle malibu
1 1 buick skylark 320
2 1 plymouth satellite
3 1 amc rebel sst
4 1 ford torino
pandas.get_dummies()函数生成多分类标签
dummy_cylinders = pd.get_dummies(cars['cylinders'