from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
def change_label_encoding(Inputfile_name, Outputfile_name):
# discrete features transformation
dis_feature = pd.read_csv(Inputfile_name)
categorical_feat=[]
for col in dis_feature.columns.values:
if(dis_feature[col].dtypes=='object'):
categorical_feat.append(col)
else:
print('col:',col)
#label transformation
for i in categorical_feat:
le = LabelEncoder()
le.fit(dis_feature[i])
dis_feature[i] =le.transform(dis_feature[i])
dis_feature.to_csv(Outputfile_name)
def change_onehot_encoding(Inputfile_name, Outputfile_name, numeric_feat):
#read file
onehot_feature = pd.read_csv(Inputfile_name)
#one-hot encoding
for i in numeric_feat:
the_cate_col = pd.get_dummies(onehot_feature[i], prefix=i, drop_first=True)
train = pd.concat((onehot_feature, the_cate_col), axis=1)
train.pop(i)
onehot_feature.to_csv(Outputfile_name)
def onehot_encoding(Inputfile_name, Outputfile_name, numeric_feat):
#read file
onehot_feature = pd.read_csv(Inputfile_name)
encode_onehot = []
#one-hot transformation
for i in numeric_feat:
dis_feature = onehot_feature[i].values.tolist()
enc = OneHotEncoder()
enc.fit(dis_feature)
b_dis = enc.transform(dis_feature).toarray()
encode_onehot.append(b_dis)
return encode_onehot
if __name__ == "__main__":
change_label_encoding('kddlabel0-2+.csv','kddlabel0-3+.csv')