逻逻辑回归案例:信用卡欺诈检测

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

DATA = pd.read_csv("creditcard.csv")

#查看数据是否平衡--------------------------------------------------------------------------------------------------------------------------------
count_classes = pd.value_counts(DATA["Class"], sort=True).sort_index()

count_classes.plot(kind="bar")#pandas可以做简单的图
plt.show()



#数据标准化------------------------------------------------------------------------------------------------------------
from sklearn.preprocessing import StandardScaler

DATA["normAmount"] = StandardScaler().fit_transform(DATA["Amount"].values.reshape(-1, 1))#StandardScaler类的实例可以进行数据标准化,-1的意思是自动选取

DATA = DATA.drop(["Time", "Amount"], axis=1)




# 下采样过程------------------------------------------------------------------------------------------------------------
#下采样的意义是从比较多的那类数据中随机选取比较少的那类数据的数量的样本
DATA_matrix = DATA.values
X = DATA_matrix[:, DATA.columns != "Class"]
y = DATA_matrix[:, DATA.columns == "Class"]

number_records_fraud = len(DATA[DATA["Class"] == 1])

fraud_indices = np.array(DATA[DATA["Class"] == 1].index)
norm_indices = np.array(DATA[DATA["Class"] == 0].index)

random_norm_indics = np.random.choice(norm_indices, number_records_fraud, replace=False)#np.random.choice()函数需要传入两个参数,第一个参数是需要随机选择的array,第二个参数为选取的个数,replace参数控制是否是放回抽样
random_norm_indics = np.array(random_norm_indics)
under_sample_indices = np.concatenate([fraud_indices, random_norm_indics])#合并操作
under_sample = DATA.iloc[under_sample_indices, :]

x_under_sample = under_sample.values[:, DATA.columns != "Class"]
y_under_sample = under_sample.values[:, DATA.columns == "Class"]





# 交叉验证----------------------------------------------------------------------------------------------------
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)#train_test_split()函数需要两个参数,第一个参数为feature阵,第二个为label阵,最后会返回四个结果(结果中X_train和y_train以及X_test和y_test会一一对应)
x_train_undersample, x_test_undersample, y_train_undersample, y_test_undersample = train_test_split(x_under_sample,
    
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值