from imblearn.over_sampling import SMOTE
import pandas as pd
C:\ProgramData\Anaconda3\lib\importlib\_bootstrap.py:219: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject
return f(*args, **kwds)
C:\ProgramData\Anaconda3\lib\importlib\_bootstrap.py:219: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject
return f(*args, **kwds)
df = pd.read_csv('base_done.csv')
data = df[:20].iloc[:,1:10]
data
| sex | age | provider | level | verified | using_time | regist_type | card_a_cnt | card_b_cnt |
---|
0 | 0 | 24853 | 0 | 1 | 0 | 24713 | 1 | 24719 | 24712 |
---|
1 | 1 | 25011 | 0 | 1 | 0 | 24743 | 7 | 24712 | 24712 |
---|
2 | 1 | 24877 | 0 | 2 | 0 | 24744 | 7 | 24719 | 24725 |
---|
3 | 0 | 24925 | 0 | 2 | 0 | 24715 | 1 | 24712 | 24712 |
---|
4 | 1 | 24877 | 2 | 1 | 0 | 24706 | 3 | 24712 | 24712 |
---|
5 | 0 | 24944 | 0 | 2 | 0 | 24720 | 1 | 24719 | 24712 |
---|
6 | 0 | 24840 | 0 | 2 | 0 | 24727 | 1 | 24712 | 24712 |
---|
7 | 0 | 24944 | 0 | 2 | 0 | 24709 | 1 | 24712 | 24712 |
---|
8 | 0 | 24908 | 0 | 2 | 0 | 24730 | 1 | 24712 | 24712 |
---|
9 | 1 | 24956 | 0 | 2 | 0 | 24741 | 7 | 24719 | 24719 |
---|
10 | 0 | 24920 | 0 | 2 | 0 | 24741 | 7 | 24719 | 24719 |
---|
11 | 0 | 24871 | 0 | 2 | 0 | 24722 | 2 | 24712 | 24712 |
---|
12 | 1 | 24889 | 0 | 2 | 0 | 24725 | 1 | 24719 | 24719 |
---|
13 | 1 | 24865 | 2 | 2 | 0 | 24710 | 3 | 24712 | 24712 |
---|
14 | 0 | 24944 | 0 | 2 | 0 | 24727 | 1 | 24712 | 24712 |
---|
15 | 1 | 24931 | 0 | 2 | 0 | 24733 | 7 | 24712 | 24719 |
---|
16 | 0 | 24963 | 0 | 1 | 0 | 24721 | 2 | 24712 | 24712 |
---|
17 | 0 | 24877 | 0 | 2 | 0 | 24727 | 1 | 24712 | 24712 |
---|
18 | 0 | 24901 | 0 | 2 | 0 | 24733 | 7 | 24725 | 24719 |
---|
19 | 0 | 24859 | 2 | 1 | 0 | 24713 | 3 | 24712 | 24712 |
---|
X = data.drop(columns='provider').values
y = data.provider
data.provider.value_counts()
0 17
2 3
Name: provider, dtype: int64
sm = SMOTE(sampling_strategy={0:17,2:15},k_neighbors=2)
X_res, y_res = sm.fit_resample(X, y)
y_res.value_counts()
0 17
2 15
Name: provider, dtype: int64
X_res
| sex | age | level | verified | using_time | regist_type | card_a_cnt | card_b_cnt |
---|
0 | 0 | 24853 | 1 | 0 | 24713 | 1 | 24719 | 24712 |
---|
1 | 1 | 25011 | 1 | 0 | 24743 | 7 | 24712 | 24712 |
---|
2 | 1 | 24877 | 2 | 0 | 24744 | 7 | 24719 | 24725 |
---|
3 | 0 | 24925 | 2 | 0 | 24715 | 1 | 24712 | 24712 |
---|
4 | 1 | 24877 | 1 | 0 | 24706 | 3 | 24712 | 24712 |
---|
5 | 0 | 24944 | 2 | 0 | 24720 | 1 | 24719 | 24712 |
---|
6 | 0 | 24840 | 2 | 0 | 24727 | 1 | 24712 | 24712 |
---|
7 | 0 | 24944 | 2 | 0 | 24709 | 1 | 24712 | 24712 |
---|
8 | 0 | 24908 | 2 | 0 | 24730 | 1 | 24712 | 24712 |
---|
9 | 1 | 24956 | 2 | 0 | 24741 | 7 | 24719 | 24719 |
---|
10 | 0 | 24920 | 2 | 0 | 24741 | 7 | 24719 | 24719 |
---|
11 | 0 | 24871 | 2 | 0 | 24722 | 2 | 24712 | 24712 |
---|
12 | 1 | 24889 | 2 | 0 | 24725 | 1 | 24719 | 24719 |
---|
13 | 1 | 24865 | 2 | 0 | 24710 | 3 | 24712 | 24712 |
---|
14 | 0 | 24944 | 2 | 0 | 24727 | 1 | 24712 | 24712 |
---|
15 | 1 | 24931 | 2 | 0 | 24733 | 7 | 24712 | 24719 |
---|
16 | 0 | 24963 | 1 | 0 | 24721 | 2 | 24712 | 24712 |
---|
17 | 0 | 24877 | 2 | 0 | 24727 | 1 | 24712 | 24712 |
---|
18 | 0 | 24901 | 2 | 0 | 24733 | 7 | 24725 | 24719 |
---|
19 | 0 | 24859 | 1 | 0 | 24713 | 3 | 24712 | 24712 |
---|
20 | 0 | 24860 | 1 | 0 | 24712 | 3 | 24712 | 24712 |
---|
21 | 0 | 24871 | 1 | 0 | 24708 | 3 | 24712 | 24712 |
---|
22 | 0 | 24872 | 1 | 0 | 24707 | 3 | 24712 | 24712 |
---|
23 | 1 | 24865 | 1 | 0 | 24709 | 3 | 24712 | 24712 |
---|
24 | 0 | 24863 | 1 | 0 | 24710 | 3 | 24712 | 24712 |
---|
25 | 0 | 24859 | 1 | 0 | 24712 | 3 | 24712 | 24712 |
---|
26 | 1 | 24870 | 1 | 0 | 24708 | 3 | 24712 | 24712 |
---|
27 | 1 | 24875 | 1 | 0 | 24706 | 3 | 24712 | 24712 |
---|
28 | 1 | 24872 | 1 | 0 | 24707 | 3 | 24712 | 24712 |
---|
29 | 0 | 24873 | 1 | 0 | 24707 | 3 | 24712 | 24712 |
---|
30 | 0 | 24862 | 1 | 0 | 24711 | 3 | 24712 | 24712 |
---|
31 | 0 | 24859 | 1 | 0 | 24712 | 3 | 24712 | 24712 |
---|
32 | 0 | 24873 | 1 | 0 | 24707 | 3 | 24712 | 24712 |
---|
33 | 1 | 24866 | 1 | 0 | 24709 | 3 | 24712 | 24712 |
---|