Untitled

import pandas as pd
import numpy as np
t = pd.read_csv('C:/Users/user/Desktop/train.csv')
T = t.set_index('Loan_ID')
T.head(10)
GenderMarriedDependentsEducationSelf_EmployedApplicantIncomeCoapplicantIncomeLoanAmountLoan_Amount_TermCredit_HistoryProperty_AreaLoan_Status
Loan_ID
LP001002MaleNo0GraduateNo58490.0NaN360.01.0UrbanY
LP001003MaleYes1GraduateNo45831508.0128.0360.01.0RuralN
LP001005MaleYes0GraduateYes30000.066.0360.01.0UrbanY
LP001006MaleYes0Not GraduateNo25832358.0120.0360.01.0UrbanY
LP001008MaleNo0GraduateNo60000.0141.0360.01.0UrbanY
LP001011MaleYes2GraduateYes54174196.0267.0360.01.0UrbanY
LP001013MaleYes0Not GraduateNo23331516.095.0360.01.0UrbanY
LP001014MaleYes3+GraduateNo30362504.0158.0360.00.0SemiurbanN
LP001018MaleYes2GraduateNo40061526.0168.0360.01.0UrbanY
LP001020MaleYes1GraduateNo1284110968.0349.0360.01.0SemiurbanN
mask=(T['Education']=='Not Graduate')&(T['Loan_Status']=='Y')&(T['Gender']=='Female')
T.loc[mask,['Gender','Education','Loan_Status']]
GenderEducationLoan_Status
Loan_ID
LP001155FemaleNot GraduateY
LP001669FemaleNot GraduateY
LP001692FemaleNot GraduateY
LP001908FemaleNot GraduateY
LP002300FemaleNot GraduateY
LP002314FemaleNot GraduateY
LP002407FemaleNot GraduateY
LP002489FemaleNot GraduateY
LP002502FemaleNot GraduateY
LP002534FemaleNot GraduateY
LP002582FemaleNot GraduateY
LP002731FemaleNot GraduateY
LP002757FemaleNot GraduateY
LP002917FemaleNot GraduateY
def num_missing(x):
    return sum(x.isnull())
T.apply(num_missing,axis=0)
Gender               13
Married               3
Dependents           15
Education             0
Self_Employed        32
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount           22
Loan_Amount_Term     14
Credit_History       50
Property_Area         0
Loan_Status           0
dtype: int64
T.apply(num_missing,axis=1)[:10]
Loan_ID
LP001002    1
LP001003    0
LP001005    0
LP001006    0
LP001008    0
LP001011    0
LP001013    0
LP001014    0
LP001018    0
LP001020    0
dtype: int64
T['Gender'].fillna(T['Gender'].mode().iloc[0],inplace=True)
T['Married'].fillna(T['Married'].mode().iloc[0],inplace=True)
T['Self_Employed'].fillna(T['Self_Employed'].mode().iloc[0],inplace=True)
pd.crosstab(T['Credit_History'],T['Loan_Status'],margins=True)
Loan_StatusNYAll
Credit_History
0.082789
1.097378475
All179385564
prop_rates = pd.DataFrame([1000, 5000, 12000], index=['Rural','Semiurban','Urban'],columns=['rates'])
prop_rates
rates
Rural1000
Semiurban5000
Urban12000
T1=T.merge(right=prop_rates,how='inner',left_on='Property_Area',right_index=True,sort=False)
T1
GenderMarriedDependentsEducationSelf_EmployedApplicantIncomeCoapplicantIncomeLoanAmountLoan_Amount_TermCredit_HistoryProperty_AreaLoan_Statusrates
Loan_ID
LP001002MaleNo0GraduateNo58490.0NaN360.01.0UrbanY12000
LP001005MaleYes0GraduateYes30000.066.0360.01.0UrbanY12000
LP001006MaleYes0Not GraduateNo25832358.0120.0360.01.0UrbanY12000
LP001008MaleNo0GraduateNo60000.0141.0360.01.0UrbanY12000
LP001011MaleYes2GraduateYes54174196.0267.0360.01.0UrbanY12000
LP001013MaleYes0Not GraduateNo23331516.095.0360.01.0UrbanY12000
LP001018MaleYes2GraduateNo40061526.0168.0360.01.0UrbanY12000
LP001024MaleYes2GraduateNo3200700.070.0360.01.0UrbanY12000
LP001027MaleYes2GraduateNo25001840.0109.0360.01.0UrbanY12000
LP001028MaleYes2GraduateNo30738106.0200.0360.01.0UrbanY12000
LP001030MaleYes2GraduateNo12991086.017.0120.01.0UrbanY12000
LP001032MaleNo0GraduateNo49500.0125.0360.01.0UrbanY12000
LP001034MaleNo1Not GraduateNo35960.0100.0240.0NaNUrbanY12000
LP001036FemaleNo0GraduateNo35100.076.0360.00.0UrbanN12000
LP001041MaleYes0GraduateNo26003500.0115.0NaN1.0UrbanY12000
LP001043MaleYes0Not GraduateNo76600.0104.0360.00.0UrbanN12000
LP001046MaleYes1GraduateNo59555625.0315.0360.01.0UrbanY12000
LP001073MaleYes2Not GraduateNo42261040.0110.0360.01.0UrbanY12000
LP001086MaleNo0Not GraduateNo14420.035.0360.01.0UrbanN12000
LP001091MaleYes1GraduateNo41663369.0201.0360.0NaNUrbanN12000
LP001095MaleNo0GraduateNo31670.074.0360.01.0UrbanN12000
LP001106MaleYes0GraduateNo22752067.0NaN360.01.0UrbanY12000
LP001109MaleYes0GraduateNo18281330.0100.0NaN0.0UrbanN12000
LP001114MaleNo0GraduateNo41667210.0184.0360.01.0UrbanY12000
LP001119MaleNo0GraduateNo36000.080.0360.01.0UrbanN12000
LP001120MaleNo0GraduateNo18001213.047.0360.01.0UrbanY12000
LP001123MaleYes0GraduateNo24000.075.0360.0NaNUrbanY12000
LP001136MaleYes0Not GraduateYes46950.096.0NaN1.0UrbanY12000
LP001137FemaleNo0GraduateNo34100.088.0NaN1.0UrbanY12000
LP001138MaleYes1GraduateNo56490.044.0360.01.0UrbanY12000
..........................................
LP002729MaleNo1GraduateNo112500.0196.0360.0NaNSemiurbanN5000
LP002738MaleNo2GraduateNo36170.0107.0360.01.0SemiurbanY5000
LP002741FemaleYes1GraduateNo46082845.0140.0180.01.0SemiurbanY5000
LP002743FemaleNo0GraduateNo21380.099.0360.00.0SemiurbanN5000
LP002753FemaleNo1GraduateNo36520.095.0360.01.0SemiurbanY5000
LP002757FemaleYes0Not GraduateNo3017663.0102.0360.0NaNSemiurbanY5000
LP002768MaleNo0Not GraduateNo33580.080.036.01.0SemiurbanN5000
LP002776FemaleNo0GraduateNo50000.0103.0360.00.0SemiurbanN5000
LP002792MaleYes1GraduateNo54681032.026.0360.01.0SemiurbanY5000
LP002795MaleYes3+GraduateYes101390.0260.0360.01.0SemiurbanY5000
LP002798MaleYes0GraduateNo38872669.0162.0360.01.0SemiurbanY5000
LP002804FemaleYes0GraduateNo41802306.0182.0360.01.0SemiurbanY5000
LP002807MaleYes2Not GraduateNo3675242.0108.0360.01.0SemiurbanY5000
LP002813FemaleYes1GraduateYes194840.0600.0360.01.0SemiurbanY5000
LP002821MaleNo0Not GraduateYes58000.0132.0360.01.0SemiurbanY5000
LP002862MaleYes2Not GraduateNo61251625.0187.0480.01.0SemiurbanN5000
LP002863MaleYes3+GraduateNo64060.0150.0360.01.0SemiurbanN5000
LP002872MaleYes0GraduateNo30872210.0136.0360.00.0SemiurbanN5000
LP002892MaleYes2GraduateNo65400.0205.0360.01.0SemiurbanY5000
LP002894FemaleYes0GraduateNo31660.036.0360.01.0SemiurbanY5000
LP002917FemaleNo0Not GraduateNo21650.070.0360.01.0SemiurbanY5000
LP002925MaleNo0GraduateNo47500.094.0360.01.0SemiurbanY5000
LP002926MaleYes2GraduateYes27260.0106.0360.00.0SemiurbanN5000
LP002928MaleYes0GraduateNo30003416.056.0180.01.0SemiurbanY5000
LP002931MaleYes2GraduateYes60000.0205.0240.01.0SemiurbanN5000
LP002933MaleNo3+GraduateYes93570.0292.0360.01.0SemiurbanY5000
LP002943MaleNoNaNGraduateNo29870.088.0360.00.0SemiurbanN5000
LP002959FemaleYes1GraduateNo120000.0496.0360.01.0SemiurbanY5000
LP002961MaleYes1GraduateNo34002500.0173.0360.01.0SemiurbanY5000
LP002990FemaleNo0GraduateYes45830.0133.0360.00.0SemiurbanN5000

614 rows × 13 columns

impute_grps = T1.pivot_table(values=["Credit_History"],index=["Property_Area","rates"],aggfunc=np.mean)
ind = tuple([row['Property_Area'],row['rates']])
T1.loc["Credit_History"] = impute_grps.loc[ind].values[0]
grps = T1.groupby(["Property_Area","rates"])
grps["Credit_History"].mean()
Property_Area       rates       
0.8617511520737328  0.861751        0.861751
Rural               1000.000000     0.830303
Semiurban           5000.000000     0.861751
Urban               12000.000000    0.829670
Name: Credit_History, dtype: float64
T2=T1.groupby(["Property_Area","rates"])["Credit_History"].count().reset_index()
T2.set_index("Property_Area")
ratesCredit_History
Property_Area
Rural1000165
Semiurban5000217
Urban12000182
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值