# data read
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
# data type and number
train.dtypes.value_counts()
# 分别展示类型和数值
train['subcat1'].value_counts().index.values.astype('str')
train['subcat1'].value_counts().values
# number of unique
train['subcat1'].nunique()
#data describe#train.describe/describe()/info()/columns/shape#data append
full = data_train.append( data_test, ignore_index=True )
# data append and checking for missing data
NAs = pd.concat([train.isnull().sum(), test.isnull().sum()], axis=1, keys=['Train', 'Test'])
NAs[NAs.sum(axis=1) > 0]
#data type change:astype
result = pd.DataFrame({'Id':test['Id'].as_matrix(),'SalePrice':predictions.astype(np.float64) })