data= {
'num' : [ 10 , 11 , 12 , 13 , 14 , 15 , 16 ] ,
'name' : [ 'lili' , 'wangyi' , 'xiaoxiao' , 'xiaoye' , 'yangyang' , 'zhouyang' , 'wangli' ] ,
'age' : [ 25 , 20 , 28 , 23 , 25 , 30 , 32 ] ,
'address' : [ '北京' , '上海' , '北京' , '上海' , '北京' , '上海' , '北京' ]
}
data
{'num': [10, 11, 12, 13, 14, 15, 16],
'name': ['lili',
'wangyi',
'xiaoxiao',
'xiaoye',
'yangyang',
'zhouyang',
'wangli'],
'age': [25, 20, 28, 23, 25, 30, 32],
'address': ['北京', '上海', '北京', '上海', '北京', '上海', '北京']}
import pandas as pd
df= pd. DataFrame( data)
df
num name age address 0 10 lili 25 北京 1 11 wangyi 20 上海 2 12 xiaoxiao 28 北京 3 13 xiaoye 23 上海 4 14 yangyang 25 北京 5 15 zhouyang 30 上海 6 16 wangli 32 北京
df. sum ( )
num 91
name liliwangyixiaoxiaoxiaoyeyangyangzhouyangwangli
age 183
address 北京上海北京上海北京上海北京
dtype: object
df. count( )
num 7
name 7
age 7
address 7
dtype: int64
df. describe( )
num age count 7.000000 7.000000 mean 13.000000 26.142857 std 2.160247 4.140393 min 10.000000 20.000000 25% 11.500000 24.000000 50% 13.000000 25.000000 75% 14.500000 29.000000 max 16.000000 32.000000
mask= df. isin( [ 'xiaoye' , 32 , 25 ] )
df[ mask]
num name age address 0 NaN NaN 25.0 NaN 1 NaN NaN NaN NaN 2 NaN NaN NaN NaN 3 NaN xiaoye NaN NaN 4 NaN NaN 25.0 NaN 5 NaN NaN NaN NaN 6 NaN NaN 32.0 NaN
df= pd. read_excel( './gzPrice.xlsx' )
df= pd. read_csv( './fangPrice.csv' , engine= 'python' , nrows= 5 )
df= pd. read_csv( './fangPrice.csv' , engine= 'python' , chunksize= 10 )
for i in df:
File "<ipython-input-51-13fade701bbe>", line 4
#print(i)
^
SyntaxError: unexpected EOF while parsing
data1= {
'num' : [ 10 , 11 , 12 , 13 , 14 , 15 , 16 ] ,
'name' : [ 'lili' , 'wangyi' , 'xiaoxiao' , 'xiaoye' , 'yangyang' , 'zhouyang' , 'wangli' ] ,
'age' : [ 25 , 20 , 28 , 23 , 25 , 30 , 32 ] ,
'address' : [ '北京' , '上海' , '北京' , '上海' , '北京' , '上海' , '北京' ]
}
data1
{'num': [10, 11, 12, 13, 14, 15, 16],
'name': ['lili',
'wangyi',
'xiaoxiao',
'xiaoye',
'yangyang',
'zhouyang',
'wangli'],
'age': [25, 20, 28, 23, 25, 30, 32],
'address': ['北京', '上海', '北京', '上海', '北京', '上海', '北京']}
df1= pd. DataFrame( data1)
df1
num name age address 0 10 lili 25 北京 1 11 wangyi 20 上海 2 12 xiaoxiao 28 北京 3 13 xiaoye 23 上海 4 14 yangyang 25 北京 5 15 zhouyang 30 上海 6 16 wangli 32 北京
df1. to_csv( './newData.csv' , index= False )
aa= [
{ "a" : 1 , "b" : 2 , "c" : 3 } ,
{ "a" : 4 , "b" : 5 , "c" : 6 } ,
{ "a" : 7 , "b" : 8 , "c" : 9 }
]
aa
[{'a': 1, 'b': 2, 'c': 3}, {'a': 4, 'b': 5, 'c': 6}, {'a': 7, 'b': 8, 'c': 9}]
from numpy import nan as NA
s = pd. Series( [ 1 , NA, 3.5 , NA, 7 ] )
s
0 1.0
1 NaN
2 3.5
3 NaN
4 7.0
dtype: float64
s. dropna( )
0 1.0
2 3.5
4 7.0
dtype: float64
s[ s. notnull( ) ]
0 1.0
2 3.5
4 7.0
dtype: float64
s[ s. isnull( ) ]
1 NaN
3 NaN
dtype: float64
df1
num name age address 0 10 lili 25 北京 1 11 wangyi 20 上海 2 12 xiaoxiao 28 北京 3 13 xiaoye 23 上海 4 14 yangyang 25 北京 5 15 zhouyang 30 上海 6 16 wangli 32 北京