#改变工作路径
import os
print(os.getcwd())#打印当前工作目录
print(os.listdir())
os.chdir(‘C:/Users/Administrator’)
print(os.getcwd())
print(os.listdir())
#C:\Users\Administrator.kaggle/world-happiness-report-2019.csv
import pandas as pd
df = pd.read_csv(‘C:/Users/Administrator/.kaggle/world-happiness-report-2019.csv’,sep=’,’)
df.head()
#列明缺失
dfs = pd.read_csv(‘C:/Users/Administrator/.kaggle/world-happiness-report-2019.csv’,sep=’,’,header=None)
dfs.tail()
#指定列名
dfs1 = pd.read_csv(‘C:/Users/Administrator/.kaggle/world-happiness-report-2019.csv’,sep=’,’,header=None,names=list(‘abcdefghij’),chunksize=10)
for price in dfs1:
print(price)
‘’’
读取数据是指定一个列作为行索引 index_col=“msg”
读取数据时指定多个列作为行索引 index_col=[“a”,“b”]
处理不规整的分割符 pd.read_table(‘data/ex3.csv’, sep=’\s+’)
‘’’
#读取文件是指定那些是缺省值 na_values=[‘NA’,‘NULL’]
#指定那些列的那些值为缺省值 na_values={‘a列’:[‘0’,‘NA’]}
#逐块读取 nrows=10
#产生一个贴袋器读取文件 chunksize=10
‘’’
统计每个 key 出现的次数
tr = pd.read_csv(‘data/ex6.csv’, chunksize=1000) #tr为返回的一个迭代器 textFileReader
key_count = pd.Series([])
for pieces in tr:
key_count = key_count.add(pieces[‘key’].value_counts(), fill_value=0) #Series的相加,自动索引对齐
key_count = key_count.sort_values(ascending=False) #排序
key_count[:10]
‘’’