Python操作Excel
1.读取文件
1.1 pandas直接读取
1.1.1 读取excel
字典形式导入
src1 = {
'file': r'd:\user\医药销售09月薪资考核数据1010.xlsx',
'sht': 'Sheet1',
'cols': 'AE,AF,I,J,K,Z',
'names': ['客户名称', 'yj账号', 'yj网点', '上期逾期期末未回款金额', '工资月份', '大区'],
'colsorder': ['工资月份', '大区', '客户名称', 'yj账号', 'yj网点', '上期逾期期末未回款金额']
}
src1['data'] = pd.read_excel(src1['file'], sheet_name=src1['sht'], header=None, skiprows=1, usecols=src1['cols'], names=src1['names'])
1.1.2 读取csv
类似于导入excel,需注意encoding = ‘’utf-8’ 或 encoding = ‘’gbk’
df = pd.read_csv(file, encoding='utf-8', usecols=cols, low_memory=False)
1.1.3 读取文件夹
os.chdir(r'd:\user\桌面\BP-js12.7')
extension = 'csv'
all_filenames = [i for i in glob.glob('*.{}'.format(extension))]
#combine all files in the list
js = pd.concat([pd.read_csv(f, encoding='gbk', low_memory=False) for f in all_filenames ])
1.2 模块式读取,主程序调用
思路:现有2张表,读入过程用函数封装,以模块形式调用。
具体做法:
①建立2个py文件,分别定义函数get_jsdf, get_bcmddf;
②将这2个文件(模块)放入同一文件夹《hkhsm》下,分别命名为importm_***。
》 导入jsdf,命名为importm_js.py
import os
import pandas as pd
import numpy as np
def get_jsdf(jsdir):
files = getfiles(jsdir, '.csv')
print(files)
file = files[0]
df = getdf(file)
for f in files[1:]:
newdf = getdf(f)
df = pd.concat([df, newdf])
return df
def getdf(file):
print(file)
df = pd.read_csv(file, encoding='gbk', low_memory=False)
df = df[df['大区'].isin(['ws大区', 'ky部', 'Tx']) & ~df['地区名称'].isin(['kyab区', 'kycd区'])]
#一些数据处理过程
df['yj账号'] = pd.to_numeric(df['yj账号'], errors='coerce').fillna(df['js账号'])
df['yj账号'] = df['yj账号'].astype(np.int64)
df['未收(未付)-本位币'] = df['未收(未付)-本位币'].apply(lambda x: 0.0 if x < 0.0 else x)
return df
def getfiles(path, extension):
list = os.listdir(path)
result = []
for i in range(0, len(list)):
temp = os.path.join(path, list[i])
if os.path.isfile(temp) and os.path.splitext(temp)[1] == extension:
result.append(temp)
return result
if __name__ == '__main__':
jsdir = r'd:\user\桌面\hkhs原始数据1\BP-js'
df = get_jsdf(jsdir)
# df = pd.read_pickle('./js.pkl')
print(df.shape)
print(df)
print(df.info())
》 导入bcmd,命名为importm_bcmd.py
import pandas as pd
import os
def get_bcmddf(path, shortname):
file = os.path.join(path, shortname)
md = pd.read_excel(file, usecols='A')
return md
if __name__ == '__main__':
bcmd = get_bcmddf(r'd:\user\桌面\货款回收原始数据1', '补充名单.xlsx')
print(bcmd.info(

最低0.47元/天 解锁文章

被折叠的 条评论
为什么被折叠?



