1、读取文件
import pandas as pd
import logging
import os
def get_data_pd(path):
if path.find(".xlsx") != -1:
data = pd.read_excel(path, "Sheet1")
if path.find(".csv") != -1:
data = pd.read_csv(path,encoding="gbk")
return data
2、将数据保存为 csv 格式
def save_data_csv(data, save_path):
"""
将数据保存
"""
data.to_csv(save_path, encoding="gbk")
3、检查数据
def check_data(data):
"""
测试数据
"""
global I
print "第%s个数据" % I
I += 1
try:
print "数据:", data
print "数据类型为:", type(data)
# 求数据长度
if isinstance(data, int) or isinstance(data, float):
print "这是整数或浮点数, 没有长度"
else:
print "数据长度为:", len(data)
except:
print "遇到未知错误!"
if isinstance(data, pd.DataFrame):
print data.describe()
for i in data:
print "这是%s列" % i
print data[i].sum()
print "数据检查函数运行结束"
print "-" * 70