import pandas as pd
import os
def read_josn(fn):
df = pd.read_json(fn,lines=True)
return df
# 核心思想是根据index与大数进行除法运算分组
def split_df_to_files(df):
df['index'] = [x // 1000 for x in list(df.index)]
groups = df.groupby('index')
for i,group in enumerate(groups):
group[1].to_csv('./train_data/' + str(i).zfill(5) + '.csv',index=False)
# 返回目录下的文件列表
def get_path_files(path):
return os.listdir(path) if os.listdir(path) else []
def run_feature(file):
df = pd.read_csv('./train_data/' +file)
file_name,_ = os.path.splitext(file)
feature_all = get_all_features(df)
return feature_all
def main():
# 分割文件到指定的目录下
df = read_josn('1w.data')
split_df_to_files(df)