# -*- coding: utf-8 -*-
"""
Created on Thu Jul 26 10:51:30 2018
@author: wenyun.wxw
"""
#计算行数
count = -1
for count,line in enumerate(open('data.txt','r',encoding='utf-8')):
pass
count=count+1
import re
#划分为30个小数据集
k=1
d=[]
file = open('1.csv','w')
with open('data.txt', 'r', encoding = 'utf-8') as f:
for line in f:
line_s=re.split(r'\s',line)
if line_s[69]=='over': del line_s[69]
d.append(line_s)
file.write(','.join(line_s)+'\n')
if len(d)>(int(count/30)+1):
del d[:]
k=k+1
file.close()
file = open(str(k)+'.csv','w')
file.close()
#分批读入30个小数据集
for j in range(1,30):
locals()['b'+str(j)]=[]
#name='b'+str(j)
with open(str(j)+'.csv', 'r') as f:
for line in f:
#locals()['b'+str(j)]=i
#b1.append(line)
locals()['b'+str(j)].append(line.strip().split(','))
#import pandas as pd
#x=pd.read_table('data.txt',sep='\s+')
import pickle
# pickle保存
file = open('data.pickle', 'wb')
pickle.dump(a, file)
file.close()
#pickle提取
with open('dataload.pickle', 'rb') as file:
a_dict1 =pickle.load(file)
print(a_dict1)