import sys,os,re
def match_by_line(f,zhouqi=1,list=[1],start_line=1,dead_line=0):
flag_j=0
line_storage=[]
line=[]
line_flag=0
for i in f:
line_flag+=1
if dead_line!=0:
if ((dead_line-start_line+1)/zhouqi)%2!=1 and ((dead_line-start_line+1)/zhouqi)%2!=0:
break
if dead_line>=line_flag>=start_line:
if flag_j<zhouqi-1:
line_storage.append(i)
flag_j+=1
elif flag_j==zhouqi-1:
line_storage.append(i)
for k in list:
line.append(line_storage[k-1])
line_storage=[]
flag_j=0
if line_flag > dead_line:
break
elif dead_line==0:
if line_flag>=start_line:
if flag_j<zhouqi-1:
line_storage.append(i)
flag_j+=1
elif flag_j==zhouqi-1:
line_storage.append(i)
for k in list:
line.append(line_storage[k-1])
line_storage=[]
flag_j=0
f.close()
return line
'''
f=open("test.txt2","r")
zhouqi=2
list=[1]
start_line=3
dead_line=10
line=match_by_line(f,zhouqi,list,start_line)
for i in line:
print(i,end="")
'''
'''
########## 按字符串匹配结果选取特定行 ##########
输入参数: 要匹配的字符串 str_match
输入参数: 文件标识
输出结果: 需要截取的行存入一个列表中,输出此列表: line
'''
def match_by_str(f,str_match):
pattern=re.compile(str_match)
line=[]
for i in f:
m=pattern.search(i)
while m:
line.append(i)
m=False
return line
'''
f=open("test.txt2","r",encoding='UTF-8')
zhouqi=2
list=[1]
start_line=3
dead_line=10
str_match="201\d\d\d\d\d"
line=match_by_str(f,str_match)
for i in line:
print(i,end="")#文件处理中未去掉换行符则需要如此输出
'''
def get_colunm_awk(f,list):
cum=[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
for i in range(len(list)):
cum[i]=list[i]
os.system("awk '{print $%d\"\t\"$%d\"\t\"$%d\"\t\"$%d\"\t\"$%d\"\t\"$%d\"\t\"$%d\"\t\"$%d\"\t\"$%d\"\t\"$%d\"\t\"$%d\"\t\"$%d\"\t\"$%d\"\t\"$%d\"\t\"$%d\"\t\"$%d\"\t\"$%d\"\t\"$%d\"\t\"$%d\"\t\"$%d}' %s> lingshi.f"%(cum[0],cum[1],cum[2],cum[3],cum[4],cum[5],cum[6],cum[7],cum[8],cum[9],cum[10],cum[11],cum[12],cum[13],cum[14],cum[15],cum[16],cum[17],cum[18],cum[19],f))
nf=open("lingshi.f","r")
line=nf.readlines()
nf.close()
return line
'''
f=sys.argv[1]
list=[1,3]
line=get_colunm_awk(f,list)
for i in line:
print (i,end="")
'''
def get_colunm_pyre(f,list,split_flag=None):
line=[]
for i in f:
i=i.strip("\n")
m=i.split(split_flag)
for k in range(len(list)):
line.append(m[list[k]-1])
return line
'''
t=sys.argv[1]
f=open(t,"r",encoding='UTF-8')
list=[1]
#split_flag="="
line=get_colunm_pyre(f,list)
p="hello"
flagi=0
for k in line:
if p != k:
if flagi==0:
print("%s\t1"%(k))
flagi+=1
if flagi%61==0:
print ("%s\t%d"%(k,flagi))
p=k
'''
'''
for i in f:
#print(type(i),i)
i=i.strip("\n")
i=i.strip("\t")
try:
i=float(i)
except ValueError:
print (type(i),i)
nian=i//10000;
yue=(i%10000)//100
r=i%100
print("%d\t%d\t%d年%d月"%(nian,yue,nian,yue))
'''
def out_first(inf,outf):
f1=open(inf,"rb")
f2=open(outf,"wb")
for i in f1:
t=i.split()
for j in t:
f2.write(j)
f1.close()
f2.close()
def out_after_compare(line,colum_num,compare_flag,becompared_value):
backline=[]
for i in line:
k=i.split()
if type(becompared_value)==type("a"):
k[colum_num-1]=str(k[colum_num-1])
elif type(becompared_value)==type(1):
k[colum_num-1]=int(k[colum_num-1])
elif type(becompared_value)==type(1.1):
k[colum_num-1]=float(k[colum_num-1])
else:
print ("unsupprot long or complex type, or your becompared_value is unrecognized")
if compare_flag==">":
if k[colum_num-1] > becompared_value:
backline.append(i)
elif compare_flag==">=":
if k[colum_num-1] >= becompared_value:
backline.append(i)
elif compare_flag=="<":
if k[colum_num-1] < becompared_value:
backline.append(i)
elif compare_flag=="<=":
if k[colum_num-1] <= becompared_value:
backline.append(i)
elif compare_flag=="=":
if k[colum_num-1] == becompared_value:
backline.append(i)
return backline
'''
#eg:
t=sys.argv[1]
f=open(t,"r")
line=match_by_line(f)
colum_num=3
compare_flag="="
becompared_value=1576045117499
backline=out_after_compare(line,colum_num,compare_flag,becompared_value)
for i in backline:
print (i,end="")
'''
'''
#big eg
t=sys.argv[1]
f=open(t,"r")
str_match="ST=\d+"
line=match_by_str(f,str_match)
o=open("lingshi.f","w")
for i in line:
o.write(i)
o.close()
o=open("lingshi.f","r")
list=[1]
split_flag="Gateway1,Node=Node1 ST="
backline=get_colunm_pyre(o,list,split_flag)
for i in backline:
print (i,end="")
'''
def split_colum(line,colum_num,split_flag):
backline=[]
for i in line:
t=i.split()
k=t[colum_num-1].split(split_flag)
for j in range(len(k)):
t.insert(colum_num-1,k[len(k)-1-j])
t=' '.join(t)
backline.append(t)
backline.extend("\n")
return backline
'''
t=sys.argv[1]
f=open(t,"r")
colum_num=2
split_flag="="
line=match_by_line(f)
backline=split_colum(line,colum_num,split_flag)
for i in backline:
print (i,end="")
'''
def split_file_by_line(filename,linenum):
f=open(filename,"r")
dir_put='split_dir/'
n=0
i=0
if os.path.isdir(dir_put):
pass
else:
os.mkdir(dir_put)
filename_front=os.path.splitext(filename)[0]
temp=open(dir_put+filename_front+'.part'+str(n)+'.txt','w')
count = 0
for index, line in enumerate(f):
count+= 1
f.close()
f=open(filename,"r")
while 1:
p=f.readline()
temp.write(p)
if i==count-1:
print(filename_front+'.part'+str(n)+'.txt')
temp.close()
f.close()
break
elif (i+1)%linenum==0:
print(filename_front+'.part'+str(n)+'.txt')
n+=1
temp.close()
temp=open(dir_put+filename_front+'.part'+str(n)+'.txt','w')
i+=1
'''
t=sys.argv[1]
linenum=int(input("enter size:"))
split_file_by_line(t,linenum)
'''
def split_file_by_KB(t,size):
fp=open(t,'rb')
i=0
n=0
dir_put='split_dir/'
if os.path.isdir(dir_put):
pass
else:
os.mkdir(dir_put)
filename_front=os.path.splitext(t)[0]
temp=open(dir_put+filename_front+'.part'+str(i)+'.txt','wb')
buf=fp.read(1024)
while 1:
temp.write(buf)
buf=fp.read(1024)
try:
if buf[0]=="":
n+=1
continue
except IndexError:
print (filename_front+'.part'+str(i)+'.txt')
temp.close()
fp.close()
break
n+=1
if n==size:
n=0
print (filename_front+'.part'+str(i)+'.txt')
i+=1
temp.close()
temp=open(dir_put+filename_front+'.part'+str(i)+'.txt','wb')
fp.close()
'''
if __name__=='__main__':
t=sys.argv[1]
size=int(input("enter size:")) #注意转换为int,否则无效
split_file_by_KB(t,size)
'''
'''
统计有多少个大于30的
f=open("Node2_ET_num.all","r")
t=0
a=0
b=0
for i in f:
k=i.split()
k[0]=float(k[0])
k[1]=float(k[1])
k[2]=float(k[2])
if k[0]>=20:
if k[1]!=a and k[2]!=b:
t+=1
a=k[1]
b=k[2]
print (t)
'''