在一个65W行数据中插入记录,传统遍历和二分法查询效率差异惊人
DATAS_txt=r'large100W.txt'
# 1.准备文件按照指定行读取
import linecache
import random
# text=linecache.getline(DATAS_txt,1)
import time
def write_wenjian111(random_min,random_max,times):
ff = open(DATAS_txt, "r+")
words = ff.readlines()
ff.close()
for iii in range(len(words)):
words[iii] = int(words[iii])
for i in range(times):
flag=1
while flag:
# print(words)
n=0
if(len(words)==0):
flag=0
a_word=random.randint(random_min, random_max)
for i in range(len(words)):
if words[i]==a_word:
flag=1
break
elif words[i]>a_word:
n=i
flag=0
break
words.insert(n,a_word)
# print(len(words))
with open(DATAS_txt,"w") as fff:
for i in words:
fff.write(str(i)+'\n')
def txt_go111(random_min,random_max,times):
write_wenjian111(random_min, random_max,times)
def write_wenjian(random_min,random_max,times):
ff = open(DATAS_txt, "r+")
words = ff.readlines()
ff.close()
# print(words)
for iii in range(len(words)):
# print(words[iii])
# print(iii)
words[iii] = int(words[iii])
for i in range(times):
flag=1
while flag:
a_word=random.randint(random_min, random_max)
firs=0
lst=len(words)-1
if firs>lst:
flag=0
while firs<=lst:
mid=int((firs+lst)/2)
if a_word>words[mid]:
firs=mid+1
flag = 0
elif a_word<words[mid]:
lst=mid-1
flag = 0
else:
flag = 1
break
words.insert(firs,a_word)
# print(len(words))
with open(DATAS_txt,"w") as fff:
for i in words:
fff.write(str(i)+'\n')
# 生成随机数
# 查找加入
def txt_go(random_min,random_max,times):
write_wenjian(random_min, random_max,times)
time_start = time.time()
txt_go111(-1000000,1000000,1000)
time_end = time.time()
print('常规TIME:', time_end - time_start, "S")
time_start = time.time()
for i in range(5):
txt_go(-1000000,1000000,30000)
time_end = time.time()
print('二分TIME:', time_end - time_start, "S")
常规遍历插入1000条数据:38.2s
二分法插入150000条数据:30.4s