import os
import pandas as pd
# 搜索目录
search_dir ='./'# 文件路径列表
list_files =[search_dir + f for f in os.listdir(search_dir)if f.endswith('.txt')]# 合并全部文件for idx, file_path inenumerate(list_files):
df_tmp = pd.read_csv(file_path, sep='\t')if idx ==0:
df_merge = df_tmp
else:# df_tmp行数据加入df_merge
df_merge = pd.concat([df_merge, df_tmp], axis=0)
df_merge.to_csv(search_dir +'merge.txt', sep='\t', index=False)
8. 内置函数map + filter 过滤数据
number =[n for n inrange(0,11)]# 获取平方数
squared_numbers =list(map(lambda x: x**2, number)print(squared_numbers)# [0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100]# 获取偶数
even_numbers =list(filter(lambda x: x %2==0, number))print(even_numbers)# [0, 2, 4, 6, 8, 10]
9. 使用concurrent.futures模块实现循环的并发处理,提高计算效率
import concurrent.futures
defsquare(num):return num **2with concurrent.futures.ThreadPoolExecutor()as executor:
res =list(executor.map(square, number))print(res)
10. 使用asyncio模块实现异步处理,提高并发性能
import asyncio
import math
asyncdefsqrt(num):return math.sqrt(num)asyncdefcalculate():
run_tasks =[sqrt(num)for num in number]
results =await asyncio.gather(*run_tasks)print(results)
asyncio.run(calculate())
11. 程序运行分析装饰器
import time
defanalysis_time(func):defwarpper(*args,**kwargs):
start_time = time.time()
res = func(*args,*kwargs)
end_time = time.time()print(f"{func.__name__} program run time: {end_time - start_time}s")return res
return warpper
# 并行计算import concurrent.futures
defsquare(num):return num **2@analysis_timedefcalulate(number):with concurrent.futures.ThreadPoolExecutor()as executor:
res =list(executor.map(square, number))return res
print(calulate(number))# calulate program run time: 0.002947568893432617s# [0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100]
12. 读取文本文件的\t分割内容至列表
# 读取文本文件的\t分割内容至列表
table =[]for line inopen('data.txt','r'):
table.append(line.strip().split('\t'))print(table)
13. 写入列表内容至文本文件
table =[['protein','ext1','ext2','col3'],[0.16,0.038,0.044,0.040],[0.33,0.089,0.095,0.091],[0.66,0.184,0.191,0.191],[1.00,0.280,0.292,0.283],[1.32,0.365,0.367,0.365],[1.66,0.441,0.443,0.444]]
out =''for row in table:
line =[str(cell)for cell in row]
out = out +'\t'.join(line)+'\n'# print(type(out))# print(type(line))open('output.txt','w').write(out)
14. pandas读取VCF文件
import pandas as pd
import os
# 跳过##开头行
df_vcf = pd.read_table(vcf_path, sep='\t',
skiprows=int(os.popen(f"grep '^##' {vcf_path}|wc -l").read()))
# 染色体排序函数defchrom_order(chrom):
order ={str(k):k for k inrange(1,23)}
order.update({"X":23,"Y":24})return order.get(str(chrom).replace("chr",""),25)# 将Chromosome的染色体重新映射为1-24数字
df["Chrom"]= df["Chromosome"].apply(chrom_order)
df = df.sort_values("Chrom").reset_index(drop=True)