#.fa 文件转换为 .npy之后作为DL学习的原始数据
# from .fa gettig npy(train/valid/test)
import os
import numpy as np
path = os.getcwd()
#################get the ENHANCER proper .fa file#################
enh_list = []
f_enh = open(path+'/'+'C_10K_GM12878.csv_enhancer.fa','r') #-***- enhancer.fa -***- #
for line in f_enh.readlines():
line = line.strip("/n")
enh_list.append(line)
f_enh.close()
enh_list = enh_list[0:1610] #16106 - 6 is the the time of 10
def Data_Set_enh(tr_enh_num,va_enh_num,te_enh_num): #0.8/0.1/0.1
enh_tr_num = tr_enh_num * len(enh_list)
enh_va_num = va_enh_num * len(enh_list)
enh_te_num = te_enh_num * len(enh_list)
enh_tr = enh_list[0:enh_tr_num]
enh_va = enh_list[enh_tr_num:enh_tr_num+enh_va_num]
enh_te = enh_list[enh_tr_num+enh_va_num:]
return enh_tr,enh_va,enh_te
################get the PROMOTER proper .fa file################
pro_list = []
f_pro = open(path+'/'+'C_10K_GM12878.csv_promoter.fa','r')

本文介绍如何使用Python脚本将DNA序列的.fasta文件格式转换为.npy格式,以便将其作为深度学习(DL)学习的原始输入数据。
最低0.47元/天 解锁文章
770

被折叠的 条评论
为什么被折叠?



