利用python处理dna序列_DNA序列比对,Needleman-Wunsch算法的python实现

此博客介绍了使用动态规划方法计算NW Distance(Needleman-Wunsch距离),用于比较两个生物序列的相似性。通过种子序列和候选序列的匹配、插入和删除操作来确定最短距离。算法详细步骤包括构建动态规划表格并根据规则更新。最后返回的是两个序列的编辑距离作为相似度指标。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

def NWDistance(seedSequence, candidateSequence):

s = -1 # a mismatch would deduce 1 point.

m = 1 # plus 1 point for one match.

g = -3 # deduce 2 point for one gap.

seedSequence = seedSequence.strip()

candidateSequence = candidateSequence.strip()

if len(seedSequence) == 0:

print "Error, seed sequence length equal zero."

sys.exit(1)

elif len(candidateSequence) == 0:

print "Error, candidate sequence length equal zero."

sys.exit(1)

sLen = len(seedSequence)

cLen = len(candidateSequence)

table = []

for m in range(0, len(seedSequence) + 1):

table.append([m * g])

table[0] = []

for n in range(0, len(candidateSequence) + 1):

table[0].append(n * g)

for i in range(sLen):

for j in range(cLen):

table[i + 1].append(

max(

table[i][j] + (m if seedSequence[i] == candidateSequence[j] else s),

table[i][j + 1] + g,

table[i + 1][j] + g,

)

)

#

i = sLen - 1

j = cLen - 1

NewSeed = seedSequence[i]

NewCandidate = candidateSequence[j]

if len(seedSequence) <= 1 or len(candidateSequence) <= 1:

print "Error, too short!"

sys.exit(1)

while True:

if i == 0 and j == 0:

break

if seedSequence[i] == candidateSequence[j]:

if table[i - 1][j - 1] + 1 > table[i - 1][j] - 2 and table[i - 1][j - 1] + 1 > table[i][j - 1] - 2:

i = i - 1

j = j - 1

NewSeed = u"%s%s" % (seedSequence[i], NewSeed)

NewCandidate = u"%s%s" % (candidateSequence[j], NewCandidate)

else:

if table[i][j + 1] > table[i + 1][j]:

i = i - 1

NewSeed = u"%s%s" % (seedSequence[i], NewSeed)

NewCandidate = u"%s%s" % ('-', NewCandidate)

else:

j = j - 1

NewSeed = u"%s%s" % ('-', NewSeed)

NewCandidate = u"%s%s" % (candidateSequence[j], NewCandidate)

else:

if table[i - 1][j - 1] + 1 > table[i - 1][j] - 2 and table[i - 1][j - 1] + 1 > table[i][j - 1] - 2:

i = i - 1

j = j - 1

NewSeed = u"%s%s" % (seedSequence[i], NewSeed)

NewCandidate = u"%s%s" % (candidateSequence[j], NewCandidate)

else:

if table[i][j + 1] > table[i + 1][j]:

i = i - 1

NewSeed = u"%s%s" % (seedSequence[i], NewSeed)

NewCandidate = u"%s%s" % ('-', NewCandidate)

else:

j = j - 1

NewSeed = u"%s%s" % ('-', NewSeed)

NewCandidate = u"%s%s" % (candidateSequence[j], NewCandidate)

print NewSeed

print NewCandidate

# distance

mismath = 0

math = 0

gap = 0

charZipList = zip(NewSeed, NewCandidate)

# delete the head gap

for n in range(len(charZipList)):

if "-" in charZipList[n]:

del charZipList[0]

else:

break

# delete the tail gap

while True:

lastTuple = charZipList.pop()

if "-" in lastTuple:

continue

else:

charZipList.append(lastTuple)

break

#

for n in range(len(charZipList)):

charTuple = charZipList[n]

if charTuple[0] == charTuple[1]:

math += 1

elif "-" in charTuple:

gapLoc = charTuple.index("-")

if charZipList[n + 1][gapLoc] == "-":

continue

else:

gap += 1

else:

mismath += 1

distance = round(1.0 - float(math) / float(mismath + math + gap), 4)

return distance

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值