【编程练习】RNA序列翻译成蛋白质_cacaauuacuca翻译成蛋白质-优快云博客

该文章介绍了一个Python程序，用于将RNA序列文件中的编码转换为对应的氨基酸序列。程序首先定义了氨基酸密码子表，然后读取Fasta文件中的RNA序列，通过函数进行翻译，最后将结果保存为新的Fasta文件。主要涉及生物信息学和文件处理。

题目：

RNA序列翻译成蛋白质

给定：氨基酸密码子表，和一个保存多条RNA序列的Fasta文件
任务：得到RNA序列翻译得到的氨基酸序列，保存为Fasta文件

解题：

import argparse

  

Genetic_code = {

    'UUU': 'F', 'UUC': 'F', 'UUA': 'L', 'UUG': 'L',

    'UCU': 'S', 'UCC': 'S', 'UCA': 'S', 'UCG': 'S',

    'UAU': 'Y', 'UAC': 'Y', 'UAA': '*', 'UAG': '*',

    'UGU': 'C', 'UGC': 'C', 'UGA': '*', 'UGG': 'W',

    'CUU': 'L', 'CUC': 'L', 'CUA': 'L', 'CUG': 'L',

    'CCU': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P',

    'CAU': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',

    'CGU': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',

    'AUU': 'I', 'AUC': 'I', 'AUA': 'I', 'AUG': 'M',

    'ACU': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T',

    'AAU': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K',

    'AGU': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',

    'GUU': 'V', 'GUC': 'V', 'GUA': 'V', 'GUG': 'V',

    'GCU': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A',

    'GAU': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E',

    'GGU': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G'

}

  

def load_rna_seq(rna_seq_fa):

    rna_seq_dir = {}

    with open(rna_seq_fa) as f:

        for line in f:

            if line.startswith('>'):

                rna_seq_id = line.strip()[1:]

                rna_seq_dir[rna_seq_id] = ''

            else:

                rna_seq_dir[rna_seq_id] += line.strip()

    return rna_seq_dir

  

def rna2protein(rna_seq):

    protein_seq = ''

    for i in range(0, len(rna_seq), 3):

        codon = rna_seq[i:i+3]

        protein_seq += Genetic_code[codon]

    return protein_seq

  

def convert_rna2proteion(rna_seq_dir):

    protein_seq_dir = {}

    for rna_seq_id, rna_seq in rna_seq_dir.items():

        protein_seq = rna2protein(rna_seq)

        protein_seq_dir[rna_seq_id] = protein_seq

    return protein_seq_dir

  

def output_protein_seq(protein_seq_dir, output_protein_fa):

    with open(output_protein_fa, 'w') as f:

        for protein_seq_id, protein_seq in protein_seq_dir.items():

            f.write('>' + protein_seq_id + '\n')

            f.write(protein_seq + '\n')

  

if __name__ == '__main__':

    parser = argparse.ArgumentParser(description='Combined Results')

    parser.add_argument('-i', '--input', required=True, help='RNA sequence file')

    parser.add_argument('-o', '--output', required=True, help='Protein sequence file')

    args = parser.parse_args()

    rna_seq_fa = args.input

    output_protein_fa = args.output

  

    rna_seq_dir = load_rna_seq(rna_seq_fa)

    protein_seq_dir = convert_rna2proteion(rna_seq_dir)

    output_protein_seq(protein_seq_dir, output_protein_fa)