根据注释软件结果从ICTV中获取物种完整注释(python代码)

 比如病毒注释软件注释到了:

Viruses;Phixviricota;Malgrandaviricetes;Petitvirales;Microviridae;;

import os
import pandas as pd

# Change working directory
os.chdir("C:/Users/fordata/Desktop/研究生/第二个想法(16s肠型+宏基因组功能)/第二篇病毒组/result/tax")
dir_ls = os.listdir()

# Load the ICTV reference file
ICTV_txt = "ICTV去冗余.txt"
df = pd.read_csv(ICTV_txt, sep='\t')

result_ls = []
for file in dir_ls:
    if "fetal_95" in file:
        result_ls.append(file)

# Process each file that matches "fetal_95"
for file2 in result_ls:
    with open(file2, "r") as f2, open(f"{file2}_TAX.txt", "a") as out:
        print(f"vOTU\tGenome_Composition\tRealm;Subrealm;Kingdom;Subkingdom;Phylum;Subphylum;Class;Subclass;Order;Suborder;Family;Subfamily;Genus;Subgenus", file=out)
        next(f2)  # Skip the header
        for line in f2.readlines():
            # Extract the taxonomy from the line
            Tax = line.split("\t")[-1].strip("\n")
            name = line.split("\t")[0].strip()
            last_Tax = [item for item in Tax.split(";") if item][-1]
            print(f"Searching for last_Tax: {last_Tax}")

            # Search for the row where last_Tax appears
            search_result = df.apply(lambda row: row.str.contains(last_Tax).any(), axis=1)  # Apply search across rows
            first_matching_index = search_result.idxmax() if search_result.any() else None

            # If a match is found
            if first_matching_index is not None:
                matching_row = df.loc[first_matching_index]
                print(f"Matching row found: {matching_row}")

                # Find the position of last_Tax in the row
                last_tax_index = matching_row[matching_row == last_Tax].index[0]

                # Select columns from the start to the column containing last_Tax
                columns_before_last_tax = matching_row.loc[:last_tax_index]
                Genome_Composition = matching_row.iloc[-1]
                # Output the relevant columns
                print(f"Output from start to last_Tax: {columns_before_last_tax.values}")
                print(f"{name}", end="\t", file=out)
                print(f"{Genome_Composition}", end="\t", file=out)
                for i, value in enumerate(columns_before_last_tax.values):
                    value = str(value).strip()  # 去除换行符和空格
                    if i < len(columns_before_last_tax.values) - 1:
                        print(value, end=";", file=out)
                    else:
                        print(value, end="", file=out)
                print("", file=out)  # 确保输出完毕后换行
            else:
                print("No match found.")
                print(f"{name}", end="\t", file=out)
                print("not assigned", end="\t", file=out)
                print("not assigned", file=out)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值