import pandas as pd
file1 = r"2021-10\K200005137_L01_126_priceseq_dedup_lzw_bowtie2_subsample_gsnapfilter_gasnap.m8_match_taxid_lineage_add_kgs_delrepeat1_out - 1.txt"
df1 = pd.read_csv(file1, sep='\t')
print(df1)
print(df1['Name'])
print(df1['count'])
print(df1.columns)
df2=df1.drop(columns=['taxid','count'],axis=1)
print(df2)
df3=df1.drop(columns=['count'],axis=1)
df4 = df3.drop_duplicates(subset=['Name', 'level 1'],keep='first')
df4.reset_index(drop=True,inplace=True)
print(df4)
list1 =df1['Name'].unique().tolist()
df4["count"]=0
for i in range(len(list1)):
print(list1[i])
list2=df1[df1["Name"]==list1[i]]['count'].tolist()
count_sum = sum(list2)
if df4.loc[i,'Name']==list1[i]:
df4['count'][i]=count_sum
print(df4)
order = ['Name', 'level 1', 'taxid', 'count', 'kingdom', 'genus', 'species','type']
df4 = df4[order]
df4.to_csv(r"2021-10\output8.txt",index=None,sep='\t')