- iterrows(): 将DataFrame迭代为(insex, Series)对。
- itertuples(): 将DataFrame迭代为元祖。
- iteritems(): 将DataFrame迭代为(列名, Series)对
def mutation_drug_and_no_drug(self):
df = self.new_gene_mutation_class_df(gene_class='').copy()
df['mutationtype_ch'] = df['mutationtype_ch'].fillna('--')
have_drug = []
no_drug = []
for by, group in df.groupby(['genesymbol', 'exonintron', 'mutationtype_ch', 'chgvs', 'phgvs', 'vaf']):
# 本癌种
current_mut = group[group['current_tumor']]
# 非本癌种
no_current_mut = group[~group['current_tumor']]
# 本癌种敏感
current_sensitive = current_mut[current_mut['drugefficacy'] == '敏感']
# 本癌种耐药
current_negative = current_mut[current_mut['drugefficacy'] == '耐药']
# 本癌种可能敏感
current_maybe_sen = current_mut[
(current_mut['drugefficacy'] == '可能敏感')
& (current_mut['level_grade1'].isin(['Level B', 'Level C', 'Level D']))
& (current_mut['Evidence_level'] != 'preclinical')
]
# 非本癌种敏感
no_current_sen = no_current_mut[
(no_current_mut['drugefficacy'] == '敏感')
& (no_current_mut['Evidence_level'].isin(['FDA', 'NMPA', 'NCCN', 'CSCO']))
]
maybe_benefit = pd.concat([current_maybe_sen, no_current_sen]) # 潜在获益的药物
# current_sensitive_list = list(set(current_sensitive['drug_ch'].to_list()))
# current_negative_list = list(set(current_negative['drug_ch'].to_list()))
# 添加'level_grade_new'列
current_sensitive['level_grade_new'] = current_sensitive['level_grade1'].apply(lambda x: x.split()[-1])
current_negative['level_grade_new'] = current_negative['level_grade1'].apply(lambda x: x.split()[-1])
maybe_benefit['level_grade_new'] = maybe_benefit['level_grade1'].apply(lambda x: x.split()[-1])
# 在current_sensitive_list、current_negative_list、maybe_benefit_list后边添加对应的df的'level_grade1'列的值,最后输出结果为:西妥昔单抗(A)
current_sensitive_list = [f"{drug_ch} ({level.split()[-1]})" for drug_ch, level in
zip(current_sensitive['drug_ch'], current_sensitive['level_grade1'])]
current_negative_list = [f"{drug_ch} ({level.split()[-1]})" for drug_ch, level in
zip(current_negative['drug_ch'], current_negative['level_grade1'])]
# maybe_benefit_list = list(set(maybe_benefit['drug_ch'].to_list()))
maybe_benefit_list = [f"{drug_ch} ({level.split()[-1]})" for drug_ch, level in
zip(maybe_benefit['drug_ch'], maybe_benefit['level_grade1'])]
# 使用chinese_multi_sort()函数排序
current_sensitive_list = [{'level_grade_new': level_grade_new} for level_grade_new in
current_sensitive_list]
current_negative_list = [{'level_grade_new': level_grade_new} for level_grade_new in current_negative_list]
maybe_benefit_list = [{'level_grade_new': level_grade_new} for level_grade_new in maybe_benefit_list]
current_sensitive_list = chinese_multi_sort(current_sensitive_list, key=['level_grade_new'],
chinese_first=True)
current_negative_list = chinese_multi_sort(current_negative_list, key=['level_grade_new'],
chinese_first=True)
maybe_benefit_list = chinese_multi_sort(maybe_benefit_list, key=['level_grade_new'], chinese_first=True)
current_sensitive_list = [i['level_grade_new'] for i in current_sensitive_list]
current_negative_list = [i['level_grade_new'] for i in current_negative_list]
maybe_benefit_list = [i['level_grade_new'] for i in maybe_benefit_list]
# 最后,如果可能获益的药物列表中存在和当前敏感药物重复的药物,我们可以使用set操作符将它们从maybe_benefit_list中删除。
maybe_benefit_list = list(set(maybe_benefit_list) - set(current_sensitive_list))
# maybe_benefit_list = [{'drug_ch': drug_ch} for drug_ch in maybe_benefit_list]
#
# maybe_benefit_list = chinese_multi_sort(
# maybe_benefit_list,
# key=['drug_ch'],
# chinese_first=True
# )
# maybe_benefit_list = [i['drug_ch'] for i in maybe_benefit_list]
if all([current_sensitive.empty, current_negative.empty, maybe_benefit.empty]):
# 非用药相关体细胞变异
no_drug_group = group[
(group['somger'] == 'SOM') & ((group['mtype'] == 'snv') | (group['mtype'] == 'skipping'))]
if not no_drug_group.empty:
# 根据exon后面对应的数字排序
# for index, row in no_drug_group.iterrows(): # Iterate through each row in the no_drug_group DataFrame
# exonintron_sort = int(
# row['exonintron'].split('on')[-1]) # Extract the number after 'exon' in 'exonintron'
# no_drug.append({
# 'genesymbol': row['genesymbol'],
# 'exonintron': row['exonintron'],
# 'mutationtype_ch': row['mutationtype_ch'],
# 'chgvs': row['chgvs'],
# 'phgvs': row['phgvs'],
# 'vaf': row['vaf'],
# 'exonintron_sort': exonintron_sort
# })
# # 只根据'exonintron_sort'排序
# # no_drug.sort(key=lambda x: x['exonintron_sort']) # Sort by 'exonintron_sort' in ascending order
# # 根据'genesymbol', 'exonintron_sort'这两列排序
# no_drug = chinese_multi_sort(
# no_drug,
# key=['genesymbol', 'exonintron_sort'],
# chinese_first=True
# )
for index, row in no_drug_group.iterrows():
exonintron_sort_exon = 0
exonintron_sort_intron = 0
if 'exon' in row['exonintron']:
exonintron_sort_exon = int(row['exonintron'].split('exon')[-1])
elif 'intron' in row['exonintron']:
exonintron_sort_intron = int(row['exonintron'].split('intron')[-1])
no_drug.append({
'genesymbol': row['genesymbol'],
'exonintron': row['exonintron'],
'mutationtype_ch': row['mutationtype_ch'],
'chgvs': row['chgvs'],
'phgvs': row['phgvs'],
'vaf': row['vaf'],
'exonintron_sort_exon': exonintron_sort_exon,
'exonintron_sort_intron': exonintron_sort_intron
})
no_drug = chinese_multi_sort(
no_drug,
key=['genesymbol', 'exonintron_sort_exon', 'exonintron_sort_intron'],
chinese_first=True
)
# 未根据exon后面对应的数字排序
# no_drug.append({
# 'genesymbol': by[0],
# 'exonintron': by[1],
# 'mutationtype_ch': by[2],
# 'chgvs': by[3],
# 'phgvs': by[4],
# 'vaf': by[5],
# })
else:
mut = {
'genesymbol': by[0],
'fix_mutation': group['fix_mutation'].values[0] if len(group['fix_mutation'].values) else '',
'current_sensitive': '\n'.join(current_sensitive_list) if current_sensitive_list else '无',
# 本癌种敏感
'current_negative': '\n'.join(current_negative_list) if current_negative_list else '无',
# 本癌种耐药
'maybe_benefit': '\n'.join(maybe_benefit_list) if maybe_benefit_list else '无',
# 潜在获益的药物
}
have_drug.append(mut)
return have_drug, no_drug
def process_grouped_data(data):
"""分组处理基因格式如下:
[
{
'genesymbol_phgvs': 'BRAF(V600X野生型) BRAF(V600X野生型)',
'敏感': '',
'耐药': 'encorafenib',
'mtype': 'snv',
'genesymbol': 'BRAF(V600X野生型)',
'phgvs': 'BRAF(V600X野生型)',
'exonintron': 'exon22'
},
{
'genesymbol_phgvs': 'KRAS p.G12S',
'敏感': '贝伐珠单抗 呋喹替尼',
'耐药': '西妥昔单抗 panitumumab',
'mtype': 'snv',
'genesymbol': 'KRAS',
'phgvs': 'p.G12S',
'exonintron': 'exon3'
}
]"""
data_summary = []
# 处理复合变异
if 'mut_comb_index' in data.columns and not data['mut_comb_index'].empty and (
data['mut_comb_index'] != '--').all():
for k3, v3 in data.groupby(['n_mut_comb_index']):
combined_gene_phgvs = '、'.join(v3['gene_phgvs'].unique())
tmp = {
'genesymbol_phgvs': combined_gene_phgvs,
'敏感': '',
'耐药': '',
'mtype': '复合',
'genesymbol': '、'.join(v3['genesymbol'].unique()),
'phgvs': '',
'exonintron': '、'.join(v3['exonintron'].unique())
}
for k4, v4 in v3.groupby(['drugefficacy']):
if k4 == '敏感':
tmp['敏感'] = '、'.join(v4['drug_ch'].unique())
# 融合和非融合
# if v4['mtype'].iloc[0] == 'fusion':
# tmp['genesymbol_phgvs'] = '、'.join(v4['phgvs'].unique())
# else:
# tmp['genesymbol_phgvs'] = '、'.join(v4['gene_phgvs'].unique())
# 融合和非融合:遍历v4,判断每条数据是否为'fusion'
for index, row in v4.iterrows():
if row['mtype'] == 'fusion':
fusion_phgvs = '、'.join(v4[v4['mtype'] == 'fusion']['phgvs'].unique())
tmp['genesymbol_phgvs'] = fusion_phgvs
else:
non_fusion_phgvs = '、'.join(v4[v4['mtype'] != 'fusion']['gene_phgvs'].unique())
tmp['genesymbol_phgvs'] += '、' + non_fusion_phgvs
elif k4 == '耐药':
tmp['耐药'] = '、'.join(v4['drug_ch'].unique())
# # 融合和非融合
# if v4['mtype'].iloc[0] == 'fusion':
# tmp['genesymbol_phgvs'] = '、'.join(v4['phgvs'].unique())
# else:
# tmp['genesymbol_phgvs'] = '、'.join(v4['gene_phgvs'].unique())
# 融合和非融合:遍历v4,判断每条数据是否为'fusion'
for index, row in v4.iterrows():
if row['mtype'] == 'fusion':
fusion_phgvs = '、'.join(v4[v4['mtype'] == 'fusion']['phgvs'].unique())
tmp['genesymbol_phgvs'] = fusion_phgvs
else:
non_fusion_phgvs = '、'.join(v4[v4['mtype'] != 'fusion']['gene_phgvs'].unique())
tmp['genesymbol_phgvs'] += '、' + non_fusion_phgvs
data_summary.append(tmp)