Datawhale AI 夏令营 siRNA药物药效预测 task02

1.完整代码

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
import lightgbm as lgb

# 数据加载和合并
df_original = pd.read_csv("train_data.csv")
n_original = df_original.shape[0]
df_submit = pd.read_csv("sample_submission.csv")
df = pd.concat([df_original, df_submit], axis=0).reset_index(drop=True)

# 特征构建函数
def siRNA_feat_builder(s: pd.Series, anti: bool = False):
    name = "anti" if anti else "sense"
    df = s.to_frame()
    df[f"feat_siRNA_{
     
     name}_seq_len"] = s.str.len()
    
    nucleotides = "AUGC"
    for pos in [0, -1]:
        for c in nucleotides:
            df[f"feat_siRNA_{
     
     name}_seq_{
     
     c}_{
     
     'front' if pos == 0 else 'back'}"] = (s.str[pos] == c)
    
    patterns = [
        ("AA", "UU"), ("GA", "UU"), ("CA", "UU"), ("UA", "UU"),
        ("UU", "AA"), ("UU"
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值