N = 1000 #每组抽1000细胞
ad_tmp = ad_all[ad_all.obs.groupby("batch").sample(n = N, random_state=123,replace=False).index].copy
frac = 0.5 #每组抽50%的细胞
ad_tmp = ad_all[ad_all.obs.groupby("batch").sample(frac = frac, random_state=123,replace=False).index].copy
根据batch列的信息进行分层不等比采样
adList=[]
groups = ad_all.obs.groupby("batch").size()
for batch in group.index:
i = groups [batch]
frc = i / ad_all.obs.groupby("batch").size().sum()
N = int(round(frc*1000,0)) #总采取1000细胞,分层不等比抽取
_index = ad_all.obs[ ad_all.obs["batch"] == batch ].sample(n = N, random_state=123,replace=False).index
ad_tmp = ad_all[_index].copy()
adList.append(ad_tmp)
#ad_sub =ad.concat(adList,join='outer')
ad_sub = sc.AnnData.concatenate(*adList,join='outer')