昨天写了一段用来做分层随机抽样的代码,很粗糙,不过用公司的2万名导购名单试了一下,结果感人,我觉得此刻的我已经要上天了,哈哈哈哈哈哈
代码如下:
#分层随机抽样 stratified sampling
import xlrd, xlwt, time, random
xl = xlrd.open_workbook(r'C:\Users\Administrator\Desktop\分层抽样.xlsx')
xl_sht1 = xl.sheets()[0]
xl_sht1_nrows = xl_sht1.nrows
#表头
title = xl_sht1.row_values(0)
#把样本写进列表 sample
sample = []
for i in range(xl_sht1_nrows):
sample.append(xl_sht1.row_values(i))
#打乱样本
random.shuffle(sample)
#把层的内容写进列表 col
col = xl_sht1.col_values(0)
#对col中的内容进行计数,获得每一类的名称对应个数的字典
col_dict = {}
for i in col:
col_dict[i] = col_dict.get(i, 0) + 1
p = eval(input('每层抽取的比例(小数):'))
#获得每一类的名称对应抽取个数的字典
col_p = {}
k = 0
for i in col_dict.keys():
col_p[i] = int(round(col_dict[i] * p)) #round用来四舍五入,不加int结果会变成无数个p
#开始抽样,把抽取结果写进result_l列表
result_l = []
for i in