# for idx in range(train_size):
# train_data.append(InputExample(texts=[Search_list[idx], Title_list[idx]], label=1.0))
# # if idx in m_list:
# m_list = []
# for m in range(0,train_size):
# if Search_list[idx] == Search_list[m]:
# m_list.append(m)
# else:
# break
# j = randint(0, range(train_size))
# while j in m_list:
# j = randint(0, range(train_size))
# train_data.append(InputExample(texts=[Search_list[idx], Title_list[j]], label=0.0))
#更改为
train_data = []
train_data_0 = []
for idx,row in f.groupby('search'):
for m in row.index:
train_data.append(InputExample(texts=[Search_list[m], Title_list[m]], label=1.0))
j = randint(0, train_size)
while j in row.index:
j = randint(0, train_size)
train_data_0.append(InputExample(texts=[Search_list[m], Title_list[j]], label=0.0))
这段代码用于生成训练数据集,基于Search_list和Title_list,将相同search值对的样本标记为1.0,不同值对的样本标记为0.0。它使用了random.randint来选择不同的索引,并利用pandas的groupby函数对数据进行分组。
555

被折叠的 条评论
为什么被折叠?



