使用pytorch构建自己的训练和测试数据集合,涉及自己数据处理类,数据变为tensor,数据分割等,为后续的训练准备了训练数据和测试数据
import torch
from torch.utils.data import DataLoader, Dataset,TensorDataset,random_split
import sys
class label_featureDataSet(Dataset):
def __init__(self,filename):
self.filename = filename
content_list = []
with open(filename) as f:
for line in f:
line = line.strip()
content_list.append(line)
self.content_info = content_list
def __len__(self):
return len(self.content_info)
def __getitem__(self, idx):
content = self.content_info[idx]
label, feature = content.split("\t")
label = int(label)
feature_list = feature.split(",")
result_feature = []
for ele in feature_list:
result_feature.append(float(ele))
return {"feature":result_feature,"label":label}
if __name__ == '__main__':
filename