from torch.utils.data import Dataset, DataLoader import numpy as np def readfile(path, embeding): with open(path, "r", encoding = "utf-8") as file: all_data = file.read().split("\n") word_embeding = {"UNK": np.random.normal(size = (embeding, ))} for data in all_data: for word in data: if word not in word_embeding.keys(): word_embeding[word] = np.random.normal(size = (embeding, )) return all_data, word_embeding class MyDataset(Dataset): def __init__(self,data): self.data = data def __len__(self): return len(self.data) def __getitem__(self, item): return self.data[item] if __name__ == "__main__": path = "D:前50行.txt" embeding = 50 all_data, word_embeding = readfile(path, embeding) dataset = MyDataset(all_data) dataloader = DataLoader( dataset) for data in dataloader: for words in data: for word in words: print(word_embeding[word])