构建word to embedding

from torch.utils.data import Dataset, DataLoader
import numpy as np

def readfile(path, embeding):
    with open(path, "r", encoding = "utf-8") as file:
        all_data = file.read().split("\n")

    word_embeding = {"UNK": np.random.normal(size = (embeding, ))}
    for data in all_data:
        for word in data:
            if word not in word_embeding.keys():
                word_embeding[word] = np.random.normal(size = (embeding, ))

    return all_data, word_embeding

class MyDataset(Dataset):
    def __init__(self,data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, item):
        return self.data[item]



if __name__ == "__main__":
    path = "D:前50行.txt"
    embeding = 50
    all_data, word_embeding = readfile(path, embeding)

    dataset = MyDataset(all_data)
    dataloader = DataLoader( dataset)
    for data in dataloader:
        for words in data:
            for word in words:
                print(word_embeding[word])
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值