Walk to work

The winter is coming and the weather becomes colder and colder,then I decided to cancel running everyday and walk to work.Because if I walk to work,I can read English and eat some food on the way.
Yesterday morning,It was the first time that I walked to work.I pass through my old school --Dalian Maritime University and have breakfast in the third dining-hall.I have been graduated for three years.during this time,I always went back my old school,but I only played basketball and didn't went to the dining-hall and even not have some food.Yesterday,I chose a piece of cake ,a big dumpling and a bowl of gruel.They were very delicious and I will have some food at this dining-hall.

Because we need health checking this morning,I directly walked to work without having anything.Fortunately,I got some food which contained an egg,a piece of bread and a bag of milk after the health checking.
import numpy as np import torch from torch import nn import torch.utils.data as Data # 统计词频 from collections import Counter device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"current device:{device}") # 句子数据 sentences = [ # 动物行为 "dogs chase cats", "cats drink milk", "dogs love bones", "cats hate water", "dogs bark loudly", "cats climb trees", "dogs play fetch", "cats purr softly", "dogs guard houses", "cats hunt mice", "dogs wag tails", "cats nap all day", "birds fly high", "fish swim fast", "birds sing in morning", "tigers roar at night", "elephants walk slowly", "monkeys swing on trees", # 食物特征 "apples are sweet", "bananas taste creamy", "apples grow on trees", "bananas have peels", "apples make juice", "bananas are yellow", "apples can be red", "bananas rich in potassium", "oranges are juicy", "grapes are sour", "strawberries are red", "bread is soft", "cheese tastes salty", "eggs are nutritious", "rice is sticky", "noodles are long", "chocolate is sweet", # 日常生活 "kids eat apples", "monkeys love bananas", "dogs enjoy meat", "cats prefer fish", "people drink milk", "children like fruit", "breakfast has milk", "dogs drink water", "cats avoid dogs", "people walk to work", "buses run on time", "cars drive fast", "students study hard", "teachers explain clearly", "doctors help patients", "cooks prepare meals", "babies cry at night", "seniors rest peacefully", # 情感与动作 "he feels happy", "she looks tired", "they laugh together", "he cries softly", "she dances gracefully", "they argue loudly", "he hugs his dog", "she kisses her baby", "they shake hands", "he smiles kindly", "she yells in anger", "they cheer with joy", # 自然与天气 "sun rises early", "moon shines at night", "stars twinkle above", "clouds float in sky", "rain falls gently", "wind blows strongly", "snow covers mountains", "ice melts in sun", "lightning flashes bright", "thunder rumbles loudly", "fog covers the fields", "dew drops sparkle", # 人物与职业 "doctors heal patients", "teachers teach students", "engineers build bridges", "writers write stories", "artists draw pictures", "singers sing songs", "actors perform plays", "pilots fly planes", "farmers grow crops", "chefs cook meals", "drivers drive cars", "scientists make discoveries", # 更多组合 "apple pie is delicious", "banana smoothie is healthy", "dog food contains nutrients", "cat food has tuna flavor", "apple and banana salad", "dog and cat live together", "milk with banana shake", "fish for cat dinner", "bread with cheese", "rice with chicken", "noodles with beef", "coffee with milk", "tea with sugar", "juice with ice" ] # 分词 word_list = "".join(sentences).split() # 转换成set去重 vocab = set(list(word_list)) vocab_size = len(vocab) # print(vocal) # 构建索引 v_to_idx = {c: i for i, c in enumerate(vocab)} # print(f"v_to_idx={v_to_idx}") idx_to_v = {i: c for i, c in enumerate(vocab)} # 构建词频 f_word = Counter(vocab) # print(f"f(w)={f_word}") f_total = len(word_list) # print(f"\sumf(w)={f_total}") # 计算负采样概率 prob_neg_samples = {w: (cnt / f_total) ** 0.75 for w, cnt in f_word.items()} # print(f"prob_neg_samples = {prob_neg_samples}") # 转换成np.array的形式,其中放的是负采样概率 prob_neg_samples = np.array([prob_neg_samples[w] for w in vocab]) # print(f"prob_neg_samples = {prob_neg_samples}") # 归一化 prob_neg_samples /= prob_neg_samples.sum() # 超参数 # 窗口大小 window_size = 2 # 1个正例对4个反例 neg_count = 4 # 一批训练多少个样本 batch_size = 128 # word-embedding维度 embedding_size = 20 # 生成负采样训练样本 def generate_neg_sample(): skip_gram = [] # 存放((正/反例),flag) flag=True为正例 for idx in range(window_size, len(word_list) - window_size): centre = v_to_idx[word_list[idx]] # 中心词 # 上下文词 contexts = [v_to_idx[word_list[i]] for i in range(idx - window_size, idx + window_size + 1) if i != idx] for c in contexts: # 对中心词-上下文词,作为正样本加入 skip_gram.append((centre, c, True)) # 负样本对应词索引集合 negs = [] # 随机选择负样本 while len(negs) < neg_count: neg_idx = np.random.choice(len(vocab), p=prob_neg_samples) if neg_idx != c: negs.append(neg_idx) # 负样本加入 for ni in negs: skip_gram.append((centre, ni, False)) return skip_gram # 将负采样训练样本转换为torch.tensor data = generate_neg_sample() centres = torch.tensor([d[0] for d in data], dtype=torch.long).to(device) print(f"centres={centres}") contexts = torch.tensor([d[1] for d in data], dtype=torch.long).to(device) print(f"contexts ={contexts}") labels = torch.tensor([d[2] for d in data], dtype=torch.float32).to(device) print(f"labels={labels}") # 使用TensorDataset将三个张量合成一个数据集对象,每个样本是一个三元组(centres, contexts, labels) dataset = Data.TensorDataset(centres, contexts, labels) # 按批次加载对象 dataloader = Data.DataLoader(dataset, batch_size, shuffle=True) class NegSkipGram(nn.Module): def __init__(self, vocab_size, embedding_size): super().__init__() # 得到中心词词嵌入向量 self.centre_embedding = nn.Embedding(vocab_size, embedding_size) # 得到上下文词词嵌入向量 self.context_embedding = nn.Embedding(vocab_size, embedding_size) # 初始化两个embedding层中的权重矩阵,使用正态分布进行原地填充(直接修改原tensor的值) nn.init.normal_( self.centre_embedding.weight, # 输出中心词word embedding的权重矩阵 mean=0, # 均值为0 std=0.01 # 标准差 ) nn.init.normal_( self.context_embedding.weight, # 输出上下文词word embedding的权重矩阵 mean=0, std=0.01 ) def forward(self, centres, contexts): # 中心词词向量 vec_centre = self.centre_embedding(centres) # print(f"vec_centre={vec_centre}") # 上下文词词向量 vec_context = self.context_embedding(contexts) # print(f"vec_context={vec_context}") # 余弦相似度 return nn.CosineSimilarity(vec_centre, vec_context, dim=1) model = NegSkipGram(vocab_size, embedding_size).to(device) criterion = nn.BCEWithLogitsLoss() optimiser = torch.optim.Adam(model.parameters(), lr=0.005) epochs = 1000 for i in range(epochs): for batch_centre, batch_context, batch_label in dataloader: cos_score = model(batch_centre, batch_context) loss = criterion(cos_score, batch_label) optimiser.zero_grad() loss.backward() 为什么报错 D:\anaconda3\envs\pytorch\python.exe D:\PycharmProect\Word2Vec\skip-gram.py current device:cuda centres=tensor([143, 143, 143, ..., 122, 122, 122], device='cuda:0') contexts =tensor([ 76, 151, 9, ..., 170, 156, 90], device='cuda:0') labels=tensor([1., 0., 0., ..., 0., 0., 0.], device='cuda:0') Traceback (most recent call last): File "D:\PycharmProect\Word2Vec\skip-gram.py", line 189, in <module> cos_score = model(batch_centre, batch_context) File "D:\anaconda3\envs\pytorch\lib\site-packages\torch\nn\modules\module.py", line 1736, in _wrapped_call_impl return self._call_impl(*args, **kwargs) File "D:\anaconda3\envs\pytorch\lib\site-packages\torch\nn\modules\module.py", line 1747, in _call_impl return forward_call(*args, **kwargs) File "D:\PycharmProect\Word2Vec\skip-gram.py", line 178, in forward return nn.CosineSimilarity(vec_centre, vec_context, dim=1) TypeError: __init__() got multiple values for argument 'dim' 进程已结束,退出代码为 1
08-11
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值