学习记录1

本人自己的学习记录

import torch

import torch.nn as nn

import numpy as np

import random

import json

import matplotlib.pyplot as plt

class Mu(nn.Module):

    def __init__(self,dim,leng,vocab):

        super(Mu,self).__init__()

        self.embedding = nn.Embedding(len(vocab),dim)

        self.pool = nn.AvgPool1d(leng-1)

        self.classfy = nn.Linear(dim,1)

        self.activation = torch.sigmoid

        self.loss = nn.functional.mse_loss

    def forward(self,x,y = None):

        x = self.embedding(x)

        x = x.transpose(1, 2)

        x = self.pool(x)

        x = x.squeeze()

        x = self.classfy(x)

        y_pred = self.activation(x)

        if y is not None:

            return self.loss(y_pred, y)

        else:

            return y_pred

#定义了 26个英文字母的单词表

def build_vocab():

    chars = "abcdefghijklmnopqrstuvwxyz"

    vocab = {'pad':0}

    for i,j in enumerate(chars):

        vocab[j] = i+1

    vocab['unk'] = len(vocab)

    return vocab

def build_sample(vocab,leng):

    #随机从子表中选出leng个数字来组成

    s = []

    for i in range(leng-1):

        s.append(random.choice(list(vocab.keys())))

    if set('abc') & set(s):

        y =1

    else:

        y = 0

    x = [vocab.get(x,vocab['unk'] )for x in s]

    return x,y

def build_dataset(s_leng,vocab,leng):

    dataset_x = []

    dataset_y = []

    for i in range(s_leng):

        x,y = build_sample(vocab,leng)

        dataset_x.append(x)

        dataset_y.append([y])

    return torch.LongTensor(dataset_x),torch.FloatTensor(dataset_y)

def build_Module(vocab,char_dim,s_leng):

    model = Mu(char_dim,s_leng,vocab)

    return model

#用来测试每一轮的模型准度

def evaluate(model,vocab,leng):

    model.eval()

    x,y = build_dataset(200,vocab,leng)

    print("本次预测集中共有%d个正样本,%d个负样本" % (sum(y), 200 - sum(y)))

    correct, wrong = 0, 0

    with torch.no_grad():

        y_pred = model(x)

        for y_p,y_t in zip(y_pred,y):

            if float(y_p) <0.5 and int(y_t) ==0:

                correct +=1

            elif float(y_p) >0.5 and int(y_t) ==1:

                correct +=1

            else:

                wrong +=1

    print("正确预测个数:%d, 正确率:%f" % (correct, correct / (correct + wrong)))

    return correct/(correct + wrong)

def main():

    epoch_num =20

    batch_size =20

    tarin_sample = 500

    char_dim = 20

    s_leng = 5

    lr = 0.003

    vocab = build_vocab()

    model = build_Module(vocab,char_dim,s_leng)

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    log = []

    for i in range(epoch_num):

        #每一轮 epoch的循环

        model.train()

        watch_loss = []

        for batch in range(int(tarin_sample/batch_size)):

            x,y = build_dataset(batch_size,vocab,s_leng)

            optimizer.zero_grad()

            loss = model(x,y)

            loss.backward()

            optimizer.step()

            watch_loss.append(loss.item())

        print("==============\n第%d轮平均loss值为:%f"%(i+1,np.mean(watch_loss)))

        acc=evaluate(model,vocab,s_leng)

        log.append([acc,np.mean(watch_loss)])

    plt.plot(range(len(log)), [l[0] for l in log], label="acc") #画acc曲线

    plt.plot(range(len(log)), [l[1] for l in log], label="loss") #画loss曲线

    plt.legend()

    plt.show()

    torch.save(model.state_dict(), "model.pth")

    writer = open("vocab.json", "w", encoding="utf8")

    writer.write(json.dumps(vocab, ensure_ascii=False, indent=2))

    writer.close()

    return

def predict(mode_path,vocab_path,input_strings):

    charm_dim =5

    s_leng = 5

    vocab = json.load(open(vocab_path,"r",encoding="utf8"))

    model = build_Module(vocab,charm_dim,s_leng)

    model.load_state_dict(torch.load(mode_path))

    x = []

    for input_string in input_strings:

        x.append([vocab[i] for i in input_string])

    model.eval()

    with torch.no_grad():

        result = model.forward(torch.LongTensor(x))

    for i,j in enumerate(result):

        print("输入:%s, 预测类别:%d, 概率值:%f" % (input_string, round(float(result[i])), result[i])) # 打印结果

if __name__ == "__main__":

      main()

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值