今天,真是郁闷透顶!

 

明后天,又有台风要来

所以,今天

下雨了

而我的心

也在下雨

不过,她下的是血雨

 

今天早上

早上回到办公室

竟然整栋大厦停电了

好笑!

本来,大家坐在一起聊聊天

感觉挺好的

可是,头头竟然把我跟另外一个同事叫进去

原来,她是想套我们的话

想知道其他同事的想法是什么

当她问到其中一个同事的状况时

我还想着怎么也不“出卖”她的

没想到,她来了一句

“你不要说你不知道哦。我知道,你跟她是很好的。”

哈哈!

她竟然已经说了这样的话了

我能不说吗?

所以,还是透露了一点点

最记得她说了这样的话

“可能她(另一同事)是对我有意见,不过,

那是非常幼稚的。上司不可能对你怎样怎样的!”

出办公室了,我还真那么乖

找那同事聊了一下

现在想想,真是笨啊

后来又把我叫进去

说要我参与标书的制作

她叫我把我那两个组员的工作安排好

我已经交代他们了

结果

那个助理,竟然在指使我干活

这是我最气的

她还真把自己当第二个副总呢

 

下午上班了

还是没电

头头叫了另一个同事进去

他出来后

就叫我每个地区上交2所学校名单

我照做了

后来阿头问那些学校的情况

仅是问那同事

却不问我

我真的感觉到她

并不信任我

之前

为了达标问题

我们,都基本作假了

我想,她现在还在怀疑我那一组的

在作假吧???

 

哈哈哈

我真的郁闷、难过

我最讨厌被人不信任

因为,这已经不是第一次了

 

其实,我是组长吗????

不是!

待遇上,永远没有给我提升

有事情,却不断的安排给我

 

下雨了

我也哭了

为什么

我的心那么疼?

那么难受呢???

 

import torchimport torch.nn as nnimport torch.optim as optimfrom torch.utils.data import Dataset, DataLoaderfrom torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequenceimport numpy as npimport pandas as pdfrom sklearn.model_selection import train_test_splitfrom sklearn.metrics import accuracy_scoreimport jiebaimport osfrom tqdm import tqdmimport timeimport jsonimport matplotlib.pyplot as pltfrom matplotlib.ticker import MaxNLocator# 设置随机种子,保证结果可复现torch.manual_seed(42)np.random.seed(42)# 确保中文正常显示jieba.setLogLevel(jieba.logging.INFO)# 设置matplotlib支持中文plt.rcParams["font.family"] = ["SimHei", "WenQuanYi Micro Hei", "Heiti TC"]plt.rcParams['axes.unicode_minus'] = False # 解决负号显示问题# 1. 数据处理class WeiboDataset(Dataset): def __init__(self, texts, labels, vocab=None, max_vocab_size=50000, min_freq=1): self.labels = labels # 分词 self.tokenized_texts = [jieba.lcut(text) for text in texts] if vocab is None: # 构建词汇表 vocab = {'<pad>': 0, '<unk>': 1} word_freq = {} for tokens in self.tokenized_texts: for token in tokens: word_freq[token] = word_freq.get(token, 0) + 1 # 按词频排序 sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True) # 构建词汇表 idx = 2 for word, freq in sorted_words: if freq >= min_freq and idx < max_vocab_size: vocab[word] = idx idx += 1 self.vocab = vocab # 将文本转换为索引 self.texts_idx = [] for tokens in self.tokenized_texts: self.texts_idx.append([vocab.get(token, vocab['<unk>']) for token in tokens]) def __len__(self): return len(self.labels) def __getitem__(self, idx): return torch.tensor(self.texts_idx[idx]), torch.tensor(self.labels[idx]) def get_vocab(self): return self.vocab# 2. 注意力机制import mathclass Attention(nn.Module): def __init__(self, hidden_size): super(Attention, self).__init__() self.hidden_size = hidden_size self.attn = nn.Linear(self.hidden_size, self.hidden_size) self.v = nn.Parameter(torch.rand(hidden_size)) # 使用math.sqrt处理整数 stdv = 1. / math.sqrt(self.v.size(0)) self.v.data.uniform_(-stdv, stdv) def forward(self, hidden, encoder_outputs): timestep = encoder_outputs.size(0) hidden = hidden.repeat(timestep, 1, 1).transpose(0, 1) encoder_outputs = encoder_outputs.transpose(0, 1) # 计算注意力得分 attn_energies = self.score(hidden, encoder_outputs) # 返回注意力权重 return torch.softmax(attn_energies, dim=1).unsqueeze(1) def score(self, hidden, encoder_outputs): # 计算注意力得分 energy = torch.tanh(self.attn(hidden + encoder_outputs)) energy = energy.transpose(1, 2) v = self.v.repeat(encoder_outputs.size(0), 1).unsqueeze(1) energy = torch.bmm(v, energy) return energy.squeeze(1)# 3. 模型定义class SentimentClassifier(nn.Module): def __init__(self, vocab_size, embedding_dim, hidden_size, output_dim, n_layers, bidirectional, dropout, use_attention=True): super(SentimentClassifier, self).__init__() # 嵌入层 self.embedding = nn.Embedding(vocab_size, embedding_dim) # RNN层(单向LSTM) self.rnn = nn.LSTM(embedding_dim, hidden_size, num_layers=n_layers, bidirectional=bidirectional, dropout=dropout if n_layers > 1 else 0, batch_first=True) # 注意力层 self.use_attention = use_attention if use_attention: self.attention = Attention(hidden_size) # 单向RNN,隐藏维度为hidden_size # 全连接层(输入维度为hidden_size,单向RNN无需乘以2) self.fc = nn.Linear(hidden_size, output_dim) # Dropout层 self.dropout = nn.Dropout(dropout) self.hidden_size = hidden_size self.bidirectional = bidirectional self.n_layers = n_layers def forward(self, text, text_lengths): # 嵌入层 embedded = self.dropout(self.embedding(text)) # 打包序列以处理变长输入 packed_embedded = pack_padded_sequence(embedded, text_lengths, batch_first=True, enforce_sorted=False) # RNN层 packed_output, (hidden, cell) = self.rnn(packed_embedded) # 解包序列 output, output_lengths = pad_packed_sequence(packed_output, batch_first=True) # 单向RNN,直接取最后一层隐藏状态 hidden = self.dropout(hidden[-1, :, :]) # 应用注意力机制 if self.use_attention: attn_weights = self.attention(hidden.unsqueeze(0), output.transpose(0, 1)) context = attn_weights.bmm(output) context = context.squeeze(1) hidden = self.dropout(context) # 全连接层 output = self.fc(hidden) return output# 4. 数据加载和处理函数def load_data(file_path): df = pd.read_csv(file_path) texts = df['review'].tolist() labels = df['label'].tolist() return texts, labels# 定义collate函数(全局作用域)def collate_batch(batch): texts, labels = zip(*batch) text_lengths = torch.tensor([len(text) for text in texts]) # 填充序列 texts_padded = pad_sequence(texts, batch_first=True, padding_value=0) return texts_padded, text_lengths, torch.tensor(labels)def create_data_loaders(train_texts, train_labels, val_texts, val_labels, test_texts, test_labels, vocab=None, batch_size=64): # 创建数据集 train_dataset = WeiboDataset(train_texts, train_labels, vocab=vocab) val_dataset = WeiboDataset(val_texts, val_labels, vocab=train_dataset.get_vocab()) test_dataset = WeiboDataset(test_texts, test_labels, vocab=train_dataset.get_vocab()) vocab = train_dataset.get_vocab() # 创建数据加载器(CPU环境下优化:num_workers=0) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_batch, num_workers=0, pin_memory=False) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_batch, num_workers=0, pin_memory=False) test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_batch, num_workers=0, pin_memory=False) return train_loader, val_loader, test_loader, vocab# 5. 训练和评估函数def train(model, iterator, optimizer, criterion, device): model.train() epoch_loss = 0 all_preds = [] all_labels = [] for texts, text_lengths, labels in tqdm(iterator, desc="Training"): texts, text_lengths, labels = texts.to(device), text_lengths.to(device), labels.to(device) optimizer.zero_grad() predictions = model(texts, text_lengths).squeeze(1) loss = criterion(predictions, labels) loss.backward() optimizer.step() epoch_loss += loss.item() # 计算准确率 preds = torch.argmax(predictions, dim=1) all_preds.extend(preds.cpu().numpy()) all_labels.extend(labels.cpu().numpy()) accuracy = accuracy_score(all_labels, all_preds) return epoch_loss / len(iterator), accuracydef evaluate(model, iterator, criterion, device): model.eval() epoch_loss = 0 all_preds = [] all_labels = [] with torch.no_grad(): for texts, text_lengths, labels in tqdm(iterator, desc="Evaluating"): texts, text_lengths, labels = texts.to(device), text_lengths.to(device), labels.to(device) predictions = model(texts, text_lengths).squeeze(1) loss = criterion(predictions, labels) epoch_loss += loss.item() # 计算准确率 preds = torch.argmax(predictions, dim=1) all_preds.extend(preds.cpu().numpy()) all_labels.extend(labels.cpu().numpy()) accuracy = accuracy_score(all_labels, all_preds) return epoch_loss / len(iterator), accuracydef predict_sentiment(model, sentence, vocab, device): model.eval() # 分词 tokens = jieba.lcut(sentence) # 转换为索引 indices = [vocab.get(token, vocab['<unk>']) for token in tokens] # 转换为张量 tensor = torch.tensor(indices).unsqueeze(0).to(device) # 序列长度 length = torch.tensor([len(indices)]).to(device) # 预测 prediction = model(tensor, length) # 获取预测类别 pred_class = torch.argmax(prediction, dim=1) return pred_class.item()# 新增:绘制训练历史曲线图def plot_training_history(history, save_path='training_history.png'): epochs = range(1, len(history) + 1) # 创建两个图像 plt.figure(figsize=(12, 5)) # 绘制损失曲线 plt.subplot(1, 2, 1) plt.plot(epochs, [h['train_loss'] for h in history], 'b-', label='训练损失') plt.plot(epochs, [h['val_loss'] for h in history], 'r-', label='验证损失') plt.title('训练和验证损失') plt.xlabel('轮次') plt.ylabel('损失') plt.legend() plt.grid(True) plt.gca().xaxis.set_major_locator(MaxNLocator(integer=True)) # 绘制准确率曲线 plt.subplot(1, 2, 2) plt.plot(epochs, [h['train_acc'] for h in history], 'b-', label='训练准确率') plt.plot(epochs, [h['val_acc'] for h in history], 'r-', label='验证准确率') plt.title('训练和验证准确率') plt.xlabel('轮次') plt.ylabel('准确率') plt.legend() plt.grid(True) plt.gca().xaxis.set_major_locator(MaxNLocator(integer=True)) plt.tight_layout() plt.savefig(save_path) print(f"训练历史曲线图已保存至 {save_path}") plt.close()# 6. 主函数def main(): # 设置设备(优先使用GPU) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print(f'Using device: {device}') if device.type == 'cuda': print(f'GPU name: {torch.cuda.get_device_name(0)}') else: print(f'CPU cores: {os.cpu_count()}') # 加载数据 print("Loading data...") file_path = 'weibo_senti_100k.csv' texts, labels = load_data(file_path) # 划分数据集 print("Splitting data...") train_texts, test_texts, train_labels, test_labels = train_test_split(texts, labels, test_size=0.1, random_state=42) train_texts, val_texts, train_labels, val_labels = train_test_split(train_texts, train_labels, test_size=0.1, random_state=42) # 超参数设置(单向RNN、1层、hidden_size=128) hyperparameters = { 'batch_size': 128, # CPU环境下减小批次大小以降低内存压力 'embedding_dim': 100, 'hidden_size': 128, # 隐藏层维度 'output_dim': 2, # 二分类 'n_layers': 2, # RNN层数 'bidirectional': False, # 单向RNN 'dropout': 0.5, 'learning_rate': 0.001, 'n_epochs': 10, 'use_attention': True } # 创建数据加载器 print("Creating data loaders...") train_loader, val_loader, test_loader, vocab = create_data_loaders( train_texts, train_labels, val_texts, val_labels, test_texts, test_labels, batch_size=hyperparameters['batch_size'] ) print(f"Vocabulary size: {len(vocab)}") # 初始化模型 print("Initializing model...") model = SentimentClassifier( vocab_size=len(vocab), embedding_dim=hyperparameters['embedding_dim'], hidden_size=hyperparameters['hidden_size'], output_dim=hyperparameters['output_dim'], n_layers=hyperparameters['n_layers'], bidirectional=hyperparameters['bidirectional'], dropout=hyperparameters['dropout'], use_attention=hyperparameters['use_attention'] ).to(device) # 打印模型架构 print("Model architecture:") print(model) # 定义优化器和损失函数 optimizer = optim.Adam(model.parameters(), lr=hyperparameters['learning_rate']) criterion = nn.CrossEntropyLoss().to(device) # 训练模型 print("Training model...") best_val_loss = float('inf') training_history = [] for epoch in range(hyperparameters['n_epochs']): start_time = time.time() train_loss, train_acc = train(model, train_loader, optimizer, criterion, device) val_loss, val_acc = evaluate(model, val_loader, criterion, device) end_time = time.time() epoch_time = end_time - start_time if val_loss < best_val_loss: best_val_loss = val_loss torch.save(model.state_dict(), 'best_model.pt') print(f'保存最佳模型 (验证损失: {val_loss:.3f})') print(f'Epoch: {epoch + 1:02} | Epoch Time: {epoch_time:.2f}s') print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc * 100:.2f}%') print(f'\t Val. Loss: {val_loss:.3f} | Val. Acc: {val_acc * 100:.2f}%') training_history.append({ 'epoch': epoch + 1, 'train_loss': train_loss, 'train_acc': train_acc, 'val_loss': val_loss, 'val_acc': val_acc, 'time': epoch_time }) # 保存训练历史 with open('training_history.json', 'w') as f: json.dump(training_history, f, indent=4) # 绘制训练历史曲线图 plot_training_history(training_history) # 加载最佳模型 model.load_state_dict(torch.load('best_model.pt')) # 在测试集上评估 test_loss, test_acc = evaluate(model, test_loader, criterion, device) print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc * 100:.2f}%') # 保存超参数和结果 results = { 'hyperparameters': hyperparameters, 'test_loss': test_loss, 'test_acc': test_acc, 'training_history': training_history } with open('results.json', 'w') as f: json.dump(results, f, indent=4) # 示例预测 examples = [ "这个产品真的太棒了,我非常喜欢!", "这个服务太差劲了,简直是浪费时间!", "今天天气真好,心情也跟着愉快起来。", "这个电影真是无聊透顶,后悔来看了。" ] print("\n示例预测:") for example in examples: sentiment = predict_sentiment(model, example, vocab, device) print(f"文本: {example}") print(f"情感: {'积极' if sentiment == 1 else '消极'}") print()if __name__ == '__main__': main()这个代码运行出来的结果是什么
最新发布
06-19
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值