利用sys.all_parameters获得储过程参数

本文介绍如何使用T-SQL查询SQL Server系统目录视图sys.all_parameters,获取存储过程的参数信息。通过示例代码展示如何指定存储过程名称并检索其所有参数详情。
import matplotlib matplotlib.use('Agg') import argparse, time, logging import os import numpy as np import mxnet as mx from mxnet import gluon, nd from mxnet import autograd as ag from mxnet.gluon.data.vision import transforms import sys sys.path.append('/home/ubuntu/PycharmProjects/pythonProject/fscil-master/') import model from model.cifar_quick import quick_cnn from model.cifar_resnet_v1 import cifar_resnet20_v1 from gluoncv.utils import makedirs from gluoncv.data import transforms as gcv_transforms from dataloader.dataloader import NC_CIFAR100, merge_datasets from tools.utils import LinearWarmUp from tools.utils import DataLoader from tools.utils import parse_args from tools.plot import plot_pr, plot_all_sess from tools.loss import DistillationSoftmaxCrossEntropyLoss,NG_Min_Loss,NG_Max_Loss from tools.ng_anchor import prepare_anchor import json from tools.utils import select_best, select_best2, select_best3 opt = parse_args() batch_size = opt.batch_size num_gpus = len(opt.gpus.split(',')) batch_size *= max(1, num_gpus) context = [mx.gpu(int(i)) for i in opt.gpus.split(',')] num_workers = opt.num_workers model_name = opt.model # ========================================================================== if model_name=='quick_cnn': classes = 60 if opt.fix_conv: fix_layers = 3 fix_fc =False else: fix_layers = 0 fix_fc = False net = quick_cnn(classes, fix_layers, fix_fc=fix_fc, fw=opt.fw) feature_size = 64 elif model_name=='resnet18': classes = 60 feature_size = 64 net = cifar_resnet20_v1(classes=classes, wo_bn=opt.wo_bn, fw=opt.fw) else: raise KeyError('network key error') if opt.resume_from: net.load_parameters(opt.resume_from, ctx = context) DATASET = eval(opt.dataset) # ========================================================================== optimizer = 'nag' save_period = opt.save_period plot_path = opt.save_plot_dir save_dir = time.strftime('./experimental_result/{}/{}/%Y-%m-%d-%H-%M-%S'.format(opt.dataset, model_name), time.localtime()) save_dir = save_dir + opt.save_name makedirs(save_dir) logger = logging.getLogger() logger.setLevel(logging.INFO) log_save_dir = os.path.join(save_dir, 'log.txt') fh = logging.FileHandler(log_save_dir) fh.setLevel(logging.INFO) logger.addHandler(fh) logger.info(opt) def test(ctx, val_data, net, sess): metric = mx.metric.Accuracy() for i, batch in enumerate(val_data): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0) outputs = [net(X, sess)[1] for X in data] metric.update(label, outputs) return metric.get() def train(net, ctx): if isinstance(ctx, mx.Context): ctx = [ctx] if not opt.resume_from: net.initialize(mx.init.Xavier(), ctx=ctx) if opt.dataset == 'NC_CIFAR100': n = mx.nd.zeros(shape=(1,3,32,32),ctx=ctx[0]) #####init CNN else: raise KeyError('dataset keyerror') for m in range(9): net(n,m) def makeSchedule(start_lr,base_lr,length,step,factor): schedule = mx.lr_scheduler.MultiFactorScheduler(step=step, factor=factor) schedule.base_lr = base_lr schedule = LinearWarmUp(schedule, start_lr=start_lr, length=length) return schedule # ========================================================================== sesses = list(np.arange(opt.sess_num)) epochs = [opt.epoch]*opt.sess_num lrs = [opt.base_lrs]+[opt.lrs]*(opt.sess_num-1) lr_decay = opt.lr_decay base_decay_epoch = [int(i) for i in opt.base_decay_epoch.split(',')] + [np.inf] lr_decay_epoch = [base_decay_epoch]+[[opt.inc_decay_epoch, np.inf]]*(opt.sess_num-1) AL_weight = opt.AL_weight min_weight = opt.min_weight oce_weight = opt.oce_weight pdl_weight = opt.pdl_weight max_weight = opt.max_weight temperature = opt.temperature use_AL = opt.use_AL # anchor loss use_ng_min = opt.use_ng_min # Neural Gas min loss use_ng_max = opt.use_ng_max # Neural Gas min loss ng_update = opt.ng_update # Neural Gas update node use_oce = opt.use_oce # old samples cross entropy loss use_pdl = opt.use_pdl # probability distillation loss use_nme = opt.use_nme # Similarity loss use_warmUp = opt.use_warmUp use_ng = opt.use_ng # Neural Gas fix_conv = opt.fix_conv # fix cnn to train novel classes fix_epoch = opt.fix_epoch c_way = opt.c_way k_shot = opt.k_shot base_acc = opt.base_acc # base model acc select_best_method = opt.select_best # select from _best, _best2, _best3 init_class = 60 anchor_num = 400 # ========================================================================== acc_dict = {} all_best_e = [] if model_name[-7:] != 'maxhead': net.fc3.initialize(mx.init.Normal(sigma=0.001), ctx=ctx, force_reinit=True) net.fc4.initialize(mx.init.Normal(sigma=0.001), ctx=ctx, force_reinit=True) net.fc5.initialize(mx.init.Normal(sigma=0.001), ctx=ctx, force_reinit=True) net.fc6.initialize(mx.init.Normal(sigma=0.001), ctx=ctx, force_reinit=True) net.fc7.initialize(mx.init.Normal(sigma=0.001), ctx=ctx, force_reinit=True) net.fc8.initialize(mx.init.Normal(sigma=0.001), ctx=ctx, force_reinit=True) net.fc9.initialize(mx.init.Normal(sigma=0.001), ctx=ctx, force_reinit=True) net.fc10.initialize(mx.init.Normal(sigma=0.001), ctx=ctx, force_reinit=True) for sess in sesses: logger.info('session : %d'%sess) schedule = makeSchedule(start_lr=0, base_lr=lrs[sess], length=5, step=lr_decay_epoch[sess], factor=lr_decay) # prepare the first anchor batch if sess==0 and opt.resume_from: acc_dict[str(sess)] = list() acc_dict[str(sess)].append([base_acc,0]) all_best_e.append(0) continue # quick cnn totally unfix, not use data augmentation if sess == 1 and model_name == 'quick_cnn'and use_AL: transform_train = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) anchor_trans = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) else: transform_train = transforms.Compose([ gcv_transforms.RandomCrop(32, pad=4), transforms.RandomFlipLeftRight(), transforms.ToTensor(), transforms.Normalize([0.5071, 0.4866, 0.4409], [0.2009, 0.1984, 0.2023]) ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.5071, 0.4866, 0.4409], [0.2009, 0.1984, 0.2023]) ]) anchor_trans = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.5071, 0.4866, 0.4409], [0.2009, 0.1984, 0.2023]) ]) # ng_init and ng_update if use_AL or use_nme or use_pdl or use_oce: if sess != 0: if ng_update == True: if sess==1: update_anchor1, bmu, variances= \ prepare_anchor(DATASET,logger,anchor_trans,num_workers,feature_size,net,ctx,use_ng,init_class) update_anchor_data = DataLoader(update_anchor1, anchor_trans, update_anchor1.__len__(), num_workers, shuffle=False) if opt.ng_var: idx_1 = np.where(variances.asnumpy() > 0.5) idx_2 = np.where(variances.asnumpy() < 0.5) variances[idx_1] = 0.9 variances[idx_2] = 1 else: base_class = init_class + (sess - 1) * 5 new_class = list(init_class + (sess - 1) * 5 + (np.arange(5))) new_set = DATASET(train=True, fine_label=True, fix_class=new_class, base_class=base_class, logger=logger) update_anchor2 = merge_datasets(update_anchor1, new_set) update_anchor_data = DataLoader(update_anchor2, anchor_trans, update_anchor2.__len__(), num_workers, shuffle=False) elif(sess==1): update_anchor, bmu, variances = \ prepare_anchor(DATASET,logger,anchor_trans,num_workers,feature_size,net,ctx,use_ng,init_class) update_anchor_data = DataLoader(update_anchor, anchor_trans, update_anchor.__len__(), num_workers, shuffle=False) if opt.ng_var: idx_1 = np.where(variances.asnumpy() > 0.5) idx_2 = np.where(variances.asnumpy() < 0.5) variances[idx_1] = 0.9 variances[idx_2] = 1 for batch in update_anchor_data: anc_data = gluon.utils.split_and_load(batch[0], ctx_list=[ctx[0]], batch_axis=0) anc_label = gluon.utils.split_and_load(batch[1], ctx_list=[ctx[0]], batch_axis=0) with ag.pause(): anchor_feat, anchor_logit = net(anc_data[0], sess-1) anchor_feat = [anchor_feat] anchor_logit = [anchor_logit] trainer = gluon.Trainer(net.collect_params(), optimizer, {'learning_rate': lrs[sess], 'wd': opt.wd, 'momentum': opt.momentum}) metric = mx.metric.Accuracy() train_metric = mx.metric.Accuracy() loss_fn = gluon.loss.SoftmaxCrossEntropyLoss() # ========================================================================== # all loss init if use_nme: def loss_fn_disG(f1, f2, weight): f1 = f1.reshape(anchor_num,-1) f2 = f2.reshape(anchor_num,-1) similar = mx.nd.sum(f1*f2, 1) return (1-similar)*weight digG_weight = opt.nme_weight if use_AL: if model_name == 'quick_cnn': AL_w = [120, 75, 120, 100, 50, 60, 90, 90] AL_weight = AL_w[sess-1] else: AL_weight=opt.AL_weight if opt.ng_var: def l2lossVar(feat, anc, weight, var): dim = feat.shape[1] feat = feat.reshape(-1, dim) anc = anc.reshape(-1, dim) loss = mx.nd.square(feat - anc) loss = loss * weight * var return mx.nd.mean(loss, axis=0, exclude=True) loss_fn_AL = l2lossVar else: loss_fn_AL = gluon.loss.L2Loss(weight=AL_weight) if use_pdl: loss_fn_pdl = DistillationSoftmaxCrossEntropyLoss(temperature=temperature, hard_weight=0, weight=pdl_weight) if use_oce: loss_fn_oce = gluon.loss.SoftmaxCrossEntropyLoss(weight=oce_weight) if use_ng_min: loss_fn_max = NG_Max_Loss(lmbd=max_weight, margin=0.5) if use_ng_min: min_loss = NG_Min_Loss(num_classes=opt.c_way, feature_size=feature_size, lmbd=min_weight, # center weight = 0.01 in the paper ctx=ctx[0]) min_loss.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx, force_reinit=True) # init matrix. center_trainer = gluon.Trainer(min_loss.collect_params(), optimizer="sgd", optimizer_params={"learning_rate": opt.ng_min_lr}) # alpha=0.1 in the paper. # ========================================================================== lr_decay_count = 0 # dataloader if opt.cum and sess==1 : base_class = list(np.arange(init_class)) joint_data = DATASET(train=True, fine_label=True, c_way=init_class, k_shot=500, fix_class=base_class, logger=logger) if sess == 0 : base_class = list(np.arange(init_class)) new_class = list(init_class + (np.arange(5))) base_data = DATASET(train=True, fine_label=True, c_way=init_class, k_shot=500, fix_class=base_class, logger=logger) bc_val_data = DataLoader(DATASET(train=False, fine_label=True, fix_class=base_class, logger=logger) , transform_test, 100, num_workers, shuffle=False) nc_val_data = DataLoader( DATASET(train=False, fine_label=True, fix_class=new_class, base_class=len(base_class), logger=logger) , transform_test, 100, num_workers, shuffle=False) else: base_class = list(np.arange(init_class + (sess-1)*5)) new_class = list(init_class + (sess-1)*5 + (np.arange(5))) train_data_nc = DATASET(train=True, fine_label=True, c_way=c_way, k_shot=k_shot, fix_class=new_class, base_class=len(base_class), logger=logger) bc_val_data = DataLoader(DATASET(train=False, fine_label=True, fix_class=base_class, logger=logger) , transform_test, 100, num_workers, shuffle=False) nc_val_data = DataLoader( DATASET(train=False, fine_label=True, fix_class=new_class, base_class=len(base_class), logger=logger) , transform_test, 100, num_workers, shuffle=False) if sess == 0: train_data = DataLoader(base_data, transform_train, min(batch_size, base_data.__len__()), num_workers, shuffle=True) else: if opt.cum: # cumulative : merge base and novel dataset. joint_data = merge_datasets(joint_data, train_data_nc) train_data = DataLoader(joint_data, transform_train, min(batch_size, joint_data.__len__()), num_workers, shuffle=True) elif opt.use_all_novel: # use all novel data if sess==1: novel_data = train_data_nc else: novel_data = merge_datasets(novel_data, train_data_nc) train_data = DataLoader(novel_data, transform_train, min(batch_size, novel_data.__len__()), num_workers, shuffle=True) else: # basic method train_data = DataLoader(train_data_nc, transform_train, min(batch_size, train_data_nc.__len__()), num_workers, shuffle=True) for epoch in range(epochs[sess]): tic = time.time() train_metric.reset() metric.reset() train_loss, train_anchor_loss, train_oce_loss = 0, 0, 0 train_disg_loss, train_pdl_loss, train_min_loss= 0, 0, 0 train_max_loss=0 num_batch = len(train_data) if use_warmUp: lr = schedule(epoch) trainer.set_learning_rate(lr) else: lr = trainer.learning_rate if epoch == lr_decay_epoch[sess][lr_decay_count]: trainer.set_learning_rate(trainer.learning_rate*lr_decay) lr_decay_count += 1 if sess!=0 and epoch<fix_epoch: fix_cnn = fix_conv else: fix_cnn = False for i, batch in enumerate(train_data): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0) all_loss = list() with ag.record(): output_feat, output = net(data[0],sess,fix_cnn) output_feat = [output_feat] output = [output] loss = [loss_fn(yhat, y) for yhat, y in zip(output, label)] all_loss.extend(loss) if use_nme: anchor_h = [net(X, sess, fix_cnn)[0] for X in anc_data] disg_loss = [loss_fn_disG(a_h, a, weight=digG_weight) for a_h, a in zip(anchor_h, anchor_feat)] all_loss.extend(disg_loss) if sess > 0 and use_ng_max: max_loss = [loss_fn_max(feat, label, feature_size, epoch, sess,init_class) for feat, label in zip(output_feat, label)] all_loss.extend(max_loss[0]) if sess > 0 and use_AL: # For anchor loss anchor_h = [net(X, sess, fix_cnn)[0] for X in anc_data] if opt.ng_var: anchor_loss = [loss_fn_AL(anchor_h[0], anchor_feat[0], AL_weight, variances)] all_loss.extend(anchor_loss) else: anchor_loss = [loss_fn_AL(a_h, a) for a_h, a in zip(anchor_h, anchor_feat)] all_loss.extend(anchor_loss) if sess > 0 and use_ng_min: loss_min = min_loss(output_feat[0], label[0]) all_loss.extend(loss_min) if sess > 0 and use_pdl: anchor_l = [net(X, sess, fix_cnn)[1] for X in anc_data] anchor_l = [anchor_l[0][:,:60+(sess-1)*5]] soft_label = [mx.nd.softmax(anchor_logit[0][:,:60+(sess-1)*5] / temperature)] pdl_loss = [loss_fn_pdl(a_h, a, soft_a) for a_h, a, soft_a in zip(anchor_l, anc_label, soft_label)] all_loss.extend(pdl_loss) if sess > 0 and use_oce: anchorp = [net(X, sess, fix_cnn)[1] for X in anc_data] oce_Loss = [loss_fn_oce(ap, a) for ap, a in zip(anchorp, anc_label)] all_loss.extend(oce_Loss) all_loss = [nd.mean(l) for l in all_loss] ag.backward(all_loss) trainer.step(1,ignore_stale_grad=True) if use_ng_min: center_trainer.step(opt.c_way*opt.k_shot) train_loss += sum([l.sum().asscalar() for l in loss]) if sess > 0 and use_AL: train_anchor_loss += sum([al.mean().asscalar() for al in anchor_loss]) if sess > 0 and use_oce: train_oce_loss += sum([al.mean().asscalar() for al in oce_Loss]) if sess > 0 and use_nme: train_disg_loss += sum([al.mean().asscalar() for al in disg_loss]) if sess > 0 and use_pdl: train_pdl_loss += sum([al.mean().asscalar() for al in pdl_loss]) if sess > 0 and use_ng_min: train_min_loss += sum([al.mean().asscalar() for al in loss_min]) if sess > 0 and use_ng_max: train_max_loss += sum([al.mean().asscalar() for al in max_loss[0]]) train_metric.update(label, output) train_loss /= batch_size * num_batch name, acc = train_metric.get() name, bc_val_acc = test(ctx, bc_val_data, net, sess) name, nc_val_acc = test(ctx, nc_val_data, net, sess) if epoch==0: acc_dict[str(sess)]=list() acc_dict[str(sess)].append([bc_val_acc,nc_val_acc]) if sess == 0: overall = bc_val_acc else: overall = (bc_val_acc*(init_class+(sess-1)*5)+nc_val_acc*5)/(init_class+sess*5) logger.info('[Epoch %d] lr=%.4f train=%.4f | val(base)=%.4f val(novel)=%.4f overall=%.4f | loss=%.8f anc loss=%.8f ' 'pdl loss:%.8f oce loss: %.8f time: %.8f' % (epoch, lr, acc, bc_val_acc, nc_val_acc, overall, train_loss, train_anchor_loss/AL_weight, train_pdl_loss/pdl_weight, train_oce_loss/oce_weight,time.time()-tic)) if use_nme: logger.info('digG loss:%.8f'%(train_disg_loss/digG_weight)) if use_ng_min: logger.info('min_loss:%.8f'%(train_min_loss/min_weight)) if use_ng_max: logger.info('max_loss:%.8f'% (train_max_loss /max_weight)) if save_period and save_dir and (epoch + 1) % save_period == 0: net.save_parameters('%s/sess-%s-%d.params'%(save_dir, model_name, epoch)) select = eval(select_best_method) best_e = select(acc_dict, sess) logger.info('best select : base: %f novel: %f '%(acc_dict[str(sess)][best_e][0],acc_dict[str(sess)][best_e][1])) if use_AL and model_name =='quick_cnn': reload_path = '%s/sess-%s-%d.params' % (save_dir, model_name, best_e) net.load_parameters(reload_path, ctx=context) all_best_e.append(best_e) reload_path = '%s/sess-%s-%d.params'%(save_dir, model_name, best_e) net.load_parameters(reload_path, ctx=context) with open('%s/acc_dict.json'%save_dir, 'w') as json_file: json.dump(acc_dict, json_file) plot_pr(acc_dict,sess,save_dir) plot_all_sess(acc_dict,save_dir,all_best_e) def main(): if opt.mode == 'hybrid': net.hybridize() train(net, context) if __name__ == '__main__': main() 解读上述代码,并标注每行代码都是要干什么的,本代码整体要干什么,原理是什么
07-04
""" EEG Conformer Convolutional Transformer for EEG decoding Couple CNN and Transformer in a concise manner with amazing results """ # remember to change paths import argparse import os gpus = [0] os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(map(str, gpus)) import numpy as np import math import glob import random import itertools import datetime import time import datetime import sys import scipy.io import torchvision.transforms as transforms from torchvision.utils import save_image, make_grid from torch.utils.data import DataLoader from torch.autograd import Variable from torchsummary import summary import torch.autograd as autograd from torchvision.models import vgg19 import torch.nn as nn import torch.nn.functional as F import torch import torch.nn.init as init from torch.utils.data import Dataset from PIL import Image import torchvision.transforms as transforms from sklearn.decomposition import PCA import torch import torch.nn.functional as F import matplotlib.pyplot as plt from torch import nn from torch import Tensor from PIL import Image from torchvision.transforms import Compose, Resize, ToTensor from einops import rearrange, reduce, repeat from einops.layers.torch import Rearrange, Reduce # from common_spatial_pattern import csp import matplotlib.pyplot as plt # from torch.utils.tensorboard import SummaryWriter from torch.backends import cudnn cudnn.benchmark = False cudnn.deterministic = True # writer = SummaryWriter('./TensorBoardX/') # Convolution module # use conv to capture local features, instead of postion embedding. class PatchEmbedding(nn.Module): def __init__(self, emb_size=40): # self.patch_size = patch_size super().__init__() self.shallownet = nn.Sequential( nn.Conv2d(1, 40, (1, 25), (1, 1)), nn.Conv2d(40, 40, (22, 1), (1, 1)), nn.BatchNorm2d(40), nn.ELU(), nn.AvgPool2d((1, 75), (1, 15)), # pooling acts as slicing to obtain 'patch' along the time dimension as in ViT nn.Dropout(0.5), ) self.projection = nn.Sequential( nn.Conv2d(40, emb_size, (1, 1), stride=(1, 1)), # transpose, conv could enhance fiting ability slightly Rearrange('b e (h) (w) -> b (h w) e'), ) def forward(self, x: Tensor) -> Tensor: b, _, _, _ = x.shape x = self.shallownet(x) x = self.projection(x) return x class MultiHeadAttention(nn.Module): def __init__(self, emb_size, num_heads, dropout): super().__init__() self.emb_size = emb_size self.num_heads = num_heads self.keys = nn.Linear(emb_size, emb_size) self.queries = nn.Linear(emb_size, emb_size) self.values = nn.Linear(emb_size, emb_size) self.att_drop = nn.Dropout(dropout) self.projection = nn.Linear(emb_size, emb_size) def forward(self, x: Tensor, mask: Tensor = None) -> Tensor: queries = rearrange(self.queries(x), "b n (h d) -> b h n d", h=self.num_heads) keys = rearrange(self.keys(x), "b n (h d) -> b h n d", h=self.num_heads) values = rearrange(self.values(x), "b n (h d) -> b h n d", h=self.num_heads) energy = torch.einsum('bhqd, bhkd -> bhqk', queries, keys) if mask is not None: fill_value = torch.finfo(torch.float32).min energy.mask_fill(~mask, fill_value) scaling = self.emb_size ** (1 / 2) att = F.softmax(energy / scaling, dim=-1) att = self.att_drop(att) out = torch.einsum('bhal, bhlv -> bhav ', att, values) out = rearrange(out, "b h n d -> b n (h d)") out = self.projection(out) return out class ResidualAdd(nn.Module): def __init__(self, fn): super().__init__() self.fn = fn def forward(self, x, **kwargs): res = x x = self.fn(x, **kwargs) x += res return x class FeedForwardBlock(nn.Sequential): def __init__(self, emb_size, expansion, drop_p): super().__init__( nn.Linear(emb_size, expansion * emb_size), nn.GELU(), nn.Dropout(drop_p), nn.Linear(expansion * emb_size, emb_size), ) class GELU(nn.Module): def forward(self, input: Tensor) -> Tensor: return input*0.5*(1.0+torch.erf(input/math.sqrt(2.0))) class TransformerEncoderBlock(nn.Sequential): def __init__(self, emb_size, num_heads=10, drop_p=0.5, forward_expansion=4, forward_drop_p=0.5): super().__init__( ResidualAdd(nn.Sequential( nn.LayerNorm(emb_size), MultiHeadAttention(emb_size, num_heads, drop_p), nn.Dropout(drop_p) )), ResidualAdd(nn.Sequential( nn.LayerNorm(emb_size), FeedForwardBlock( emb_size, expansion=forward_expansion, drop_p=forward_drop_p), nn.Dropout(drop_p) ) )) class TransformerEncoder(nn.Sequential): def __init__(self, depth, emb_size): super().__init__(*[TransformerEncoderBlock(emb_size) for _ in range(depth)]) class ClassificationHead(nn.Sequential): def __init__(self, emb_size, n_classes): super().__init__() # global average pooling self.clshead = nn.Sequential( Reduce('b n e -> b e', reduction='mean'), nn.LayerNorm(emb_size), nn.Linear(emb_size, n_classes) ) self.fc = nn.Sequential( nn.Linear(2440, 256), nn.ELU(), nn.Dropout(0.5), nn.Linear(256, 32), nn.ELU(), nn.Dropout(0.3), nn.Linear(32, 4) ) def forward(self, x): x = x.contiguous().view(x.size(0), -1) out = self.fc(x) return x, out class Conformer(nn.Sequential): def __init__(self, emb_size=40, depth=6, n_classes=4, **kwargs): super().__init__( PatchEmbedding(emb_size), TransformerEncoder(depth, emb_size), ClassificationHead(emb_size, n_classes) ) class ExP(): def __init__(self, nsub): super(ExP, self).__init__() self.batch_size = 72 self.n_epochs = 2000 self.c_dim = 4 self.lr = 0.0002 self.b1 = 0.5 self.b2 = 0.999 self.dimension = (190, 50) self.nSub = nsub self.start_epoch = 0 self.root = '/Data/strict_TE/' self.log_write = open("./results/log_subject%d.txt" % self.nSub, "w") self.Tensor = torch.cuda.FloatTensor self.LongTensor = torch.cuda.LongTensor self.criterion_l1 = torch.nn.L1Loss().cuda() self.criterion_l2 = torch.nn.MSELoss().cuda() self.criterion_cls = torch.nn.CrossEntropyLoss().cuda() self.model = Conformer().cuda() self.model = nn.DataParallel(self.model, device_ids=[i for i in range(len(gpus))]) self.model = self.model.cuda() # summary(self.model, (1, 22, 1000)) # Segmentation and Reconstruction (S&R) data augmentation def interaug(self, timg, label): aug_data = [] aug_label = [] for cls4aug in range(4): cls_idx = np.where(label == cls4aug + 1) tmp_data = timg[cls_idx] tmp_label = label[cls_idx] tmp_aug_data = np.zeros((int(self.batch_size / 4), 1, 22, 1000)) for ri in range(int(self.batch_size / 4)): for rj in range(8): rand_idx = np.random.randint(0, tmp_data.shape[0], 8) tmp_aug_data[ri, :, :, rj * 125:(rj + 1) * 125] = tmp_data[rand_idx[rj], :, :, rj * 125:(rj + 1) * 125] aug_data.append(tmp_aug_data) aug_label.append(tmp_label[:int(self.batch_size / 4)]) aug_data = np.concatenate(aug_data) aug_label = np.concatenate(aug_label) aug_shuffle = np.random.permutation(len(aug_data)) aug_data = aug_data[aug_shuffle, :, :] aug_label = aug_label[aug_shuffle] aug_data = torch.from_numpy(aug_data).cuda() aug_data = aug_data.float() aug_label = torch.from_numpy(aug_label-1).cuda() aug_label = aug_label.long() return aug_data, aug_label def get_source_data(self): # ! please please recheck if you need validation set # ! and the data segement compared methods used # train data self.total_data = scipy.io.loadmat(self.root + 'A0%dT.mat' % self.nSub) self.train_data = self.total_data['data'] self.train_label = self.total_data['label'] self.train_data = np.transpose(self.train_data, (2, 1, 0)) self.train_data = np.expand_dims(self.train_data, axis=1) self.train_label = np.transpose(self.train_label) self.allData = self.train_data self.allLabel = self.train_label[0] shuffle_num = np.random.permutation(len(self.allData)) self.allData = self.allData[shuffle_num, :, :, :] self.allLabel = self.allLabel[shuffle_num] # test data self.test_tmp = scipy.io.loadmat(self.root + 'A0%dE.mat' % self.nSub) self.test_data = self.test_tmp['data'] self.test_label = self.test_tmp['label'] self.test_data = np.transpose(self.test_data, (2, 1, 0)) self.test_data = np.expand_dims(self.test_data, axis=1) self.test_label = np.transpose(self.test_label) self.testData = self.test_data self.testLabel = self.test_label[0] # standardize target_mean = np.mean(self.allData) target_std = np.std(self.allData) self.allData = (self.allData - target_mean) / target_std self.testData = (self.testData - target_mean) / target_std # data shape: (trial, conv channel, electrode channel, time samples) return self.allData, self.allLabel, self.testData, self.testLabel def train(self): img, label, test_data, test_label = self.get_source_data() img = torch.from_numpy(img) label = torch.from_numpy(label - 1) dataset = torch.utils.data.TensorDataset(img, label) self.dataloader = torch.utils.data.DataLoader(dataset=dataset, batch_size=self.batch_size, shuffle=True) test_data = torch.from_numpy(test_data) test_label = torch.from_numpy(test_label - 1) test_dataset = torch.utils.data.TensorDataset(test_data, test_label) self.test_dataloader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=self.batch_size, shuffle=True) # Optimizers self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr, betas=(self.b1, self.b2)) test_data = Variable(test_data.type(self.Tensor)) test_label = Variable(test_label.type(self.LongTensor)) bestAcc = 0 averAcc = 0 num = 0 Y_true = 0 Y_pred = 0 # Train the cnn model total_step = len(self.dataloader) curr_lr = self.lr for e in range(self.n_epochs): # in_epoch = time.time() self.model.train() for i, (img, label) in enumerate(self.dataloader): img = Variable(img.cuda().type(self.Tensor)) label = Variable(label.cuda().type(self.LongTensor)) # data augmentation aug_data, aug_label = self.interaug(self.allData, self.allLabel) img = torch.cat((img, aug_data)) label = torch.cat((label, aug_label)) tok, outputs = self.model(img) loss = self.criterion_cls(outputs, label) self.optimizer.zero_grad() loss.backward() self.optimizer.step() # out_epoch = time.time() # test process if (e + 1) % 1 == 0: self.model.eval() Tok, Cls = self.model(test_data) loss_test = self.criterion_cls(Cls, test_label) y_pred = torch.max(Cls, 1)[1] acc = float((y_pred == test_label).cpu().numpy().astype(int).sum()) / float(test_label.size(0)) train_pred = torch.max(outputs, 1)[1] train_acc = float((train_pred == label).cpu().numpy().astype(int).sum()) / float(label.size(0)) print('Epoch:', e, ' Train loss: %.6f' % loss.detach().cpu().numpy(), ' Test loss: %.6f' % loss_test.detach().cpu().numpy(), ' Train accuracy %.6f' % train_acc, ' Test accuracy is %.6f' % acc) self.log_write.write(str(e) + " " + str(acc) + "\n") num = num + 1 averAcc = averAcc + acc if acc > bestAcc: bestAcc = acc Y_true = test_label Y_pred = y_pred torch.save(self.model.module.state_dict(), 'model.pth') averAcc = averAcc / num print('The average accuracy is:', averAcc) print('The best accuracy is:', bestAcc) self.log_write.write('The average accuracy is: ' + str(averAcc) + "\n") self.log_write.write('The best accuracy is: ' + str(bestAcc) + "\n") return bestAcc, averAcc, Y_true, Y_pred # writer.close() def main(): best = 0 aver = 0 result_write = open("./results/sub_result.txt", "w") for i in range(9): starttime = datetime.datetime.now() seed_n = np.random.randint(2021) print('seed is ' + str(seed_n)) random.seed(seed_n) np.random.seed(seed_n) torch.manual_seed(seed_n) torch.cuda.manual_seed(seed_n) torch.cuda.manual_seed_all(seed_n) print('Subject %d' % (i+1)) exp = ExP(i + 1) bestAcc, averAcc, Y_true, Y_pred = exp.train() print('THE BEST ACCURACY IS ' + str(bestAcc)) result_write.write('Subject ' + str(i + 1) + ' : ' + 'Seed is: ' + str(seed_n) + "\n") result_write.write('Subject ' + str(i + 1) + ' : ' + 'The best accuracy is: ' + str(bestAcc) + "\n") result_write.write('Subject ' + str(i + 1) + ' : ' + 'The average accuracy is: ' + str(averAcc) + "\n") endtime = datetime.datetime.now() print('subject %d duration: '%(i+1) + str(endtime - starttime)) best = best + bestAcc aver = aver + averAcc if i == 0: yt = Y_true yp = Y_pred else: yt = torch.cat((yt, Y_true)) yp = torch.cat((yp, Y_pred)) best = best / 9 aver = aver / 9 result_write.write('**The average Best accuracy is: ' + str(best) + "\n") result_write.write('The average Aver accuracy is: ' + str(aver) + "\n") result_write.close() if __name__ == "__main__": print(time.asctime(time.localtime(time.time()))) main() print(time.asctime(time.localtime(time.time()))) 解释上述代码
最新发布
10-23
import os import logging import sys import torch import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score, f1_score from transformers import ( BertTokenizerFast, BertForSequenceClassification, Trainer, TrainingArguments, DataCollatorWithPadding, get_linear_schedule_with_warmup ) from torch.utils.data import Dataset import time import math import random # 设置随机种子确保结果可复现 def set_seed(seed=42): random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed) set_seed() # 配置支持UTF-8的日志系统 class UTF8StreamHandler(logging.StreamHandler): def emit(self, record): try: msg = self.format(record) stream = self.stream stream.write(msg + self.terminator) self.flush() except Exception: self.handleError(record) logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler('news_classifier.log', encoding='utf-8'), UTF8StreamHandler(sys.stdout) ] ) logger = logging.getLogger(__name__) class NewsClassifier: def __init__(self): self.tokenizer = None self.model = None self.label_mapping = {} self.id2label = {} self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") logger.info(f"使用设备: {self.device}") def load_data(self, filepath): """加载并预处理数据""" try: start_time = time.time() logger.info(f"开始加载数据: {filepath}") with open(filepath, 'r', encoding='utf-8', errors='replace') as f: lines = f.readlines() data = [] error_count = 0 for i, line in enumerate(lines, 1): try: line = line.strip() if not line: continue if '\t' in line: parts = line.rsplit('\t', 1) else: parts = line.rsplit(' ', 1) if len(parts) != 2: error_count += 1 logger.warning(f"第{i}行格式错误,已跳过: {line[:50]}...") continue text, label = parts text, label = text.strip(), label.strip() if not text or not label: error_count += 1 logger.warning(f"第{i}行内容为空,已跳过: {line[:50]}...") continue data.append({'text': text, 'label': label}) except Exception as e: error_count += 1 logger.warning(f"第{i}行处理失败: {str(e)} - 内容: {line[:50]}...") if error_count > 0: logger.warning(f"共跳过{error_count}条错误数据") if not data: raise ValueError("没有有效数据可加载") df = pd.DataFrame(data) unique_labels = df['label'].unique() self.label_mapping = {label: idx for idx, label in enumerate(unique_labels)} self.id2label = {idx: label for label, idx in self.label_mapping.items()} df['label'] = df['label'].map(self.label_mapping) logger.info(f"成功加载 {len(df)} 条有效数据,共 {len(unique_labels)} 个类别") logger.info("类别分布:\n" + df['label'].value_counts().to_string()) logger.info(f"数据加载完成,耗时: {time.time() - start_time:.2f}秒") return df[['text', 'label']] except Exception as e: logger.error(f"数据加载失败: {str(e)}") raise def preprocess_data(self, data, max_length=128): """将数据转换为BERT输入格式""" try: start_time = time.time() logger.info(f"开始预处理数据,共 {len(data)} 条,最大长度: {max_length}") # 分批次处理大量数据,避免内存溢出 batch_size = 10000 all_encodings = None all_labels = [] for i in range(0, len(data), batch_size): batch_texts = data['text'].iloc[i:i + batch_size].tolist() batch_labels = data['label'].iloc[i:i + batch_size].tolist() encodings = self.tokenizer( batch_texts, truncation=True, padding='max_length', max_length=max_length, return_tensors="pt" ) if all_encodings is None: all_encodings = encodings else: for key in all_encodings.keys(): all_encodings[key] = torch.cat([all_encodings[key], encodings[key]]) all_labels.extend(batch_labels) if (i // batch_size + 1) % 10 == 0: logger.info(f"已处理 {min(i + batch_size, len(data))}/{len(data)} 条数据") all_labels = torch.tensor(all_labels) logger.info(f"数据预处理完成,耗时: {time.time() - start_time:.2f}秒") return all_encodings, all_labels except Exception as e: logger.error(f"预处理失败: {str(e)}") raise def load_model(self, model_path='bert-base-chinese'): """加载预训练模型,优先使用国内镜像源""" try: start_time = time.time() logger.info(f"正在加载模型: {model_path}") # 本地模型路径(自动生成) local_path = f"./{model_path.replace('/', '-')}" # 检查本地是否已下载模型 if os.path.exists(local_path): logger.info(f"使用本地模型: {local_path}") self.tokenizer = BertTokenizerFast.from_pretrained(local_path) self.model = BertForSequenceClassification.from_pretrained( local_path, num_labels=len(self.label_mapping), id2label=self.id2label, label2id=self.label_mapping ) self.model.to(self.device) logger.info(f"本地模型加载成功,耗时: {time.time() - start_time:.2f}秒") return # 尝试使用国内镜像源下载(优先方案) logger.info("尝试从国内镜像源下载模型...") try: # 设置国内镜像源(Hugging Face中国镜像) from transformers import set_huggingface_hub_url set_huggingface_hub_url("https://hf-mirror.com") # 国内镜像源 # 下载并保存模型 self._download_and_save_model(model_path, local_path) logger.info(f"国内镜像源下载成功,保存至: {local_path}") except Exception as mirror_err: logger.warning(f"国内镜像源下载失败: {mirror_err},尝试禁用SSL验证下载...") # 禁用SSL验证(备选方案) self._disable_ssl_verification() try: self._download_and_save_model(model_path, local_path) logger.info(f"禁用SSL验证后下载成功,保存至: {local_path}") except Exception as ssl_err: logger.error(f"所有下载方式失败: {ssl_err}") raise ValueError("无法下载模型,请检查网络或手动下载") # 加载已下载的模型 self.tokenizer = BertTokenizerFast.from_pretrained(local_path) self.model = BertForSequenceClassification.from_pretrained( local_path, num_labels=len(self.label_mapping), id2label=self.id2label, label2id=self.label_mapping ) self.model.to(self.device) logger.info(f"模型加载成功,耗时: {time.time() - start_time:.2f}秒") except Exception as e: logger.error(f"模型加载失败: {str(e)}") raise def _download_and_save_model(self, model_name, save_path): """下载模型并保存到本地""" logger.info(f"下载模型: {model_name},保存至: {save_path}") tokenizer = BertTokenizerFast.from_pretrained(model_name) model = BertForSequenceClassification.from_pretrained( model_name, num_labels=len(self.label_mapping), id2label=self.id2label, label2id=self.label_mapping ) tokenizer.save_pretrained(save_path) model.save_pretrained(save_path) def _disable_ssl_verification(self): """禁用SSL证书验证(仅在必要时使用)""" logger.info("禁用SSL证书验证...") os.environ['CURL_CA_BUNDLE'] = '' os.environ['REQUESTS_CA_BUNDLE'] = '' def train(self, train_encodings, train_labels, val_encodings, val_labels): """训练分类模型""" try: start_time = time.time() logger.info("开始准备训练数据...") class NewsDataset(Dataset): def __init__(self, encodings, labels): self.encodings = encodings self.labels = labels def __getitem__(self, idx): item = {key: val[idx] for key, val in self.encodings.items()} item['labels'] = self.labels[idx] return item def __len__(self): return len(self.labels) train_dataset = NewsDataset(train_encodings, train_labels) val_dataset = NewsDataset(val_encodings, val_labels) # 计算训练参数 train_batch_size = 16 eval_batch_size = 64 num_epochs = 3 device_count = torch.cuda.device_count() if torch.cuda.is_available() else 1 effective_batch_size = train_batch_size * device_count warmup_steps = min(500, len(train_dataset) // effective_batch_size) weight_decay = 0.01 logger.info(f"检测到 {device_count} 个设备,有效批大小: {effective_batch_size}") training_args = TrainingArguments( output_dir='./results', num_train_epochs=num_epochs, per_device_train_batch_size=train_batch_size, per_device_eval_batch_size=eval_batch_size, warmup_steps=warmup_steps, weight_decay=weight_decay, logging_dir='./logs', logging_steps=min(10, len(train_dataset) // effective_batch_size // 10), evaluation_strategy="epoch", save_strategy="epoch", load_best_model_at_end=True, metric_for_best_model="f1", greater_is_better=True, fp16=torch.cuda.is_available(), gradient_accumulation_steps=max(1, 32 // effective_batch_size), dataloader_num_workers=min(4, os.cpu_count() or 1), report_to="none", save_total_limit=3, remove_unused_columns=False, ) num_train_steps = (len(train_dataset) // effective_batch_size + (1 if len(train_dataset) % effective_batch_size else 0)) * num_epochs logger.info(f"训练总步数: {num_train_steps},热身步数: {warmup_steps}") def compute_metrics(p): preds = np.argmax(p.predictions, axis=1) return { 'accuracy': accuracy_score(p.label_ids, preds), 'f1': f1_score(p.label_ids, preds, average='weighted') } optimizer = torch.optim.AdamW( self.model.parameters(), lr=5e-5, weight_decay=weight_decay ) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=warmup_steps, num_training_steps=num_train_steps ) trainer = Trainer( model=self.model, args=training_args, train_dataset=train_dataset, eval_dataset=val_dataset, data_collator=DataCollatorWithPadding(tokenizer=self.tokenizer), compute_metrics=compute_metrics, optimizers=(optimizer, scheduler) ) logger.info("开始训练模型...") trainer.train() logger.info("模型训练完成") eval_results = trainer.evaluate() logger.info( f"验证集评估结果: 准确率={eval_results['eval_accuracy']:.4f}, F1分数={eval_results['eval_f1']:.4f}" ) logger.info(f"总训练时间: {time.time() - start_time:.2f}秒") return trainer except Exception as e: logger.error(f"训练失败: {str(e)}") raise def predict(self, texts, trainer=None): """对新文本进行分类预测""" try: model = trainer.model if trainer else self.model model.eval() batch_size = 64 all_results = [] for i in range(0, len(texts), batch_size): batch_texts = texts[i:i + batch_size] inputs = self.tokenizer( batch_texts, truncation=True, padding=True, max_length=128, return_tensors="pt" ) inputs = {k: v.to(self.device) for k, v in inputs.items()} with torch.no_grad(): outputs = model(**inputs) logits = outputs.logits.cpu() preds = torch.argmax(logits, dim=1).numpy() confs = torch.softmax(logits, dim=1).max(dim=1).values.numpy() for text, pred, conf in zip(batch_texts, preds, confs): all_results.append({ 'text': text, 'label': self.id2label[pred], 'confidence': float(conf) }) return all_results except Exception as e: logger.error(f"预测失败: {str(e)}") raise def main(): try: start_time = time.time() logger.info("=== 新闻分类器程序开始运行 ===") classifier = NewsClassifier() # 1. 加载数据 data_dir = "./train.txt" logger.info(f"正在加载数据文件: {data_dir}") data = classifier.load_data(data_dir) # 2. 划分训练集和验证集 train_data, val_data = train_test_split( data, test_size=0.2, random_state=42, stratify=data['label'] ) logger.info(f"数据集划分完成 - 训练集: {len(train_data)}条, 验证集: {len(val_data)}条") # 3. 加载模型(自动使用国内镜像源) classifier.load_model() # 4. 预处理数据 logger.info("正在预处理数据...") train_encodings, train_labels = classifier.preprocess_data(train_data) val_encodings, val_labels = classifier.preprocess_data(val_data) # 5. 训练模型 trainer = classifier.train(train_encodings, train_labels, val_encodings, val_labels) # 6. 保存模型 save_path = "./saved_model" trainer.save_model(save_path) classifier.tokenizer.save_pretrained(save_path) logger.info(f"模型已保存到: {save_path}") # 7. 测试预测 test_texts = [ "网民市民集体幻想中奖后如果你中了9000万怎么办", "PVC期货有望5月挂牌", "午时三刻新作《幻神录―宿命情缘》", "欧司朗LLFY网络提供一站式照明解决方案", "试探北京楼市向何方:排不完的队 涨不够的价" ] logger.info("\n测试预测结果:") results = classifier.predict(test_texts, trainer) for r in results: print(f"文本: {r['text']}") print(f"类别: {r['label']}") print(f"置信度: {r['confidence']:.2%}\n") total_time = time.time() - start_time logger.info(f"=== 程序运行完成,总耗时: {total_time:.2f}秒 ({total_time / 60:.2f}分钟) ===") except Exception as e: logger.error(f"程序运行出错: {str(e)}") import traceback logger.error(traceback.format_exc()) finally: input("按Enter键退出...") if __name__ == "__main__": if sys.version_info[0] == 3 and sys.version_info[1] >= 7: sys.stdout.reconfigure(encoding='utf-8', errors='replace') main()一、 实验目的 1. 理解 Transformer 架构在自然语言处理中的应用原理,重点掌握 BERT 模型的预训练机制与微调方法 2. 掌握基于 PyTorch 和 Hugging Face Transformers 库实现新闻文本分类的完整流程 3. 学习处理文本分类任务中的数据预处理、模型训练及评估技巧 4. 对比 Transformer 模型与循环神经网络(如 GRU)在文本分类任务中的性能差异一、 实验目的 1. 理解 Transformer 架构在自然语言处理中的应用原理,重点掌握 BERT 模型的预训练机制与微调方法 2. 掌握基于 PyTorch 和 Hugging Face Transformers 库实现新闻文本分类的完整流程 3. 学习处理文本分类任务中的数据预处理、模型训练及评估技巧 4. 对比 Transformer 模型与循环神经网络(如 GRU)在文本分类任务中的性能差异 生成思维导图
06-13
#%% import os import random import logging import sys import numpy as np import torch import torch.nn as nn from datetime import datetime from data.pr_agent import PRAgent from data.pr_set import PhaseSet from utils.utils import * from config import * from param.param_sparse_mask1 import C from model.sparsemask.sparse_mask import get_sparsemask from piq import ssim, psnr random.seed(C.seed) torch.manual_seed(C.seed) torch.cuda.manual_seed_all(C.seed) # log dir time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') if C.search: C.exp_path = os.path.join(C.exp_path, time_stamp) C.exp_path = C.exp_path + '_search' else: C.exp_path = os.path.dirname(C.mask_path)[:-7] + '_pruned' if not os.path.exists(C.exp_path): os.makedirs(C.exp_path) # logger log_format = '%(asctime)s - %(levelname)s - %(message)s' logging.basicConfig(level=logging.INFO, stream=sys.stdout, format=log_format, datefmt='%m/%d %I:%M:%S %p',) fh = logging.FileHandler(os.path.join(C.exp_path, 'log.txt')) fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) # args logging.info(C) [train_set, val_set, _] = PRAgent.load( data_dir=C.dataset_path, train_indices=C.train_indices, val_indices=C.val_indices, test_indices=C.test_indices, train_config=C.train_config, val_config=C.val_config, test_config=C.test_config, radius=C.radius, fn_pattern=None, re_pattern=C.re_pattern, cropped_fourier_spectrum_shape=C.cropped_fourier_spectrum_shape, ) model, run = get_sparsemask(C) if C.pretrain: checkpoint = torch.load(C.pretrain_checkpoint, map_location=device) model.load_state_dict(checkpoint['state_dict'], strict=True) num_parameters = sum([l.nelement() for l in model.parameters()]) logging.info(model) logging.info('number of parameters: {}'.format(num_parameters)) if C.loss == 'mse': criterion = nn.MSELoss() elif C.loss == 'mixGE': if C.mixge_warm_up_epoch is not None: criterion_pretrain = nn.MSELoss() criterion_pretrain.to(device) criterion = MixGradError(C.mixGE_ratio) # to device model.to(device) criterion.to(device) # optimizer round_length_per_epoch = train_set.get_round_length(C.batch_size) optimizer = torch.optim.Adam(model.parameters(), lr=C.lr) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200) def train(epoch, criterion): model.train() if C.freeze_bn: for m in model.modules(): if isinstance(m, nn.BatchNorm2d): m.eval() train_loss = 0 train_loss_sparse = 0 batch_psnr = [] batch_ssim = [] for batch_idx, batch in train_set.gen_batches(C.batch_size, shuffle=True, is_training=True): PhaseSet.random_window(batch, is_training=True, shape=[C.win_size, C.win_size], num=C.win_num) features_crop = batch.features targets_crop = batch.targets features_crop = torch.tensor(features_crop).type(torch.FloatTensor).to(device) targets_crop = torch.tensor(targets_crop).type(torch.FloatTensor).to(device) if C.search: output, loss, loss_sparse, loss1_sparse, loss2_sparse = run(model, criterion, optimizer, features_crop, targets_crop, scheduler, batch_idx, epoch, round_length_per_epoch, C) train_loss_sparse += loss_sparse.clone().detach().cpu() else: output, loss = run(model, criterion, optimizer, features_crop, targets_crop, scheduler, batch_idx, epoch, round_length_per_epoch, C) train_loss += loss.item() psnr_per_batch = psnr(output, targets_crop).detach().cpu() batch_psnr.append(psnr_per_batch) ssim_per_batch = ssim(output, targets_crop).detach().cpu() batch_ssim.append(ssim_per_batch) if batch_idx % 5 == 0: if C.search: logging.info('Train Epoch: {:3} [{:6}/{:6} ({:3.0f}%)]\tLoss: {:.6f}\tSparseloss: {:.6f}\tSparseloss1: {:.6f}\tSparseloss2: {:.6f}\n'.format( epoch, (batch_idx + 1) * batch.size, train_set.size, 100. * (batch_idx + 1) * batch.size / train_set.size, loss.item(), loss_sparse, loss1_sparse.cpu().detach(), loss2_sparse.cpu().detach()) ) else: logging.info('Train Epoch: {:3} [{:6}/{:6} ({:3.0f}%)]\tLoss: {:.6f}\n'.format( epoch, (batch_idx + 1) * batch.size, train_set.size, 100. * (batch_idx + 1) * batch.size / train_set.size, loss.item(),) ) if epoch % C.display_per_num_of_epoch == 0: target_to_show = targets_crop[0,0].clone().detach().cpu().numpy() cmin, cmax = np.min(target_to_show), np.max(target_to_show) show(target_to_show, 'train_target', cmap='jet', clim=[cmin, cmax+0.1]) show(output[0,0].clone().detach(), 'train_predict', cmap='jet', clim=[cmin, cmax+0.1]) return torch.sum(torch.tensor(batch_psnr))/len(batch_psnr), torch.sum(torch.tensor(batch_ssim))/len(batch_ssim),\ train_loss_sparse/len(batch_psnr), train_loss/len(batch_psnr) def eval(epoch): model.eval() batch_psnr = [] batch_ssim = [] val_loss = 0 with torch.no_grad(): for batch_idx, batch in val_set.gen_batches(C.batch_size, shuffle=False, is_training=False): features = torch.tensor(batch.features).type(torch.FloatTensor).to(device) targets = torch.tensor(batch.targets).type(torch.FloatTensor).to(device) outputs = model(features) loss = criterion(features, outputs) val_loss += loss.item() psnr_per_batch = psnr(outputs, targets).detach().cpu() batch_psnr.append(psnr_per_batch) ssim_per_batch = ssim(outputs, targets).detach().cpu() batch_ssim.append(ssim_per_batch) if epoch % C.display_per_num_of_epoch == 0: target_to_show = targets[0,0].clone().detach().cpu().numpy() cmin, cmax = np.min(target_to_show), np.max(target_to_show) show(target_to_show, 'val_target', cmap='jet', clim=[cmin, cmax+0.1]) show(outputs[0,0].clone().detach(), 'val_predict', cmap='jet', clim=[cmin, cmax+0.1]) return torch.sum(torch.tensor(batch_psnr))/len(batch_psnr), \ torch.sum(torch.tensor(batch_ssim))/len(batch_ssim), \ val_loss train_result_lst = {'loss':[], 'psnr': [], 'ssim': [], 'sparse_loss':[]} val_result_lst = {'loss':[], 'psnr': [], 'ssim': []} epoch_lst = [] best_train_result = {'loss': 10, 'psnr': 0, 'ssim': 0, 'sparse_loss': 0} best_val_result = {'loss': 10, 'psnr': 0, 'ssim': 0} for epoch in range(C.max_epoch): if C.loss == 'mixGE' and C.mixge_warm_up_epoch is not None and epoch < C.mixge_warm_up_epoch: train_result_per_batch = train(epoch, criterion_pretrain) else: train_result_per_batch = train(epoch, criterion) for value, key in zip(train_result_per_batch, ['psnr', 'ssim', 'sparse_loss', 'loss']): train_result_lst[key].append(value) if 'loss' in key: if value < best_train_result[key]: best_train_result[key] = value else: if value > best_train_result[key]: best_train_result[key] = value val_result_per_batch = eval(epoch) for value, key in zip(val_result_per_batch, ['psnr', 'ssim', 'loss']): val_result_lst[key].append(value) if 'loss' in key: # 'loss' the smaller the better if value < best_val_result[key]: best_val_result[key] = value else: # 'psnr' and 'ssim' the larger the better if value > best_val_result[key]: best_val_result[key] = value epoch_lst.append(epoch) torch.save({'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}, os.path.join(C.exp_path, 'checkpoint_{}.pth.tar'.format(epoch))) if (epoch % C.display_per_num_of_epoch == 0) and epoch > 1 or (epoch == C.max_epoch - 1): plot_loss(epoch_lst, val_result_lst[C.cpbase], 'val_'+(C.cpbase)) plot_loss(epoch_lst, train_result_lst[C.cpbase], 'train_'+(C.cpbase)) plot_loss(epoch_lst, train_result_lst['sparse_loss'], 'sparse') logging.info("val psnr is : %3.5f" % (val_result_lst['psnr'][-1]) + ", val ssim is : %f" % (val_result_lst['ssim'][-1]) + ", val loss is : %f" % (val_result_lst['loss'][-1])) logging.info("train psnr is : %3.5f" % (train_result_lst['psnr'][-1]) + ", train ssim is : %f" % (train_result_lst['ssim'][-1])) logging.info("best val" + "psnr" + " is : %3.5f" % (best_val_result['psnr']) + ", ssim is : %f" % (best_val_result['ssim'])) logging.info("best train" + "psnr" + " is : %3.5f" % (best_train_result['psnr']) + ", ssim is : %f" % (best_train_result['ssim'])) best_ckpt_index = take_best_ckpt(val_result_lst['psnr'], val_result_lst['ssim']) best_ckpt_file = os.path.join(C.exp_path, 'checkpoint_{}.pth.tar'.format(best_ckpt_index)) rename_best_ckpt_file = os.path.join(C.exp_path, 'best-checkpoint_{}.pth.tar'.format(best_ckpt_index)) os.rename(best_ckpt_file, rename_best_ckpt_file) logging.info('psnr in best ckpt is : % 3.5f' % (val_result_lst['psnr'][best_ckpt_index])) logging.info('ssim in best ckpt is : % 3.5f' % (val_result_lst['ssim'][best_ckpt_index]))
09-25
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值