python的print(flush=True)实现动态loading......效果

本文详细解析了如何在优快云上发布一篇高质量的技术博客,包括标题、标签和内容的优化策略,旨在帮助作者提高文章的可见性和吸引力。

https://blog.youkuaiyun.com/Granthoo/article/details/82880562

没有这些呀from awq.quantize.quantizer import WeightQuantizer from awq.utils import get_act_scales, get_weight_scales,我不是和你说了吗,你学习一下这个流程,有依据地帮我改写from lm_eval import evaluator, tasks from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig import torch import argparse import os import json from accelerate import ( init_empty_weights, infer_auto_device_map, dispatch_model, load_checkpoint_in_model, ) from accelerate.utils.modeling import get_balanced_memory from awq.utils.parallel import auto_parallel from awq.quantize.pre_quant import run_awq, apply_awq from awq.quantize.quantizer import ( pseudo_quantize_model_weight, real_quantize_model_weight, ) from awq.utils.lm_eval_adaptor import LMEvalAdaptor from awq.utils.utils import simple_dispatch_model from datasets import load_dataset from torch import nn import tqdm parser = argparse.ArgumentParser() parser.add_argument("--model_path", type=str, help="path of the hf model") parser.add_argument("--dtype", type=str, default="float16", choices=["float16", "bfloat16"]) parser.add_argument("--batch_size", type=int, default=1, help="batch size") parser.add_argument("--tasks", default=None, type=str) parser.add_argument("--output_path", default=None, type=str) parser.add_argument("--num_fewshot", type=int, default=0) # model config parser.add_argument("--parallel", action="store_true", help="enable model parallelism") # max memory to offload larger models to CPU parser.add_argument( "--max_memory", type=str, nargs="*", help="List of device_id:max_memory pairs to be parsed into a dictionary; " + "Example: 0:10GiB 1:10GiB cpu:30GiB; " + "mode details here: " + "https://huggingface.co/docs/accelerate/usage_guides/big_modeling", ) parser.add_argument( "--auto_parallel", action="store_true", help="automatically set parallel and batch_size", ) # quantization config parser.add_argument("--w_bit", type=int, default=None) parser.add_argument("--q_group_size", type=int, default=-1) parser.add_argument("--no_zero_point", action="store_true", help="disable zero_point") parser.add_argument("--q_backend", type=str, default="fake", choices=["fake", "real"]) # save/load real quantized weights parser.add_argument("--dump_quant", type=str, default=None, help="save quantized model") parser.add_argument( "--dump_fake", type=str, default=None, help="save fake-quantized model" ) parser.add_argument("--load_quant", type=str, default=None, help="load quantized model") # apply/save/load awq parser.add_argument("--run_awq", action="store_true", help="perform awq search process") parser.add_argument( "--dump_awq", type=str, default=None, help="save the awq search results" ) parser.add_argument( "--load_awq", type=str, default=None, help="load the awq search results" ) parser.add_argument( "--vila-15", action="store_true", help="quantizing vila 1.5", ) parser.add_argument( "--vila-20", action="store_true", help="quantizing or smoothing vila 2.0 (NVILA)", ) parser.add_argument( "--smooth_scale", action="store_true", help="generate the act scale of visiontower", ) parser.add_argument( "--media_path", type=str, nargs="+", help="The input video to get act scale for visiontower", ) parser.add_argument( "--act_scale_path", type=str, default=None, help="Path to save act scale", ) args = parser.parse_args() assert ( args.act_scale_path is not None and len(args.media_path) > 0 ) or not args.smooth_scale vila_10_quant_mode = ( ("llava" in args.model_path.lower() or "vila" in args.model_path.lower()) and not args.vila_15 and not args.vila_20 ) max_memory = [v.split(":") for v in (args.max_memory or [])] max_memory = {(int(k) if k.isdigit() else k): v for k, v in max_memory} if args.auto_parallel: gpu_list = auto_parallel(args) # get quantization config (apart from w_bit) q_config = { "zero_point": not args.no_zero_point, # by default True "q_group_size": args.q_group_size, # whether to use group quantization } print("Quantization config:", q_config) # build model and tokenizer def build_model_and_enc(model_path, dtype): torch_dtype = torch.float16 if dtype == "float16" else torch.bfloat16 if not os.path.exists(model_path): # look into ssd raise FileNotFoundError(f"{model_path} not found!") print(f"* Building model {model_path}") # all hf model if vila_10_quant_mode: from llava.model.builder import load_pretrained_model from llava.mm_utils import get_model_name_from_path enc, model, image_processor, context_len = load_pretrained_model( model_path=model_path, model_base=None, model_name=get_model_name_from_path(model_path), device="cpu", **{"use_cache": False}, ) else: config = AutoConfig.from_pretrained(model_path, trust_remote_code=True) # Note (Haotian): To avoid OOM after huggingface transformers 4.36.2 config.use_cache = False if "mpt" in config.__class__.__name__.lower(): enc = AutoTokenizer.from_pretrained( config.tokenizer_name, trust_remote_code=True ) else: enc = AutoTokenizer.from_pretrained( model_path, use_fast=False, trust_remote_code=True ) if args.load_quant: # directly load quantized weights print("Loading pre-computed quantized weights...") with init_empty_weights(): model = AutoModelForCausalLM.from_config( config=config, torch_dtype=torch_dtype, trust_remote_code=True ) real_quantize_model_weight( model, w_bit=args.w_bit, q_config=q_config, init_only=True ) model.tie_weights() # Infer device map kwargs = {"max_memory": max_memory} if len(max_memory) else {} device_map = infer_auto_device_map( model, no_split_module_classes=[ "OPTDecoderLayer", "LlamaDecoderLayer", "BloomBlock", "MPTBlock", "DecoderLayer", ], **kwargs, ) # Load checkpoint in the model load_checkpoint_in_model( model, checkpoint=args.load_quant, device_map=device_map, offload_state_dict=True, ) # Dispatch model model = simple_dispatch_model(model, device_map=device_map) model.eval() else: # fp16 to quantized args.run_awq &= not args.load_awq # if load_awq, no need to run awq # Init model on CPU: kwargs = {"torch_dtype": torch_dtype, "low_cpu_mem_usage": True} if not vila_10_quant_mode: model = AutoModelForCausalLM.from_pretrained( model_path, config=config, trust_remote_code=True, **kwargs ) model.eval() if args.run_awq: assert args.dump_awq, "Please save the awq results with --dump_awq" awq_results = run_awq( model, enc, w_bit=args.w_bit, q_config=q_config, n_samples=128, seqlen=512, ) if args.dump_awq: dirpath = os.path.dirname(args.dump_awq) os.makedirs(dirpath, exist_ok=True) torch.save(awq_results, args.dump_awq) print("AWQ results saved at", args.dump_awq) exit(0) if args.load_awq: print("Loading pre-computed AWQ results from", args.load_awq) awq_results = torch.load(args.load_awq, map_location="cpu") apply_awq(model, awq_results) # weight quantization if args.w_bit is not None: if args.q_backend == "fake": assert ( args.dump_quant is None ), "Need to use real quantization to dump quantized weights" pseudo_quantize_model_weight(model, w_bit=args.w_bit, q_config=q_config) if args.dump_fake: model.save_pretrained(args.dump_fake) print("Pseudo-quantized models saved at", args.dump_fake) elif args.q_backend == "real": # real quantization real_quantize_model_weight(model, w_bit=args.w_bit, q_config=q_config) if args.dump_quant: if not args.dump_quant.endswith("v2.pt"): print("[Info] Auto-change the dump_quant file name to *v2.pt") args.dump_quant = args.dump_quant.replace(".pt", "-v2.pt") dirpath = os.path.dirname(args.dump_quant) os.makedirs(dirpath, exist_ok=True) print(f"Saving the quantized model at {args.dump_quant}...") torch.save(model.cpu().state_dict(), args.dump_quant) exit(0) else: raise NotImplementedError # Move the model to GPU (as much as possible) for LM evaluation kwargs = { "max_memory": get_balanced_memory( model, max_memory if len(max_memory) > 0 else None ) } device_map = infer_auto_device_map( model, # TODO: can we remove this? no_split_module_classes=[ "OPTDecoderLayer", "LlamaDecoderLayer", "BloomBlock", "MPTBlock", "DecoderLayer", ], **kwargs, ) model = dispatch_model(model, device_map=device_map) return model, enc def main(): if args.output_path is not None and os.path.exists(args.output_path): # print(f"Results {args.output_path} already generated. Exit.") print(f"Results {args.output_path} already generated. Overwrite.") # exit() # a hack here to auto set model group if args.smooth_scale and args.vila_20: if os.path.exists(args.act_scale_path): print(f"Found existing Smooth Scales {args.act_scale_path}, skip.") else: from awq.quantize import get_smooth_scale act_scale = get_smooth_scale(args.model_path, args.media_path) os.makedirs(os.path.dirname(args.act_scale_path), exist_ok=True) torch.save(act_scale, args.act_scale_path) print("Save act scales at " + str(args.act_scale_path)) args.model_path = args.model_path + "/llm" if args.dump_awq is None and args.dump_quant is None: exit() if args.dump_awq and os.path.exists(args.dump_awq): print(f"Found existing AWQ results {args.dump_awq}, exit.") exit() model, enc = build_model_and_enc(args.model_path, args.dtype) if args.tasks is not None: # https://github.com/IST-DASLab/gptq/blob/2d65066eeb06a5c9ff5184d8cebdf33662c67faf/llama.py#L206 if args.tasks == "wikitext": testenc = load_dataset("wikitext", "wikitext-2-raw-v1", split="test") testenc = enc("\n\n".join(testenc["text"]), return_tensors="pt") model.seqlen = 2048 testenc = testenc.input_ids.to(model.device) nsamples = testenc.numel() // model.seqlen model = model.eval() nlls = [] for i in tqdm.tqdm(range(nsamples), desc="evaluating..."): batch = testenc[:, (i * model.seqlen) : ((i + 1) * model.seqlen)].to( model.device ) with torch.no_grad(): lm_logits = model(batch).logits shift_logits = lm_logits[:, :-1, :].contiguous().float() shift_labels = testenc[ :, (i * model.seqlen) : ((i + 1) * model.seqlen) ][:, 1:] loss_fct = nn.CrossEntropyLoss() loss = loss_fct( shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1) ) neg_log_likelihood = loss.float() * model.seqlen nlls.append(neg_log_likelihood) ppl = torch.exp(torch.stack(nlls).sum() / (nsamples * model.seqlen)) print(ppl.item()) results = {"ppl": ppl.item()} if args.output_path is not None: os.makedirs(os.path.dirname(args.output_path), exist_ok=True) with open(args.output_path, "w") as f: json.dump(results, f, indent=2) else: task_names = args.tasks.split(",") lm_eval_model = LMEvalAdaptor(args.model_path, model, enc, args.batch_size) results = evaluator.simple_evaluate( model=lm_eval_model, tasks=task_names, batch_size=args.batch_size, no_cache=True, num_fewshot=args.num_fewshot, ) print(evaluator.make_table(results)) if args.output_path is not None: os.makedirs(os.path.dirname(args.output_path), exist_ok=True) # otherwise cannot save results["config"]["model"] = args.model_path with open(args.output_path, "w") as f: json.dump(results, f, indent=2) if __name__ == "__main__": main()
07-26
from data import * from utils.augmentations import SSDAugmentation, BaseTransform from utils.functions import MovingAverage, SavePath from utils.logger import Log from utils import timer from layers.modules import MultiBoxLoss from yolact import Yolact import os import sys import time import math, random from pathlib import Path import torch from torch.autograd import Variable import torch.nn as nn import torch.optim as optim import torch.backends.cudnn as cudnn import torch.nn.init as init import torch.utils.data as data import numpy as np import argparse import datetime # Oof import eval as eval_script def str2bool(v): return v.lower() in ("yes", "true", "t", "1") parser = argparse.ArgumentParser( description='Yolact Training Script') parser.add_argument('--batch_size', default=2, type=int, help='Batch size for training') parser.add_argument('--resume', default=None, type=str, help='Checkpoint state_dict file to resume training from. If this is "interrupt"'\ ', the model will resume training from the interrupt file.') parser.add_argument('--start_iter', default=-1, type=int, help='Resume training at this iter. If this is -1, the iteration will be'\ 'determined from the file name.') parser.add_argument('--num_workers', default=0, type=int, help='Number of workers used in dataloading') parser.add_argument('--cuda', default=True, type=str2bool, help='Use CUDA to train model') parser.add_argument('--lr', '--learning_rate', default=None, type=float, help='Initial learning rate. Leave as None to read this from the config.') parser.add_argument('--momentum', default=None, type=float, help='Momentum for SGD. Leave as None to read this from the config.') parser.add_argument('--decay', '--weight_decay', default=None, type=float, help='Weight decay for SGD. Leave as None to read this from the config.') parser.add_argument('--gamma', default=None, type=float, help='For each lr step, what to multiply the lr by. Leave as None to read this from the config.') parser.add_argument('--save_folder', default='weights/', help='Directory for saving checkpoint models.') parser.add_argument('--log_folder', default='logs/', help='Directory for saving logs.') parser.add_argument('--config', default=None, help='The config object to use.') parser.add_argument('--save_interval', default=10000, type=int, help='The number of iterations between saving the model.') parser.add_argument('--validation_size', default=5000, type=int, help='The number of images to use for validation.') parser.add_argument('--validation_epoch', default=2, type=int, help='Output validation information every n iterations. If -1, do no validation.') parser.add_argument('--keep_latest', dest='keep_latest', action='store_true', help='Only keep the latest checkpoint instead of each one.') parser.add_argument('--keep_latest_interval', default=100000, type=int, help='When --keep_latest is on, don\'t delete the latest file at these intervals. This should be a multiple of save_interval or 0.') parser.add_argument('--dataset', default=None, type=str, help='If specified, override the dataset specified in the config with this one (example: coco2017_dataset).') parser.add_argument('--no_log', dest='log', action='store_false', help='Don\'t log per iteration information into log_folder.') parser.add_argument('--log_gpu', dest='log_gpu', action='store_true', help='Include GPU information in the logs. Nvidia-smi tends to be slow, so set this with caution.') parser.add_argument('--no_interrupt', dest='interrupt', action='store_false', help='Don\'t save an interrupt when KeyboardInterrupt is caught.') parser.add_argument('--batch_alloc', default=None, type=str, help='If using multiple GPUS, you can set this to be a comma separated list detailing which GPUs should get what local batch size (It should add up to your total batch size).') parser.add_argument('--no_autoscale', dest='autoscale', action='store_false', help='YOLACT will automatically scale the lr and the number of iterations depending on the batch size. Set this if you want to disable that.') parser.set_defaults(keep_latest=False, log=True, log_gpu=False, interrupt=True, autoscale=True) args = parser.parse_args() if args.config is not None: set_cfg(args.config) if args.dataset is not None: set_dataset(args.dataset) if args.autoscale and args.batch_size != 8: factor = args.batch_size / 8 if __name__ == '__main__': print('Scaling parameters by %.2f to account for a batch size of %d.' % (factor, args.batch_size)) cfg.lr *= factor cfg.max_iter //= factor cfg.lr_steps = [x // factor for x in cfg.lr_steps] # Update training parameters from the config if necessary def replace(name): if getattr(args, name) == None: setattr(args, name, getattr(cfg, name)) replace('lr') replace('decay') replace('gamma') replace('momentum') # This is managed by set_lr cur_lr = args.lr if torch.cuda.device_count() == 0: print('No GPUs detected. Exiting...') exit(-1) if args.batch_size // torch.cuda.device_count() < 6: if __name__ == '__main__': print('Per-GPU batch size is less than the recommended limit for batch norm. Disabling batch norm.') cfg.freeze_bn = True loss_types = ['B', 'C', 'M', 'P', 'D', 'E', 'S', 'I'] if torch.cuda.is_available(): if args.cuda: torch.set_default_tensor_type('torch.cuda.FloatTensor') if not args.cuda: print("WARNING: It looks like you have a CUDA device, but aren't " + "using CUDA.\nRun with --cuda for optimal training speed.") torch.set_default_tensor_type('torch.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') class NetLoss(nn.Module): """ A wrapper for running the network and computing the loss This is so we can more efficiently use DataParallel. """ def __init__(self, net:Yolact, criterion:MultiBoxLoss): super().__init__() self.net = net self.criterion = criterion def forward(self, images, targets, masks, num_crowds): preds = self.net(images) losses = self.criterion(self.net, preds, targets, masks, num_crowds) return losses class CustomDataParallel(nn.DataParallel): """ This is a custom version of DataParallel that works better with our training data. It should also be faster than the general case. """ def scatter(self, inputs, kwargs, device_ids): # More like scatter and data prep at the same time. The point is we prep the data in such a way # that no scatter is necessary, and there's no need to shuffle stuff around different GPUs. devices = ['cuda:' + str(x) for x in device_ids] splits = prepare_data(inputs[0], devices, allocation=args.batch_alloc) return [[split[device_idx] for split in splits] for device_idx in range(len(devices))], \ [kwargs] * len(devices) def gather(self, outputs, output_device): out = {} for k in outputs[0]: out[k] = torch.stack([output[k].to(output_device) for output in outputs]) return out def train(): if not os.path.exists(args.save_folder): os.mkdir(args.save_folder) dataset = COCODetection(image_path=cfg.dataset.train_images, info_file=cfg.dataset.train_info, transform=SSDAugmentation(MEANS)) if args.validation_epoch > 0: setup_eval() val_dataset = COCODetection(image_path=cfg.dataset.valid_images, info_file=cfg.dataset.valid_info, transform=BaseTransform(MEANS)) # Parallel wraps the underlying module, but when saving and loading we don't want that yolact_net = Yolact() net = yolact_net net.train() if args.log: log = Log(cfg.name, args.log_folder, dict(args._get_kwargs()), overwrite=(args.resume is None), log_gpu_stats=args.log_gpu) # I don't use the timer during training (I use a different timing method). # Apparently there's a race condition with multiple GPUs, so disable it just to be safe. timer.disable_all() # Both of these can set args.resume to None, so do them before the check if args.resume == 'interrupt': args.resume = SavePath.get_interrupt(args.save_folder) elif args.resume == 'latest': args.resume = SavePath.get_latest(args.save_folder, cfg.name) if args.resume is not None: print('Resuming training, loading {}...'.format(args.resume)) yolact_net.load_weights(args.resume) if args.start_iter == -1: args.start_iter = SavePath.from_str(args.resume).iteration else: print('Initializing weights...') yolact_net.init_weights(backbone_path=args.save_folder + cfg.backbone.path) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.decay) criterion = MultiBoxLoss(num_classes=cfg.num_classes, pos_threshold=cfg.positive_iou_threshold, neg_threshold=cfg.negative_iou_threshold, negpos_ratio=cfg.ohem_negpos_ratio) if args.batch_alloc is not None: args.batch_alloc = [int(x) for x in args.batch_alloc.split(',')] if sum(args.batch_alloc) != args.batch_size: print('Error: Batch allocation (%s) does not sum to batch size (%s).' % (args.batch_alloc, args.batch_size)) exit(-1) net = CustomDataParallel(NetLoss(net, criterion)) if args.cuda: net = net.cuda() # Initialize everything if not cfg.freeze_bn: yolact_net.freeze_bn() # Freeze bn so we don't kill our means yolact_net(torch.zeros(1, 3, cfg.max_size, cfg.max_size).cuda()) if not cfg.freeze_bn: yolact_net.freeze_bn(True) # loss counters loc_loss = 0 conf_loss = 0 iteration = max(args.start_iter, 0) last_time = time.time() epoch_size = len(dataset)+1 // args.batch_size num_epochs = math.ceil(cfg.max_iter / epoch_size) # Which learning rate adjustment step are we on? lr' = lr * gamma ^ step_index step_index = 0 data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) save_path = lambda epoch, iteration: SavePath(cfg.name, epoch, iteration).get_path(root=args.save_folder) time_avg = MovingAverage() global loss_types # Forms the print order loss_avgs = { k: MovingAverage(100) for k in loss_types } print('Begin training!') print() # try-except so you can use ctrl+c to save early and stop training try: for epoch in range(num_epochs): # Resume from start_iter if (epoch+1)*epoch_size < iteration: continue for datum in data_loader: # Stop if we've reached an epoch if we're resuming from start_iter if iteration == (epoch+1)*epoch_size: break # Stop at the configured number of iterations even if mid-epoch if iteration == cfg.max_iter: break # Change a config setting if we've reached the specified iteration changed = False for change in cfg.delayed_settings: if iteration >= change[0]: changed = True cfg.replace(change[1]) # Reset the loss averages because things might have changed for avg in loss_avgs: avg.reset() # If a config setting was changed, remove it from the list so we don't keep checking if changed: cfg.delayed_settings = [x for x in cfg.delayed_settings if x[0] > iteration] # Warm up by linearly interpolating the learning rate from some smaller value if cfg.lr_warmup_until > 0 and iteration <= cfg.lr_warmup_until: set_lr(optimizer, (args.lr - cfg.lr_warmup_init) * (iteration / cfg.lr_warmup_until) + cfg.lr_warmup_init) # Adjust the learning rate at the given iterations, but also if we resume from past that iteration while step_index < len(cfg.lr_steps) and iteration >= cfg.lr_steps[step_index]: step_index += 1 set_lr(optimizer, args.lr * (args.gamma ** step_index)) # Zero the grad to get ready to compute gradients optimizer.zero_grad() # Forward Pass + Compute loss at the same time (see CustomDataParallel and NetLoss) losses = net(datum) losses = { k: (v).mean() for k,v in losses.items() } # Mean here because Dataparallel loss = sum([losses[k] for k in losses]) # no_inf_mean removes some components from the loss, so make sure to backward through all of it # all_loss = sum([v.mean() for v in losses.values()]) # Backprop loss.backward() # Do this to free up vram even if loss is not finite if torch.isfinite(loss).item(): optimizer.step() # Add the loss to the moving average for bookkeeping for k in losses: loss_avgs[k].add(losses[k].item()) cur_time = time.time() elapsed = cur_time - last_time last_time = cur_time # Exclude graph setup from the timing information if iteration != args.start_iter: time_avg.add(elapsed) if iteration % 10 == 0: eta_str = str(datetime.timedelta(seconds=(cfg.max_iter-iteration) * time_avg.get_avg())).split('.')[0] total = sum([loss_avgs[k].get_avg() for k in losses]) loss_labels = sum([[k, loss_avgs[k].get_avg()] for k in loss_types if k in losses], []) print(('[%3d] %7d ||' + (' %s: %.3f |' * len(losses)) + ' T: %.3f || ETA: %s || timer: %.3f') % tuple([epoch, iteration] + loss_labels + [total, eta_str, elapsed]), flush=True) if args.log: precision = 5 loss_info = {k: round(losses[k].item(), precision) for k in losses} loss_info['T'] = round(loss.item(), precision) if args.log_gpu: log.log_gpu_stats = (iteration % 10 == 0) # nvidia-smi is sloooow log.log('train', loss=loss_info, epoch=epoch, iter=iteration, lr=round(cur_lr, 10), elapsed=elapsed) log.log_gpu_stats = args.log_gpu iteration += 1 if iteration % args.save_interval == 0 and iteration != args.start_iter: if args.keep_latest: latest = SavePath.get_latest(args.save_folder, cfg.name) print('Saving state, iter:', iteration) yolact_net.save_weights(save_path(epoch, iteration)) if args.keep_latest and latest is not None: if args.keep_latest_interval <= 0 or iteration % args.keep_latest_interval != args.save_interval: print('Deleting old save...') os.remove(latest) # This is done per epoch if args.validation_epoch > 0: if epoch % args.validation_epoch == 0 and epoch > 0: compute_validation_map(epoch, iteration, yolact_net, val_dataset, log if args.log else None) # Compute validation mAP after training is finished compute_validation_map(epoch, iteration, yolact_net, val_dataset, log if args.log else None) except KeyboardInterrupt: if args.interrupt: print('Stopping early. Saving network...') # Delete previous copy of the interrupted network so we don't spam the weights folder SavePath.remove_interrupt(args.save_folder) yolact_net.save_weights(save_path(epoch, repr(iteration) + '_interrupt')) exit() yolact_net.save_weights(save_path(epoch, iteration)) def set_lr(optimizer, new_lr): for param_group in optimizer.param_groups: param_group['lr'] = new_lr global cur_lr cur_lr = new_lr def gradinator(x): x.requires_grad = False return x def prepare_data(datum, devices:list=None, allocation:list=None): with torch.no_grad(): if devices is None: devices = ['cuda:0'] if args.cuda else ['cpu'] if allocation is None: allocation = [args.batch_size // len(devices)] * (len(devices) - 1) allocation.append(args.batch_size - sum(allocation)) # The rest might need more/less images, (targets, masks, num_crowds) = datum cur_idx = 0 for device, alloc in zip(devices, allocation): for _ in range(alloc): images[cur_idx] = gradinator(images[cur_idx].to(device)) targets[cur_idx] = gradinator(targets[cur_idx].to(device)) masks[cur_idx] = gradinator(masks[cur_idx].to(device)) cur_idx += 1 if cfg.preserve_aspect_ratio: # Choose a random size from the batch _, h, w = images[random.randint(0, len(images)-1)].size() for idx, (image, target, mask, num_crowd) in enumerate(zip(images, targets, masks, num_crowds)): images[idx], targets[idx], masks[idx], num_crowds[idx] \ = enforce_size(image, target, mask, num_crowd, w, h) cur_idx = 0 split_images, split_targets, split_masks, split_numcrowds \ = [[None for alloc in allocation] for _ in range(4)] for device_idx, alloc in enumerate(allocation): split_images[device_idx] = torch.stack(images[cur_idx:cur_idx+alloc], dim=0) split_targets[device_idx] = targets[cur_idx:cur_idx+alloc] split_masks[device_idx] = masks[cur_idx:cur_idx+alloc] split_numcrowds[device_idx] = num_crowds[cur_idx:cur_idx+alloc] cur_idx += alloc return split_images, split_targets, split_masks, split_numcrowds def no_inf_mean(x:torch.Tensor): """ Computes the mean of a vector, throwing out all inf values. If there are no non-inf values, this will return inf (i.e., just the normal mean). """ no_inf = [a for a in x if torch.isfinite(a)] if len(no_inf) > 0: return sum(no_inf) / len(no_inf) else: return x.mean() def compute_validation_loss(net, data_loader, criterion): global loss_types with torch.no_grad(): losses = {} # Don't switch to eval mode because we want to get losses iterations = 0 for datum in data_loader: images, targets, masks, num_crowds = prepare_data(datum) out = net(images) wrapper = ScatterWrapper(targets, masks, num_crowds) _losses = criterion(out, wrapper, wrapper.make_mask()) for k, v in _losses.items(): v = v.mean().item() if k in losses: losses[k] += v else: losses[k] = v iterations += 1 if args.validation_size <= iterations * args.batch_size: break for k in losses: losses[k] /= iterations loss_labels = sum([[k, losses[k]] for k in loss_types if k in losses], []) print(('Validation ||' + (' %s: %.3f |' * len(losses)) + ')') % tuple(loss_labels), flush=True) def compute_validation_map(epoch, iteration, yolact_net, dataset, log:Log=None): with torch.no_grad(): yolact_net.eval() start = time.time() print() print("Computing validation mAP (this may take a while)...", flush=True) val_info = eval_script.evaluate(yolact_net, dataset, train_mode=True) end = time.time() if log is not None: log.log('val', val_info, elapsed=(end - start), epoch=epoch, iter=iteration) yolact_net.train() def setup_eval(): eval_script.parse_args(['--no_bar', '--max_images='+str(args.validation_size)]) if __name__ == '__main__': train() 模型初始化处在哪儿
06-20
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值