Write Videos & Images

1.Write Videos

#include <iostream>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>

using namespace cv;
using namespace std;
int main(int argc, char **argv) 
{
  time_t t=time(0);
  string x=to_string(t);
  VideoCapture capture(0);
  double rate = 25.0;//视频的帧率  
  Size videoSize(640,480);  
  VideoWriter writer(x+".avi", CV_FOURCC('M', 'J', 'P', 'G'), rate, videoSize);  
  Mat frame;  
  
    while (capture.isOpened())  
    {  
        capture >> frame;  
        writer << frame;  
        imshow("video", frame);  
        if (waitKey(20) == 27)//27是键盘摁下esc时,计算机接收到的ascii码值  
        {  
            break;  
        }  
    }  
}

2.Write Images

#include <iostream>
#include<opencv2/core/core.hpp>
#include<opencv2/highgui/highgui.hpp>
using namespace cv;
using namespace std;
#include<string.h>
#include<string>
#include <sys/time.h>  
long getCurrentTime()    
{    
   struct timeval tv;    
   gettimeofday(&tv,NULL);    
   return tv.tv_sec * 1000 + tv.tv_usec / 1000;    
} 

int main(int argc, char **argv) 
{
  VideoCapture cap(0);
  Mat src;
  
  string b="~/Desktop/YZJPrograme/ImageCapture/1/";
  string c=".jpg";
  while(true)
  {
    cap>>src;
    cout<<getCurrentTime()<<endl;
  //  cout<<time(0)<<endl;
    string s;
    int x=1000;
    s=to_string(getCurrentTime());
    imwrite(s+c,src);
    imshow("s",src);
    if(waitKey(100)==27)
      break;
  }
  return 0;
}






内容概要:本文档详细介绍了基于MATLAB实现的多头长短期记忆网络(MH-LSTM)结合Transformer编码器进行多变量时间序列预测的项目实例。项目旨在通过融合MH-LSTM对时序动态的细致学习和Transformer对全局依赖的捕捉,显著提升多变量时间序列预测的精度和稳定性。文档涵盖了从项目背景、目标意义、挑战与解决方案、模型架构及代码示例,到具体的应用领域、部署与应用、未来改进方向等方面的全面内容。项目不仅展示了技术实现细节,还提供了从数据预处理、模型构建与训练到性能评估的全流程指导。 适合人群:具备一定编程基础,特别是熟悉MATLAB和深度学习基础知识的研发人员、数据科学家以及从事时间序列预测研究的专业人士。 使用场景及目标:①深入理解MH-LSTM与Transformer结合的多变量时间序列预测模型原理;②掌握MATLAB环境下复杂神经网络的搭建、训练及优化技巧;③应用于金融风险管理、智能电网负荷预测、气象预报、交通流量预测、工业设备健康监测、医疗数据分析、供应链需求预测等多个实际场景,以提高预测精度和决策质量。 阅读建议:此资源不仅适用于希望深入了解多变量时间序列预测技术的读者,也适合希望通过MATLAB实现复杂深度学习模型的开发者。建议读者在学习过程中结合提供的代码示例进行实践操作,并关注模型训练中的关键步骤和超参数调优策略,以便更好地应用于实际项目中。
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license &quot;&quot;&quot; Run YOLOv5 detection inference on images, videos, directories, globs, YouTube, webcam, streams, etc. Usage - sources: $ python detect.py --weights yolov5s.pt --source 0 # webcam img.jpg # image vid.mp4 # video screen # screenshot path/ # directory list.txt # list of images list.streams # list of streams &#39;path/*.jpg&#39; # glob &#39;https://youtu.be/LNwODJXcvt4&#39; # YouTube &#39;rtsp://example.com/media.mp4&#39; # RTSP, RTMP, HTTP stream Usage - formats: $ python detect.py --weights yolov5s.pt # PyTorch yolov5s.torchscript # TorchScript yolov5s.onnx # ONNX Runtime or OpenCV DNN with --dnn yolov5s_openvino_model # OpenVINO yolov5s.engine # TensorRT yolov5s.mlpackage # CoreML (macOS-only) yolov5s_saved_model # TensorFlow SavedModel yolov5s.pb # TensorFlow GraphDef yolov5s.tflite # TensorFlow Lite yolov5s_edgetpu.tflite # TensorFlow Edge TPU yolov5s_paddle_model # PaddlePaddle &quot;&quot;&quot; import argparse import csv import os import platform import sys from pathlib import Path import torch FILE = Path(__file__).resolve() ROOT = FILE.parents[0] # YOLOv5 root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative from ultralytics.utils.plotting import Annotator, colors, save_one_box from models.common import DetectMultiBackend from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreams from utils.general import ( LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2, increment_path, non_max_suppression, print_args, scale_boxes, strip_optimizer, xyxy2xywh, ) from utils.torch_utils import select_device, smart_inference_mode # 新增:计算IOU函数 def calculate_iou(box1, box2): &quot;&quot;&quot;计算两个边界框的IOU&quot;&quot;&quot; x1, y1, x2, y2 = box1 x1g, y1g, x2g, y2g = box2 # 计算交集区域 xA = max(x1, x1g) yA = max(y1, y1g) xB = min(x2, x2g) yB = min(y2, y2g) # 计算交集面积 inter_area = max(0, xB - xA + 1) * max(0, yB - yA + 1) # 计算并集面积 box1_area = (x2 - x1 + 1) * (y2 - y1 + 1) box2_area = (x2g - x1g + 1) * (y2g - y1g + 1) union_area = float(box1_area + box2_area - inter_area) # 计算IOU iou = inter_area / union_area return iou # 新增:计算准确率函数 def calculate_accuracy(gt_labels, pred_detections, iou_threshold=0.5): &quot;&quot;&quot;计算目标检测的准确率&quot;&quot;&quot; correct_predictions = 0 total_gt_objects = 0 total_pred_objects = 0 for img_name in gt_labels: if img_name not in pred_detections: continue gt_boxes = gt_labels[img_name] pred_boxes = pred_detections[img_name] total_gt_objects += len(gt_boxes) total_pred_objects += len(pred_boxes) # 标记已匹配的真实标签 gt_matched = [False] * len(gt_boxes) for pred_box in pred_boxes: pred_class, pred_bbox, pred_conf = pred_box best_iou = 0 best_gt_idx = -1 # 寻找最佳匹配的真实标签 for i, gt_box in enumerate(gt_boxes): gt_class, gt_bbox = gt_box if gt_matched[i]: continue iou = calculate_iou(pred_bbox, gt_bbox) if iou &gt; best_iou and pred_class == gt_class: best_iou = iou best_gt_idx = i # 如果IOU超过阈值且类别正确,则计为正确预测 if best_gt_idx != -1 and best_iou &gt;= iou_threshold: correct_predictions += 1 gt_matched[best_gt_idx] = True # 避免除零错误 if total_gt_objects == 0: return 0.0 # 计算准确率 return correct_predictions / total_gt_objects @smart_inference_mode() def run( weights=ROOT / &quot;yolov5s.pt&quot;, # model path or triton URL source=ROOT / &quot;data/images&quot;, # file/dir/URL/glob/screen/0(webcam) data=ROOT / &quot;data/coco128.yaml&quot;, # dataset.yaml path imgsz=(640, 640), # inference size (height, width) conf_thres=0.25, # confidence threshold iou_thres=0.45, # NMS IOU threshold max_det=1000, # maximum detections per image device=&quot;&quot;, # cuda device, i.e. 0 or 0,1,2,3 or cpu view_img=False, # show results save_txt=False, # save results to *.txt save_format=0, # save boxes coordinates in YOLO format or Pascal-VOC format (0 for YOLO and 1 for Pascal-VOC) save_csv=False, # save results in CSV format save_conf=False, # save confidences in --save-txt labels save_crop=False, # save cropped prediction boxes nosave=False, # do not save images/videos classes=None, # filter by class: --class 0, or --class 0 2 3 agnostic_nms=False, # class-agnostic NMS augment=False, # augmented inference visualize=False, # visualize features update=False, # update all models project=ROOT / &quot;runs/detect&quot;, # save results to project/name name=&quot;exp&quot;, # save results to project/name exist_ok=False, # existing project/name ok, do not increment line_thickness=3, # bounding box thickness (pixels) hide_labels=False, # hide labels hide_conf=False, # hide confidences half=False, # use FP16 half-precision inference dnn=False, # use OpenCV DNN for ONNX inference vid_stride=1, # video frame-rate stride gt_dir=&quot;&quot;, # 新增:真实标签目录 eval_interval=10, # 新增:评估间隔帧数 ): &quot;&quot;&quot; Runs YOLOv5 detection inference on various sources like images, videos, directories, streams, etc. Args: weights (str | Path): Path to the model weights file or a Triton URL. Default is &#39;yolov5s.pt&#39;. source (str | Path): Input source, which can be a file, directory, URL, glob pattern, screen capture, or webcam index. Default is &#39;data/images&#39;. data (str | Path): Path to the dataset YAML file. Default is &#39;data/coco128.yaml&#39;. imgsz (tuple[int, int]): Inference image size as a tuple (height, width). Default is (640, 640). conf_thres (float): Confidence threshold for detections. Default is 0.25. iou_thres (float): Intersection Over Union (IOU) threshold for non-max suppression. Default is 0.45. max_det (int): Maximum number of detections per image. Default is 1000. device (str): CUDA device identifier (e.g., &#39;0&#39; or &#39;0,1,2,3&#39;) or &#39;cpu&#39;. Default is an empty string, which uses the best available device. view_img (bool): If True, display inference results using OpenCV. Default is False. save_txt (bool): If True, save results in a text file. Default is False. save_format (int): Whether to save boxes coordinates in YOLO format or Pascal-VOC format. Default is 0. save_csv (bool): If True, save results in a CSV file. Default is False. save_conf (bool): If True, include confidence scores in the saved results. Default is False. save_crop (bool): If True, save cropped prediction boxes. Default is False. nosave (bool): If True, do not save inference images or videos. Default is False. classes (list[int]): List of classes to filter detections by. Default is None. agnostic_nms (bool): If True, perform class-agnostic non-max suppression. Default is False. augment (bool): If True, use augmented inference. Default is False. visualize (bool): If True, visualize feature maps. Default is False. update (bool): If True, update all models&#39; weights. Default is False. project (str | Path): Directory to save results. Default is &#39;runs/detect&#39;. name (str): Name of the current experiment; used to create a subdirectory within &#39;project&#39;. Default is &#39;exp&#39;. exist_ok (bool): If True, existing directories with the same name are reused instead of being incremented. Default is False. line_thickness (int): Thickness of bounding box lines in pixels. Default is 3. hide_labels (bool): If True, do not display labels on bounding boxes. Default is False. hide_conf (bool): If True, do not display confidence scores on bounding boxes. Default is False. half (bool): If True, use FP16 half-precision inference. Default is False. dnn (bool): If True, use OpenCV DNN backend for ONNX inference. Default is False. vid_stride (int): Stride for processing video frames, to skip frames between processing. Default is 1. gt_dir (str): 新增:真实标签目录路径 eval_interval (int): 新增:每隔多少帧计算一次准确率 Returns: None &quot;&quot;&quot; source = str(source) save_img = not nosave and not source.endswith(&quot;.txt&quot;) # save inference images is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS) is_url = source.lower().startswith((&quot;rtsp://&quot;, &quot;rtmp://&quot;, &quot;http://&quot;, &quot;https://&quot;)) webcam = source.isnumeric() or source.endswith(&quot;.streams&quot;) or (is_url and not is_file) screenshot = source.lower().startswith(&quot;screen&quot;) if is_url and is_file: source = check_file(source) # download # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / &quot;labels&quot; if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Load model device = select_device(device) model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half) stride, names, pt = model.stride, model.names, model.pt imgsz = check_img_size(imgsz, s=stride) # check image size # Dataloader bs = 1 # batch_size if webcam: view_img = check_imshow(warn=True) dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride) bs = len(dataset) elif screenshot: dataset = LoadScreenshots(source, img_size=imgsz, stride=stride, auto=pt) else: dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride) vid_path, vid_writer = [None] * bs, [None] * bs # 新增:加载真实标签数据 gt_labels = {} if gt_dir: gt_dir = Path(gt_dir) for txt_file in gt_dir.glob(&quot;*.txt&quot;): img_name = txt_file.stem gt_labels[img_name] = [] with open(txt_file, &quot;r&quot;) as f: for line in f: parts = line.strip().split() if len(parts) &gt;= 5: cls = int(parts[0]) # 将YOLO格式转换为xyxy格式 x, y, w, h = map(float, parts[1:5]) # 假设真实标签对应的图像尺寸与输入图像一致 x1 = (x - w/2) * imgsz[1] y1 = (y - h/2) * imgsz[0] x2 = (x + w/2) * imgsz[1] y2 = (y + h/2) * imgsz[0] gt_labels[img_name].append((cls, (x1, y1, x2, y2))) # 新增:收集预测结果 pred_detections = {} frame_count = 0 accuracy = 0.0 # 初始化准确率 # Run inference model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz)) # warmup seen, windows, dt = 0, [], (Profile(device=device), Profile(device=device), Profile(device=device)) for path, im, im0s, vid_cap, s in dataset: with dt[0]: im = torch.from_numpy(im).to(model.device) im = im.half() if model.fp16 else im.float() # uint8 to fp16/32 im /= 255 # 0 - 255 to 0.0 - 1.0 if len(im.shape) == 3: im = im[None] # expand for batch dim if model.xml and im.shape[0] &gt; 1: ims = torch.chunk(im, im.shape[0], 0) # Inference with dt[1]: visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False if model.xml and im.shape[0] &gt; 1: pred = None for image in ims: if pred is None: pred = model(image, augment=augment, visualize=visualize).unsqueeze(0) else: pred = torch.cat((pred, model(image, augment=augment, visualize=visualize).unsqueeze(0)), dim=0) pred = [pred, None] else: pred = model(im, augment=augment, visualize=visualize) # NMS with dt[2]: pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) # Second-stage classifier (optional) # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s) # Define the path for the CSV file csv_path = save_dir / &quot;predictions.csv&quot; # Create or append to the CSV file def write_to_csv(image_name, prediction, confidence): &quot;&quot;&quot;Writes prediction data for an image to a CSV file, appending if the file exists.&quot;&quot;&quot; data = {&quot;Image Name&quot;: image_name, &quot;Prediction&quot;: prediction, &quot;Confidence&quot;: confidence} file_exists = os.path.isfile(csv_path) with open(csv_path, mode=&quot;a&quot;, newline=&quot;&quot;) as f: writer = csv.DictWriter(f, fieldnames=data.keys()) if not file_exists: writer.writeheader() writer.writerow(data) # Process predictions for i, det in enumerate(pred): # per image seen += 1 if webcam: # batch_size &gt;= 1 p, im0, frame = path[i], im0s[i].copy(), dataset.count s += f&quot;{i}: &quot; else: p, im0, frame = path, im0s.copy(), getattr(dataset, &quot;frame&quot;, 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # im.jpg txt_path = str(save_dir / &quot;labels&quot; / p.stem) + (&quot;&quot; if dataset.mode == &quot;image&quot; else f&quot;_{frame}&quot;) # im.txt s += &quot;{:g}x{:g} &quot;.format(*im.shape[2:]) # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if save_crop else im0 # for save_crop annotator = Annotator(im0, line_width=line_thickness, example=str(names)) if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, 5].unique(): n = (det[:, 5] == c).sum() # detections per class s += f&quot;{n} {names[int(c)]}{&#39;s&#39; * (n &gt; 1)}, &quot; # add to string # Write results for *xyxy, conf, cls in reversed(det): c = int(cls) # integer class label = names[c] if hide_conf else f&quot;{names[c]}&quot; confidence = float(conf) confidence_str = f&quot;{confidence:.2f}&quot; if save_csv: write_to_csv(p.name, label, confidence_str) if save_txt: # Write to file if save_format == 0: coords = ( (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() ) # normalized xywh else: coords = (torch.tensor(xyxy).view(1, 4) / gn).view(-1).tolist() # xyxy line = (cls, *coords, conf) if save_conf else (cls, *coords) # label format with open(f&quot;{txt_path}.txt&quot;, &quot;a&quot;) as f: f.write((&quot;%g &quot; * len(line)).rstrip() % line + &quot;\n&quot;) if save_img or save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if hide_labels else (names[c] if hide_conf else f&quot;{names[c]} {conf:.2f}&quot;) annotator.box_label(xyxy, label, color=colors(c, True)) if save_crop: save_one_box(xyxy, imc, file=save_dir / &quot;crops&quot; / names[c] / f&quot;{p.stem}.jpg&quot;, BGR=True) # 新增:收集预测结果 img_name = p.stem pred_detections[img_name] = [] if len(det): for *xyxy, conf, cls in det: c = int(cls) x1, y1, x2, y2 = map(int, xyxy) pred_detections[img_name].append((c, (x1, y1, x2, y2), float(conf))) # 新增:定期计算准确率并显示 frame_count += 1 if gt_dir and frame_count % eval_interval == 0: accuracy = calculate_accuracy(gt_labels, pred_detections) if save_img or view_img: accuracy_text = f&quot;Accuracy: {accuracy:.2f}&quot; annotator.text((10, 30), accuracy_text, txt_color=(255, 255, 255)) im0 = annotator.result() # Stream results im0 = annotator.result() if view_img: if platform.system() == &quot;Linux&quot; and p not in windows: windows.append(p) cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux) cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0]) cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == &quot;image&quot;: cv2.imwrite(save_path, im0) else: # &#39;video&#39; or &#39;stream&#39; if vid_path[i] != save_path: # new video vid_path[i] = save_path if isinstance(vid_writer[i], cv2.VideoWriter): vid_writer[i].release() # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path = str(Path(save_path).with_suffix(&quot;.mp4&quot;)) # force *.mp4 suffix on results videos vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*&quot;mp4v&quot;), fps, (w, h)) vid_writer[i].write(im0) # Print time (inference-only) LOGGER.info(f&quot;{s}{&#39;&#39; if len(det) else &#39;(no detections), &#39;}{dt[1].dt * 1e3:.1f}ms&quot;) # 新增:在终端输出最终准确率 if gt_dir: accuracy = calculate_accuracy(gt_labels, pred_detections) LOGGER.info(f&quot;Overall Accuracy: {accuracy:.4f}&quot;) # Print results t = tuple(x.t / seen * 1e3 for x in dt) # speeds per image LOGGER.info(f&quot;Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}&quot; % t) if save_txt or save_img: s = f&quot;\n{len(list(save_dir.glob(&#39;labels/*.txt&#39;)))} labels saved to {save_dir / &#39;labels&#39;}&quot; if save_txt else &quot;&quot; LOGGER.info(f&quot;Results saved to {colorstr(&#39;bold&#39;, save_dir)}{s}&quot;) if update: strip_optimizer(weights[0]) # update model (to fix SourceChangeWarning) def parse_opt(): &quot;&quot;&quot; Parse command-line arguments for YOLOv5 detection, allowing custom inference options and model configurations. Args: --weights (str | list[str], optional): Model path or triton URL. Defaults to ROOT / &#39;yolov5s.pt&#39;. --source (str, optional): File/dir/URL/glob/screen/0(webcam). Defaults to ROOT / &#39;data/images&#39;. --data (str, optional): Dataset YAML path. Provides dataset configuration information. --imgsz (list[int], optional): Inference size (height, width). Defaults to [640]. --conf-thres (float, optional): Confidence threshold. Defaults to 0.25. --iou-thres (float, optional): NMS IoU threshold. Defaults to 0.45. --max-det (int, optional): Maximum number of detections per image. Defaults to 1000. --device (str, optional): CUDA device, i.e. 0 or 0,1,2,3 or cpu. Defaults to &quot;&quot;. --view-img (bool, optional): Flag to display results. Default is False. --save-txt (bool, optional): Flag to save results to *.txt files. Default is False. --save-format (int, optional): Whether to save boxes coordinates in YOLO format or Pascal-VOC format. Default is 0. --save-csv (bool, optional): Flag to save results in CSV format. Default is False. --save-conf (bool, optional): Flag to save confidences in labels saved via --save-txt. Default is False. --save-crop (bool, optional): Flag to save cropped prediction boxes. Default is False. --nosave (bool, optional): Flag to prevent saving images/videos. Default is False. --classes (list[int], optional): List of classes to filter results by. Default is None. --agnostic-nms (bool, optional): Flag for class-agnostic NMS. Default is False. --augment (bool, optional): Flag for augmented inference. Default is False. --visualize (bool, optional): Flag for visualizing features. Default is False. --update (bool, optional): Flag to update all models in the model directory. Default is False. --project (str, optional): Directory to save results. Default is ROOT / &#39;runs/detect&#39;. --name (str, optional): Sub-directory name for saving results within --project. Default is &#39;exp&#39;. --exist-ok (bool, optional): Flag to allow overwriting if the project/name already exists. Default is False. --line-thickness (int, optional): Thickness (in pixels) of bounding boxes. Default is 3. --hide-labels (bool, optional): Flag to hide labels in the output. Default is False. --hide-conf (bool, optional): Flag to hide confidences in the output. Default is False. --half (bool, optional): Flag to use FP16 half-precision inference. Default is False. --dnn (bool, optional): Flag to use OpenCV DNN for ONNX inference. Default is False. --vid-stride (int, optional): Video frame-rate stride. Default is 1. --gt-dir (str, optional): 新增:真实标签目录路径 --eval-interval (int, optional): 新增:每隔多少帧计算一次准确率 Returns: argparse.Namespace: Parsed command-line arguments as an argparse.Namespace object. &quot;&quot;&quot; parser = argparse.ArgumentParser() parser.add_argument(&quot;--weights&quot;, nargs=&quot;+&quot;, type=str, default=ROOT / &quot;yolov5s.pt&quot;, help=&quot;model path or triton URL&quot;) parser.add_argument(&quot;--source&quot;, type=str, default=ROOT / &quot;data/images&quot;, help=&quot;file/dir/URL/glob/screen/0(webcam)&quot;) parser.add_argument(&quot;--data&quot;, type=str, default=ROOT / &quot;data/coco128.yaml&quot;, help=&quot;(optional) dataset.yaml path&quot;) parser.add_argument(&quot;--imgsz&quot;, &quot;--img&quot;, &quot;--img-size&quot;, nargs=&quot;+&quot;, type=int, default=[640], help=&quot;inference size h,w&quot;) parser.add_argument(&quot;--conf-thres&quot;, type=float, default=0.25, help=&quot;confidence threshold&quot;) parser.add_argument(&quot;--iou-thres&quot;, type=float, default=0.45, help=&quot;NMS IoU threshold&quot;) parser.add_argument(&quot;--max-det&quot;, type=int, default=1000, help=&quot;maximum detections per image&quot;) parser.add_argument(&quot;--device&quot;, default=&quot;&quot;, help=&quot;cuda device, i.e. 0 or 0,1,2,3 or cpu&quot;) parser.add_argument(&quot;--view-img&quot;, action=&quot;store_true&quot;, help=&quot;show results&quot;) parser.add_argument(&quot;--save-txt&quot;, action=&quot;store_true&quot;, help=&quot;save results to *.txt&quot;) parser.add_argument( &quot;--save-format&quot;, type=int, default=0, help=&quot;whether to save boxes coordinates in YOLO format or Pascal-VOC format when save-txt is True, 0 for YOLO and 1 for Pascal-VOC&quot;, ) parser.add_argument(&quot;--save-csv&quot;, action=&quot;store_true&quot;, help=&quot;save results in CSV format&quot;) parser.add_argument(&quot;--save-conf&quot;, action=&quot;store_true&quot;, help=&quot;save confidences in --save-txt labels&quot;) parser.add_argument(&quot;--save-crop&quot;, action=&quot;store_true&quot;, help=&quot;save cropped prediction boxes&quot;) parser.add_argument(&quot;--nosave&quot;, action=&quot;store_true&quot;, help=&quot;do not save images/videos&quot;) parser.add_argument(&quot;--classes&quot;, nargs=&quot;+&quot;, type=int, help=&quot;filter by class: --classes 0, or --classes 0 2 3&quot;) parser.add_argument(&quot;--agnostic-nms&quot;, action=&quot;store_true&quot;, help=&quot;class-agnostic NMS&quot;) parser.add_argument(&quot;--augment&quot;, action=&quot;store_true&quot;, help=&quot;augmented inference&quot;) parser.add_argument(&quot;--visualize&quot;, action=&quot;store_true&quot;, help=&quot;visualize features&quot;) parser.add_argument(&quot;--update&quot;, action=&quot;store_true&quot;, help=&quot;update all models&quot;) parser.add_argument(&quot;--project&quot;, default=ROOT / &quot;runs/detect&quot;, help=&quot;save results to project/name&quot;) parser.add_argument(&quot;--name&quot;, default=&quot;exp&quot;, help=&quot;save results to project/name&quot;) parser.add_argument(&quot;--exist-ok&quot;, action=&quot;store_true&quot;, help=&quot;existing project/name ok, do not increment&quot;) parser.add_argument(&quot;--line-thickness&quot;, default=3, type=int, help=&quot;bounding box thickness (pixels)&quot;) parser.add_argument(&quot;--hide-labels&quot;, default=False, action=&quot;store_true&quot;, help=&quot;hide labels&quot;) parser.add_argument(&quot;--hide-conf&quot;, default=False, action=&quot;store_true&quot;, help=&quot;hide confidences&quot;) parser.add_argument(&quot;--half&quot;, action=&quot;store_true&quot;, help=&quot;use FP16 half-precision inference&quot;) parser.add_argument(&quot;--dnn&quot;, action=&quot;store_true&quot;, help=&quot;use OpenCV DNN for ONNX inference&quot;) parser.add_argument(&quot;--vid-stride&quot;, type=int, default=1, help=&quot;video frame-rate stride&quot;) # 新增参数 parser.add_argument(&quot;--gt-dir&quot;, type=str, default=&quot;&quot;, help=&quot;ground truth labels directory&quot;) parser.add_argument(&quot;--eval-interval&quot;, type=int, default=10, help=&quot;evaluate accuracy every N frames&quot;) opt = parser.parse_args() opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand print_args(vars(opt)) return opt def main(opt): &quot;&quot;&quot; Executes YOLOv5 model inference based on provided command-line arguments, validating dependencies before running. Args: opt (argparse.Namespace): Command-line arguments for YOLOv5 detection. Returns: None &quot;&quot;&quot; check_requirements(ROOT / &quot;requirements.txt&quot;, exclude=(&quot;tensorboard&quot;, &quot;thop&quot;)) run(**vars(opt)) if __name__ == &quot;__main__&quot;: opt = parse_opt() main(opt)代码如上。yolov5在detect.py得到有类别和置信度标注的视频和图片,请问我如何操作,才能在有类别和置信度标注的视频和图片的基础上,在视频或图片中显示识别准确率Accuracy。请给出修改后的完整代码(尽量少修改,不要改变代码的其他地方),要求直接在vscode点击运行即可生成显示识别准确率Accuracy的视频或图片
07-07
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值