PE_BuildTime.html

<!-- PE_BuildTime.html -->
<html>
<script>
function show()
{
    //JavaScript Date对象 1970年1月1日午夜
    //PE TimeDateStamp 文件日期时间戳,指这个pe文件生成的时间,它的值是从1969年12月31日16:00:00以来的秒数.
    //北京时间 +8时区
    var s = parseInt(document.getElementById("TimeDateStamp").value,16);
    var t = new Date(0+s*1000);
    alert("["+t.getFullYear()+"/"+(t.getMonth()+1)+"/"+t.getDate()+" "+t.getHours()+":"+t.getMinutes()+":"+t.getSeconds()+"]");
}
</script>
<input type="text" id="TimeDateStamp" value=""/>
<input type="button" value="PE文件编译时间" onclick="show()"/>
</html>

############################################################################### # Copyright (C) 2024 LiveTalking@lipku https://github.com/lipku/LiveTalking # email: lipku@foxmail.com # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################### # server.py from flask import Flask, render_template,send_from_directory,request, jsonify from flask_sockets import Sockets import base64 import json #import gevent #from gevent import pywsgi #from geventwebsocket.handler import WebSocketHandler import re import numpy as np from threading import Thread,Event #import multiprocessing import torch.multiprocessing as mp from aiohttp import web import aiohttp import aiohttp_cors from aiortc import RTCPeerConnection, RTCSessionDescription from aiortc.rtcrtpsender import RTCRtpSender from webrtc import HumanPlayer from basereal import BaseReal from llm import llm_response import argparse import random import shutil import asyncio import torch from typing import Dict from logger import logger import torch import time import os # 添加这行到文件顶部的其他import语句附近 app = Flask(__name__) #sockets = Sockets(app) nerfreals:Dict[int, BaseReal] = {} #sessionid:BaseReal opt = None model = None avatar = None #####webrtc############################### pcs = set() def randN(N)->int: '''生成长度为 N的随机数 ''' min = pow(10, N - 1) max = pow(10, N) return random.randint(min, max - 1) def build_nerfreal(sessionid:int)->BaseReal: opt.sessionid=sessionid if opt.model == 'wav2lip': from lipreal import LipReal nerfreal = LipReal(opt,model,avatar) elif opt.model == 'musetalk': from musereal import MuseReal nerfreal = MuseReal(opt,model,avatar) elif opt.model == 'ernerf': from nerfreal import NeRFReal nerfreal = NeRFReal(opt,model,avatar) elif opt.model == 'ultralight': from lightreal import LightReal nerfreal = LightReal(opt,model,avatar) return nerfreal #@app.route('/offer', methods=['POST']) async def offer(request): params = await request.json() offer = RTCSessionDescription(sdp=params["sdp"], type=params["type"]) if len(nerfreals) >= opt.max_session: logger.info('reach max session') return web.Response( content_type="application/json", text=json.dumps({"code": -1, "msg": "Maximum sessions reached"}), status=503 # HTTP 503 Service Unavailable ) sessionid = randN(6) logger.info('sessionid=%d', sessionid) nerfreals[sessionid] = None nerfreal = await asyncio.get_event_loop().run_in_executor(None, build_nerfreal, sessionid) nerfreals[sessionid] = nerfreal pc = RTCPeerConnection() pcs.add(pc) @pc.on("connectionstatechange") async def on_connectionstatechange(): logger.info("Connection state is %s" % pc.connectionState) if pc.connectionState == "failed": await pc.close() pcs.discard(pc) del nerfreals[sessionid] if pc.connectionState == "closed": pcs.discard(pc) del nerfreals[sessionid] player = HumanPlayer(nerfreals[sessionid]) audio_sender = pc.addTrack(player.audio) video_sender = pc.addTrack(player.video) capabilities = RTCRtpSender.getCapabilities("video") preferences = list(filter(lambda x: x.name == "H264", capabilities.codecs)) preferences += list(filter(lambda x: x.name == "VP8", capabilities.codecs)) preferences += list(filter(lambda x: x.name == "rtx", capabilities.codecs)) transceiver = pc.getTransceivers()[1] transceiver.setCodecPreferences(preferences) await pc.setRemoteDescription(offer) answer = await pc.createAnswer() await pc.setLocalDescription(answer) return web.Response( content_type="application/json", text=json.dumps( {"sdp": pc.localDescription.sdp, "type": pc.localDescription.type, "sessionid": sessionid} ), ) async def human(request): params = await request.json() sessionid = params.get('sessionid',0) if params.get('interrupt'): nerfreals[sessionid].flush_talk() if params['type']=='echo': nerfreals[sessionid].put_msg_txt(params['text']) elif params['type']=='chat': res=await asyncio.get_event_loop().run_in_executor(None, llm_response, params['text'],nerfreals[sessionid]) #nerfreals[sessionid].put_msg_txt(res) return web.Response( content_type="application/json", text=json.dumps( {"code": 0, "data":"ok"} ), ) async def humanaudio(request): try: form= await request.post() sessionid = int(form.get('sessionid',0)) fileobj = form["file"] filename=fileobj.filename filebytes=fileobj.file.read() nerfreals[sessionid].put_audio_file(filebytes) return web.Response( content_type="application/json", text=json.dumps( {"code": 0, "msg":"ok"} ), ) except Exception as e: return web.Response( content_type="application/json", text=json.dumps( {"code": -1, "msg":"err","data": ""+e.args[0]+""} ), ) async def set_audiotype(request): params = await request.json() sessionid = params.get('sessionid',0) nerfreals[sessionid].set_custom_state(params['audiotype'],params['reinit']) return web.Response( content_type="application/json", text=json.dumps( {"code": 0, "data":"ok"} ), ) async def record(request): params = await request.json() sessionid = params.get('sessionid',0) if params['type']=='start_record': # nerfreals[sessionid].put_msg_txt(params['text']) nerfreals[sessionid].start_recording() elif params['type']=='end_record': nerfreals[sessionid].stop_recording() return web.Response( content_type="application/json", text=json.dumps( {"code": 0, "data":"ok"} ), ) async def is_speaking(request): params = await request.json() sessionid = params.get('sessionid',0) return web.Response( content_type="application/json", text=json.dumps( {"code": 0, "data": nerfreals[sessionid].is_speaking()} ), ) async def on_shutdown(app): # close peer connections coros = [pc.close() for pc in pcs] await asyncio.gather(*coros) pcs.clear() async def post(url,data): try: async with aiohttp.ClientSession() as session: async with session.post(url,data=data) as response: return await response.text() except aiohttp.ClientError as e: logger.info(f'Error: {e}') async def run(push_url,sessionid): nerfreal = await asyncio.get_event_loop().run_in_executor(None, build_nerfreal,sessionid) nerfreals[sessionid] = nerfreal pc = RTCPeerConnection() pcs.add(pc) @pc.on("connectionstatechange") async def on_connectionstatechange(): logger.info("Connection state is %s" % pc.connectionState) if pc.connectionState == "failed": await pc.close() pcs.discard(pc) player = HumanPlayer(nerfreals[sessionid]) audio_sender = pc.addTrack(player.audio) video_sender = pc.addTrack(player.video) await pc.setLocalDescription(await pc.createOffer()) answer = await post(push_url,pc.localDescription.sdp) await pc.setRemoteDescription(RTCSessionDescription(sdp=answer,type='answer')) ########################################## # os.environ['MKL_SERVICE_FORCE_INTEL'] = '1' # os.environ['MULTIPROCESSING_METHOD'] = 'forkserver' if __name__ == '__main__': torch.cuda.set_device(0) # 指定使用第一块 GPU torch.set_default_tensor_type('torch.cuda.FloatTensor') # 默认张量类型为 GPU ###device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') ###torch.set_default_tensor_type(torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor) mp.set_start_method('spawn') parser = argparse.ArgumentParser() parser.add_argument('--pose', type=str, default="data/data_kf.json", help="transforms.json, pose source") parser.add_argument('--au', type=str, default="data/au.csv", help="eye blink area") parser.add_argument('--torso_imgs', type=str, default="", help="torso images path") parser.add_argument('-O', action='store_true', help="equals --fp16 --cuda_ray --exp_eye") parser.add_argument('--data_range', type=int, nargs='*', default=[0, -1], help="data range to use") parser.add_argument('--workspace', type=str, default='data/video') parser.add_argument('--seed', type=int, default=0) ### training options parser.add_argument('--ckpt', type=str, default='data/pretrained/ngp_kf.pth') # 在参数解析部分(约第 150 行)修改默认值: parser.add_argument('--num_rays', type=int, default=4096, help="减少每批光线数量") # 原值 65536 parser.add_argument('--batch_size', type=int, default=8, help="降低批大小") # 原值 16 parser.add_argument('--max_ray_batch', type=int, default=2048, help="避免推理时 OOM") # 原值 4096 ###parser.add_argument('--num_rays', type=int, default=4096 * 16, help="num rays sampled per image for each training step") parser.add_argument('--cuda_ray', action='store_true', help="use CUDA raymarching instead of pytorch") parser.add_argument('--max_steps', type=int, default=16, help="max num steps sampled per ray (only valid when using --cuda_ray)") parser.add_argument('--num_steps', type=int, default=16, help="num steps sampled per ray (only valid when NOT using --cuda_ray)") parser.add_argument('--upsample_steps', type=int, default=0, help="num steps up-sampled per ray (only valid when NOT using --cuda_ray)") parser.add_argument('--update_extra_interval', type=int, default=16, help="iter interval to update extra status (only valid when using --cuda_ray)") ###parser.add_argument('--max_ray_batch', type=int, default=4096, help="batch size of rays at inference to avoid OOM (only valid when NOT using --cuda_ray)") ### loss set parser.add_argument('--warmup_step', type=int, default=10000, help="warm up steps") parser.add_argument('--amb_aud_loss', type=int, default=1, help="use ambient aud loss") parser.add_argument('--amb_eye_loss', type=int, default=1, help="use ambient eye loss") parser.add_argument('--unc_loss', type=int, default=1, help="use uncertainty loss") parser.add_argument('--lambda_amb', type=float, default=1e-4, help="lambda for ambient loss") ### network backbone options parser.add_argument('--fp16', action='store_true', help="use amp mixed precision training") parser.add_argument('--bg_img', type=str, default='white', help="background image") parser.add_argument('--fbg', action='store_true', help="frame-wise bg") parser.add_argument('--exp_eye', action='store_true', help="explicitly control the eyes") parser.add_argument('--fix_eye', type=float, default=-1, help="fixed eye area, negative to disable, set to 0-0.3 for a reasonable eye") parser.add_argument('--smooth_eye', action='store_true', help="smooth the eye area sequence") parser.add_argument('--torso_shrink', type=float, default=0.8, help="shrink bg coords to allow more flexibility in deform") ### dataset options parser.add_argument('--color_space', type=str, default='srgb', help="Color space, supports (linear, srgb)") parser.add_argument('--preload', type=int, default=0, help="0 means load data from disk on-the-fly, 1 means preload to CPU, 2 means GPU.") # (the default value is for the fox dataset) parser.add_argument('--bound', type=float, default=1, help="assume the scene is bounded in box[-bound, bound]^3, if > 1, will invoke adaptive ray marching.") parser.add_argument('--scale', type=float, default=4, help="scale camera location into box[-bound, bound]^3") parser.add_argument('--offset', type=float, nargs='*', default=[0, 0, 0], help="offset of camera location") parser.add_argument('--dt_gamma', type=float, default=1/256, help="dt_gamma (>=0) for adaptive ray marching. set to 0 to disable, >0 to accelerate rendering (but usually with worse quality)") parser.add_argument('--min_near', type=float, default=0.05, help="minimum near distance for camera") parser.add_argument('--density_thresh', type=float, default=10, help="threshold for density grid to be occupied (sigma)") parser.add_argument('--density_thresh_torso', type=float, default=0.01, help="threshold for density grid to be occupied (alpha)") parser.add_argument('--patch_size', type=int, default=1, help="[experimental] render patches in training, so as to apply LPIPS loss. 1 means disabled, use [64, 32, 16] to enable") parser.add_argument('--init_lips', action='store_true', help="init lips region") parser.add_argument('--finetune_lips', action='store_true', help="use LPIPS and landmarks to fine tune lips region") parser.add_argument('--smooth_lips', action='store_true', help="smooth the enc_a in a exponential decay way...") parser.add_argument('--torso', action='store_true', help="fix head and train torso") parser.add_argument('--head_ckpt', type=str, default='', help="head model") ### GUI options parser.add_argument('--gui', action='store_true', help="start a GUI") parser.add_argument('--W', type=int, default=450, help="GUI width") parser.add_argument('--H', type=int, default=450, help="GUI height") parser.add_argument('--radius', type=float, default=3.35, help="default GUI camera radius from center") parser.add_argument('--fovy', type=float, default=21.24, help="default GUI camera fovy") parser.add_argument('--max_spp', type=int, default=1, help="GUI rendering max sample per pixel") ### else parser.add_argument('--att', type=int, default=2, help="audio attention mode (0 = turn off, 1 = left-direction, 2 = bi-direction)") parser.add_argument('--aud', type=str, default='', help="audio source (empty will load the default, else should be a path to a npy file)") parser.add_argument('--emb', action='store_true', help="use audio class + embedding instead of logits") parser.add_argument('--ind_dim', type=int, default=4, help="individual code dim, 0 to turn off") parser.add_argument('--ind_num', type=int, default=10000, help="number of individual codes, should be larger than training dataset size") parser.add_argument('--ind_dim_torso', type=int, default=8, help="individual code dim, 0 to turn off") parser.add_argument('--amb_dim', type=int, default=2, help="ambient dimension") parser.add_argument('--part', action='store_true', help="use partial training data (1/10)") parser.add_argument('--part2', action='store_true', help="use partial training data (first 15s)") parser.add_argument('--train_camera', action='store_true', help="optimize camera pose") parser.add_argument('--smooth_path', action='store_true', help="brute-force smooth camera pose trajectory with a window size") parser.add_argument('--smooth_path_window', type=int, default=7, help="smoothing window size") # asr parser.add_argument('--asr', action='store_true', help="load asr for real-time app") parser.add_argument('--asr_wav', type=str, default='', help="load the wav and use as input") parser.add_argument('--asr_play', action='store_true', help="play out the audio") #parser.add_argument('--asr_model', type=str, default='deepspeech') parser.add_argument('--asr_model', type=str, default='cpierse/wav2vec2-large-xlsr-53-esperanto') # # parser.add_argument('--asr_model', type=str, default='facebook/wav2vec2-large-960h-lv60-self') # parser.add_argument('--asr_model', type=str, default='facebook/hubert-large-ls960-ft') parser.add_argument('--asr_save_feats', action='store_true') # audio FPS parser.add_argument('--fps', type=int, default=50) # sliding window left-middle-right length (unit: 20ms) parser.add_argument('-l', type=int, default=10) parser.add_argument('-m', type=int, default=8) parser.add_argument('-r', type=int, default=10) parser.add_argument('--fullbody', action='store_true', help="fullbody human") parser.add_argument('--fullbody_img', type=str, default='data/fullbody/img') parser.add_argument('--fullbody_width', type=int, default=580) parser.add_argument('--fullbody_height', type=int, default=1080) parser.add_argument('--fullbody_offset_x', type=int, default=0) parser.add_argument('--fullbody_offset_y', type=int, default=0) #musetalk opt parser.add_argument('--avatar_id', type=str, default='avator_1') parser.add_argument('--bbox_shift', type=int, default=5) ###parser.add_argument('--batch_size', type=int, default=16) # parser.add_argument('--customvideo', action='store_true', help="custom video") # parser.add_argument('--customvideo_img', type=str, default='data/customvideo/img') # parser.add_argument('--customvideo_imgnum', type=int, default=1) parser.add_argument('--customvideo_config', type=str, default='') parser.add_argument('--tts', type=str, default='edgetts') #xtts gpt-sovits cosyvoice parser.add_argument('--REF_FILE', type=str, default=None) parser.add_argument('--REF_TEXT', type=str, default=None) parser.add_argument('--TTS_SERVER', type=str, default='http://127.0.0.1:9880') # http://localhost:9000 # parser.add_argument('--CHARACTER', type=str, default='test') # parser.add_argument('--EMOTION', type=str, default='default') parser.add_argument('--model', type=str, default='ernerf') #musetalk wav2lip parser.add_argument('--transport', type=str, default='rtcpush') #rtmp webrtc rtcpush parser.add_argument('--push_url', type=str, default='http://localhost:1985/rtc/v1/whip/?app=live&stream=livestream') #rtmp://localhost/live/livestream parser.add_argument('--max_session', type=int, default=100) #multi session count parser.add_argument('--listenport', type=int, default=8010) opt = parser.parse_args() #app.config.from_object(opt) #print(app.config) opt.customopt = [] if opt.customvideo_config!='': with open(opt.customvideo_config,'r') as file: opt.customopt = json.load(file) if opt.model == 'ernerf': from nerfreal import NeRFReal,load_model,load_avatar model = load_model(opt) avatar = load_avatar(opt) # we still need test_loader to provide audio features for testing. # for k in range(opt.max_session): # opt.sessionid=k # nerfreal = NeRFReal(opt, trainer, test_loader,audio_processor,audio_model) # nerfreals.append(nerfreal) elif opt.model == 'musetalk': from musereal import MuseReal,load_model,load_avatar,warm_up logger.info(opt) model = load_model() avatar = load_avatar(opt.avatar_id) warm_up(opt.batch_size,model) # for k in range(opt.max_session): # opt.sessionid=k # nerfreal = MuseReal(opt,audio_processor,vae, unet, pe,timesteps) # nerfreals.append(nerfreal) elif opt.model == 'wav2lip': from lipreal import LipReal,load_model,load_avatar,warm_up logger.info(opt) ###model = load_model("./models/wav2lip.pth") model = load_model("./models/wav2lip.pth").to('cuda') # 强制模型加载到 GPU ###model = load_model("./models/wav2lip.pth").to(device) # 动态适配 GPU/CPU avatar = load_avatar(opt.avatar_id) warm_up(opt.batch_size,model,256) # for k in range(opt.max_session): # opt.sessionid=k # nerfreal = LipReal(opt,model) # nerfreals.append(nerfreal) elif opt.model == 'ultralight': from lightreal import LightReal,load_model,load_avatar,warm_up logger.info(opt) model = load_model(opt) avatar = load_avatar(opt.avatar_id) warm_up(opt.batch_size,avatar,160) if opt.transport=='rtmp': thread_quit = Event() nerfreals[0] = build_nerfreal(0) rendthrd = Thread(target=nerfreals[0].render,args=(thread_quit,)) rendthrd.start() ############################################################################# appasync = web.Application() from aiohttp import WSMsgType async def websocket_handler(request): ws = web.WebSocketResponse() await ws.prepare(request) sessionid = request.query.get('sessionid', 0) if sessionid in nerfreals: nerfreals[sessionid].set_websocket(ws) async for msg in ws: if msg.type == WSMsgType.TEXT: try: data = json.loads(msg.data) # 处理可能的WebSocket消息 except json.JSONDecodeError: logger.error("无效的WebSocket消息格式") elif msg.type == WSMsgType.ERROR: logger.error(f"WebSocket错误: {ws.exception()}") if sessionid in nerfreals: nerfreals[sessionid].set_websocket(None) return ws async def get_system_reply(request): try: file_path = 'systemReply.txt' if not os.path.exists(file_path): logger.info('systemReply.txt 文件不存在') return web.Response( content_type="application/json", text=json.dumps({"text": ""}) ) # 只读取不清空文件 with open(file_path, 'r', encoding='utf-8') as f: content = f.read().strip() logger.info(f'从 systemReply.txt 读取内容: {content[:100]}...') # 只打印前100字符避免日志过长 return web.Response( content_type="application/json", text=json.dumps({"text": content}) ) except Exception as e: logger.error(f'读取 systemReply.txt 出错: {str(e)}') return web.Response( content_type="application/json", text=json.dumps({"error": str(e)}), status=500 ) async def clear_reply(request): try: params = await request.json() sessionid = params.get('sessionid', 0) file_path = 'systemReply.txt' # 清空文件内容 with open(file_path, 'w', encoding='utf-8') as f: f.write('') logger.info(f'已清空 systemReply.txt (会话ID: {sessionid})') return web.Response( content_type="application/json", text=json.dumps({"code": 0, "msg": "回复已清空"}) ) except Exception as e: logger.error(f'清空回复出错: {str(e)}') return web.Response( content_type="application/json", text=json.dumps({"code": -1, "error": str(e)}), status=500 ) async def get_system_reply_array(request): try: file_path = 'systemReplyArray.txt' if not os.path.exists(file_path): # 文件不存在时创建空文件 with open(file_path, 'w', encoding='utf-8') as f: f.write('') # 确保使用同步方式读取,避免异步问题 with open(file_path, 'r', encoding='utf-8') as f: content = f.read().strip() # 将换行符替换为 "||" content = content.replace('\n', '||') return web.Response( content_type="application/json", text=json.dumps({ "status": "success", "text": content, "timestamp": int(time.time()) # 添加时间戳防止缓存 }) ) except Exception as e: logger.error(f'读取systemReplyArray.txt出错: {str(e)}') return web.Response( content_type="application/json", status=500, text=json.dumps({ "status": "error", "error": str(e), "text": "" }) ) # 添加WebSocket路由 appasync.router.add_get("/ws", websocket_handler) appasync.router.add_post("/clear_reply", clear_reply) appasync.on_shutdown.append(on_shutdown) appasync.router.add_post("/offer", offer) appasync.router.add_post("/human", human) appasync.router.add_post("/humanaudio", humanaudio) appasync.router.add_post("/set_audiotype", set_audiotype) appasync.router.add_post("/record", record) appasync.router.add_post("/is_speaking", is_speaking) appasync.router.add_static('/',path='web') # 在appasync.router.add_...部分添加新路由 appasync.router.add_get("/get_system_reply", get_system_reply) appasync.router.add_get("/get_system_reply_array", get_system_reply_array) # Configure default CORS settings. cors = aiohttp_cors.setup(appasync, defaults={ "*": aiohttp_cors.ResourceOptions( allow_credentials=True, expose_headers="*", allow_headers="*", ) }) # Configure CORS on all routes. for route in list(appasync.router.routes()): cors.add(route) pagename='webrtcapi.html' if opt.transport=='rtmp': pagename='echoapi.html' elif opt.transport=='rtcpush': pagename='rtcpushapi.html' logger.info('start http server; http://<serverip>:'+str(opt.listenport)+'/'+pagename) logger.info('如果使用webrtc,推荐访问webrtc集成前端: http://127.0.0.1:'+str(opt.listenport)+'/ffnerchat.html') logger.info(f"模型使用的设备: {next(model.parameters()).device}") logger.info(f"当前 GPU 显存占用: {torch.cuda.memory_allocated() / 1024**2:.2f} MB") def run_server(runner): loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) loop.run_until_complete(runner.setup()) site = web.TCPSite(runner, '127.0.0.1', opt.listenport) loop.run_until_complete(site.start()) # 添加打印可访问的URL import socket import webbrowser hostname = socket.gethostname() local_ip = socket.gethostbyname(hostname) logger.info(f"服务已启动,可通过以下地址访问:") logger.info(f"本地: http://127.0.0.1:{opt.listenport}/ffnerchatm.html") logger.info(f"局域网: http://{local_ip}:{opt.listenport}/ffnerchatm.html") url = f"http://127.0.0.1:{opt.listenport}/ffnerchatm.html" # 尝试打开浏览器 try: webbrowser.open(url) logger.info("已尝试在默认浏览器中打开页面") except Exception as e: logger.error(f"无法打开浏览器: {e}") if opt.transport=='rtcpush': for k in range(opt.max_session): push_url = opt.push_url if k!=0: push_url = opt.push_url+str(k) loop.run_until_complete(run(push_url,k)) loop.run_forever() #Thread(target=run_server, args=(web.AppRunner(appasync),)).start() run_server(web.AppRunner(appasync)) #app.on_shutdown.append(on_shutdown) #app.router.add_post("/offer", offer) # print('start websocket server') # server = pywsgi.WSGIServer(('0.0.0.0', 8000), app, handler_class=WebSocketHandler) # server.serve_forever() 这里有语音转文字吗
最新发布
10-12
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license import contextlib import csv import urllib from copy import copy from pathlib import Path import cv2 import numpy as np import pytest import torch from PIL import Image from tests import CFG, MODEL, MODELS, SOURCE, SOURCES_LIST, TASK_MODEL_DATA, TMP from ultralytics import RTDETR, YOLO from ultralytics.cfg import TASK2DATA, TASKS from ultralytics.data.build import load_inference_source from ultralytics.data.utils import check_det_dataset from ultralytics.utils import ( ARM64, ASSETS, DEFAULT_CFG, DEFAULT_CFG_PATH, LINUX, LOGGER, ONLINE, ROOT, WEIGHTS_DIR, WINDOWS, YAML, checks, is_dir_writeable, is_github_action_running, ) from ultralytics.utils.downloads import download from ultralytics.utils.torch_utils import TORCH_1_9 IS_TMP_WRITEABLE = is_dir_writeable(TMP) # WARNING: must be run once tests start as TMP does not exist on tests/init def test_model_forward(): """Test the forward pass of the YOLO model.""" model = YOLO(CFG) model(source=None, imgsz=32, augment=True) # also test no source and augment def test_model_methods(): """Test various methods and properties of the YOLO model to ensure correct functionality.""" model = YOLO(MODEL) # Model methods model.info(verbose=True, detailed=True) model = model.reset_weights() model = model.load(MODEL) model.to("cpu") model.fuse() model.clear_callback("on_train_start") model.reset_callbacks() # Model properties _ = model.names _ = model.device _ = model.transforms _ = model.task_map def test_model_profile(): """Test profiling of the YOLO model with `profile=True` to assess performance and resource usage.""" from ultralytics.nn.tasks import DetectionModel model = DetectionModel() # build model im = torch.randn(1, 3, 64, 64) # requires min imgsz=64 _ = model.predict(im, profile=True) @pytest.mark.skipif(not IS_TMP_WRITEABLE, reason="directory is not writeable") def test_predict_txt(): """Test YOLO predictions with file, directory, and pattern sources listed in a text file.""" file = TMP / "sources_multi_row.txt" with open(file, "w") as f: for src in SOURCES_LIST: f.write(f"{src}\n") results = YOLO(MODEL)(source=file, imgsz=32) assert len(results) == 7 # 1 + 2 + 2 + 2 = 7 images @pytest.mark.skipif(True, reason="disabled for testing") @pytest.mark.skipif(not IS_TMP_WRITEABLE, reason="directory is not writeable") def test_predict_csv_multi_row(): """Test YOLO predictions with sources listed in multiple rows of a CSV file.""" file = TMP / "sources_multi_row.csv" with open(file, "w", newline="") as f: writer = csv.writer(f) writer.writerow(["source"]) writer.writerows([[src] for src in SOURCES_LIST]) results = YOLO(MODEL)(source=file, imgsz=32) assert len(results) == 7 # 1 + 2 + 2 + 2 = 7 images @pytest.mark.skipif(True, reason="disabled for testing") @pytest.mark.skipif(not IS_TMP_WRITEABLE, reason="directory is not writeable") def test_predict_csv_single_row(): """Test YOLO predictions with sources listed in a single row of a CSV file.""" file = TMP / "sources_single_row.csv" with open(file, "w", newline="") as f: writer = csv.writer(f) writer.writerow(SOURCES_LIST) results = YOLO(MODEL)(source=file, imgsz=32) assert len(results) == 7 # 1 + 2 + 2 + 2 = 7 images @pytest.mark.parametrize("model_name", MODELS) def test_predict_img(model_name): """Test YOLO model predictions on various image input types and sources, including online images.""" channels = 1 if model_name == "yolo11n-grayscale.pt" else 3 model = YOLO(WEIGHTS_DIR / model_name) im = cv2.imread(str(SOURCE), flags=cv2.IMREAD_GRAYSCALE if channels == 1 else cv2.IMREAD_COLOR) # uint8 numpy array assert len(model(source=Image.open(SOURCE), save=True, verbose=True, imgsz=32)) == 1 # PIL assert len(model(source=im, save=True, save_txt=True, imgsz=32)) == 1 # ndarray assert len(model(torch.rand((2, channels, 32, 32)), imgsz=32)) == 2 # batch-size 2 Tensor, FP32 0.0-1.0 RGB order assert len(model(source=[im, im], save=True, save_txt=True, imgsz=32)) == 2 # batch assert len(list(model(source=[im, im], save=True, stream=True, imgsz=32))) == 2 # stream assert len(model(torch.zeros(320, 640, channels).numpy().astype(np.uint8), imgsz=32)) == 1 # tensor to numpy batch = [ str(SOURCE), # filename Path(SOURCE), # Path "https://github.com/ultralytics/assets/releases/download/v0.0.0/zidane.jpg" if ONLINE else SOURCE, # URI im, # OpenCV Image.open(SOURCE), # PIL np.zeros((320, 640, channels), dtype=np.uint8), # numpy ] assert len(model(batch, imgsz=32, classes=0)) == len(batch) # multiple sources in a batch @pytest.mark.parametrize("model", MODELS) def test_predict_visualize(model): """Test model prediction methods with 'visualize=True' to generate and display prediction visualizations.""" YOLO(WEIGHTS_DIR / model)(SOURCE, imgsz=32, visualize=True) def test_predict_grey_and_4ch(): """Test YOLO prediction on SOURCE converted to greyscale and 4-channel images with various filenames.""" im = Image.open(SOURCE) directory = TMP / "im4" directory.mkdir(parents=True, exist_ok=True) source_greyscale = directory / "greyscale.jpg" source_rgba = directory / "4ch.png" source_non_utf = directory / "non_UTF_测试文件_tést_image.jpg" source_spaces = directory / "image with spaces.jpg" im.convert("L").save(source_greyscale) # greyscale im.convert("RGBA").save(source_rgba) # 4-ch PNG with alpha im.save(source_non_utf) # non-UTF characters in filename im.save(source_spaces) # spaces in filename # Inference model = YOLO(MODEL) for f in source_rgba, source_greyscale, source_non_utf, source_spaces: for source in Image.open(f), cv2.imread(str(f)), f: results = model(source, save=True, verbose=True, imgsz=32) assert len(results) == 1 # verify that an image was run f.unlink() # cleanup @pytest.mark.slow @pytest.mark.skipif(not ONLINE, reason="environment is offline") @pytest.mark.skipif(is_github_action_running(), reason="No auth https://github.com/JuanBindez/pytubefix/issues/166") def test_youtube(): """Test YOLO model on a YouTube video stream, handling potential network-related errors.""" model = YOLO(MODEL) try: model.predict("https://youtu.be/G17sBkb38XQ", imgsz=96, save=True) # Handle internet connection errors and 'urllib.error.HTTPError: HTTP Error 429: Too Many Requests' except (urllib.error.HTTPError, ConnectionError) as e: LOGGER.error(f"YouTube Test Error: {e}") @pytest.mark.skipif(not ONLINE, reason="environment is offline") @pytest.mark.skipif(not IS_TMP_WRITEABLE, reason="directory is not writeable") @pytest.mark.parametrize("model", MODELS) def test_track_stream(model): """ Test streaming tracking on a short 10 frame video using ByteTrack tracker and different GMC methods. Note imgsz=160 required for tracking for higher confidence and better matches. """ if model == "yolo11n-cls.pt": # classification model not supported for tracking return video_url = "https://github.com/ultralytics/assets/releases/download/v0.0.0/decelera_portrait_min.mov" model = YOLO(model) model.track(video_url, imgsz=160, tracker="bytetrack.yaml") model.track(video_url, imgsz=160, tracker="botsort.yaml", save_frames=True) # test frame saving also # Test Global Motion Compensation (GMC) methods and ReID for gmc, reidm in zip(["orb", "sift", "ecc"], ["auto", "auto", "yolo11n-cls.pt"]): default_args = YAML.load(ROOT / "cfg/trackers/botsort.yaml") custom_yaml = TMP / f"botsort-{gmc}.yaml" YAML.save(custom_yaml, {**default_args, "gmc_method": gmc, "with_reid": True, "model": reidm}) model.track(video_url, imgsz=160, tracker=custom_yaml) @pytest.mark.parametrize("task,weight,data", TASK_MODEL_DATA) def test_val(task: str, weight: str, data: str) -> None: """Test the validation mode of the YOLO model.""" model = YOLO(weight) for plots in {True, False}: # Test both cases i.e. plots=True and plots=False metrics = model.val(data=data, imgsz=32, plots=plots) metrics.to_df() metrics.to_csv() metrics.to_xml() metrics.to_html() metrics.to_json() metrics.to_sql() metrics.confusion_matrix.to_df() # Tests for confusion matrix export metrics.confusion_matrix.to_csv() metrics.confusion_matrix.to_xml() metrics.confusion_matrix.to_html() metrics.confusion_matrix.to_json() metrics.confusion_matrix.to_sql() def test_train_scratch(): """Test training the YOLO model from scratch using the provided configuration.""" model = YOLO(CFG) model.train(data="coco8.yaml", epochs=2, imgsz=32, cache="disk", batch=-1, close_mosaic=1, name="model") model(SOURCE) @pytest.mark.parametrize("scls", [False, True]) def test_train_pretrained(scls): """Test training of the YOLO model starting from a pre-trained checkpoint.""" model = YOLO(WEIGHTS_DIR / "yolo11n-seg.pt") model.train( data="coco8-seg.yaml", epochs=1, imgsz=32, cache="ram", copy_paste=0.5, mixup=0.5, name=0, single_cls=scls ) model(SOURCE) def test_all_model_yamls(): """Test YOLO model creation for all available YAML configurations in the `cfg/models` directory.""" for m in (ROOT / "cfg" / "models").rglob("*.yaml"): if "rtdetr" in m.name: if TORCH_1_9: # torch<=1.8 issue - TypeError: __init__() got an unexpected keyword argument 'batch_first' _ = RTDETR(m.name)(SOURCE, imgsz=640) # must be 640 else: YOLO(m.name) @pytest.mark.skipif(WINDOWS, reason="Windows slow CI export bug https://github.com/ultralytics/ultralytics/pull/16003") def test_workflow(): """Test the complete workflow including training, validation, prediction, and exporting.""" model = YOLO(MODEL) model.train(data="coco8.yaml", epochs=1, imgsz=32, optimizer="SGD") model.val(imgsz=32) model.predict(SOURCE, imgsz=32) model.export(format="torchscript") # WARNING: Windows slow CI export bug def test_predict_callback_and_setup(): """Test callback functionality during YOLO prediction setup and execution.""" def on_predict_batch_end(predictor): """Callback function that handles operations at the end of a prediction batch.""" path, im0s, _ = predictor.batch im0s = im0s if isinstance(im0s, list) else [im0s] bs = [predictor.dataset.bs for _ in range(len(path))] predictor.results = zip(predictor.results, im0s, bs) # results is List[batch_size] model = YOLO(MODEL) model.add_callback("on_predict_batch_end", on_predict_batch_end) dataset = load_inference_source(source=SOURCE) bs = dataset.bs # noqa access predictor properties results = model.predict(dataset, stream=True, imgsz=160) # source already setup for r, im0, bs in results: print("test_callback", im0.shape) print("test_callback", bs) boxes = r.boxes # Boxes object for bbox outputs print(boxes) @pytest.mark.parametrize("model", MODELS) def test_results(model: str): """Test YOLO model results processing and output in various formats.""" temp_s = "https://ultralytics.com/images/boats.jpg" if model == "yolo11n-obb.pt" else SOURCE results = YOLO(WEIGHTS_DIR / model)([temp_s, temp_s], imgsz=160) for r in results: assert len(r), f"'{model}' results should not be empty!" r = r.cpu().numpy() print(r, len(r), r.path) # print numpy attributes r = r.to(device="cpu", dtype=torch.float32) r.save_txt(txt_file=TMP / "runs/tests/label.txt", save_conf=True) r.save_crop(save_dir=TMP / "runs/tests/crops/") r.to_df(decimals=3) # Align to_ methods: https://docs.ultralytics.com/modes/predict/#working-with-results r.to_csv() r.to_xml() r.to_html() r.to_json(normalize=True) r.to_sql() r.plot(pil=True, save=True, filename=TMP / "results_plot_save.jpg") r.plot(conf=True, boxes=True) print(r, len(r), r.path) # print after methods def test_labels_and_crops(): """Test output from prediction args for saving YOLO detection labels and crops.""" imgs = [SOURCE, ASSETS / "zidane.jpg"] results = YOLO(WEIGHTS_DIR / "yolo11n.pt")(imgs, imgsz=160, save_txt=True, save_crop=True) save_path = Path(results[0].save_dir) for r in results: im_name = Path(r.path).stem cls_idxs = r.boxes.cls.int().tolist() # Check correct detections assert cls_idxs == ([0, 7, 0, 0] if r.path.endswith("bus.jpg") else [0, 0, 0]) # bus.jpg and zidane.jpg classes # Check label path labels = save_path / f"labels/{im_name}.txt" assert labels.exists() # Check detections match label count assert len(r.boxes.data) == len([line for line in labels.read_text().splitlines() if line]) # Check crops path and files crop_dirs = list((save_path / "crops").iterdir()) crop_files = [f for p in crop_dirs for f in p.glob("*")] # Crop directories match detections assert all(r.names.get(c) in {d.name for d in crop_dirs} for c in cls_idxs) # Same number of crops as detections assert len([f for f in crop_files if im_name in f.name]) == len(r.boxes.data) @pytest.mark.skipif(not ONLINE, reason="environment is offline") def test_data_utils(): """Test utility functions in ultralytics/data/utils.py, including dataset stats and auto-splitting.""" from ultralytics.data.split import autosplit from ultralytics.data.utils import HUBDatasetStats from ultralytics.utils.downloads import zip_directory # from ultralytics.utils.files import WorkingDirectory # with WorkingDirectory(ROOT.parent / 'tests'): for task in TASKS: file = Path(TASK2DATA[task]).with_suffix(".zip") # i.e. coco8.zip download(f"https://github.com/ultralytics/hub/raw/main/example_datasets/{file}", unzip=False, dir=TMP) stats = HUBDatasetStats(TMP / file, task=task) stats.get_json(save=True) stats.process_images() autosplit(TMP / "coco8") zip_directory(TMP / "coco8/images/val") # zip @pytest.mark.skipif(not ONLINE, reason="environment is offline") def test_data_converter(): """Test dataset conversion functions from COCO to YOLO format and class mappings.""" from ultralytics.data.converter import coco80_to_coco91_class, convert_coco file = "instances_val2017.json" download(f"https://github.com/ultralytics/assets/releases/download/v0.0.0/{file}", dir=TMP) convert_coco(labels_dir=TMP, save_dir=TMP / "yolo_labels", use_segments=True, use_keypoints=False, cls91to80=True) coco80_to_coco91_class() def test_data_annotator(): """Test automatic annotation of data using detection and segmentation models.""" from ultralytics.data.annotator import auto_annotate auto_annotate( ASSETS, det_model=WEIGHTS_DIR / "yolo11n.pt", sam_model=WEIGHTS_DIR / "mobile_sam.pt", output_dir=TMP / "auto_annotate_labels", ) def test_events(): """Test event sending functionality.""" from ultralytics.hub.utils import Events events = Events() events.enabled = True cfg = copy(DEFAULT_CFG) # does not require deepcopy cfg.mode = "test" events(cfg) def test_cfg_init(): """Test configuration initialization utilities from the 'ultralytics.cfg' module.""" from ultralytics.cfg import check_dict_alignment, copy_default_cfg, smart_value with contextlib.suppress(SyntaxError): check_dict_alignment({"a": 1}, {"b": 2}) copy_default_cfg() (Path.cwd() / DEFAULT_CFG_PATH.name.replace(".yaml", "_copy.yaml")).unlink(missing_ok=False) [smart_value(x) for x in {"none", "true", "false"}] def test_utils_init(): """Test initialization utilities in the Ultralytics library.""" from ultralytics.utils import get_git_branch, get_git_origin_url, get_ubuntu_version, is_github_action_running get_ubuntu_version() is_github_action_running() get_git_origin_url() get_git_branch() def test_utils_checks(): """Test various utility checks for filenames, git status, requirements, image sizes, and versions.""" checks.check_yolov5u_filename("yolov5n.pt") checks.git_describe(ROOT) checks.check_requirements() # check requirements.txt checks.check_imgsz([600, 600], max_dim=1) checks.check_imshow(warn=True) checks.check_version("ultralytics", "8.0.0") checks.print_args() @pytest.mark.skipif(WINDOWS, reason="Windows profiling is extremely slow (cause unknown)") def test_utils_benchmarks(): """Benchmark model performance using 'ProfileModels' from 'ultralytics.utils.benchmarks'.""" from ultralytics.utils.benchmarks import ProfileModels ProfileModels(["yolo11n.yaml"], imgsz=32, min_time=1, num_timed_runs=3, num_warmup_runs=1).run() def test_utils_torchutils(): """Test Torch utility functions including profiling and FLOP calculations.""" from ultralytics.nn.modules.conv import Conv from ultralytics.utils.torch_utils import get_flops_with_torch_profiler, profile_ops, time_sync x = torch.randn(1, 64, 20, 20) m = Conv(64, 64, k=1, s=2) profile_ops(x, [m], n=3) get_flops_with_torch_profiler(m) time_sync() def test_utils_ops(): """Test utility operations for coordinate transformations and normalizations.""" from ultralytics.utils.ops import ( ltwh2xywh, ltwh2xyxy, make_divisible, xywh2ltwh, xywh2xyxy, xywhn2xyxy, xywhr2xyxyxyxy, xyxy2ltwh, xyxy2xywh, xyxy2xywhn, xyxyxyxy2xywhr, ) make_divisible(17, torch.tensor([8])) boxes = torch.rand(10, 4) # xywh torch.allclose(boxes, xyxy2xywh(xywh2xyxy(boxes))) torch.allclose(boxes, xyxy2xywhn(xywhn2xyxy(boxes))) torch.allclose(boxes, ltwh2xywh(xywh2ltwh(boxes))) torch.allclose(boxes, xyxy2ltwh(ltwh2xyxy(boxes))) boxes = torch.rand(10, 5) # xywhr for OBB boxes[:, 4] = torch.randn(10) * 30 torch.allclose(boxes, xyxyxyxy2xywhr(xywhr2xyxyxyxy(boxes)), rtol=1e-3) def test_utils_files(): """Test file handling utilities including file age, date, and paths with spaces.""" from ultralytics.utils.files import file_age, file_date, get_latest_run, spaces_in_path file_age(SOURCE) file_date(SOURCE) get_latest_run(ROOT / "runs") path = TMP / "path/with spaces" path.mkdir(parents=True, exist_ok=True) with spaces_in_path(path) as new_path: print(new_path) @pytest.mark.slow def test_utils_patches_torch_save(): """Test torch_save backoff when _torch_save raises RuntimeError.""" from unittest.mock import MagicMock, patch from ultralytics.utils.patches import torch_save mock = MagicMock(side_effect=RuntimeError) with patch("ultralytics.utils.patches._torch_save", new=mock): with pytest.raises(RuntimeError): torch_save(torch.zeros(1), TMP / "test.pt") assert mock.call_count == 4, "torch_save was not attempted the expected number of times" def test_nn_modules_conv(): """Test Convolutional Neural Network modules including CBAM, Conv2, and ConvTranspose.""" from ultralytics.nn.modules.conv import CBAM, Conv2, ConvTranspose, DWConvTranspose2d, Focus c1, c2 = 8, 16 # input and output channels x = torch.zeros(4, c1, 10, 10) # BCHW # Run all modules not otherwise covered in tests DWConvTranspose2d(c1, c2)(x) ConvTranspose(c1, c2)(x) Focus(c1, c2)(x) CBAM(c1)(x) # Fuse ops m = Conv2(c1, c2) m.fuse_convs() m(x) def test_nn_modules_block(): """Test various neural network block modules.""" from ultralytics.nn.modules.block import C1, C3TR, BottleneckCSP, C3Ghost, C3x c1, c2 = 8, 16 # input and output channels x = torch.zeros(4, c1, 10, 10) # BCHW # Run all modules not otherwise covered in tests C1(c1, c2)(x) C3x(c1, c2)(x) C3TR(c1, c2)(x) C3Ghost(c1, c2)(x) BottleneckCSP(c1, c2)(x) @pytest.mark.skipif(not ONLINE, reason="environment is offline") def test_hub(): """Test Ultralytics HUB functionalities.""" from ultralytics.hub import export_fmts_hub, logout from ultralytics.hub.utils import smart_request export_fmts_hub() logout() smart_request("GET", "https://github.com", progress=True) @pytest.fixture def image(): """Load and return an image from a predefined source.""" return cv2.imread(str(SOURCE)) @pytest.mark.parametrize( "auto_augment, erasing, force_color_jitter", [ (None, 0.0, False), ("randaugment", 0.5, True), ("augmix", 0.2, False), ("autoaugment", 0.0, True), ], ) def test_classify_transforms_train(image, auto_augment, erasing, force_color_jitter): """Test classification transforms during training with various augmentations.""" from ultralytics.data.augment import classify_augmentations transform = classify_augmentations( size=224, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), scale=(0.08, 1.0), ratio=(3.0 / 4.0, 4.0 / 3.0), hflip=0.5, vflip=0.5, auto_augment=auto_augment, hsv_h=0.015, hsv_s=0.4, hsv_v=0.4, force_color_jitter=force_color_jitter, erasing=erasing, ) transformed_image = transform(Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))) assert transformed_image.shape == (3, 224, 224) assert torch.is_tensor(transformed_image) assert transformed_image.dtype == torch.float32 @pytest.mark.slow @pytest.mark.skipif(not ONLINE, reason="environment is offline") def test_model_tune(): """Tune YOLO model for performance improvement.""" YOLO("yolo11n-pose.pt").tune(data="coco8-pose.yaml", plots=False, imgsz=32, epochs=1, iterations=2, device="cpu") YOLO("yolo11n-cls.pt").tune(data="imagenet10", plots=False, imgsz=32, epochs=1, iterations=2, device="cpu") def test_model_embeddings(): """Test YOLO model embeddings extraction functionality.""" model_detect = YOLO(MODEL) model_segment = YOLO(WEIGHTS_DIR / "yolo11n-seg.pt") for batch in [SOURCE], [SOURCE, SOURCE]: # test batch size 1 and 2 assert len(model_detect.embed(source=batch, imgsz=32)) == len(batch) assert len(model_segment.embed(source=batch, imgsz=32)) == len(batch) @pytest.mark.skipif(checks.IS_PYTHON_3_12, reason="YOLOWorld with CLIP is not supported in Python 3.12") @pytest.mark.skipif( checks.IS_PYTHON_3_8 and LINUX and ARM64, reason="YOLOWorld with CLIP is not supported in Python 3.8 and aarch64 Linux", ) def test_yolo_world(): """Test YOLO world models with CLIP support.""" model = YOLO(WEIGHTS_DIR / "yolov8s-world.pt") # no YOLO11n-world model yet model.set_classes(["tree", "window"]) model(SOURCE, conf=0.01) model = YOLO(WEIGHTS_DIR / "yolov8s-worldv2.pt") # no YOLO11n-world model yet # Training from a pretrained model. Eval is included at the final stage of training. # Use dota8.yaml which has fewer categories to reduce the inference time of CLIP model model.train( data="dota8.yaml", epochs=1, imgsz=32, cache="disk", close_mosaic=1, ) # test WorWorldTrainerFromScratch from ultralytics.models.yolo.world.train_world import WorldTrainerFromScratch model = YOLO("yolov8s-worldv2.yaml") # no YOLO11n-world model yet model.train( data={"train": {"yolo_data": ["dota8.yaml"]}, "val": {"yolo_data": ["dota8.yaml"]}}, epochs=1, imgsz=32, cache="disk", close_mosaic=1, trainer=WorldTrainerFromScratch, ) @pytest.mark.skipif(checks.IS_PYTHON_3_12 or not TORCH_1_9, reason="YOLOE with CLIP is not supported in Python 3.12") @pytest.mark.skipif( checks.IS_PYTHON_3_8 and LINUX and ARM64, reason="YOLOE with CLIP is not supported in Python 3.8 and aarch64 Linux", ) def test_yoloe(): """Test YOLOE models with MobileClip support.""" # Predict # text-prompts model = YOLO(WEIGHTS_DIR / "yoloe-11s-seg.pt") names = ["person", "bus"] model.set_classes(names, model.get_text_pe(names)) model(SOURCE, conf=0.01) import numpy as np from ultralytics import YOLOE from ultralytics.models.yolo.yoloe import YOLOEVPSegPredictor # visual-prompts visuals = dict( bboxes=np.array( [[221.52, 405.8, 344.98, 857.54], [120, 425, 160, 445]], ), cls=np.array([0, 1]), ) model.predict( SOURCE, visual_prompts=visuals, predictor=YOLOEVPSegPredictor, ) # Val model = YOLOE(WEIGHTS_DIR / "yoloe-11s-seg.pt") # text prompts model.val(data="coco128-seg.yaml", imgsz=32) # visual prompts model.val(data="coco128-seg.yaml", load_vp=True, imgsz=32) # Train, fine-tune from ultralytics.models.yolo.yoloe import YOLOEPESegTrainer model = YOLOE("yoloe-11s-seg.pt") model.train( data="coco128-seg.yaml", epochs=1, close_mosaic=1, trainer=YOLOEPESegTrainer, imgsz=32, ) # prompt-free # predict model = YOLOE(WEIGHTS_DIR / "yoloe-11s-seg-pf.pt") model.predict(SOURCE) # val model = YOLOE("yoloe-11s-seg.pt") # or select yoloe-m/l-seg.pt for different sizes model.val(data="coco128-seg.yaml", imgsz=32) def test_yolov10(): """Test YOLOv10 model training, validation, and prediction functionality.""" model = YOLO("yolov10n.yaml") # train/val/predict model.train(data="coco8.yaml", epochs=1, imgsz=32, close_mosaic=1, cache="disk") model.val(data="coco8.yaml", imgsz=32) model.predict(imgsz=32, save_txt=True, save_crop=True, augment=True) model(SOURCE) def test_multichannel(): """Test YOLO model multi-channel training, validation, and prediction functionality.""" model = YOLO("yolo11n.pt") model.train(data="coco8-multispectral.yaml", epochs=1, imgsz=32, close_mosaic=1, cache="disk") model.val(data="coco8-multispectral.yaml") im = np.zeros((32, 32, 10), dtype=np.uint8) model.predict(source=im, imgsz=32, save_txt=True, save_crop=True, augment=True) model.export(format="onnx") @pytest.mark.parametrize("task,model,data", TASK_MODEL_DATA) def test_grayscale(task: str, model: str, data: str) -> None: """Test YOLO model grayscale training, validation, and prediction functionality.""" if task == "classify": # not support grayscale classification yet return grayscale_data = Path(TMP) / f"{Path(data).stem}-grayscale.yaml" data = check_det_dataset(data) data["channels"] = 1 # add additional channels key for grayscale YAML.save(grayscale_data, data) # remove npy files in train/val splits if exists, might be created by previous tests for split in {"train", "val"}: for npy_file in (Path(data["path"]) / data[split]).glob("*.npy"): npy_file.unlink() model = YOLO(model) model.train(data=grayscale_data, epochs=1, imgsz=32, close_mosaic=1) model.val(data=grayscale_data) im = np.zeros((32, 32, 1), dtype=np.uint8) model.predict(source=im, imgsz=32, save_txt=True, save_crop=True, augment=True) export_model = model.export(format="onnx") model = YOLO(export_model, task=task) model.predict(source=im, imgsz=32) 代码分析
08-13
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值