Export-Onnx
from ultralytics import YOLO
model = YOLO("yolov8n-obb.yaml")
model = YOLO("yolov8n-obb.pt")
trainer = model.train(data="dota8.yaml", epochs=3, imgsz=640)
metrics = model.val(data="dota8.yaml")
results = model("boats.jpg")
model.export(format="onnx")
Python
import cv2
import numpy as np
import onnxruntime as ort
import time
PI = 3.1415926
CLASS_NAMES = [
"plane", "ship", "storage tank", "baseball diamond", "tennis court",
"basketball court", "ground track field", "harbor", "bridge", "large vehicle",
"small vehicle", "helicopter", "roundabout", "soccer ball field", "swimming pool"
]
MODEL_PATH = "yolov8n-obb.onnx"
IMAGE_PATH = "boats.jpg"
CONF_THRESHOLD = 0.25
NMS_THRESHOLD = 0.4
image = cv2.imread(IMAGE_PATH)
image_height, image_width = image.shape[:2]
start_time = cv2.getTickCount()
session = ort.InferenceSession(MODEL_PATH, providers=['CPUExecutionProvider'])
conf_thresold = 0.45
iou_threshold = 0.25
model_inputs = session.get_inputs()
input_names = [model_inputs[i].name for i in range(len(model_inputs))]
input_shape = model_inputs[0].shape
model_output = session.get_outputs()
output_names = [model_output[i].name for i in range(len(model_output))]
output_shape = model_output[0].shape
h, w = image.shape[:2]
max_dim = max(h, w)
image_resized = np.zeros((max_dim, max_dim, 3), dtype=np.uint8)
image_resized[:h, :w] = image
image_resized = cv2.resize(image_resized,(input_shape[3], input_shape[2]))
input_h, input_w = input_shape[2], input_shape[3]
x_factor = max_dim / input_w
y_factor = max_dim / input_h
input_image = image_resized / 255.0
input_image = input_image.transpose(2, 0, 1)
input_tensor = input_image[np.newaxis, :, :, :].astype(np.float32)
outputs = session.run(output_names, {input_names[0]: input_tensor})[0]
inference_time = (time.time() - start_time) * 1000
output = np.squeeze(outputs)
boxes = []
class_ids = []
confidences = []
for i in range(output.shape[1]):
class_scores = output[4:19, i]
class_id = np.argmax(class_scores)
score = class_scores[class_id]
if score > CONF_THRESHOLD:
cx, cy, ow, oh = output[0:4, i]
angle = output[19, i]
angle = angle if angle <= PI else angle - PI
box = (
(cx * x_factor, cy * y_factor),
(ow * x_factor, oh * y_factor),
angle * 180.0 / PI
)
boxes.append(cv2.RotatedRect(*box))
class_ids.append(class_id)
confidences.append(float(score))
indices = cv2.dnn.NMSBoxesRotated(boxes, confidences, CONF_THRESHOLD, NMS_THRESHOLD)
for i in indices:
i = i[0] if isinstance(i, (list, tuple)) else i
box = boxes[i]
points = cv2.boxPoints(box).astype(int)
for j in range(4):
cv2.line(image, tuple(points[j]), tuple(points[(j + 1) % 4]), (0, 0, 255), 2)
lower_point = min(points, key=lambda p: p[1])
label = f"{CLASS_NAMES[class_ids[i]]}:{confidences[i]:.2f}"
text_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)[0]
text_box = (lower_point[0], lower_point[1] +20)
cv2.rectangle(image, text_box, (text_box[0] + text_size[0], text_box[1] + text_size[1] + 5), (0, 255, 255),
cv2.FILLED)
cv2.putText(image, label, (text_box[0], text_box[1] + text_size[1]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1)
fps = 1000 / inference_time
cv2.putText(image, f"FPS: {fps:.2f}", (20, 40), cv2.FONT_HERSHEY_PLAIN, 2.0, (255, 0, 0), 2)
print(f"Inference time: {inference_time:.2f} ms; Detections: {len(indices)}")
cv2.imshow("Detection Results", image)
cv2.waitKey(0)
Cpp
#include <fstream>
#include <opencv2/opencv.hpp>
#include <openvino/openvino.hpp>
using namespace std;
using namespace cv;
using namespace ov;
constexpr float PI = 3.1415926f;
const std::vector<std::string> class_names = {
"plane", "ship", "storage tank", "baseball diamond", "tennis court",
"basketball court", "ground track field", "harbor", "bridge", "large vehicle",
"small vehicle", "helicopter", "roundabout", "soccer ball field", "swimming pool",
};
int main(int argc, char** argv)
{
Mat image = imread("boats.jpg");
int ih = image.rows;
int iw = image.cols;
string model_path = "yolov8n-obb.onnx";
float conf_threshold = 0.25;
float nms_threshold = 0.4;
float score_threshold = 0.25;
Core core;
auto compiled_model = core.compile_model(model_path, "CPU");
auto input_port = compiled_model.input();
auto input_shape = input_port.get_shape();
InferRequest infer_request = compiled_model.create_infer_request();
int64 start = getTickCount();
int w = image.cols;
int h = image.rows;
int _max = max(h, w);
Mat image_ = Mat::zeros(Size(_max, _max), CV_8UC3);
Rect roi(0, 0, w, h);
image.copyTo(image_(roi));
int input_w = input_shape[2];
int input_h = input_shape[3];
float x_factor = image_.cols / static_cast<float>(input_w);
float y_factor = image_.rows / static_cast<float>(input_h);
Mat blob = dnn::blobFromImage(image_, 1.0 / 255.0, Size(input_w, input_h), Scalar(), true);
Tensor input_tensor(input_port.get_element_type(), input_port.get_shape(), blob.ptr(0));
infer_request.set_input_tensor(input_tensor);
infer_request.infer();
auto output = infer_request.get_output_tensor(0);
auto output_shape = output.get_shape();
float* data = output.data<float>();
Mat det_output(output_shape[1], output_shape[2], CV_32F, data);
std::vector<cv::RotatedRect> boxes;
std::vector<int> class_ids;
std::vector<float> confidences;
for (int i = 0; i < det_output.cols; ++i) {
const cv::Mat classes_scores = det_output.col(i).rowRange(4, 19);
cv::Point class_id_point;
double score;
cv::minMaxLoc(classes_scores, nullptr, &score, nullptr, &class_id_point);
if (score > 0.3) {
const float cx = det_output.at<float>(0, i);
const float cy = det_output.at<float>(1, i);
const float ow = det_output.at<float>(2, i);
const float oh = det_output.at<float>(3, i);
float angle = det_output.at<float>(19, i);
if (angle > PI && angle < 0.75 * PI) {
angle = angle - PI;
}
const cv::RotatedRect box{
cv::Point2f{cx * x_factor, cy * y_factor},
cv::Size2f{ow * x_factor, oh * y_factor},
angle * 180.0f / PI
};
boxes.push_back(box);
class_ids.push_back(class_id_point.y);
confidences.push_back(score);
}
}
std::vector<int> indexes;
cv::dnn::NMSBoxes(boxes, confidences, 0.25f, 0.45f, indexes);
for (size_t i = 0; i < indexes.size(); i++) {
const int index = indexes[i];
const cv::RotatedRect& box = boxes[index];
cv::Point2f points[4];
box.points(points);
for (int i = 0; i < 4; ++i)
{
cv::line(image, points[i], points[(i + 1) % 4], cv::Scalar(0, 0, 255), 2, 8);
}
cv::Point2f lowerPoint = points[0];
for (int i = 1; i < 4; ++i)
{
if (points[i].y < lowerPoint.y)
{
lowerPoint = points[i];
}
}
const std::string label = class_names[class_ids[index]] + ":" + std::to_string(confidences[index]).substr(0, 4);
const cv::Size textSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, nullptr);
const cv::Rect textBox(lowerPoint.x, lowerPoint.y +20, textSize.width, textSize.height + 5);
cv::rectangle(image, textBox, cv::Scalar(0, 255, 255), cv::FILLED);
cv::putText(image, label, cv::Point(lowerPoint.x, lowerPoint.y + 33), cv::FONT_HERSHEY_SIMPLEX, 0.5,
cv::Scalar(0, 0, 0));
}
const float t = (cv::getTickCount() - start) / static_cast<float>(cv::getTickFrequency());
std::cout << "Infer time(ms): " << t * 1000 << "ms; Detections: " << indexes.size() << std::endl;
cv::putText(image, cv::format("FPS: %.2f", 1.0 / t), cv::Point(20, 40), cv::FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(255, 0, 0), 2, 8);
cv::imshow("Detection Results", image);
cv::waitKey(0);
return 0;
}
CMakeLists.txt
cmake_minimum_required(VERSION 3.18)
project(Yolov8-obb)
set("OpenCV_DIR" "E:\\Opencv\\opencv_vs\\build")
set("ONNXRUNTIME_DIR" "E:\\Onnxruntime\\cpu\\1.15")
set(OpenCV_INCLUDE_DIRS ${OpenCV_DIR}\\include)
set(OpenCV_LIB_DIRS ${OpenCV_DIR}\\x64\\vc16\\lib)
set(OpenCV_LIBS "opencv_world480d.lib" "opencv_world480.lib")
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
include_directories(${OpenCV_INCLUDE_DIRS})
link_directories(${OpenCV_LIB_DIRS})
find_package(OpenCV QUIET)
link_libraries(${OpenCV_LIBS})
add_executable(Yolov8-obb main.cpp)
target_compile_features(Yolov8-obb PRIVATE cxx_std_14)
find_library(PATH ${ONNXRUNTIME_DIR})
target_include_directories(Yolov8-obb PRIVATE "${ONNXRUNTIME_DIR}/include")
target_link_libraries(Yolov8-obb "${ONNXRUNTIME_DIR}/lib/onnxruntime.lib")