【指针检测项目】yolo detect_image函数返回检测结果裁剪后的图片

最新推荐文章于 2025-07-05 14:13:43 发布

原创最新推荐文章于 2025-07-05 14:13:43 发布 · 4.8k 阅读

12 ·

CC 4.0 BY-SA版权

工程项目同时被 2 个专栏收录

20 篇文章

订阅专栏

图像处理

3 篇文章

订阅专栏

修改后的代码如下

def detect_image(self, image):
    start = timer()
if self.model_image_size != (None, None):
    assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required'
    assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required'
    boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size)))
else:
    new_image_size = (image.width - (image.width % 32),
                      image.height - (image.height % 32))
    boxed_image = letterbox_image(image, new_image_size)
image_data = np.array(boxed_image, dtype='float32')

#print(image_data.shape)
image_data /= 255.
image_data = np.expand_dims(image_data, 0)  # Add batch dimension.

out_boxes, out_scores, out_classes = self.sess.run(
    [self.boxes, self.scores, self.classes],
    feed_dict={
        self.yolo_model.input: image_data,
        self.input_image_shape: [image.size[1], image.size[0]],
        K.learning_phase(): 0
    })

print('Found {} boxes for {}'.format(len(out_boxes), 'img'))

font = ImageFont.truetype(font='font/FiraMono-Medium.otf',
            size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
thickness = (image.size[0] + image.size[1]) // 300

for i, c in reversed(list(enumerate(out_classes))):
    predicted_class = self.class_names[c]
    box = out_boxes[i]
    score = out_scores[i]

    label = '{} {:.2f}'.format(predicted_class, score)
    draw = ImageDraw.Draw(image)
    #print(image.size)
    label_size = draw.textsize(label, font)

    top, left, bottom, right = box
    top = max(0, np.floor(top + 0.5).astype('int32'))
    left = max(0, np.floor(left + 0.5).astype('int32'))
    bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
    right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
    print(label, (left, top), (right, bottom))

    if top - label_size[1] >= 0:
        text_origin = np.array([left, top - label_size[1]])
    else:
        text_origin = np.array([left, top + 1])

    # My kingdom for a good redistributable image drawing library.
    for i in range(thickness):
        draw.rectangle(
            [left + i, top + i, right - i, bottom - i],
            outline=255) #outline=self.colors[c]
    draw.rectangle(
        [tuple(text_origin), tuple(text_origin + label_size)],
        fill=255) #fill=self.colors[c]
    draw.text(text_origin, label, fill=0, font=font)# fill=255
    del draw
#img = cv2.imdecode(image, cv2.IMREAD_GRAYSCALE)
#img = cv2.imread(image)
#cropped = img[bottom:top, left:right]
image = image.crop((left, top, right, bottom))
end = timer()
print(end - start)
return image
#return cropped

def close_session(self):
    self.sess.close()

主要是这句话 image = image.crop((left, top, right, bottom))，一开始没搞明白crop的用法，报错SystemError: tile cannot extend outside image，然后百度得知left top 是要裁剪区域的左上角坐标，right bottom是要裁剪区域的右下角坐标。