用作者提供的net1->net2生成MTCNN的训练样本(positive,negative,part,landmark)

本文介绍了一种基于多任务级联卷积网络(MTCNN)的人脸检测方法,并详细展示了如何通过该方法生成用于训练的正样本、负样本及部分样本。此外,还提供了生成包含面部关键点位置信息样本的具体实现。



本代码基于作者提供的python版本代码修改,参考:

https://github.com/DuinoDu/mtcnn/blob/master/demo.py  (作者提供)

https://github.com/dlunion/mtcnn/blob/master/train/gen_48net_data2.py


1,生成positive,negative,part三种样本,用作者的net1->net2生成bbox, 根据预测的bbox和ground truth计算IOU:

positive: IOU >= 0.65;

negative: IOU < 0.3;

part: 0.4 <= IOU < 0.65


代码如下:


  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. import _init_paths
  4. import caffe
  5. import cv2
  6. import numpy as np
  7. #from python_wrapper import *
  8. import os
  9. def bbreg(boundingbox, reg):
  10. reg = reg.T
  11. # calibrate bouding boxes
  12. if reg.shape[1] == 1:
  13. print "reshape of reg"
  14. pass # reshape of reg
  15. w = boundingbox[:,2] - boundingbox[:,0] + 1
  16. h = boundingbox[:,3] - boundingbox[:,1] + 1
  17. bb0 = boundingbox[:,0] + reg[:,0]*w
  18. bb1 = boundingbox[:,1] + reg[:,1]*h
  19. bb2 = boundingbox[:,2] + reg[:,2]*w
  20. bb3 = boundingbox[:,3] + reg[:,3]*h
  21. boundingbox[:,0:4] = np.array([bb0, bb1, bb2, bb3]).T
  22. #print "bb", boundingbox
  23. return boundingbox
  24. def pad(boxesA, w, h):
  25. boxes = boxesA.copy() # shit, value parameter!!!
  26. tmph = boxes[:,3] - boxes[:,1] + 1
  27. tmpw = boxes[:,2] - boxes[:,0] + 1
  28. numbox = boxes.shape[0]
  29. dx = np.ones(numbox)
  30. dy = np.ones(numbox)
  31. edx = tmpw
  32. edy = tmph
  33. x = boxes[:,0:1][:,0]
  34. y = boxes[:,1:2][:,0]
  35. ex = boxes[:,2:3][:,0]
  36. ey = boxes[:,3:4][:,0]
  37. tmp = np.where(ex > w)[0]
  38. if tmp.shape[0] != 0:
  39. edx[tmp] = -ex[tmp] + w-1 + tmpw[tmp]
  40. ex[tmp] = w-1
  41. tmp = np.where(ey > h)[0]
  42. if tmp.shape[0] != 0:
  43. edy[tmp] = -ey[tmp] + h-1 + tmph[tmp]
  44. ey[tmp] = h-1
  45. tmp = np.where(x < 1)[0]
  46. if tmp.shape[0] != 0:
  47. dx[tmp] = 2 - x[tmp]
  48. x[tmp] = np.ones_like(x[tmp])
  49. tmp = np.where(y < 1)[0]
  50. if tmp.shape[0] != 0:
  51. dy[tmp] = 2 - y[tmp]
  52. y[tmp] = np.ones_like(y[tmp])
  53. # for python index from 0, while matlab from 1
  54. dy = np.maximum(0, dy-1)
  55. dx = np.maximum(0, dx-1)
  56. y = np.maximum(0, y-1)
  57. x = np.maximum(0, x-1)
  58. edy = np.maximum(0, edy-1)
  59. edx = np.maximum(0, edx-1)
  60. ey = np.maximum(0, ey-1)
  61. ex = np.maximum(0, ex-1)
  62. return [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph]
  63. def IoU(box, boxes):
  64. """Compute IoU between detect box and gt boxes
  65. Parameters:
  66. ----------
  67. box: numpy array , shape (5, ): x1, y1, x2, y2, score
  68. input box
  69. boxes: numpy array, shape (n, 4): x1, y1, x2, y2
  70. input ground truth boxes
  71. Returns:
  72. -------
  73. ovr: numpy.array, shape (n, )
  74. IoU
  75. """
  76. box_area = (box[2] - box[0] + 1) * (box[3] - box[1] + 1)
  77. area = (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1)
  78. xx1 = np.maximum(box[0], boxes[:, 0])
  79. yy1 = np.maximum(box[1], boxes[:, 1])
  80. xx2 = np.minimum(box[2], boxes[:, 2])
  81. yy2 = np.minimum(box[3], boxes[:, 3])
  82. # compute the width and height of the bounding box
  83. w = np.maximum(0, xx2 - xx1 + 1)
  84. h = np.maximum(0, yy2 - yy1 + 1)
  85. inter = w * h
  86. ovr = inter / (box_area + area - inter)
  87. return ovr
  88. def rerec(bboxA):
  89. # convert bboxA to square
  90. w = bboxA[:,2] - bboxA[:,0]
  91. h = bboxA[:,3] - bboxA[:,1]
  92. l = np.maximum(w,h).T
  93. bboxA[:,0] = bboxA[:,0] + w*0.5 - l*0.5
  94. bboxA[:,1] = bboxA[:,1] + h*0.5 - l*0.5
  95. bboxA[:,2:4] = bboxA[:,0:2] + np.repeat([l], 2, axis = 0).T
  96. return bboxA
  97. def nms(boxes, threshold, type):
  98. """nms
  99. :boxes: [:,0:5]
  100. :threshold: 0.5 like
  101. :type: 'Min' or others
  102. :returns: TODO
  103. """
  104. if boxes.shape[0] == 0:
  105. return np.array([])
  106. x1 = boxes[:,0]
  107. y1 = boxes[:,1]
  108. x2 = boxes[:,2]
  109. y2 = boxes[:,3]
  110. s = boxes[:,4]
  111. area = np.multiply(x2-x1+1, y2-y1+1)
  112. I = np.array(s.argsort()) # read s using I
  113. pick = [];
  114. while len(I) > 0:
  115. xx1 = np.maximum(x1[I[-1]], x1[I[0:-1]])
  116. yy1 = np.maximum(y1[I[-1]], y1[I[0:-1]])
  117. xx2 = np.minimum(x2[I[-1]], x2[I[0:-1]])
  118. yy2 = np.minimum(y2[I[-1]], y2[I[0:-1]])
  119. w = np.maximum(0.0, xx2 - xx1 + 1)
  120. h = np.maximum(0.0, yy2 - yy1 + 1)
  121. inter = w * h
  122. if type == 'Min':
  123. o = inter / np.minimum(area[I[-1]], area[I[0:-1]])
  124. else:
  125. o = inter / (area[I[-1]] + area[I[0:-1]] - inter)
  126. pick.append(I[-1])
  127. I = I[np.where( o <= threshold)[0]]
  128. return pick
  129. def generateBoundingBox(map, reg, scale, t):
  130. stride = 2
  131. cellsize = 12
  132. map = map.T
  133. dx1 = reg[0,:,:].T
  134. dy1 = reg[1,:,:].T
  135. dx2 = reg[2,:,:].T
  136. dy2 = reg[3,:,:].T
  137. (x, y) = np.where(map >= t)
  138. yy = y
  139. xx = x
  140. score = map[x,y]
  141. reg = np.array([dx1[x,y], dy1[x,y], dx2[x,y], dy2[x,y]])
  142. if reg.shape[0] == 0:
  143. pass
  144. boundingbox = np.array([yy, xx]).T
  145. bb1 = np.fix((stride * (boundingbox) + 1) / scale).T # matlab index from 1, so with "boundingbox-1"
  146. bb2 = np.fix((stride * (boundingbox) + cellsize - 1 + 1) / scale).T # while python don't have to
  147. score = np.array([score])
  148. boundingbox_out = np.concatenate((bb1, bb2, score, reg), axis=0)
  149. return boundingbox_out.T
  150. def drawBoxes(im, boxes):
  151. x1 = boxes[:,0]
  152. y1 = boxes[:,1]
  153. x2 = boxes[:,2]
  154. y2 = boxes[:,3]
  155. for i in range(x1.shape[0]):
  156. cv2.rectangle(im, (int(x1[i]), int(y1[i])), (int(x2[i]), int(y2[i])), (0,255,0), 1)
  157. return im
  158. def drawlandmark(im, points):
  159. for i in range(points.shape[0]):
  160. for j in range(5):
  161. cv2.circle(im, (int(points[i][j]), int(points[i][j+5])), 2, (255,0,0))
  162. return im
  163. from time import time
  164. _tstart_stack = []
  165. def tic():
  166. _tstart_stack.append(time())
  167. def toc(fmt="Elapsed: %s s"):
  168. print fmt % (time()-_tstart_stack.pop())
  169. def detect_face(img, minsize, PNet, RNet, threshold, fastresize, factor):
  170. img2 = img.copy()
  171. factor_count = 0
  172. total_boxes = np.zeros((0,9), np.float)
  173. points = []
  174. h = img.shape[0]
  175. w = img.shape[1]
  176. minl = min(h, w)
  177. img = img.astype(float)
  178. m = 12.0/minsize
  179. minl = minl*m
  180. # create scale pyramid
  181. scales = []
  182. while minl >= 12:
  183. scales.append(m * pow(factor, factor_count))
  184. minl *= factor
  185. factor_count += 1
  186. # first stage
  187. for scale in scales:
  188. hs = int(np.ceil(h*scale))
  189. ws = int(np.ceil(w*scale))
  190. if fastresize:
  191. im_data = (img-127.5)*0.0078125 # [0,255] -> [-1,1]
  192. im_data = cv2.resize(im_data, (ws,hs)) # default is bilinear
  193. else:
  194. im_data = cv2.resize(img, (ws,hs)) # default is bilinear
  195. im_data = (im_data-127.5)*0.0078125 # [0,255] -> [-1,1]
  196. #im_data = imResample(img, hs, ws); print "scale:", scale
  197. im_data = np.swapaxes(im_data, 0, 2)
  198. im_data = np.array([im_data], dtype = np.float)
  199. PNet.blobs['data'].reshape(1, 3, ws, hs)
  200. PNet.blobs['data'].data[...] = im_data
  201. out = PNet.forward()
  202. boxes = generateBoundingBox(out['prob1'][0,1,:,:], out['conv4-2'][0], scale, threshold[0])
  203. if boxes.shape[0] != 0:
  204. pick = nms(boxes, 0.5, 'Union')
  205. if len(pick) > 0 :
  206. boxes = boxes[pick, :]
  207. if boxes.shape[0] != 0:
  208. total_boxes = np.concatenate((total_boxes, boxes), axis=0)
  209. #np.save('total_boxes_101.npy', total_boxes)
  210. #####
  211. # 1 #
  212. #####
  213. # print "[1]:",total_boxes.shape[0]
  214. #print total_boxes
  215. #return total_boxes, []
  216. numbox = total_boxes.shape[0]
  217. if numbox > 0:
  218. # nms
  219. pick = nms(total_boxes, 0.7, 'Union')
  220. total_boxes = total_boxes[pick, :]
  221. # print "[2]:",total_boxes.shape[0]
  222. # revise and convert to square
  223. regh = total_boxes[:,3] - total_boxes[:,1]
  224. regw = total_boxes[:,2] - total_boxes[:,0]
  225. t1 = total_boxes[:,0] + total_boxes[:,5]*regw
  226. t2 = total_boxes[:,1] + total_boxes[:,6]*regh
  227. t3 = total_boxes[:,2] + total_boxes[:,7]*regw
  228. t4 = total_boxes[:,3] + total_boxes[:,8]*regh
  229. t5 = total_boxes[:,4]
  230. total_boxes = np.array([t1,t2,t3,t4,t5]).T
  231. total_boxes = rerec(total_boxes) # convert box to square
  232. # print "[4]:",total_boxes.shape[0]
  233. total_boxes[:,0:4] = np.fix(total_boxes[:,0:4])
  234. # print "[4.5]:",total_boxes.shape[0]
  235. #print total_boxes
  236. [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = pad(total_boxes, w, h)
  237. numbox = total_boxes.shape[0]
  238. if numbox > 0:
  239. # second stage
  240. # construct input for RNet
  241. tempimg = np.zeros((numbox, 24, 24, 3)) # (24, 24, 3, numbox)
  242. for k in range(numbox):
  243. tmp = np.zeros((int(tmph[k]) +1, int(tmpw[k]) + 1,3))
  244. tmp[int(dy[k]):int(edy[k])+1, int(dx[k]):int(edx[k])+1] = img[int(y[k]):int(ey[k])+1, int(x[k]):int(ex[k])+1]
  245. #print "y,ey,x,ex", y[k], ey[k], x[k], ex[k]
  246. #print "tmp", tmp.shape
  247. tempimg[k,:,:,:] = cv2.resize(tmp, (24, 24))
  248. #print tempimg.shape
  249. #print tempimg[0,0,0,:]
  250. tempimg = (tempimg-127.5)*0.0078125 # done in imResample function wrapped by python
  251. # RNet
  252. tempimg = np.swapaxes(tempimg, 1, 3)
  253. #print tempimg[0,:,0,0]
  254. RNet.blobs['data'].reshape(numbox, 3, 24, 24)
  255. RNet.blobs['data'].data[...] = tempimg
  256. out = RNet.forward()
  257. score = out['prob1'][:,1]
  258. #print 'score', score
  259. pass_t = np.where(score>threshold[1])[0]
  260. #print 'pass_t', pass_t
  261. score = np.array([score[pass_t]]).T
  262. total_boxes = np.concatenate( (total_boxes[pass_t, 0:4], score), axis = 1)
  263. # print "[5]:",total_boxes.shape[0]
  264. #print total_boxes
  265. #print "1.5:",total_boxes.shape
  266. mv = out['conv5-2'][pass_t, :].T
  267. #print "mv", mv
  268. if total_boxes.shape[0] > 0:
  269. pick = nms(total_boxes, 0.7, 'Union')
  270. # print 'pick', pick
  271. if len(pick) > 0:
  272. total_boxes = total_boxes[pick, :]
  273. # print "[6]:", total_boxes.shape[0]
  274. total_boxes = bbreg(total_boxes, mv[:, pick])
  275. # print "[7]:", total_boxes.shape[0]
  276. total_boxes = rerec(total_boxes)
  277. # print "[8]:", total_boxes.shape[0]
  278. return total_boxes
  279. def main():
  280. img_dir = "/home/xiao/code/mtcnn-caffe/prepare_data/WIDER_train/images/"
  281. imglistfile = "wider_face_train.txt"
  282. with open(imglistfile, 'r') as f:
  283. annotations = f.readlines()
  284. num = len(annotations)
  285. print "%d pics in total" % num
  286. neg_save_dir = "/media/xiao/软件/mtcnn/train/48/negative/"
  287. pos_save_dir = "/media/xiao/软件/mtcnn/train/48/positive/"
  288. part_save_dir = "/media/xiao/软件/mtcnn/train/48/part/"
  289. image_size = 48
  290. f1 = open('/media/xiao/软件/mtcnn/train/48/pos_48.txt', 'w')
  291. f2 = open('/media/xiao/软件/mtcnn/train/48/neg_48.txt', 'w')
  292. f3 = open('/media/xiao/软件/mtcnn/train/48/part_48.txt', 'w')
  293. p_idx = 0 # positive
  294. n_idx = 0 # negative
  295. d_idx = 0 # dont care
  296. image_idx = 0
  297. minsize = 20
  298. caffe_model_path = "./model"
  299. threshold = [0.6, 0.7, 0.7]
  300. factor = 0.709
  301. caffe.set_mode_gpu()
  302. PNet = caffe.Net(caffe_model_path+"/det1.prototxt", caffe_model_path+"/det1.caffemodel", caffe.TEST)
  303. RNet = caffe.Net(caffe_model_path+"/det2.prototxt", caffe_model_path+"/det2.caffemodel", caffe.TEST)
  304. for annotation in annotations:
  305. # imgpath = imgpath.split('\n')[0]
  306. annotation = annotation.strip().split(' ')
  307. bbox = map(float, annotation[1:])
  308. gts = np.array(bbox, dtype=np.float32).reshape(-1, 4)
  309. img_path = img_dir + annotation[0] + '.jpg'
  310. #print "######\n", img_path
  311. print image_idx
  312. image_idx += 1
  313. img = cv2.imread(img_path)
  314. img_matlab = img.copy()
  315. tmp = img_matlab[:,:,2].copy()
  316. img_matlab[:,:,2] = img_matlab[:,:,0]
  317. img_matlab[:,:,0] = tmp
  318. boundingboxes = detect_face(img_matlab, minsize, PNet, RNet, threshold, False, factor)
  319. #img = drawBoxes(img, boundingboxes)
  320. #cv2.imshow('img', img)
  321. #cv2.waitKey(1000)
  322. # generate positive,negative,part samples
  323. for box in boundingboxes:
  324. x_left, y_top, x_right, y_bottom, _ = box
  325. crop_w = x_right - x_left + 1
  326. crop_h = y_bottom - y_top + 1
  327. # ignore box that is too small or beyond image border
  328. if crop_w < image_size / 2 or crop_h < image_size / 2:
  329. continue
  330. if x_left < 0 or y_top < 0:
  331. continue
  332. # compute intersection over union(IoU) between current box and all gt boxes
  333. Iou = IoU(box, gts)
  334. cropped_im = img[int(y_top):int(y_bottom + 1) , int(x_left):int(x_right + 1) ]
  335. resized_im = cv2.resize(cropped_im, (image_size, image_size), interpolation=cv2.INTER_LINEAR)
  336. #try:
  337. # resized_im = cv2.resize(cropped_im, (image_size, image_size), interpolation=cv2.INTER_LINEAR)
  338. #except Exception as e:
  339. # print " 1 "
  340. # print e
  341. # save negative images and write label, 负样本
  342. if np.max(Iou) < 0.3:
  343. # Iou with all gts must below 0.3
  344. save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)
  345. f2.write("%s/negative/%s.jpg" % (image_size, n_idx) + ' 0')
  346. f2.write(" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n")
  347. cv2.imwrite(save_file, resized_im)
  348. n_idx += 1
  349. else:
  350. # find gt_box with the highest iou
  351. idx = np.argmax(Iou)
  352. assigned_gt = gts[idx]
  353. x1, y1, x2, y2 = assigned_gt
  354. # compute bbox reg label,offset_x1,offset_y1相对于左上角; offset_x2,offset_y2相对于右上角
  355. offset_x1 = (x1 - x_left) / float(crop_w)
  356. offset_y1 = (y1 - y_top) / float(crop_h)
  357. # offset_x2 = (x2 - x_left) / float(crop_w)
  358. # offset_y2 = (y2 - y_top) / float(crop_h)
  359. offset_x2 = (x2 - x_right) / float(crop_w)
  360. offset_y2 = (y2 - y_bottom )/ float(crop_h)
  361. # save positive and part-face images and write labels, 正样本
  362. if np.max(Iou) >= 0.65:
  363. save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx)
  364. f1.write("%s/positive/%s.jpg" % (image_size, p_idx) + ' 1 %.6f %.6f %.6f %.6f' % (offset_x1, offset_y1, offset_x2, offset_y2))
  365. f1.write(" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n")
  366. cv2.imwrite(save_file, resized_im)
  367. p_idx += 1
  368. # part 样本
  369. elif np.max(Iou) >= 0.4:
  370. save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx)
  371. f3.write("%s/part/%s.jpg" % (image_size, d_idx) + ' -1 %.6f %.6f %.6f %.6f' % (offset_x1, offset_y1, offset_x2, offset_y2))
  372. f3.write(" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n")
  373. cv2.imwrite(save_file, resized_im)
  374. d_idx += 1
  375. f.close()
  376. f1.close()
  377. f2.close()
  378. f3.close()
  379. if __name__ == "__main__":
  380. main()


2,生成landmark样本,用作者的net1->net2生成bbox, 根据5个landmark是否都在bbox中作为判别条件:

代码如下:


  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. import _init_paths
  4. import caffe
  5. import cv2
  6. import numpy as np
  7. #from python_wrapper import *
  8. import os
  9. def bbreg(boundingbox, reg):
  10. reg = reg.T
  11. # calibrate bouding boxes
  12. if reg.shape[1] == 1:
  13. print "reshape of reg"
  14. pass # reshape of reg
  15. w = boundingbox[:,2] - boundingbox[:,0] + 1
  16. h = boundingbox[:,3] - boundingbox[:,1] + 1
  17. bb0 = boundingbox[:,0] + reg[:,0]*w
  18. bb1 = boundingbox[:,1] + reg[:,1]*h
  19. bb2 = boundingbox[:,2] + reg[:,2]*w
  20. bb3 = boundingbox[:,3] + reg[:,3]*h
  21. boundingbox[:,0:4] = np.array([bb0, bb1, bb2, bb3]).T
  22. #print "bb", boundingbox
  23. return boundingbox
  24. def pad(boxesA, w, h):
  25. boxes = boxesA.copy() # shit, value parameter!!!
  26. tmph = boxes[:,3] - boxes[:,1] + 1
  27. tmpw = boxes[:,2] - boxes[:,0] + 1
  28. numbox = boxes.shape[0]
  29. dx = np.ones(numbox)
  30. dy = np.ones(numbox)
  31. edx = tmpw
  32. edy = tmph
  33. x = boxes[:,0:1][:,0]
  34. y = boxes[:,1:2][:,0]
  35. ex = boxes[:,2:3][:,0]
  36. ey = boxes[:,3:4][:,0]
  37. tmp = np.where(ex > w)[0]
  38. if tmp.shape[0] != 0:
  39. edx[tmp] = -ex[tmp] + w-1 + tmpw[tmp]
  40. ex[tmp] = w-1
  41. tmp = np.where(ey > h)[0]
  42. if tmp.shape[0] != 0:
  43. edy[tmp] = -ey[tmp] + h-1 + tmph[tmp]
  44. ey[tmp] = h-1
  45. tmp = np.where(x < 1)[0]
  46. if tmp.shape[0] != 0:
  47. dx[tmp] = 2 - x[tmp]
  48. x[tmp] = np.ones_like(x[tmp])
  49. tmp = np.where(y < 1)[0]
  50. if tmp.shape[0] != 0:
  51. dy[tmp] = 2 - y[tmp]
  52. y[tmp] = np.ones_like(y[tmp])
  53. # for python index from 0, while matlab from 1
  54. dy = np.maximum(0, dy-1)
  55. dx = np.maximum(0, dx-1)
  56. y = np.maximum(0, y-1)
  57. x = np.maximum(0, x-1)
  58. edy = np.maximum(0, edy-1)
  59. edx = np.maximum(0, edx-1)
  60. ey = np.maximum(0, ey-1)
  61. ex = np.maximum(0, ex-1)
  62. return [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph]
  63. def IoU(box, boxes):
  64. """Compute IoU between detect box and gt boxes
  65. Parameters:
  66. ----------
  67. box: numpy array , shape (5, ): x1, y1, x2, y2, score
  68. input box
  69. boxes: numpy array, shape (n, 4): x1, y1, x2, y2
  70. input ground truth boxes
  71. Returns:
  72. -------
  73. ovr: numpy.array, shape (n, )
  74. IoU
  75. """
  76. box_area = (box[2] - box[0] + 1) * (box[3] - box[1] + 1)
  77. area = (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1)
  78. xx1 = np.maximum(box[0], boxes[:, 0])
  79. yy1 = np.maximum(box[1], boxes[:, 1])
  80. xx2 = np.minimum(box[2], boxes[:, 2])
  81. yy2 = np.minimum(box[3], boxes[:, 3])
  82. # compute the width and height of the bounding box
  83. w = np.maximum(0, xx2 - xx1 + 1)
  84. h = np.maximum(0, yy2 - yy1 + 1)
  85. inter = w * h
  86. ovr = inter / (box_area + area - inter)
  87. return ovr
  88. def rerec(bboxA):
  89. # convert bboxA to square
  90. w = bboxA[:,2] - bboxA[:,0]
  91. h = bboxA[:,3] - bboxA[:,1]
  92. l = np.maximum(w,h).T
  93. bboxA[:,0] = bboxA[:,0] + w*0.5 - l*0.5
  94. bboxA[:,1] = bboxA[:,1] + h*0.5 - l*0.5
  95. bboxA[:,2:4] = bboxA[:,0:2] + np.repeat([l], 2, axis = 0).T
  96. return bboxA
  97. def nms(boxes, threshold, type):
  98. """nms
  99. :boxes: [:,0:5]
  100. :threshold: 0.5 like
  101. :type: 'Min' or others
  102. :returns: TODO
  103. """
  104. if boxes.shape[0] == 0:
  105. return np.array([])
  106. x1 = boxes[:,0]
  107. y1 = boxes[:,1]
  108. x2 = boxes[:,2]
  109. y2 = boxes[:,3]
  110. s = boxes[:,4]
  111. area = np.multiply(x2-x1+1, y2-y1+1)
  112. I = np.array(s.argsort()) # read s using I
  113. pick = [];
  114. while len(I) > 0:
  115. xx1 = np.maximum(x1[I[-1]], x1[I[0:-1]])
  116. yy1 = np.maximum(y1[I[-1]], y1[I[0:-1]])
  117. xx2 = np.minimum(x2[I[-1]], x2[I[0:-1]])
  118. yy2 = np.minimum(y2[I[-1]], y2[I[0:-1]])
  119. w = np.maximum(0.0, xx2 - xx1 + 1)
  120. h = np.maximum(0.0, yy2 - yy1 + 1)
  121. inter = w * h
  122. if type == 'Min':
  123. o = inter / np.minimum(area[I[-1]], area[I[0:-1]])
  124. else:
  125. o = inter / (area[I[-1]] + area[I[0:-1]] - inter)
  126. pick.append(I[-1])
  127. I = I[np.where( o <= threshold)[0]]
  128. return pick
  129. def generateBoundingBox(map, reg, scale, t):
  130. stride = 2
  131. cellsize = 12
  132. map = map.T
  133. dx1 = reg[0,:,:].T
  134. dy1 = reg[1,:,:].T
  135. dx2 = reg[2,:,:].T
  136. dy2 = reg[3,:,:].T
  137. (x, y) = np.where(map >= t)
  138. yy = y
  139. xx = x
  140. score = map[x,y]
  141. reg = np.array([dx1[x,y], dy1[x,y], dx2[x,y], dy2[x,y]])
  142. if reg.shape[0] == 0:
  143. pass
  144. boundingbox = np.array([yy, xx]).T
  145. bb1 = np.fix((stride * (boundingbox) + 1) / scale).T # matlab index from 1, so with "boundingbox-1"
  146. bb2 = np.fix((stride * (boundingbox) + cellsize - 1 + 1) / scale).T # while python don't have to
  147. score = np.array([score])
  148. boundingbox_out = np.concatenate((bb1, bb2, score, reg), axis=0)
  149. return boundingbox_out.T
  150. def drawBoxes(im, boxes):
  151. x1 = boxes[:,0]
  152. y1 = boxes[:,1]
  153. x2 = boxes[:,2]
  154. y2 = boxes[:,3]
  155. for i in range(x1.shape[0]):
  156. cv2.rectangle(im, (int(x1[i]), int(y1[i])), (int(x2[i]), int(y2[i])), (0,255,0), 1)
  157. return im
  158. def drawBoxes_align(im, boxe):
  159. x1 = boxe[0]
  160. y1 = boxe[1]
  161. x2 = boxe[2]
  162. y2 = boxe[3]
  163. cv2.rectangle(im, (int(x1), int(y1)), (int(x2), int(y2)), (0,255,0), 1)
  164. return im
  165. def drawlandmark(im, points):
  166. for i in range(points.shape[0]):
  167. for j in range(5):
  168. cv2.circle(im, (int(points[i][j]), int(points[i][j+5])), 2, (255,0,0))
  169. return im
  170. def drawlandmark_align(im, point):
  171. for j in range(5):
  172. cv2.circle(im, (int(point[j*2]), int(point[j*2+1])), 2, (255,0,0))
  173. return im
  174. from time import time
  175. _tstart_stack = []
  176. def tic():
  177. _tstart_stack.append(time())
  178. def toc(fmt="Elapsed: %s s"):
  179. print fmt % (time()-_tstart_stack.pop())
  180. def detect_face(img, minsize, PNet, RNet, threshold, fastresize, factor):
  181. img2 = img.copy()
  182. factor_count = 0
  183. total_boxes = np.zeros((0,9), np.float)
  184. points = []
  185. h = img.shape[0]
  186. w = img.shape[1]
  187. minl = min(h, w)
  188. img = img.astype(float)
  189. m = 12.0/minsize
  190. minl = minl*m
  191. # create scale pyramid
  192. scales = []
  193. while minl >= 12:
  194. scales.append(m * pow(factor, factor_count))
  195. minl *= factor
  196. factor_count += 1
  197. # first stage
  198. for scale in scales:
  199. hs = int(np.ceil(h*scale))
  200. ws = int(np.ceil(w*scale))
  201. if fastresize:
  202. im_data = (img-127.5)*0.0078125 # [0,255] -> [-1,1]
  203. im_data = cv2.resize(im_data, (ws,hs)) # default is bilinear
  204. else:
  205. im_data = cv2.resize(img, (ws,hs)) # default is bilinear
  206. im_data = (im_data-127.5)*0.0078125 # [0,255] -> [-1,1]
  207. #im_data = imResample(img, hs, ws); print "scale:", scale
  208. im_data = np.swapaxes(im_data, 0, 2)
  209. im_data = np.array([im_data], dtype = np.float)
  210. PNet.blobs['data'].reshape(1, 3, ws, hs)
  211. PNet.blobs['data'].data[...] = im_data
  212. out = PNet.forward()
  213. boxes = generateBoundingBox(out['prob1'][0,1,:,:], out['conv4-2'][0], scale, threshold[0])
  214. if boxes.shape[0] != 0:
  215. pick = nms(boxes, 0.5, 'Union')
  216. if len(pick) > 0 :
  217. boxes = boxes[pick, :]
  218. if boxes.shape[0] != 0:
  219. total_boxes = np.concatenate((total_boxes, boxes), axis=0)
  220. #np.save('total_boxes_101.npy', total_boxes)
  221. #####
  222. # 1 #
  223. #####
  224. # print "[1]:",total_boxes.shape[0]
  225. #print total_boxes
  226. #return total_boxes, []
  227. numbox = total_boxes.shape[0]
  228. if numbox > 0:
  229. # nms
  230. pick = nms(total_boxes, 0.7, 'Union')
  231. total_boxes = total_boxes[pick, :]
  232. # print "[2]:",total_boxes.shape[0]
  233. # revise and convert to square
  234. regh = total_boxes[:,3] - total_boxes[:,1]
  235. regw = total_boxes[:,2] - total_boxes[:,0]
  236. t1 = total_boxes[:,0] + total_boxes[:,5]*regw
  237. t2 = total_boxes[:,1] + total_boxes[:,6]*regh
  238. t3 = total_boxes[:,2] + total_boxes[:,7]*regw
  239. t4 = total_boxes[:,3] + total_boxes[:,8]*regh
  240. t5 = total_boxes[:,4]
  241. total_boxes = np.array([t1,t2,t3,t4,t5]).T
  242. total_boxes = rerec(total_boxes) # convert box to square
  243. # print "[4]:",total_boxes.shape[0]
  244. total_boxes[:,0:4] = np.fix(total_boxes[:,0:4])
  245. # print "[4.5]:",total_boxes.shape[0]
  246. #print total_boxes
  247. [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = pad(total_boxes, w, h)
  248. numbox = total_boxes.shape[0]
  249. if numbox > 0:
  250. # second stage
  251. # construct input for RNet
  252. tempimg = np.zeros((numbox, 24, 24, 3)) # (24, 24, 3, numbox)
  253. for k in range(numbox):
  254. tmp = np.zeros((int(tmph[k]) +1, int(tmpw[k]) + 1,3))
  255. tmp[int(dy[k]):int(edy[k])+1, int(dx[k]):int(edx[k])+1] = img[int(y[k]):int(ey[k])+1, int(x[k]):int(ex[k])+1]
  256. #print "y,ey,x,ex", y[k], ey[k], x[k], ex[k]
  257. #print "tmp", tmp.shape
  258. tempimg[k,:,:,:] = cv2.resize(tmp, (24, 24))
  259. #print tempimg.shape
  260. #print tempimg[0,0,0,:]
  261. tempimg = (tempimg-127.5)*0.0078125 # done in imResample function wrapped by python
  262. # RNet
  263. tempimg = np.swapaxes(tempimg, 1, 3)
  264. #print tempimg[0,:,0,0]
  265. RNet.blobs['data'].reshape(numbox, 3, 24, 24)
  266. RNet.blobs['data'].data[...] = tempimg
  267. out = RNet.forward()
  268. score = out['prob1'][:,1]
  269. #print 'score', score
  270. pass_t = np.where(score>threshold[1])[0]
  271. #print 'pass_t', pass_t
  272. score = np.array([score[pass_t]]).T
  273. total_boxes = np.concatenate( (total_boxes[pass_t, 0:4], score), axis = 1)
  274. # print "[5]:",total_boxes.shape[0]
  275. #print total_boxes
  276. #print "1.5:",total_boxes.shape
  277. mv = out['conv5-2'][pass_t, :].T
  278. #print "mv", mv
  279. if total_boxes.shape[0] > 0:
  280. pick = nms(total_boxes, 0.7, 'Union')
  281. # print 'pick', pick
  282. if len(pick) > 0:
  283. total_boxes = total_boxes[pick, :]
  284. # print "[6]:", total_boxes.shape[0]
  285. total_boxes = bbreg(total_boxes, mv[:, pick])
  286. # print "[7]:", total_boxes.shape[0]
  287. total_boxes = rerec(total_boxes)
  288. # print "[8]:", total_boxes.shape[0]
  289. return total_boxes
  290. def main():
  291. img_dir = "/media/xiao/学习/face_alignment/data/CelebA/Img/img_celeba.7z/img_celeba/"
  292. anno_file = "celebA_bbox_landmark.txt"
  293. with open(anno_file, 'r') as f:
  294. annotations = f.readlines()
  295. num = len(annotations)
  296. print "%d pics in total" % num
  297. # 图片大小为48*48
  298. image_size = 48
  299. # landmark_save_dir = "48/landmark/"
  300. landmark_save_dir = "/media/xiao/软件/mtcnn/train/48/landmark/"
  301. # save_dir = "./" + str(image_size)
  302. f1 = open('/media/xiao/软件/mtcnn/train/48/landmark_48.txt', 'w')
  303. l_idx = 0 # landmark
  304. image_idx = 0
  305. minsize = 40
  306. caffe_model_path = "./model"
  307. threshold = [0.6, 0.7, 0.7]
  308. factor = 0.709
  309. caffe.set_mode_gpu()
  310. PNet = caffe.Net(caffe_model_path+"/det1.prototxt", caffe_model_path+"/det1.caffemodel", caffe.TEST)
  311. RNet = caffe.Net(caffe_model_path+"/det2.prototxt", caffe_model_path+"/det2.caffemodel", caffe.TEST)
  312. for annotation in annotations:
  313. # imgpath = imgpath.split('\n')[0]
  314. annotation = annotation.strip().split(' ')
  315. im_path = annotation[0]
  316. # bbox = map(float, annotation[1:-10])
  317. pts = map(float, annotation[-10:])
  318. # boxes = np.array(bbox, dtype=np.float32).reshape(-1, 4)
  319. im_path = img_dir + im_path
  320. backupPts = pts[:]
  321. #print "######\n", img_path
  322. print image_idx
  323. image_idx += 1
  324. img = cv2.imread(im_path)
  325. img_matlab = img.copy()
  326. tmp = img_matlab[:,:,2].copy()
  327. img_matlab[:,:,2] = img_matlab[:,:,0]
  328. img_matlab[:,:,0] = tmp
  329. # 用作者提供的net1->net2生成人脸框
  330. boundingboxes = detect_face(img_matlab, minsize, PNet, RNet, threshold, False, factor)
  331. #img = drawBoxes(img, boundingboxes)
  332. #cv2.imshow('img', img)
  333. #cv2.waitKey(1000)
  334. # generate landmark samples
  335. for box in boundingboxes:
  336. x_left, y_top, x_right, y_bottom, _ = box # 得到人脸bbox坐标
  337. crop_w = x_right - x_left + 1
  338. crop_h = y_bottom - y_top + 1
  339. # ignore box that is too small or beyond image border
  340. if crop_w < image_size / 2 or crop_h < image_size / 2:
  341. continue
  342. if x_left < 0 or y_top < 0:
  343. continue
  344. # 当五个landmark都在bbox中时,这样的样本作为我们的landmark训练样本
  345. if pts[0] < x_left or pts[0] > x_right:
  346. continue
  347. if pts[2] < x_left or pts[2] > x_right:
  348. continue
  349. if pts[4] < x_left or pts[4] > x_right:
  350. continue
  351. if pts[6] < x_left or pts[6] > x_right:
  352. continue
  353. if pts[8] < x_left or pts[8] > x_right:
  354. continue
  355. if pts[1] < y_top or pts[1] > y_bottom:
  356. continue
  357. if pts[3] < y_top or pts[3] > y_bottom:
  358. continue
  359. if pts[5] < y_top or pts[5] > y_bottom:
  360. continue
  361. if pts[7] < y_top or pts[7] > y_bottom:
  362. continue
  363. if pts[9] < y_top or pts[9] > y_bottom:
  364. continue
  365. # show image
  366. #img1 = drawBoxes_align(img, box)
  367. #img1 = drawlandmark_align(img1, pts)
  368. #cv2.imshow('img', img1)
  369. #cv2.waitKey(1000)
  370. # our method, x0,y0,x1,y1,x2,y2,x3,y3,x4,y4
  371. #for k in range(len(pts) / 2):
  372. # pts[k * 2] = (pts[k * 2] - x_left) / float(crop_w);
  373. # pts[k * 2 + 1] = (pts[k * 2 + 1] - y_top) / float(crop_h);
  374. #author method, x0,x1,x2,x3,x4,y0,y1,y2,y3,y4
  375. ptsss = pts[:]
  376. # landmark标注为其相对于crop区域左上角的归一化值
  377. for k in range(len(ptsss) / 2):
  378. pts[k] = (ptsss[k * 2] - x_left) / float(crop_w);
  379. pts[5+k] = (ptsss[k * 2 + 1] - y_top) / float(crop_h);
  380. # 将crop区域进行resize
  381. cropped_im = img[int(y_top):int(y_bottom + 1), int(x_left):int(x_right + 1)]
  382. resized_im = cv2.resize(cropped_im, (image_size, image_size), interpolation=cv2.INTER_LINEAR)
  383. # box_ = box.reshape(1, -1)
  384. # 写图片名,class坐标,bbox坐标。
  385. save_file = os.path.join(landmark_save_dir, "%s.jpg" % l_idx)
  386. f1.write(str(image_size) + "/landmark/%s.jpg" % l_idx + ' -1 -1 -1 -1 -1')
  387. # 写landmark坐标并保存图片
  388. for k in range(len(pts)):
  389. f1.write(" %f" % pts[k])
  390. f1.write("\n")
  391. cv2.imwrite(save_file, resized_im)
  392. l_idx += 1
  393. f.close()
  394. f1.close()
  395. if __name__ == "__main__":
  396. main()




/** * Copyright (C) 2024 TP-Link. All rights reserved. */ #if defined(DLA_336Q) || defined(DLA_377) || defined(DLA_920) || defined(DLA_308) #include <string.h> #include <unistd.h> #include "network.h" #include "common.h" #include "utils.h" #include "mi_ipu.h" #include "mi_ipu_datatype.h" #include "network_ipu.h" #include "mi_sys.h" #include "ipu_common.h" #define MMA_HEAP_NAME "mma_heap_name0" #if defined(DLA_377) || defined(DLA_920) || defined(DLA_308) #define ST_DEFAULT_SOC_ID 0 #define MI_SYS_Init_Wrap() MI_SYS_Init(ST_DEFAULT_SOC_ID) #define MI_SYS_MMA_Alloc_Wrap(...) MI_SYS_MMA_Alloc(ST_DEFAULT_SOC_ID, ##__VA_ARGS__) #define MI_SYS_MMA_Free_Wrap(...) MI_SYS_MMA_Free(ST_DEFAULT_SOC_ID, ##__VA_ARGS__) #else #define MI_SYS_Init_Wrap MI_SYS_Init #define MI_SYS_MMA_Alloc_Wrap MI_SYS_MMA_Alloc #define MI_SYS_MMA_Free_Wrap MI_SYS_MMA_Free #endif #define MAX_IPU_INPUT_OUTPUT_BUF_DEPTH 3 #define FD_LM_MAX(a, b) (((a) > (b)) ? (a) : (b)) #define FD_LM_MIN(a, b) (((a) < (b)) ? (a) : (b)) // #define RETINAFACE_OUTPUT_BOX_NUM 16800 // 640 #define RETINAFACE_OUTPUT_BOX_NUM 6048 // 512x288 // #define RETINAFACE_OUTPUT_BOX_NUM 2100 // 320 #define RETINAFACE_FEATUREUSE_NUM 3 typedef struct Retinaface_BBOX { float f32Xmin; float f32Xmax; float f32Ymin; float f32Ymax; float f32Conf; float f32Landmx1; float f32Landmy1; float f32Landmx2; float f32Landmy2; float f32Landmx3; float f32Landmy3; float f32Landmx4; float f32Landmy4; float f32Landmx5; float f32Landmy5; U32 u32Mask; } Retinaface_BBOX_S; typedef struct Retinaface_STACK { S32 s32Min; S32 s32Max; } Retinaface_STACK_S; Retinaface_BBOX_S *g_pstBbox = NULL; Retinaface_STACK_S *g_ps32AssistBuf = NULL; /** * 单个网络 */ typedef struct __network_s { MI_IPU_CHN u32ChnId; /**< 网络的IPU通道ID */ MI_IPU_TensorVector_t InputTensorVector; /**< 网络输入 */ MI_IPU_TensorVector_t OutputTensorVector; /**< 网络输出 */ MI_IPU_SubNet_InputOutputDesc_t desc; /**<网络输入输出张量描述 */ dla_img_desc_t img_desc; /**< 输入图片描述 */ U32 in_size; /**< 输入大小 */ U32 out_size; /**< 输出大小 */ U32 status; /**< 网络状态 */ } network_t; typedef struct __network_ctx_s { network_t *nets[NET_MAX]; dla_buffer_t in_buf; /**< MMA中分配的输入缓存,所有模型共用一块 */ dla_model_reuse_param_t *model_reuse_param; /**< 模型复用参数 */ U32 inited; } network_ctx_t; static network_ctx_t g_net_ctx = { .inited = 0, }; const char *hw_network_model_path(dla_net_e id) { switch (id) { #if defined(DLA_PD) || defined(DLA_PSS) case NET_PD: return MODEL_OD_PED_PATH; case NET_PCLS: return MODEL_PCLS_PATH; #endif #if defined(DLA_LPSS) || defined(DLA_WHITELIST) case NET_CPD: DLA_DEBUG("CPD path: %s", MODEL_CPD_PATH); return MODEL_CPD_PATH; case NET_PTC: DLA_DEBUG("PTC path: %s", MODEL_PTC_PATH); return MODEL_PTC_PATH; case NET_VTC: DLA_DEBUG("VTC path: %s", MODEL_VTC_PATH); return MODEL_VTC_PATH; case NET_OF: DLA_DEBUG("OF path: %s", MODEL_OF_PATH); return MODEL_OF_PATH; #endif #ifdef DLA_FPD case NET_FPD: return MODEL_FPD_PATH; #endif #ifdef DLA_FSS case NET_FSS: return MODEL_OD_FACE_PATH; case NET_FCLS: return MODEL_FCLS_PATH; #endif case NET_FANGLE: return MODEL_FANGLE_PATH; #if (defined DLA_PSS) || (defined DLA_PR) case NET_PSS: return MODEL_OD_PED_PATH; case NET_PR: return MODEL_PR_PATH; case NET_PC: return MODEL_PC_PATH; #endif case NET_FD_LM: return MODEL_FD_LM_FACE_PATH; case NET_FMCLS: return MODEL_FMCLS_PATH; case NET_OD21: return MODEL_OD21_PATH; #ifdef DLA_ED case NET_ED: return MODEL_ED_PATH; #endif #ifdef DLA_OT case NET_OT_B1: return MODEL_OT_B1_PATH; case NET_OT_B2: return MODEL_OT_B2_PATH; case NET_OT_HEAD: return MODEL_OT_HEAD_PATH; #endif #if (defined DLA_FSS) || (defined DLA_FR) || (defined FLM_P98) case NET_FLM: return MODEL_FLM_PATH; #endif #if (defined DLA_FSS) || (defined DLA_FR) case NET_FR: return MODEL_FR_PATH; #endif #ifdef DLA_FR case NET_FAS: return MODEL_FAS_PATH; #endif #ifdef DLA_LPR case NET_OD22: return MODEL_OD22_PATH; #endif #ifdef DLA_LPR_W case NET_LPD: return MODEL_LPD_PATH; case NET_LPR_W: return MODEL_LPR_W_PATH; #endif #if defined(DLA_LPR) || defined(DLA_LPSS) || defined(DLA_WHITELIST) case NET_PLM: return MODEL_PLM_PATH; case NET_LPR: return MODEL_LPR_PATH; #endif #ifdef DLA_HPR case NET_HPR: return MODEL_HPR_PATH; #endif #ifdef DLA_PKGD case NET_PKGD: return MODEL_PKGD_PATH; #endif #ifdef DLA_SED case NET_SED: return MODEL_SED_PATH; case NET_SED_8K: return MODEL_SED_PATH_8k; case NET_SED_16K: return MODEL_SED_PATH_16k; #endif #ifdef DLA_AGED case NET_AGED: return MODEL_AGED_PATH; #endif #ifdef DLA_FCD case NET_FCD: return MODEL_FCD_PATH; #endif #ifdef DLA_FOD case NET_FOD: return MODEL_FOD_PATH; #endif default: return NULL; } } static S32 ipu_init() { U32 max_var_buf_size = 0; MI_SYS_Init_Wrap(); for (int i = 0; i < NET_MAX; ++i) { #ifdef CAM_OS_RTK char *model_path = (char *)get_mem_ipu_model_addr(i); SerializedReadFunc readfunc = mi_ipu_mem_read; #else char *model_path = (char *)hw_network_model_path(i); SerializedReadFunc readfunc = NULL; if (0 != access(model_path, F_OK)) { continue; } #endif if (model_path) { MI_IPU_OfflineModelStaticInfo_t model_stat_info; if (MI_SUCCESS != MI_IPU_GetOfflineModeStaticInfo(readfunc, model_path, &model_stat_info)) { DLA_ERROR("Get model %d info failed", i); return DLA_ERR; } max_var_buf_size = UTILS_MAX(max_var_buf_size, model_stat_info.u32VariableBufferSize); } } DLA_DEBUG("Final variable buf size: %u", max_var_buf_size); MI_IPU_DevAttr_t dev_attr; dev_attr.u32MaxVariableBufSize = max_var_buf_size; dev_attr.u32YUV420_W_Pitch_Alignment = 16; dev_attr.u32YUV420_H_Pitch_Alignment = 2; dev_attr.u32XRGB_W_Pitch_Alignment = 16; if (MI_SUCCESS != MI_IPU_CreateDevice(&dev_attr, NULL, NULL, 0)) { DLA_ERROR("create ipu device failed"); return DLA_ERR; } return DLA_OK; } static void ipu_deinit() { MI_IPU_DestroyDevice(); } S32 hw_network_ctx_init() { if (g_net_ctx.inited) { return DLA_OK; } memset(&g_net_ctx, 0, sizeof(network_ctx_t)); g_net_ctx.inited = 1; return DLA_OK; } void hw_network_ctx_deinit() { S32 i = 0; if (!g_net_ctx.inited) { return; } if (NULL != g_pstBbox) { free(g_pstBbox); g_pstBbox = NULL; } if (NULL != g_ps32AssistBuf) { free(g_ps32AssistBuf); g_ps32AssistBuf = NULL; } for (i = 0; i < NET_MAX; ++i) { if (g_net_ctx.nets[i] && NET_STATUS_NOT_LOAD != g_net_ctx.nets[i]->status) { hw_network_unload(i); } } g_net_ctx.inited = 0; return; } S32 hw_network_load(dla_net_e id) { U8 need_init_ipu = 1; /* 若没有加载过任何模型,需先初始化ipu,再加载 */ for (int i = 0; i < NET_MAX; ++i) { if (g_net_ctx.nets[i] && NET_STATUS_LOADED == g_net_ctx.nets[i]->status) { need_init_ipu = 0; break; } } if (need_init_ipu) { if (DLA_OK != ipu_init()) { DLA_ERROR("Ipu init failed"); return DLA_ERR; } } /* 首次load分配内存 */ if (NULL == g_net_ctx.nets[id]) { g_net_ctx.nets[id] = (network_t *)calloc(1, sizeof(network_t)); if (NULL == g_net_ctx.nets[id]) { DLA_ERROR("net %u alloc failed", id); return DLA_ERR; } } network_t *network = g_net_ctx.nets[id]; if (NET_STATUS_LOADED == network->status) { DLA_DEBUG("network %u is already loaded", id); return DLA_OK; } // create channel MI_IPUChnAttr_t chn_attr; memset(&chn_attr, 0, sizeof(chn_attr)); chn_attr.u32InputBufDepth = 0; chn_attr.u32OutputBufDepth = MAX_IPU_INPUT_OUTPUT_BUF_DEPTH; #ifdef CAM_OS_RTK MI_PHY model_phy_addr = get_mem_ipu_model_phy_addr(id); #else char *path = (char *)hw_network_model_path(id); #endif U8 model_create_flag = 1; #if defined(CAM_OS_RTK) /* rtos直接创建模型 */ if (0 == model_phy_addr || MI_SUCCESS != MI_IPU_CreateCHNWithUserMem(&(network->u32ChnId), &chn_attr, model_phy_addr)) { DLA_ERROR("MI_IPU_CreateCHNWithUserMem failed"); model_create_flag = 0; } #else #if defined(TAPO_DUALOS_CAM) /* Linux端根据开关判断是新创建模型还是复用模型 */ if (g_net_ctx.model_reuse_param[id].bm == 1) { if (MI_SUCCESS != MI_IPU_DupCHN(g_net_ctx.model_reuse_param[id].chn_id)) { DLA_ERROR("dup ipu network %d chnid %d failed", id, g_net_ctx.model_reuse_param[id].chn_id); model_create_flag = 0; } } else #endif { if (NULL == path || MI_SUCCESS != MI_IPU_CreateCHN(&(network->u32ChnId), &chn_attr, NULL, path)) { DLA_ERROR("create ipu channel failed"); model_create_flag = 0; } } #endif if (model_create_flag == 0) { if (need_init_ipu) { /* 如果加载第一个模型且失败了,需释放ipu */ ipu_deinit(); } return DLA_ERR; } if (g_net_ctx.model_reuse_param) { dla_change_reuse_info(id, network->u32ChnId); } #ifdef TP_TAPO_BATTERY_CAM /* 电池机删除模型文件,节省内存 */ if (remove(path) != 0) { DLA_ERROR("Remove model file failed,path = %s", path); } #endif /* 获取输入输出tensor的shape */ MI_IPU_GetInOutTensorDesc(network->u32ChnId, &(network->desc)); network->InputTensorVector.u32TensorCount = network->desc.u32InputTensorCount; U32 n = network->desc.astMI_InputTensorDescs[0].u32TensorShape[0]; U32 h = network->desc.astMI_InputTensorDescs[0].u32TensorShape[1]; U32 w = network->desc.astMI_InputTensorDescs[0].u32TensorShape[2]; network->img_desc.height = ALIGN_IMG_HEIGHT_HW(h); network->img_desc.width = w; GET_ALIGN_STRIDE_WITH_WIDTH_HW(network->img_desc.stride, w); U32 c = 0; if (NET_OT_HEAD == id) { c = network->desc.astMI_InputTensorDescs[0].u32TensorShape[3]; network->in_size = c * n * w * h * sizeof(MI_S16); } else if (NET_SED == id || NET_SED_8K == id || NET_SED_16K == id) { network->img_desc.height = h; /* SED模型的输入为S16灰度图,stride = w * sizeof(S16) */ #if defined DLA_336Q network->img_desc.width = ALIGN_UP(w, 8); network->img_desc.stride = ALIGN_UP(w, 8) * sizeof(MI_S16); #else network->img_desc.width = w; network->img_desc.stride = w * sizeof(MI_S16); #endif network->in_size = n * network->img_desc.stride * network->img_desc.height; } else { network->in_size = n * IMG_SIZE_HW(network->img_desc.stride, network->img_desc.height); } DLA_DEBUG("in buf NHW size %d %d %d %d", n, network->img_desc.height, network->img_desc.width, network->in_size); network->OutputTensorVector.u32TensorCount = network->desc.u32OutputTensorCount; n = network->desc.astMI_OutputTensorDescs[0].u32TensorShape[0]; h = network->desc.astMI_OutputTensorDescs[0].u32TensorShape[1]; w = network->desc.astMI_OutputTensorDescs[0].u32TensorShape[2]; c = network->desc.astMI_OutputTensorDescs[0].u32TensorShape[3]; DLA_DEBUG("out buf: N, H, W, C %d %d %d %d", n, h, w, c); network->out_size = n * c * h * w * sizeof(float); MI_IPU_GetOutputTensors(network->u32ChnId, &network->OutputTensorVector); network->status = NET_STATUS_LOADED; return DLA_OK; } S32 hw_network_unload(dla_net_e id) { S32 i = 0; U8 free_inbuf = 1; network_t *network = NULL; if (id >= NET_MAX || !g_net_ctx.nets[id]) { DLA_DEBUG("invalid net id %u", id); return DLA_ERR; } if (!g_net_ctx.inited) { DLA_ERROR("network ctx not inited"); return DLA_ERR; } network = g_net_ctx.nets[id]; if (NET_STATUS_NOT_LOAD == network->status) { return DLA_OK; } /* 若为电池机不卸载模型 */ #ifdef TP_TAPO_BATTERY_CAM return DLA_OK; #endif MI_IPU_PutOutputTensors(network->u32ChnId, &(network->OutputTensorVector)); #ifndef CAM_OS_RTK MI_IPU_DestroyCHN(network->u32ChnId); #endif network->status = NET_STATUS_NOT_LOAD; if (g_net_ctx.nets[id]) { free(g_net_ctx.nets[id]); g_net_ctx.nets[id] = NULL; } /* 所有模型都已被卸载时,释放共用的输入缓存,并释放ipu */ for (i = 0; i < NET_MAX; ++i) { if (g_net_ctx.nets[i] && NET_STATUS_LOADED == g_net_ctx.nets[i]->status) { free_inbuf = 0; break; } } if (free_inbuf) { DLA_DEBUG("free network input buff of size %u", g_net_ctx.in_buf.size); dla_free_buffer(&g_net_ctx.in_buf); memset(&g_net_ctx.in_buf, 0, sizeof(dla_buffer_t)); #ifndef CAM_OS_RTK ipu_deinit(); #endif } return DLA_OK; } static void Retinaface_Argswap(float *ps32Src1, float *ps32Src2, U32 u32ArraySize) { U32 i = 0; float s32Tmp = 0; for (i = 0; i < u32ArraySize; i++) { s32Tmp = ps32Src1[i]; ps32Src1[i] = ps32Src2[i]; ps32Src2[i] = s32Tmp; } } static S32 Retinaface_NonRecursiveArgQuickSort(float *ps32Array, S32 s32Low, S32 s32High, U32 u32ArraySize, U32 u32ScoreIdx, Retinaface_STACK_S *pstStack) { S32 i = s32Low; S32 j = s32High; S32 s32Top = 0; float s32KeyConfidence = ps32Array[u32ArraySize * s32Low + u32ScoreIdx]; pstStack[s32Top].s32Min = s32Low; pstStack[s32Top].s32Max = s32High; while (s32Top > -1) { s32Low = pstStack[s32Top].s32Min; s32High = pstStack[s32Top].s32Max; i = s32Low; j = s32High; s32Top--; s32KeyConfidence = ps32Array[u32ArraySize * s32Low + u32ScoreIdx]; while (i < j) { while ((i < j) && (s32KeyConfidence > ps32Array[j * u32ArraySize + u32ScoreIdx])) { j--; } if (i < j) { Retinaface_Argswap(&ps32Array[i * u32ArraySize], &ps32Array[j * u32ArraySize], u32ArraySize); i++; } while ((i < j) && (s32KeyConfidence < ps32Array[i * u32ArraySize + u32ScoreIdx])) { i++; } if (i < j) { Retinaface_Argswap(&ps32Array[i * u32ArraySize], &ps32Array[j * u32ArraySize], u32ArraySize); j--; } } if (s32Low < i - 1) { s32Top++; pstStack[s32Top].s32Min = s32Low; pstStack[s32Top].s32Max = i - 1; } if (s32High > i + 1) { s32Top++; pstStack[s32Top].s32Min = i + 1; pstStack[s32Top].s32Max = s32High; } } return DLA_OK; } static float Retinaface_Iou(Retinaface_BBOX_S *pstBbox1, Retinaface_BBOX_S *pstBbox2) { float f32InterWidth = 0.0; float f32InterHeight = 0.0; float f32InterArea = 0.0; float f32Box1Area = 0.0; float f32Box2Area = 0.0; float f32UnionArea = 0.0; f32InterWidth = FD_LM_MIN(pstBbox1->f32Xmax, pstBbox2->f32Xmax) - FD_LM_MAX(pstBbox1->f32Xmin, pstBbox2->f32Xmin); f32InterHeight = FD_LM_MIN(pstBbox1->f32Ymax, pstBbox2->f32Ymax) - FD_LM_MAX(pstBbox1->f32Ymin, pstBbox2->f32Ymin); if (f32InterWidth <= 0 || f32InterHeight <= 0) { return 0; } f32InterArea = f32InterWidth * f32InterHeight; f32Box1Area = (pstBbox1->f32Xmax - pstBbox1->f32Xmin) * (pstBbox1->f32Ymax - pstBbox1->f32Ymin); f32Box2Area = (pstBbox2->f32Xmax - pstBbox2->f32Xmin) * (pstBbox2->f32Ymax - pstBbox2->f32Ymin); f32UnionArea = f32Box1Area + f32Box2Area - f32InterArea; return f32InterArea / f32UnionArea; } static S32 Retinaface_NonMaxSuppression(Retinaface_BBOX_S *pstBbox, U32 u32BboxNum, float f32NmsThresh, U32 u32MaxRoiNum) { U32 i, j; U32 u32Num = 0; float f32Iou = 0.0; for (i = 0; i < u32BboxNum && u32Num < u32MaxRoiNum; i++) { if (pstBbox[i].u32Mask == 0) { u32Num++; for (j = i + 1; j < u32BboxNum; j++) { if (pstBbox[j].u32Mask == 0) { f32Iou = Retinaface_Iou(&pstBbox[i], &pstBbox[j]); if (f32Iou >= f32NmsThresh) { pstBbox[j].u32Mask = 1; } } } } } return DLA_OK; } static S32 task_fd_lm_nms(Retinaface_BBOX_S *pstBbox, U32 u32BboxNum, float f32NmsThresh, U32 u32MaxRoiNum, dla_buffer_t *out_buf) { Retinaface_STACK_S *ps32AssistBuf = g_ps32AssistBuf; U32 u32FaceOutNum = 0; U32 i = 0; U32 j = 0; int box_size = 15; /* loc : 4 landmark : 10 conf : 1 */ // quick sort (void)Retinaface_NonRecursiveArgQuickSort((float *)pstBbox, 0, u32BboxNum - 1, sizeof(Retinaface_BBOX_S) / sizeof(float), 4, (Retinaface_STACK_S *)ps32AssistBuf); // Yolov3 and Yolov2 have the same Nms operation (void)Retinaface_NonMaxSuppression(pstBbox, u32BboxNum, f32NmsThresh, u32MaxRoiNum); for (i = 0; i < 1; i++) { for (j = 0; j < u32BboxNum; j++) { if (0 == pstBbox[j].u32Mask) { // box coordinate *((float *)(out_buf->vir_addr) + u32FaceOutNum * box_size + 0) = FD_LM_MAX((pstBbox[j].f32Xmin), 0); *((float *)(out_buf->vir_addr) + u32FaceOutNum * box_size + 1) = FD_LM_MAX((pstBbox[j].f32Ymin), 0); *((float *)(out_buf->vir_addr) + u32FaceOutNum * box_size + 2) = FD_LM_MIN((pstBbox[j].f32Xmax), 1); *((float *)(out_buf->vir_addr) + u32FaceOutNum * box_size + 3) = FD_LM_MIN((pstBbox[j].f32Ymax), 1); // confidence *((float *)(out_buf->vir_addr) + u32FaceOutNum * box_size + 4) = pstBbox[j].f32Conf; // landmark *((float *)(out_buf->vir_addr) + u32FaceOutNum * box_size + 5) = pstBbox[j].f32Landmx1; *((float *)(out_buf->vir_addr) + u32FaceOutNum * box_size + 6) = pstBbox[j].f32Landmy1; *((float *)(out_buf->vir_addr) + u32FaceOutNum * box_size + 7) = pstBbox[j].f32Landmx2; *((float *)(out_buf->vir_addr) + u32FaceOutNum * box_size + 8) = pstBbox[j].f32Landmy2; *((float *)(out_buf->vir_addr) + u32FaceOutNum * box_size + 9) = pstBbox[j].f32Landmx3; *((float *)(out_buf->vir_addr) + u32FaceOutNum * box_size + 10) = pstBbox[j].f32Landmy3; *((float *)(out_buf->vir_addr) + u32FaceOutNum * box_size + 11) = pstBbox[j].f32Landmx4; *((float *)(out_buf->vir_addr) + u32FaceOutNum * box_size + 12) = pstBbox[j].f32Landmy4; *((float *)(out_buf->vir_addr) + u32FaceOutNum * box_size + 13) = pstBbox[j].f32Landmx5; *((float *)(out_buf->vir_addr) + u32FaceOutNum * box_size + 14) = pstBbox[j].f32Landmy5; DLA_TRACE("pstBbox[j].f32Landmx1, pstBbox[j].f32Landmy1, (%f, %f)\n" "pstBbox[j].f32Landmx2, pstBbox[j].f32Landmy2, (%f, %f)\n" "pstBbox[j].f32Landmx3, pstBbox[j].f32Landmy3, (%f, %f)\n" "pstBbox[j].f32Landmx4, pstBbox[j].f32Landmy4, (%f, %f)\n" "pstBbox[j].f32Landmx5, pstBbox[j].f32Landmy5 (%f, %f)\n", pstBbox[j].f32Landmx1, pstBbox[j].f32Landmy1, pstBbox[j].f32Landmx2, pstBbox[j].f32Landmy2, pstBbox[j].f32Landmx3, pstBbox[j].f32Landmy3, pstBbox[j].f32Landmx4, pstBbox[j].f32Landmy4, pstBbox[j].f32Landmx5, pstBbox[j].f32Landmy5); u32FaceOutNum++; } } } out_buf->size = u32FaceOutNum * box_size * sizeof(float); return DLA_OK; } static S32 task_fd_lm_post_process(MI_IPU_TensorVector_t *OutputTensorVector, dla_buffer_t *out_buf) { /* 首次使用申请内存 */ if (NULL == g_pstBbox) { g_pstBbox = (Retinaface_BBOX_S *)calloc(RETINAFACE_OUTPUT_BOX_NUM, sizeof(Retinaface_BBOX_S)); if (NULL == g_pstBbox) { DLA_ERROR("g_pstBbox malloc failed"); return DLA_ERR; } } if (NULL == g_ps32AssistBuf) { g_ps32AssistBuf = (Retinaface_STACK_S *)calloc(RETINAFACE_OUTPUT_BOX_NUM, sizeof(Retinaface_STACK_S)); if (NULL == g_ps32AssistBuf) { DLA_ERROR("g_ps32AssistBuf malloc failed"); return DLA_ERR; } } /* 地址指针区 */ float *pf32Conf = (float *)(OutputTensorVector->astArrayTensors[0].ptTensorData[0]); float *pf32Loc = (float *)(OutputTensorVector->astArrayTensors[1].ptTensorData[0]); float *pf32Landm = (float *)(OutputTensorVector->astArrayTensors[2].ptTensorData[0]); Retinaface_BBOX_S *pstBbox = g_pstBbox; /* feature map 宽度 高度 proiorbox 宽高 缩放倍数 转换系数 */ U32 u32PriorBoxWidth[RETINAFACE_FEATUREUSE_NUM] = { 64, 32, 16 }; // 512 U32 u32PriorBoxHeight[RETINAFACE_FEATUREUSE_NUM] = { 36, 18, 9 }; // 288 // U32 u32PriorBoxMinSize[RETINAFACE_FEATUREUSE_NUM * 2] = { 16, 32, 64, 128, 256, 512 }; U32 u32PriorBoxMinSize[RETINAFACE_FEATUREUSE_NUM * 2] = { 8, 16, 32, 64, 128, 256 }; U32 u32PriorBoxStepHeight[RETINAFACE_FEATUREUSE_NUM] = { 8, 16, 32 }; U32 u32PriorBoxStepWidth[RETINAFACE_FEATUREUSE_NUM] = { 8, 16, 32 }; float f32Variances[2] = { 0.1, 0.2 }; /* 阈值区 */ float f32ConfThresh = 0.6; float f32NmsThresh = 0.4; U32 u32MaxRoiNum = 20; /* 步长区 */ U32 f32LocStride = 8; U32 f32landmStride = 16; U32 f32confStride = 8; /* 框及关键点存储 */ U32 u32FeaturePointIndex = 0; float f32pCenterX = 0; float f32pCenterY = 0; float f32pBoxHeight = 0; float f32pBoxWidth = 0; float f32CenterX = 0; float f32CenterY = 0; float f32BoxHeight = 0; float f32BoxWidth = 0; float f32BoxConf = 0.0; float f32Landmx1 = 0.0; float f32Landmy1 = 0.0; float f32Landmx2 = 0.0; float f32Landmy2 = 0.0; float f32Landmx3 = 0.0; float f32Landmy3 = 0.0; float f32Landmx4 = 0.0; float f32Landmy4 = 0.0; float f32Landmx5 = 0.0; float f32Landmy5 = 0.0; U32 feature_height = 0; U32 feature_width = 0; U32 u32FeatureMapIndex = 0; U32 u32BboxNum = 0; /* anchor 控制 */ int anchor_num = 2; /* anchor 数量 */ int anchor_index = 0; /* 图像及anchor长宽 */ float face_wh_rate = 0.75; /* 长宽比 */ float img_height = ((float)u32PriorBoxStepHeight[0] * (float)u32PriorBoxHeight[0]); /* 图像高度 */ float img_width = ((float)u32PriorBoxStepWidth[0] * (float)u32PriorBoxWidth[0]); /* 图像宽度 */ float f32pBoxWidth_0 = 0; float f32pBoxHeight_0 = 0; float f32pBoxWidth_1 = 0; float f32pBoxHeight_1 = 0; float f32pBoxWidths[2] = { 0, 0 }; float f32pBoxHeights[2] = { 0, 0 }; for (u32FeatureMapIndex = 0; u32FeatureMapIndex < RETINAFACE_FEATUREUSE_NUM; u32FeatureMapIndex++) { f32pBoxWidth_0 = (((float)u32PriorBoxMinSize[(u32FeatureMapIndex) * 2]) * face_wh_rate) / img_width; f32pBoxHeight_0 = ((float)u32PriorBoxMinSize[(u32FeatureMapIndex) * 2]) / img_height; f32pBoxWidth_1 = ((float)u32PriorBoxMinSize[(u32FeatureMapIndex * 2) + 1] * face_wh_rate) / img_width; f32pBoxHeight_1 = ((float)u32PriorBoxMinSize[(u32FeatureMapIndex * 2) + 1]) / img_height; f32pBoxWidths[0] = f32pBoxWidth_0; f32pBoxWidths[1] = f32pBoxWidth_1; f32pBoxHeights[0] = f32pBoxHeight_0; f32pBoxHeights[1] = f32pBoxHeight_1; for (feature_height = 0; feature_height < u32PriorBoxHeight[u32FeatureMapIndex]; feature_height++) { for (feature_width = 0; feature_width < u32PriorBoxWidth[u32FeatureMapIndex]; feature_width++) { f32pCenterX = (feature_width + 0.5f) / u32PriorBoxWidth[u32FeatureMapIndex]; f32pCenterY = (feature_height + 0.5f) / u32PriorBoxHeight[u32FeatureMapIndex]; /*** first prior ***/ for (anchor_index = 0; anchor_index < anchor_num; anchor_index++) { /**** rest of priors, skip AspectRatio == 1 ****/ f32pBoxWidth = f32pBoxWidths[anchor_index]; f32pBoxHeight = f32pBoxHeights[anchor_index]; f32CenterX = f32pCenterX + pf32Loc[(2 * u32FeaturePointIndex + anchor_index) * f32LocStride + 0] * f32Variances[0] * f32pBoxWidth; f32CenterY = f32pCenterY + pf32Loc[(2 * u32FeaturePointIndex + anchor_index) * f32LocStride + 1] * f32Variances[0] * f32pBoxHeight; f32BoxWidth = f32pBoxWidth * exp(pf32Loc[(2 * u32FeaturePointIndex + anchor_index) * f32LocStride + 2] * f32Variances[1]); f32BoxHeight = f32pBoxHeight * exp(pf32Loc[(2 * u32FeaturePointIndex + anchor_index) * f32LocStride + 3] * f32Variances[1]); f32BoxConf = pf32Conf[(2 * u32FeaturePointIndex + anchor_index) * f32confStride + 1]; f32Landmx1 = f32pCenterX + pf32Landm[(2 * u32FeaturePointIndex + anchor_index) * f32landmStride + 0] * f32Variances[0] * f32pBoxWidth; f32Landmy1 = f32pCenterY + pf32Landm[(2 * u32FeaturePointIndex + anchor_index) * f32landmStride + 1] * f32Variances[0] * f32pBoxHeight; f32Landmx2 = f32pCenterX + pf32Landm[(2 * u32FeaturePointIndex + anchor_index) * f32landmStride + 2] * f32Variances[0] * f32pBoxWidth; f32Landmy2 = f32pCenterY + pf32Landm[(2 * u32FeaturePointIndex + anchor_index) * f32landmStride + 3] * f32Variances[0] * f32pBoxHeight; f32Landmx3 = f32pCenterX + pf32Landm[(2 * u32FeaturePointIndex + anchor_index) * f32landmStride + 4] * f32Variances[0] * f32pBoxWidth; f32Landmy3 = f32pCenterY + pf32Landm[(2 * u32FeaturePointIndex + anchor_index) * f32landmStride + 5] * f32Variances[0] * f32pBoxHeight; f32Landmx4 = f32pCenterX + pf32Landm[(2 * u32FeaturePointIndex + anchor_index) * f32landmStride + 6] * f32Variances[0] * f32pBoxWidth; f32Landmy4 = f32pCenterY + pf32Landm[(2 * u32FeaturePointIndex + anchor_index) * f32landmStride + 7] * f32Variances[0] * f32pBoxHeight; f32Landmx5 = f32pCenterX + pf32Landm[(2 * u32FeaturePointIndex + anchor_index) * f32landmStride + 8] * f32Variances[0] * f32pBoxWidth; f32Landmy5 = f32pCenterY + pf32Landm[(2 * u32FeaturePointIndex + anchor_index) * f32landmStride + 9] * f32Variances[0] * f32pBoxHeight; if (f32BoxConf > f32ConfThresh) { pstBbox[u32BboxNum].f32Xmin = (float)(f32CenterX - f32BoxWidth * 0.5f); pstBbox[u32BboxNum].f32Ymin = (float)(f32CenterY - f32BoxHeight * 0.5f); pstBbox[u32BboxNum].f32Xmax = (float)(f32CenterX + f32BoxWidth * 0.5f); pstBbox[u32BboxNum].f32Ymax = (float)(f32CenterY + f32BoxHeight * 0.5f); pstBbox[u32BboxNum].f32Conf = (float)(f32BoxConf); pstBbox[u32BboxNum].f32Landmx1 = (float)(f32Landmx1); pstBbox[u32BboxNum].f32Landmy1 = (float)(f32Landmy1); pstBbox[u32BboxNum].f32Landmx2 = (float)(f32Landmx2); pstBbox[u32BboxNum].f32Landmy2 = (float)(f32Landmy2); pstBbox[u32BboxNum].f32Landmx3 = (float)(f32Landmx3); pstBbox[u32BboxNum].f32Landmy3 = (float)(f32Landmy3); pstBbox[u32BboxNum].f32Landmx4 = (float)(f32Landmx4); pstBbox[u32BboxNum].f32Landmy4 = (float)(f32Landmy4); pstBbox[u32BboxNum].f32Landmx5 = (float)(f32Landmx5); pstBbox[u32BboxNum].f32Landmy5 = (float)(f32Landmy5); pstBbox[u32BboxNum].u32Mask = 0; u32BboxNum++; } } u32FeaturePointIndex++; } } } // nms task_fd_lm_nms(pstBbox, u32BboxNum, f32NmsThresh, u32MaxRoiNum, out_buf); return DLA_OK; } static S32 task_lpd_lpr_post_process(const network_desc_t *net_desc, network_t *network, MI_IPU_TensorVector_t *OutputTensorVector, dla_buffer_t *out_buf) { U32 grid_width, grid_height, classes, box_num, onechannel; float obj_thresh, iou_thresh; float *anchor; ipu_darknet_yolov2_param_t *net_param; net_param = (ipu_darknet_yolov2_param_t *)net_desc->platform_spec; grid_width = net_param->grid_width; grid_height = net_param->grid_height; obj_thresh = net_param->conf_thresh; classes = net_param->class_num; box_num = net_param->box_num_per_grid; onechannel = net_param->values_per_box; iou_thresh = net_param->nms_thresh; anchor = net_param->anchor; /* recognition */ float *data = (float *)(OutputTensorVector->astArrayTensors[0].ptTensorData[0]); darknet_yolov2_generate_bbox(data, out_buf->vir_addr, grid_width, grid_height, obj_thresh, classes, box_num, anchor, onechannel, iou_thresh, DLA_LPR_CANDI_WORD_NUM, DLA_LPR_WORD_NUM); return DLA_OK; } S32 hw_network_post_process(dla_net_e id, const network_desc_t *net_desc, MI_IPU_TensorVector_t *OutputTensorVector, dla_buffer_t *out_buf) { if (!OutputTensorVector) { DLA_ERROR("invalid args"); return DLA_ERR; } /* detection */ if (NET_FD_LM == id) { task_fd_lm_post_process(OutputTensorVector, out_buf); } else if (NET_FSS == id || NET_PD == id || NET_OD21 == id || NET_OD22 == id || NET_FPD == id || NET_CPD == id || NET_PKGD == id || NET_FOD == id) { float *pfBBox = (float *)(OutputTensorVector->astArrayTensors[0].ptTensorData[0]); float *pfClass = (float *)(OutputTensorVector->astArrayTensors[1].ptTensorData[0]); float *pfScore = (float *)(OutputTensorVector->astArrayTensors[2].ptTensorData[0]); float *pfDetect = (float *)(OutputTensorVector->astArrayTensors[3].ptTensorData[0]); int s32DetectCount = (int)(*pfDetect); int i = 0; for (i = 0; i < s32DetectCount; i++) { // box coordinate *((float *)(out_buf->vir_addr) + i * 6 + 1) = *(pfBBox + i * ALIGN_UP(4, INNER_MOST_ALIGNMENT) + 0); *((float *)(out_buf->vir_addr) + i * 6 + 0) = *(pfBBox + i * ALIGN_UP(4, INNER_MOST_ALIGNMENT) + 1); *((float *)(out_buf->vir_addr) + i * 6 + 3) = *(pfBBox + i * ALIGN_UP(4, INNER_MOST_ALIGNMENT) + 2); *((float *)(out_buf->vir_addr) + i * 6 + 2) = *(pfBBox + i * ALIGN_UP(4, INNER_MOST_ALIGNMENT) + 3); // confidence *((float *)(out_buf->vir_addr) + i * 6 + 4) = *(pfScore + i); // label *((float *)(out_buf->vir_addr) + i * 6 + 5) = (float)((int)(*(pfClass + i))); } out_buf->size = s32DetectCount * 6 * sizeof(float); } else if (NET_LPR_W == id || NET_LPD == id) { task_lpd_lpr_post_process(net_desc, g_net_ctx.nets[id], OutputTensorVector, out_buf); } else if (NET_OT_B1 == id || NET_OT_B2 == id) { for (int i = 0; i < OutputTensorVector->u32TensorCount; i++) { int outer_size = 1; int inner_size_before_align_up = 0; int k = 0; if (NET_OT_B1 == id) { outer_size = 1 * 8 * 8; } else if (NET_OT_B2 == id) { outer_size = 1 * 16 * 16; } inner_size_before_align_up = 48; int inner_size = ALIGN_UP(inner_size_before_align_up, INNER_MOST_ALIGNMENT); float *data = (float *)OutputTensorVector->astArrayTensors[i].ptTensorData[0]; for (k = 0; k < outer_size; k++) { memcpy((float *)(out_buf->vir_addr) + k * inner_size_before_align_up, data + k * inner_size, inner_size_before_align_up * sizeof(float)); } out_buf->size = outer_size * inner_size_before_align_up; } } else if (NET_OT_HEAD == id) { float *pfClass = (float *)OutputTensorVector->astArrayTensors[0].ptTensorData[0]; float *pfBBox = (float *)OutputTensorVector->astArrayTensors[1].ptTensorData[0]; for (int i = 0; i < 16 * 16; i++) { *((float *)(out_buf->vir_addr) + i + 256 * 0) = *(pfClass + i * ALIGN_UP(2, INNER_MOST_ALIGNMENT) + 0); *((float *)(out_buf->vir_addr) + i + 256 * 1) = *(pfClass + i * ALIGN_UP(2, INNER_MOST_ALIGNMENT) + 1); *((float *)(out_buf->vir_addr) + i + 256 * 2) = *(pfBBox + i * ALIGN_UP(4, INNER_MOST_ALIGNMENT) + 0); *((float *)(out_buf->vir_addr) + i + 256 * 3) = *(pfBBox + i * ALIGN_UP(4, INNER_MOST_ALIGNMENT) + 1); *((float *)(out_buf->vir_addr) + i + 256 * 4) = *(pfBBox + i * ALIGN_UP(4, INNER_MOST_ALIGNMENT) + 2); *((float *)(out_buf->vir_addr) + i + 256 * 5) = *(pfBBox + i * ALIGN_UP(4, INNER_MOST_ALIGNMENT) + 3); } } else if (NET_SED == id || NET_SED_8K == id || NET_SED_16K == id) { float *score = (float *)OutputTensorVector->astArrayTensors[0].ptTensorData[0]; for (int i = 0; i < DLA_SED_NUM; i++) { // confidence *((float *)out_buf->vir_addr + i) = *(score + i); } } else { /* recognition */ network_t *network; network = g_net_ctx.nets[id]; for (int i = 0; i < OutputTensorVector->u32TensorCount; i++) { int outer_size = 1; int inner_size_before_align_up = 0; for (int j = 0; j < network->desc.astMI_OutputTensorDescs[i].u32TensorDim - 1; j++) { outer_size *= *(int *)(network->desc.astMI_OutputTensorDescs[i].u32TensorShape + j); } inner_size_before_align_up = *((int *)network->desc.astMI_OutputTensorDescs[i].u32TensorShape + network->desc.astMI_OutputTensorDescs[i].u32TensorDim - 1); int inner_size = ALIGN_UP(inner_size_before_align_up, INNER_MOST_ALIGNMENT); float *data = (float *)(OutputTensorVector->astArrayTensors[i].ptTensorData[0]); for (int k = 0; k < outer_size; k++) { memcpy((float *)(out_buf->vir_addr) + k * inner_size_before_align_up, data + k * inner_size, inner_size_before_align_up * sizeof(float)); } out_buf->size = outer_size * inner_size_before_align_up; } } return DLA_OK; } S32 hw_network_forward(dla_net_e id, const dla_buffer_t *in_buf, dla_buffer_t *out_buf) { network_t *network = NULL; dla_buffer_t tmp_in_buf = { 0 }; if (id >= NET_MAX || !in_buf || !out_buf || !in_buf->vir_addr || !out_buf->vir_addr || !g_net_ctx.nets[id]) { DLA_ERROR("invalid args"); return DLA_ERR; } if (!g_net_ctx.inited) { DLA_ERROR("network ctx not inited"); return DLA_ERR; } network = g_net_ctx.nets[id]; if (NET_STATUS_LOADED != network->status) { DLA_ERROR("network %u not load", id); return DLA_ERR; } const network_desc_t *net_desc = NULL; net_desc = network_get_desc(id); if (!net_desc) { DLA_ERROR("Failed to load desc file for model %d", id); return DLA_ERR; } /* 因为检测模型需要后处理,不判断out_buf->size与network->out_size */ if (in_buf->size < network->in_size) { DLA_ERROR("io buff size too small %u %u", in_buf->size, out_buf->size); return DLA_ERR; } if (in_buf->phy_addr) { /* 输入图片由硬件地址,可直接使用 */ network->InputTensorVector.astArrayTensors[0].ptTensorData[0] = in_buf->vir_addr; network->InputTensorVector.astArrayTensors[0].phyTensorAddr[0] = in_buf->phy_addr; } else { /* 否则拷贝到MMA内存再使用 */ if (DLA_OK != hw_network_get_input(id, &tmp_in_buf)) { DLA_ERROR("get network %d input buff failed", id); return DLA_ERR; } memcpy(tmp_in_buf.vir_addr, in_buf->vir_addr, tmp_in_buf.size); dla_flush_cache(tmp_in_buf.vir_addr, tmp_in_buf.size); network->InputTensorVector.astArrayTensors[0].ptTensorData[0] = tmp_in_buf.vir_addr; network->InputTensorVector.astArrayTensors[0].phyTensorAddr[0] = tmp_in_buf.phy_addr; } if (MI_SUCCESS != MI_IPU_Invoke(network->u32ChnId, &network->InputTensorVector, &network->OutputTensorVector)) { DLA_ERROR("ipu invoke function failed"); return DLA_ERR; } hw_network_post_process(id, net_desc, &(network->OutputTensorVector), out_buf); return DLA_OK; } S32 hw_network_get_input(dla_net_e id, dla_buffer_t *in_buf) { if (!g_net_ctx.inited) { DLA_ERROR("network ctx not inited"); return DLA_ERR; } if (id >= NET_MAX || !in_buf || !g_net_ctx.nets[id]) { DLA_ERROR("invalid args"); return DLA_ERR; } network_t *network = g_net_ctx.nets[id]; if (NET_STATUS_LOADED != network->status) { DLA_ERROR("network %u not load", id); return DLA_ERR; } if (network->in_size > g_net_ctx.in_buf.size) { DLA_DEBUG("realloc input buff from %u to %u", g_net_ctx.in_buf.size, network->in_size); dla_free_buffer(&g_net_ctx.in_buf); g_net_ctx.in_buf.size = network->in_size; if (DLA_OK != dla_alloc_buffer(&g_net_ctx.in_buf, 1)) { memset(&g_net_ctx.in_buf, 0, sizeof(dla_buffer_t)); DLA_ERROR("alloc input buff failed"); return DLA_ERR; } } in_buf->size = network->in_size; in_buf->vir_addr = g_net_ctx.in_buf.vir_addr; in_buf->phy_addr = g_net_ctx.in_buf.phy_addr; return DLA_OK; } S32 hw_network_get_input_desc(dla_net_e id, dla_img_desc_t *img_desc) { if (!g_net_ctx.inited) { DLA_ERROR("network ctx not inited"); return DLA_ERR; } if (id >= NET_MAX || !img_desc || !g_net_ctx.nets[id]) { DLA_ERROR("invalid args"); return DLA_ERR; } network_t *network = g_net_ctx.nets[id]; if (NET_STATUS_LOADED != network->status) { DLA_ERROR("network %u not load", id); return DLA_ERR; } *img_desc = network->img_desc; return DLA_OK; } S32 hw_network_get_load_status(dla_net_e id) { if (id >= NET_MAX) { DLA_ERROR("invalid net id"); return NET_STATUS_NOT_LOAD; } if (!g_net_ctx.nets[id]) { return NET_STATUS_NOT_LOAD; } network_t *network = g_net_ctx.nets[id]; return network->status; } static S32 hw_network_load_float_array(JSON_OBJPTR jso_obj, const char *key, float *res, U32 len) { S32 i = 0; JSON_OBJPTR jso_array = NULL; double tmp_val = 0; jso_array = jso_obj_get(jso_obj, key); if (NULL == jso_array) { DLA_ERROR("No %s", key); return DLA_ERR; } if (!jso_is_array(jso_array)) { DLA_ERROR("obj %s not array", key); return DLA_ERR; } if (len != jso_array_length(jso_array)) { DLA_ERROR("%s len not equal: %d", key, len); return DLA_ERR; } for (i = 0; i < len; ++i) { jso_get_double(jso_array_get_idx(jso_array, i), &tmp_val); res[i] = tmp_val; } return DLA_OK; } S32 hw_network_init_spec_desc(JSON_OBJPTR desc_obj, void *desc) { network_desc_t *net_desc = NULL; ipu_darknet_yolov2_param_t *darknet_yolov2_desc = NULL; JSON_OBJPTR darknet_yolov2_obj = NULL; double tmp_val = 0; if (NULL == desc_obj || NULL == desc) { DLA_ERROR("Invalid args"); return DLA_ERR; } net_desc = (network_desc_t *)desc; if (NULL != net_desc->platform_spec) { DLA_ERROR("platform specific info is already loaded"); return DLA_ERR; } if (NET_TYPE_DARKNET_YOLOV2 == net_desc->type) { darknet_yolov2_obj = jso_obj_get(desc_obj, "darknet_yolov2"); if (NULL == darknet_yolov2_obj) { DLA_ERROR("No darknet_yolov2 specific info in network desc"); goto error; } darknet_yolov2_desc = (ipu_darknet_yolov2_param_t *)malloc(sizeof(ipu_darknet_yolov2_param_t)); if (NULL == darknet_yolov2_desc) { DLA_ERROR("malloc darknet_yolov2 desc failed"); goto error; } if (0 != jso_obj_get_int(darknet_yolov2_obj, "box_num_per_grid", (int *)(&(darknet_yolov2_desc->box_num_per_grid)))) { DLA_ERROR("No box_num_per_grid"); goto error; } darknet_yolov2_desc->bias_num_per_grid = darknet_yolov2_desc->box_num_per_grid * 2; if (0 != jso_obj_get_int(darknet_yolov2_obj, "class_num", (int *)(&(darknet_yolov2_desc->class_num)))) { DLA_ERROR("No class_num"); goto error; } darknet_yolov2_desc->values_per_box = darknet_yolov2_desc->class_num + 5; if (0 != jso_obj_get_double(darknet_yolov2_obj, "nms_thresh", &tmp_val)) { DLA_ERROR("No nms_thresh"); goto error; } darknet_yolov2_desc->nms_thresh = tmp_val; if (0 != jso_obj_get_double(darknet_yolov2_obj, "conf_thresh", &tmp_val)) { DLA_ERROR("No conf_thresh"); goto error; } darknet_yolov2_desc->conf_thresh = tmp_val; if (0 != jso_obj_get_int(darknet_yolov2_obj, "grid_width", (int *)(&(darknet_yolov2_desc->grid_width)))) { DLA_ERROR("No grid_width"); goto error; } if (0 != jso_obj_get_int(darknet_yolov2_obj, "grid_height", (int *)(&(darknet_yolov2_desc->grid_height)))) { DLA_ERROR("No grid_height"); goto error; } DLA_DEBUG("darknet_yolov2 box_num_per_grid: %d", darknet_yolov2_desc->box_num_per_grid); DLA_DEBUG("darknet_yolov2 bias_num_per_grid: %d", darknet_yolov2_desc->bias_num_per_grid); DLA_DEBUG("darknet_yolov2 class_num: %d", darknet_yolov2_desc->class_num); DLA_DEBUG("darknet_yolov2 values_per_box: %d", darknet_yolov2_desc->values_per_box); DLA_DEBUG("darknet_yolov2 nms_thresh: %f", darknet_yolov2_desc->nms_thresh); DLA_DEBUG("darknet_yolov2 conf_thresh: %f", darknet_yolov2_desc->conf_thresh); darknet_yolov2_desc->anchor = (float *)malloc(darknet_yolov2_desc->bias_num_per_grid * sizeof(float)); if (NULL == darknet_yolov2_desc->anchor) { DLA_ERROR("malloc anchor failed"); goto error; } memset(darknet_yolov2_desc->anchor, 0, darknet_yolov2_desc->bias_num_per_grid * sizeof(float)); if (DLA_OK != hw_network_load_float_array(darknet_yolov2_obj, "anchor", darknet_yolov2_desc->anchor, darknet_yolov2_desc->bias_num_per_grid)) { DLA_ERROR("load anchor failed"); goto error; } } net_desc->platform_spec = (U8 *)darknet_yolov2_desc; return DLA_OK; error: if (darknet_yolov2_desc) { if (darknet_yolov2_desc->anchor) { free(darknet_yolov2_desc->anchor); } free(darknet_yolov2_desc); net_desc->platform_spec = NULL; } return DLA_ERR; } void hw_network_deinit_spec_desc(void *desc) { network_desc_t *net_desc = NULL; ipu_darknet_yolov2_param_t *darknet_yolov2_desc = NULL; if (NULL == desc) { DLA_ERROR("Invalid args"); return; } net_desc = (network_desc_t *)desc; if (NULL == net_desc->platform_spec) { DLA_DEBUG("No platform specific info"); return; } if (NET_TYPE_DARKNET_YOLOV2 == net_desc->type) { darknet_yolov2_desc = (ipu_darknet_yolov2_param_t *)net_desc->platform_spec; if (darknet_yolov2_desc->anchor) { free(darknet_yolov2_desc->anchor); } free(darknet_yolov2_desc); net_desc->platform_spec = NULL; } return; } S32 dla_alloc_buffer(dla_buffer_t *buf, U8 cache) { U32 ret = 0; U32 size = 0; if (!buf || 0 == buf->size) { return DLA_ERR; } size = ALIGIN_MMA_PAGE(buf->size); ret = MI_SYS_MMA_Alloc_Wrap((MI_U8 *)MMA_HEAP_NAME, size, &(buf->phy_addr)); if (ret != MI_SUCCESS) { return DLA_ERR; } ret = MI_SYS_Mmap(buf->phy_addr, size, (void **)&buf->vir_addr, !!cache); if (ret != MI_SUCCESS) { MI_SYS_MMA_Free_Wrap(buf->phy_addr); return DLA_ERR; } return DLA_OK; } void dla_free_buffer(dla_buffer_t *buf) { U32 size = 0; if (!buf || !buf->vir_addr || 0 == buf->phy_addr || 0 == buf->size) { return; } size = ALIGIN_MMA_PAGE(buf->size); MI_SYS_Munmap(buf->vir_addr, size); MI_SYS_MMA_Free_Wrap(buf->phy_addr); } void dla_flush_cache(void *vir_addr, U32 size) { MI_SYS_FlushInvCache(vir_addr, size); } S32 hw_network_get_reuse_info(dla_model_reuse_param_t *model_reuse_param) { if (!model_reuse_param) { DLA_ERROR("invalid args"); return DLA_ERR; } g_net_ctx.model_reuse_param = model_reuse_param; return DLA_OK; } void dla_change_reuse_info(dla_net_e id, MI_IPU_CHN chn_id) { if (!g_net_ctx.inited) { DLA_ERROR("network ctx not inited"); return; } if (id >= NET_MAX) { DLA_ERROR("invalid args"); return; } #ifdef CAM_OS_RTK g_net_ctx.model_reuse_param[id].bm = 1; g_net_ctx.model_reuse_param[id].chn_id = chn_id; #elif defined(TAPO_DUALOS_CAM) DLA_DEBUG("before change bm %d chn_id %d", g_net_ctx.model_reuse_param[id].bm, g_net_ctx.model_reuse_param[id].chn_id); g_net_ctx.model_reuse_param[id].bm = 0; g_net_ctx.model_reuse_param[id].chn_id = 255; DLA_DEBUG("after change bm %d chn_id %d", g_net_ctx.model_reuse_param[id].bm, g_net_ctx.model_reuse_param[id].chn_id); #endif return; } #endif 这段代码sed部分又是从哪取的音频输入作为模型输入
最新发布
12-04
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值