Commit @bc54f796a43b056b890f560ae62004c60dd2337a

윤영준 05-28

onnx_gpu inference as inference_gpu_.py Vectorized for loop for x100 on bbox processing

Vectorized for loop for x100 on bbox processing

@bc54f796a43b056b890f560ae62004c60dd2337a

96c0514

bc54f79

yoloseg/inference_gpu_.py

--- yoloseg/inference_gpu_.py

+++ yoloseg/inference_gpu_.py


+import time
+
 import cv2
 import numpy as np
 import random

         # Prepare input data as a dictionary
         inputs = {self.session.get_inputs()[0].name: blob}
         # Run model
+        t1 = time.time()
         outputs = self.session.run(None, inputs)
+        t2 = time.time()
+        print("model infer :")
+        print(t2-t1)
         outputs_bbox = outputs[0]
         outputs_mask = outputs[1]
-
+        t1 = time.time()
         detections = self.process_detections(outputs_bbox, model_input)
+        t2 = time.time()
+        print("bbox :")
+        print(t2-t1)
+        t1 = time.time()
         mask_maps = self.process_mask_output(detections, outputs_mask, model_input.shape)
+        t2 = time.time()
+        print("mask :")
+        print(t2-t1)
 
         return detections, mask_maps
 

         x_factor = model_input.shape[1] / self.model_shape[0]
         y_factor = model_input.shape[0] / self.model_shape[1]
 
-        class_ids = []
-        confidences = []
-        mask_coefficients = []
-        boxes = []
+        t1 = time.time()
+        # Assuming outputs_bbox is an array with shape (N, 4+CLASS_NUM+32) where N is the number of detections
+        # Example outputs_bbox.shape -> (8400, 4+CLASS_NUM+32)
 
-        for detection in outputs_bbox[0].T:
-            # This segmentation model uses yolact architecture to predict mask
-            # the output tensor dimension for yolo-v8-seg is B x [X, Y, W, H, C1, C2, ..., P1, ...,P32] * 8400
-            # where C{n} are confidence score for each class
-            # and P{n} are coefficient for each proto masks. (32 by default)
-            scores_classification = detection[4:4+CLASS_NUM]
-            scores_segmentation = detection[4+CLASS_NUM:]
-            class_id = np.argmax(scores_classification, axis=0)
-            confidence = scores_classification[class_id]
+        # Extract basic bbox coordinates and scores
+        x, y, w, h = outputs_bbox[:, 0], outputs_bbox[:, 1], outputs_bbox[:, 2], outputs_bbox[:, 3]
+        scores = outputs_bbox[:, 4:4 + CLASS_NUM]
 
-            thres = self.model_score_threshold
-            w_thres = 40
-            h_thres = 40
+        # Calculate confidences and class IDs
+        confidences = np.max(scores, axis=1)
+        class_ids = np.argmax(scores, axis=1)
 
-            x, y, w, h = detection[:4]
-            # if bboxes are too small, it just skips, and it is not a bad idea since we do not need to detect small areas
-            if w < w_thres or h < h_thres:
-                continue
+        # Filter out small boxes
+        min_width, min_height = 40, 40
+        valid_size = (w >= min_width) & (h >= min_height)
 
-            if confidence > thres:
+        # Apply confidence threshold
+        valid_confidence = (confidences > self.model_score_threshold)
 
-                left = int((x - 0.5 * w) * x_factor)
-                top = int((y - 0.5 * h) * y_factor)
-                width = int(w * x_factor)
-                height = int(h * y_factor)
+        # Combine all conditions
+        valid_detections = valid_size & valid_confidence
 
-                boxes.append([left, top, width, height])
-                confidences.append(float(confidence))
-                mask_coefficients.append(scores_segmentation)
-                class_ids.append(class_id)
+        # proto_mask_score
+        scores_segmentation = outputs_bbox[:, 4 + CLASS_NUM:]
+
+        # Filter arrays based on valid detections
+        filtered_x = x[valid_detections]
+        filtered_y = y[valid_detections]
+        filtered_w = w[valid_detections]
+        filtered_h = h[valid_detections]
+        filtered_confidences = confidences[valid_detections]
+        filtered_class_ids = class_ids[valid_detections]
+        filtered_mask_coefficient = np.transpose(scores_segmentation, (2,0,1))[valid_detections.T]
+
+
+        # Calculate adjusted box coordinates
+        left = (filtered_x - 0.5 * filtered_w) * x_factor
+        top = (filtered_y - 0.5 * filtered_h) * y_factor
+        width = filtered_w * x_factor
+        height = filtered_h * y_factor
+
+        # Prepare final arrays
+        boxes = np.vstack([left, top, width, height]).T
+        mask_coefficients = scores_segmentation
+
+        # If you need to use integer types for some reason (e.g., indexing later on):
+        boxes = boxes.astype(int)
+
+        # You can further process these arrays or convert them to lists if needed:
+        boxes = boxes.tolist()
+        filtered_confidences = filtered_confidences.tolist()
+        filtered_class_ids = filtered_class_ids.tolist()
+        t2 = time.time()
+        print("cursed for loop")
+        print(t2-t1)
         confidences = (confidences)
-        indices = cv2.dnn.NMSBoxes(boxes, confidences, self.model_score_threshold, self.model_nms_threshold)
+        t1 = time.time()
+        if not len(boxes) <= 0 :
+            indices = cv2.dnn.NMSBoxes(boxes, filtered_confidences, self.model_score_threshold, self.model_nms_threshold)
+        else:
+            indices = []
+        t2 = time.time()
+        print("nms : ")
+        print(t2-t1)
 
         detections = []
         for i in indices:
             idx = i
             result = {
-                'class_id': class_ids[i],
-                'confidence': confidences[i],
-                'mask_coefficients': np.array(mask_coefficients[i]),
+                'class_id': filtered_class_ids[i],
+                'confidence': filtered_confidences[i],
+                'mask_coefficients': np.array(filtered_mask_coefficient[i]),
                 'box': boxes[idx],
-                'class_name': self.classes[class_ids[i]],
+                'class_name': self.classes[filtered_class_ids[i]],
                 'color': (random.randint(100, 255), random.randint(100, 255), random.randint(100, 255))
             }
             detections.append(result)

     model_path = 'yoloseg/weight/best.onnx'
     classes_txt_file = 'config_files/yolo_config.txt'
     # image_path = 'yoloseg/img3.jpg'
-    image_path = 'testing.png'
+    image_path = 'yoloseg/img3.jpg'
 
     model_input_shape = (640, 640)
     inference_engine = Inference(

Add a comment

Open 0
Closed 0

List

...	...	@@ -1,3 +1,5 @@
	1	+import time
	2	+
1	3	import cv2
2	4	import numpy as np
3	5	import random
...	...	@@ -34,12 +36,23 @@
34	36	# Prepare input data as a dictionary
35	37	inputs = {self.session.get_inputs()[0].name: blob}
36	38	# Run model
	39	+ t1 = time.time()
37	40	outputs = self.session.run(None, inputs)
	41	+ t2 = time.time()
	42	+ print("model infer :")
	43	+ print(t2-t1)
38	44	outputs_bbox = outputs[0]
39	45	outputs_mask = outputs[1]
40		-
	46	+ t1 = time.time()
41	47	detections = self.process_detections(outputs_bbox, model_input)
	48	+ t2 = time.time()
	49	+ print("bbox :")
	50	+ print(t2-t1)
	51	+ t1 = time.time()
42	52	mask_maps = self.process_mask_output(detections, outputs_mask, model_input.shape)
	53	+ t2 = time.time()
	54	+ print("mask :")
	55	+ print(t2-t1)
43	56
44	57	return detections, mask_maps
45	58
...	...	@@ -74,53 +87,80 @@
74	87	x_factor = model_input.shape[1] / self.model_shape[0]
75	88	y_factor = model_input.shape[0] / self.model_shape[1]
76	89
77		- class_ids = []
78		- confidences = []
79		- mask_coefficients = []
80		- boxes = []
	90	+ t1 = time.time()
	91	+ # Assuming outputs_bbox is an array with shape (N, 4+CLASS_NUM+32) where N is the number of detections
	92	+ # Example outputs_bbox.shape -> (8400, 4+CLASS_NUM+32)
81	93
82		- for detection in outputs_bbox[0].T:
83		- # This segmentation model uses yolact architecture to predict mask
84		- # the output tensor dimension for yolo-v8-seg is B x [X, Y, W, H, C1, C2, ..., P1, ...,P32] * 8400
85		- # where C{n} are confidence score for each class
86		- # and P{n} are coefficient for each proto masks. (32 by default)
87		- scores_classification = detection[4:4+CLASS_NUM]
88		- scores_segmentation = detection[4+CLASS_NUM:]
89		- class_id = np.argmax(scores_classification, axis=0)
90		- confidence = scores_classification[class_id]
	94	+ # Extract basic bbox coordinates and scores
	95	+ x, y, w, h = outputs_bbox[:, 0], outputs_bbox[:, 1], outputs_bbox[:, 2], outputs_bbox[:, 3]
	96	+ scores = outputs_bbox[:, 4:4 + CLASS_NUM]
91	97
92		- thres = self.model_score_threshold
93		- w_thres = 40
94		- h_thres = 40
	98	+ # Calculate confidences and class IDs
	99	+ confidences = np.max(scores, axis=1)
	100	+ class_ids = np.argmax(scores, axis=1)
95	101
96		- x, y, w, h = detection[:4]
97		- # if bboxes are too small, it just skips, and it is not a bad idea since we do not need to detect small areas
98		- if w < w_thres or h < h_thres:
99		- continue
	102	+ # Filter out small boxes
	103	+ min_width, min_height = 40, 40
	104	+ valid_size = (w >= min_width) & (h >= min_height)
100	105
101		- if confidence > thres:
	106	+ # Apply confidence threshold
	107	+ valid_confidence = (confidences > self.model_score_threshold)
102	108
103		- left = int((x - 0.5 * w) * x_factor)
104		- top = int((y - 0.5 * h) * y_factor)
105		- width = int(w * x_factor)
106		- height = int(h * y_factor)
	109	+ # Combine all conditions
	110	+ valid_detections = valid_size & valid_confidence
107	111
108		- boxes.append([left, top, width, height])
109		- confidences.append(float(confidence))
110		- mask_coefficients.append(scores_segmentation)
111		- class_ids.append(class_id)
	112	+ # proto_mask_score
	113	+ scores_segmentation = outputs_bbox[:, 4 + CLASS_NUM:]
	114	+
	115	+ # Filter arrays based on valid detections
	116	+ filtered_x = x[valid_detections]
	117	+ filtered_y = y[valid_detections]
	118	+ filtered_w = w[valid_detections]
	119	+ filtered_h = h[valid_detections]
	120	+ filtered_confidences = confidences[valid_detections]
	121	+ filtered_class_ids = class_ids[valid_detections]
	122	+ filtered_mask_coefficient = np.transpose(scores_segmentation, (2,0,1))[valid_detections.T]
	123	+
	124	+
	125	+ # Calculate adjusted box coordinates
	126	+ left = (filtered_x - 0.5 * filtered_w) * x_factor
	127	+ top = (filtered_y - 0.5 * filtered_h) * y_factor
	128	+ width = filtered_w * x_factor
	129	+ height = filtered_h * y_factor
	130	+
	131	+ # Prepare final arrays
	132	+ boxes = np.vstack([left, top, width, height]).T
	133	+ mask_coefficients = scores_segmentation
	134	+
	135	+ # If you need to use integer types for some reason (e.g., indexing later on):
	136	+ boxes = boxes.astype(int)
	137	+
	138	+ # You can further process these arrays or convert them to lists if needed:
	139	+ boxes = boxes.tolist()
	140	+ filtered_confidences = filtered_confidences.tolist()
	141	+ filtered_class_ids = filtered_class_ids.tolist()
	142	+ t2 = time.time()
	143	+ print("cursed for loop")
	144	+ print(t2-t1)
112	145	confidences = (confidences)
113		- indices = cv2.dnn.NMSBoxes(boxes, confidences, self.model_score_threshold, self.model_nms_threshold)
	146	+ t1 = time.time()
	147	+ if not len(boxes) <= 0 :
	148	+ indices = cv2.dnn.NMSBoxes(boxes, filtered_confidences, self.model_score_threshold, self.model_nms_threshold)
	149	+ else:
	150	+ indices = []
	151	+ t2 = time.time()
	152	+ print("nms : ")
	153	+ print(t2-t1)
114	154
115	155	detections = []
116	156	for i in indices:
117	157	idx = i
118	158	result = {
119		- 'class_id': class_ids[i],
120		- 'confidence': confidences[i],
121		- 'mask_coefficients': np.array(mask_coefficients[i]),
	159	+ 'class_id': filtered_class_ids[i],
	160	+ 'confidence': filtered_confidences[i],
	161	+ 'mask_coefficients': np.array(filtered_mask_coefficient[i]),
122	162	'box': boxes[idx],
123		- 'class_name': self.classes[class_ids[i]],
	163	+ 'class_name': self.classes[filtered_class_ids[i]],
124	164	'color': (random.randint(100, 255), random.randint(100, 255), random.randint(100, 255))
125	165	}
126	166	detections.append(result)
...	...	@@ -243,7 +283,7 @@
243	283	model_path = 'yoloseg/weight/best.onnx'
244	284	classes_txt_file = 'config_files/yolo_config.txt'
245	285	# image_path = 'yoloseg/img3.jpg'
246		- image_path = 'testing.png'
	286	+ image_path = 'yoloseg/img3.jpg'
247	287
248	288	model_input_shape = (640, 640)
249	289	inference_engine = Inference(

Delete comment