윤영준 윤영준 05-28
onnx_gpu inference as inference_gpu_.py Vectorized for loop for x100 on bbox processing
Vectorized for loop for x100 on bbox processing
@bc54f796a43b056b890f560ae62004c60dd2337a
yoloseg/inference_gpu_.py
--- yoloseg/inference_gpu_.py
+++ yoloseg/inference_gpu_.py
@@ -1,3 +1,5 @@
+import time
+
 import cv2
 import numpy as np
 import random
@@ -34,12 +36,23 @@
         # Prepare input data as a dictionary
         inputs = {self.session.get_inputs()[0].name: blob}
         # Run model
+        t1 = time.time()
         outputs = self.session.run(None, inputs)
+        t2 = time.time()
+        print("model infer :")
+        print(t2-t1)
         outputs_bbox = outputs[0]
         outputs_mask = outputs[1]
-
+        t1 = time.time()
         detections = self.process_detections(outputs_bbox, model_input)
+        t2 = time.time()
+        print("bbox :")
+        print(t2-t1)
+        t1 = time.time()
         mask_maps = self.process_mask_output(detections, outputs_mask, model_input.shape)
+        t2 = time.time()
+        print("mask :")
+        print(t2-t1)
 
         return detections, mask_maps
 
@@ -74,53 +87,80 @@
         x_factor = model_input.shape[1] / self.model_shape[0]
         y_factor = model_input.shape[0] / self.model_shape[1]
 
-        class_ids = []
-        confidences = []
-        mask_coefficients = []
-        boxes = []
+        t1 = time.time()
+        # Assuming outputs_bbox is an array with shape (N, 4+CLASS_NUM+32) where N is the number of detections
+        # Example outputs_bbox.shape -> (8400, 4+CLASS_NUM+32)
 
-        for detection in outputs_bbox[0].T:
-            # This segmentation model uses yolact architecture to predict mask
-            # the output tensor dimension for yolo-v8-seg is B x [X, Y, W, H, C1, C2, ..., P1, ...,P32] * 8400
-            # where C{n} are confidence score for each class
-            # and P{n} are coefficient for each proto masks. (32 by default)
-            scores_classification = detection[4:4+CLASS_NUM]
-            scores_segmentation = detection[4+CLASS_NUM:]
-            class_id = np.argmax(scores_classification, axis=0)
-            confidence = scores_classification[class_id]
+        # Extract basic bbox coordinates and scores
+        x, y, w, h = outputs_bbox[:, 0], outputs_bbox[:, 1], outputs_bbox[:, 2], outputs_bbox[:, 3]
+        scores = outputs_bbox[:, 4:4 + CLASS_NUM]
 
-            thres = self.model_score_threshold
-            w_thres = 40
-            h_thres = 40
+        # Calculate confidences and class IDs
+        confidences = np.max(scores, axis=1)
+        class_ids = np.argmax(scores, axis=1)
 
-            x, y, w, h = detection[:4]
-            # if bboxes are too small, it just skips, and it is not a bad idea since we do not need to detect small areas
-            if w < w_thres or h < h_thres:
-                continue
+        # Filter out small boxes
+        min_width, min_height = 40, 40
+        valid_size = (w >= min_width) & (h >= min_height)
 
-            if confidence > thres:
+        # Apply confidence threshold
+        valid_confidence = (confidences > self.model_score_threshold)
 
-                left = int((x - 0.5 * w) * x_factor)
-                top = int((y - 0.5 * h) * y_factor)
-                width = int(w * x_factor)
-                height = int(h * y_factor)
+        # Combine all conditions
+        valid_detections = valid_size & valid_confidence
 
-                boxes.append([left, top, width, height])
-                confidences.append(float(confidence))
-                mask_coefficients.append(scores_segmentation)
-                class_ids.append(class_id)
+        # proto_mask_score
+        scores_segmentation = outputs_bbox[:, 4 + CLASS_NUM:]
+
+        # Filter arrays based on valid detections
+        filtered_x = x[valid_detections]
+        filtered_y = y[valid_detections]
+        filtered_w = w[valid_detections]
+        filtered_h = h[valid_detections]
+        filtered_confidences = confidences[valid_detections]
+        filtered_class_ids = class_ids[valid_detections]
+        filtered_mask_coefficient = np.transpose(scores_segmentation, (2,0,1))[valid_detections.T]
+
+
+        # Calculate adjusted box coordinates
+        left = (filtered_x - 0.5 * filtered_w) * x_factor
+        top = (filtered_y - 0.5 * filtered_h) * y_factor
+        width = filtered_w * x_factor
+        height = filtered_h * y_factor
+
+        # Prepare final arrays
+        boxes = np.vstack([left, top, width, height]).T
+        mask_coefficients = scores_segmentation
+
+        # If you need to use integer types for some reason (e.g., indexing later on):
+        boxes = boxes.astype(int)
+
+        # You can further process these arrays or convert them to lists if needed:
+        boxes = boxes.tolist()
+        filtered_confidences = filtered_confidences.tolist()
+        filtered_class_ids = filtered_class_ids.tolist()
+        t2 = time.time()
+        print("cursed for loop")
+        print(t2-t1)
         confidences = (confidences)
-        indices = cv2.dnn.NMSBoxes(boxes, confidences, self.model_score_threshold, self.model_nms_threshold)
+        t1 = time.time()
+        if not len(boxes) <= 0 :
+            indices = cv2.dnn.NMSBoxes(boxes, filtered_confidences, self.model_score_threshold, self.model_nms_threshold)
+        else:
+            indices = []
+        t2 = time.time()
+        print("nms : ")
+        print(t2-t1)
 
         detections = []
         for i in indices:
             idx = i
             result = {
-                'class_id': class_ids[i],
-                'confidence': confidences[i],
-                'mask_coefficients': np.array(mask_coefficients[i]),
+                'class_id': filtered_class_ids[i],
+                'confidence': filtered_confidences[i],
+                'mask_coefficients': np.array(filtered_mask_coefficient[i]),
                 'box': boxes[idx],
-                'class_name': self.classes[class_ids[i]],
+                'class_name': self.classes[filtered_class_ids[i]],
                 'color': (random.randint(100, 255), random.randint(100, 255), random.randint(100, 255))
             }
             detections.append(result)
@@ -243,7 +283,7 @@
     model_path = 'yoloseg/weight/best.onnx'
     classes_txt_file = 'config_files/yolo_config.txt'
     # image_path = 'yoloseg/img3.jpg'
-    image_path = 'testing.png'
+    image_path = 'yoloseg/img3.jpg'
 
     model_input_shape = (640, 640)
     inference_engine = Inference(
Add a comment
List