onnx_gpu inference as inference_gpu_.py Vectorized for loop for x100 on bbox processing
Vectorized for loop for x100 on bbox processing
@bc54f796a43b056b890f560ae62004c60dd2337a
--- yoloseg/inference_gpu_.py
+++ yoloseg/inference_gpu_.py
... | ... | @@ -1,3 +1,5 @@ |
1 |
+import time |
|
2 |
+ |
|
1 | 3 |
import cv2 |
2 | 4 |
import numpy as np |
3 | 5 |
import random |
... | ... | @@ -34,12 +36,23 @@ |
34 | 36 |
# Prepare input data as a dictionary |
35 | 37 |
inputs = {self.session.get_inputs()[0].name: blob} |
36 | 38 |
# Run model |
39 |
+ t1 = time.time() |
|
37 | 40 |
outputs = self.session.run(None, inputs) |
41 |
+ t2 = time.time() |
|
42 |
+ print("model infer :") |
|
43 |
+ print(t2-t1) |
|
38 | 44 |
outputs_bbox = outputs[0] |
39 | 45 |
outputs_mask = outputs[1] |
40 |
- |
|
46 |
+ t1 = time.time() |
|
41 | 47 |
detections = self.process_detections(outputs_bbox, model_input) |
48 |
+ t2 = time.time() |
|
49 |
+ print("bbox :") |
|
50 |
+ print(t2-t1) |
|
51 |
+ t1 = time.time() |
|
42 | 52 |
mask_maps = self.process_mask_output(detections, outputs_mask, model_input.shape) |
53 |
+ t2 = time.time() |
|
54 |
+ print("mask :") |
|
55 |
+ print(t2-t1) |
|
43 | 56 |
|
44 | 57 |
return detections, mask_maps |
45 | 58 |
|
... | ... | @@ -74,53 +87,80 @@ |
74 | 87 |
x_factor = model_input.shape[1] / self.model_shape[0] |
75 | 88 |
y_factor = model_input.shape[0] / self.model_shape[1] |
76 | 89 |
|
77 |
- class_ids = [] |
|
78 |
- confidences = [] |
|
79 |
- mask_coefficients = [] |
|
80 |
- boxes = [] |
|
90 |
+ t1 = time.time() |
|
91 |
+ # Assuming outputs_bbox is an array with shape (N, 4+CLASS_NUM+32) where N is the number of detections |
|
92 |
+ # Example outputs_bbox.shape -> (8400, 4+CLASS_NUM+32) |
|
81 | 93 |
|
82 |
- for detection in outputs_bbox[0].T: |
|
83 |
- # This segmentation model uses yolact architecture to predict mask |
|
84 |
- # the output tensor dimension for yolo-v8-seg is B x [X, Y, W, H, C1, C2, ..., P1, ...,P32] * 8400 |
|
85 |
- # where C{n} are confidence score for each class |
|
86 |
- # and P{n} are coefficient for each proto masks. (32 by default) |
|
87 |
- scores_classification = detection[4:4+CLASS_NUM] |
|
88 |
- scores_segmentation = detection[4+CLASS_NUM:] |
|
89 |
- class_id = np.argmax(scores_classification, axis=0) |
|
90 |
- confidence = scores_classification[class_id] |
|
94 |
+ # Extract basic bbox coordinates and scores |
|
95 |
+ x, y, w, h = outputs_bbox[:, 0], outputs_bbox[:, 1], outputs_bbox[:, 2], outputs_bbox[:, 3] |
|
96 |
+ scores = outputs_bbox[:, 4:4 + CLASS_NUM] |
|
91 | 97 |
|
92 |
- thres = self.model_score_threshold |
|
93 |
- w_thres = 40 |
|
94 |
- h_thres = 40 |
|
98 |
+ # Calculate confidences and class IDs |
|
99 |
+ confidences = np.max(scores, axis=1) |
|
100 |
+ class_ids = np.argmax(scores, axis=1) |
|
95 | 101 |
|
96 |
- x, y, w, h = detection[:4] |
|
97 |
- # if bboxes are too small, it just skips, and it is not a bad idea since we do not need to detect small areas |
|
98 |
- if w < w_thres or h < h_thres: |
|
99 |
- continue |
|
102 |
+ # Filter out small boxes |
|
103 |
+ min_width, min_height = 40, 40 |
|
104 |
+ valid_size = (w >= min_width) & (h >= min_height) |
|
100 | 105 |
|
101 |
- if confidence > thres: |
|
106 |
+ # Apply confidence threshold |
|
107 |
+ valid_confidence = (confidences > self.model_score_threshold) |
|
102 | 108 |
|
103 |
- left = int((x - 0.5 * w) * x_factor) |
|
104 |
- top = int((y - 0.5 * h) * y_factor) |
|
105 |
- width = int(w * x_factor) |
|
106 |
- height = int(h * y_factor) |
|
109 |
+ # Combine all conditions |
|
110 |
+ valid_detections = valid_size & valid_confidence |
|
107 | 111 |
|
108 |
- boxes.append([left, top, width, height]) |
|
109 |
- confidences.append(float(confidence)) |
|
110 |
- mask_coefficients.append(scores_segmentation) |
|
111 |
- class_ids.append(class_id) |
|
112 |
+ # proto_mask_score |
|
113 |
+ scores_segmentation = outputs_bbox[:, 4 + CLASS_NUM:] |
|
114 |
+ |
|
115 |
+ # Filter arrays based on valid detections |
|
116 |
+ filtered_x = x[valid_detections] |
|
117 |
+ filtered_y = y[valid_detections] |
|
118 |
+ filtered_w = w[valid_detections] |
|
119 |
+ filtered_h = h[valid_detections] |
|
120 |
+ filtered_confidences = confidences[valid_detections] |
|
121 |
+ filtered_class_ids = class_ids[valid_detections] |
|
122 |
+ filtered_mask_coefficient = np.transpose(scores_segmentation, (2,0,1))[valid_detections.T] |
|
123 |
+ |
|
124 |
+ |
|
125 |
+ # Calculate adjusted box coordinates |
|
126 |
+ left = (filtered_x - 0.5 * filtered_w) * x_factor |
|
127 |
+ top = (filtered_y - 0.5 * filtered_h) * y_factor |
|
128 |
+ width = filtered_w * x_factor |
|
129 |
+ height = filtered_h * y_factor |
|
130 |
+ |
|
131 |
+ # Prepare final arrays |
|
132 |
+ boxes = np.vstack([left, top, width, height]).T |
|
133 |
+ mask_coefficients = scores_segmentation |
|
134 |
+ |
|
135 |
+ # If you need to use integer types for some reason (e.g., indexing later on): |
|
136 |
+ boxes = boxes.astype(int) |
|
137 |
+ |
|
138 |
+ # You can further process these arrays or convert them to lists if needed: |
|
139 |
+ boxes = boxes.tolist() |
|
140 |
+ filtered_confidences = filtered_confidences.tolist() |
|
141 |
+ filtered_class_ids = filtered_class_ids.tolist() |
|
142 |
+ t2 = time.time() |
|
143 |
+ print("cursed for loop") |
|
144 |
+ print(t2-t1) |
|
112 | 145 |
confidences = (confidences) |
113 |
- indices = cv2.dnn.NMSBoxes(boxes, confidences, self.model_score_threshold, self.model_nms_threshold) |
|
146 |
+ t1 = time.time() |
|
147 |
+ if not len(boxes) <= 0 : |
|
148 |
+ indices = cv2.dnn.NMSBoxes(boxes, filtered_confidences, self.model_score_threshold, self.model_nms_threshold) |
|
149 |
+ else: |
|
150 |
+ indices = [] |
|
151 |
+ t2 = time.time() |
|
152 |
+ print("nms : ") |
|
153 |
+ print(t2-t1) |
|
114 | 154 |
|
115 | 155 |
detections = [] |
116 | 156 |
for i in indices: |
117 | 157 |
idx = i |
118 | 158 |
result = { |
119 |
- 'class_id': class_ids[i], |
|
120 |
- 'confidence': confidences[i], |
|
121 |
- 'mask_coefficients': np.array(mask_coefficients[i]), |
|
159 |
+ 'class_id': filtered_class_ids[i], |
|
160 |
+ 'confidence': filtered_confidences[i], |
|
161 |
+ 'mask_coefficients': np.array(filtered_mask_coefficient[i]), |
|
122 | 162 |
'box': boxes[idx], |
123 |
- 'class_name': self.classes[class_ids[i]], |
|
163 |
+ 'class_name': self.classes[filtered_class_ids[i]], |
|
124 | 164 |
'color': (random.randint(100, 255), random.randint(100, 255), random.randint(100, 255)) |
125 | 165 |
} |
126 | 166 |
detections.append(result) |
... | ... | @@ -243,7 +283,7 @@ |
243 | 283 |
model_path = 'yoloseg/weight/best.onnx' |
244 | 284 |
classes_txt_file = 'config_files/yolo_config.txt' |
245 | 285 |
# image_path = 'yoloseg/img3.jpg' |
246 |
- image_path = 'testing.png' |
|
286 |
+ image_path = 'yoloseg/img3.jpg' |
|
247 | 287 |
|
248 | 288 |
model_input_shape = (640, 640) |
249 | 289 |
inference_engine = Inference( |
Add a comment
Delete comment
Once you delete this comment, you won't be able to recover it. Are you sure you want to delete this comment?