윤영준 윤영준 05-22
Bug fix and refactoring for inferenece_.py
1. handling xywh <-> xyxy is now a separate method
2. this doubles as a bug fix for occasional dimension mismatch between ```full_masks``` and ```full_mask```
@c8654885df50cdbd58d4f88f48ba4e34a432c7cc
inference_endpoint.py
--- inference_endpoint.py
+++ inference_endpoint.py
@@ -82,13 +82,11 @@
 
         t2 = time.time()
         if len(self.mask) > 0:
-            print(self.mask.shape)
-            print(type(self.mask))
             self.mask_blob = cv2.imencode('.png', self.mask)
             self.mask_blob = self.mask.tobytes()
             self.mask = cv2.resize(self.mask, (image.shape[0], image.shape[1]))
 
-        # print(t2 - t1)
+        print(t2 - t1)
 
         if len(self.mask) != 0:
             seg_image = overlay_mask(image, self.mask[0], color=(0, 255, 0), alpha=0.3)
yoloseg/inference_.py
--- yoloseg/inference_.py
+++ yoloseg/inference_.py
@@ -2,6 +2,7 @@
 import numpy as np
 import random
 from config_files.yolo_config import CLASS_NAME, CLASS_NUM
+from typing import List, Tuple
 
 class Inference:
     def __init__(self, onnx_model_path, model_input_shape, classes_txt_file, run_with_cuda):
@@ -99,31 +100,16 @@
         if not detections:
             return []
 
-        batch_size, num_protos, proto_height, proto_width = proto_masks.shape  # Correct shape unpacking
+        batch_size, num_protos, proto_height, proto_width = proto_masks.shape
         full_masks = np.zeros((len(detections), image_shape[0], image_shape[1]), dtype=np.float32)
 
         for idx, det in enumerate(detections):
             box = det['box']
-            x1, y1, w, h = box
-            # print(f"x1 : {x1}, y1 : {y1}, w: {w}, h: {h}")
 
-            x1, y1, x2, y2 = x1, y1, x1 + w, y1 + h
+            x1, y1, w, h = self.adjust_box_coordinates(box, (image_shape[0], image_shape[1]))
 
-            #... why the model outputs ... negative values?...
-            if x1 <= 0 :
-                w += x1
-                x1 = 0
-            if y1 <= 0 :
-                h += y1
-                y1 = 0
-
-            # To handle edge cases where you get bboxes that pass beyond the original size of image_binary
-            if y2 > image_shape[1]:
-                h = image_shape[1] - y1
-            if x2 > image_shape[0]:
-                w = image_shape[1] - y1
-
-            # print(f"x2: {x2}, y2 : {y2}")
+            if w <=1 or h <= 1:
+                continue
 
             # Get the corresponding mask coefficients for this detection
             coeffs = det["mask_coefficients"]
@@ -147,15 +133,38 @@
             # print(f"x2: {x2}, y2 : {y2}")
             # print(final_mask.shape)
             # print(full_mask[y1:y2, x1:x2].shape)
-            full_mask[y1:y2, x1:x2] = final_mask
+            full_mask[y1:y1+h, x1:x1+w] = final_mask
 
             # Combine the mask with the masks of other detections
             full_masks[idx] = full_mask
+
+
         all_mask = full_masks.sum(axis=0)
-        # Append a dimension so that cv2 can understand this as an image.
+        all_mask = np.clip(all_mask, 0, 1)
+        # Append a dimension so that cv2 can understand ```all_mask``` argument as an image.
         all_mask = all_mask.reshape((image_shape[0], image_shape[1], 1))
         return all_mask.astype(np.uint8)
 
+    def adjust_box_coordinates(self, box: List[int], image_shape: Tuple[int, int]) -> Tuple[int, int, int, int]:
+        """
+        Adjusts bounding box coordinates to ensure they lie within image boundaries.
+        """
+        x1, y1, w, h = box
+        x2, y2 = x1 + w, y1 + h
+
+        # Clamp coordinates to image boundaries
+        x1 = max(0, x1)
+        y1 = max(0, y1)
+        x2 = min(image_shape[1], x2)
+        y2 = min(image_shape[0], y2)
+
+        # Recalculate width and height
+        w = x2 - x1
+        h = y2 - y1
+
+        return x1, y1, w, h
+
+
     def load_classes_from_file(self):
         with open(self.classes_path, 'r') as f:
             self.classes = f.read().strip().split('\n')
Add a comment
List