윤영준 윤영준 05-14
onnx 가중치 추론단 완성
@970c7c681a37ec5233c18f1b1f4d5d3ad2a03991
 
config.py (added)
+++ config.py
@@ -0,0 +1,2 @@
+CLASS_NUM = 1
+CLASS_NAME = ["water_body"]
hls_streaming/hls.py
--- hls_streaming/hls.py
+++ hls_streaming/hls.py
@@ -14,35 +14,36 @@
         :param hls_url: hls address
         :param cctv_id: cctv_id number(whatever it is, this exists to distinguish from where. Further disscusion is needed with frontend developers.)
         :param interval: interval of sampling in seconds
-        :param buffer_duration: video buffer, 15 seconds is default for ITS video streaming
+        :param buffer_duration: video buffer, 15 seconds is default for ITS HLS video streaming
         :param time_zone: default Asia/Seoul
+        :param endpoint: API endpoint
         '''
         self.hls_url = hls_url
         self.interval = interval
         self.buffer_duration = buffer_duration
-        self.buffer_size = 600
+        self.buffer_size = buffer_size
         self.frame_buffer = []
-        self.frame_buffer_lock = Lock()
+        self.frame_buffer_lock = Lock() # for no memory sharing between receive_stream_packet and process_frames
         self.captured_frame_count = 0
         self.last_capture_time = 0
         self.start_time = time.time()
         self.stop_event = Event()
-        self.setup_stream()
-        self.cctvid = cctv_id
-        self.time_zone = ZoneInfo(time_zone)
-        self.endpoint = endpoint
 
-    def setup_stream(self):
         self.input_stream = av.open(self.hls_url)
         self.video_stream = next(s for s in self.input_stream.streams if s.type == 'video')
         self.fps = self.video_stream.guessed_rate.numerator
         self.capture_interval = 1 / self.fps
 
-    # ```capture_frames``` and ```process_frames``` work asynchronously (called with Thread)
+        self.cctvid = cctv_id
+        self.time_zone = ZoneInfo(time_zone)
+        self.endpoint = endpoint
+
+
+    # ```receive_stream_packet``` and ```process_frames``` work asynchronously (called with Thread)
     #  so that it always run as intended (for every '''interval''' sec, send a photo)
-    #  regardless of what buffer frames are now.
+    #  regardless of how you buffer frames as long as there are enough buffer.
     # They are triggered by ```start``` and halts by ```stop```
-    def capture_frames(self):
+    def receive_stream_packet(self):
         for packet in self.input_stream.demux(self.video_stream):
             for frame in packet.decode():
                 with self.frame_buffer_lock:
@@ -59,13 +60,13 @@
                             if len(self.frame_buffer) > self.buffer_size:
                                 self.frame_buffer = self.frame_buffer[-self.buffer_size:]
                             buffered_frame = self.frame_buffer[-1]
-                            print(len(self.frame_buffer))
+                            # print(len(self.frame_buffer))
                             img = buffered_frame.to_image()
                             img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
                             frame_name = f"captured_frame_{self.captured_frame_count}.jpg"
                             img_binary = cv2.imencode('.png', img)
                             self.send_image_to_server(img_binary, self.endpoint)
-                            cv2.imwrite(f'hls_streaming/captured_frame_/{datetime.now()}_{frame_name}', img)
+                            # cv2.imwrite(f'hls_streaming/captured_frame_/{datetime.now()}_{frame_name}', img)
                             self.last_capture_time = current_time
                             print(f"Captured {frame_name} at time: {current_time - self.start_time:.2f}s")
                             self.captured_frame_count +=1
@@ -82,17 +83,18 @@
         try:
             requests.post(endpoint, headers=header, files=image)
         except:
-            print("Can not connect to the analyzer server. Check the endpoint address or connection.")
+            print("Can not connect to the analyzer server. Check the endpoint address or connection.\n"
+                  f"Can not connect to : {self.endpoint}")
 
     def start(self):
-        self.capture_thread = Thread(target=self.capture_frames)
+        self.receive_stream_packet = Thread(target=self.receive_stream_packet)
         self.process_thread = Thread(target=self.process_frames)
-        self.capture_thread.start()
+        self.receive_stream_packet.start()
         self.process_thread.start()
 
     def stop(self):
         self.stop_event.set()
-        self.capture_thread.join()
+        self.receive_stream_packet.join()
         self.process_thread.join()
         self.input_stream.close()
 
@@ -102,7 +104,7 @@
     capturer = FrameCapturer(
         'http://cctvsec.ktict.co.kr/73496/'
         '7xhDlyfDPK1AtaOUkAUDUJgZvfqvRXYYZUmRLxgPgKXk+eEtIJIfGkiC/gcQmysaz7zhDW2Jd8qhPCxgpo7cn5VqArnowyKjUePjdAmuQQ8=',
-        101, 300
+        101, 10
     )
     t1 = time.time()
     try:
@@ -110,6 +112,7 @@
         time.sleep(600000)
     finally:
         capturer.stop()
+        del capturer
         t2 = time.time()
         with open("result.txt", "w") as file:
             file.write(f'{t2-t1} seconds before terminating')
 
inferenece.py (deleted)
--- inferenece.py
@@ -1,0 +0,0 @@
 
yoloseg/config/classes.txt (added)
+++ yoloseg/config/classes.txt
@@ -0,0 +1,1 @@
+water_body(파일 끝에 줄바꿈 문자 없음)
 
yoloseg/img.png (Binary) (added)
+++ yoloseg/img.png
Binary file is not shown
 
yoloseg/inference_.py (added)
+++ yoloseg/inference_.py
@@ -0,0 +1,235 @@
+import cv2
+import numpy as np
+import random
+from config import CLASS_NAME, CLASS_NUM
+
+class Inference:
+    def __init__(self, onnx_model_path, model_input_shape, classes_txt_file, run_with_cuda):
+        self.model_path = onnx_model_path
+        self.model_shape = model_input_shape
+        self.classes_path = classes_txt_file
+        self.cuda_enabled = run_with_cuda
+        self.letter_box_for_square = True
+        self.model_score_threshold = 0.3
+        self.model_nms_threshold = 0.5
+        self.classes = []
+
+        self.load_onnx_network()
+        self.load_classes_from_file()
+
+    def sigmoid(self, x):
+        return 1 / (1 + np.exp(-x))
+
+    def inverse_sigmoid(self, x):
+        return np.log(x/(1-x))
+
+    def run_inference(self, input_image):
+        model_input = input_image
+        if self.letter_box_for_square and self.model_shape[0] == self.model_shape[1]:
+            model_input = self.format_to_square(model_input)
+
+        blob = cv2.dnn.blobFromImage(model_input, 1.0 / 255.0, self.model_shape, (0, 0, 0), True, False)
+        self.net.setInput(blob)
+
+        outputs = self.net.forward(self.net.getUnconnectedOutLayersNames())
+        outputs_bbox = outputs[0]
+        outputs_mask = outputs[1]
+
+        detections = self.process_detections(outputs_bbox, model_input)
+        mask_maps = self.process_mask_output(detections, outputs_mask, model_input.shape)
+
+        return detections, mask_maps
+
+    def process_detections(self, outputs_bbox, model_input):
+        # Assuming outputs_bbox is already in the (x, y, w, h, confidence, class_probs...) format
+        x_factor = model_input.shape[1] / self.model_shape[0]
+        y_factor = model_input.shape[0] / self.model_shape[1]
+
+        class_ids = []
+        confidences = []
+        mask_coefficients = []
+        boxes = []
+
+        for detection in outputs_bbox[0].T:
+            # when your weight is trained from pretrained weight, the resulting wieght
+            # may have leftover classes (that does nothing), hence this.
+            scores_classification = detection[4:4+CLASS_NUM]
+            scores_segmentation = detection[4+CLASS_NUM:]
+            class_id = np.argmax(scores_classification, axis=0)
+            confidence = scores_classification[class_id]
+
+            thres = self.model_score_threshold
+            if confidence > thres:
+                x, y, w, h = detection[:4]
+                left = int((x - 0.5 * w) * x_factor)
+                top = int((y - 0.5 * h) * y_factor)
+                width = int(w * x_factor)
+                height = int(h * y_factor)
+
+                boxes.append([left, top, width, height])
+                confidences.append(float(confidence))
+                mask_coefficients.append(scores_segmentation)
+                class_ids.append(class_id)
+        confidences = (confidences)
+        indices = cv2.dnn.NMSBoxes(boxes, confidences, self.model_score_threshold, self.model_nms_threshold)
+
+        detections = []
+        for i in indices:
+            idx = i
+            result = {
+                'class_id': class_ids[i],
+                'confidence': confidences[i],
+                'mask_coefficients': np.array(mask_coefficients[i]),
+                'box': boxes[idx],
+                'class_name': self.classes[class_ids[i]],
+                'color': (random.randint(100, 255), random.randint(100, 255), random.randint(100, 255))
+            }
+            detections.append(result)
+
+        return detections
+
+    def process_mask_output(self, detections, proto_masks, image_shape):
+        if not detections:
+            return []
+
+        batch_size, num_protos, proto_height, proto_width = proto_masks.shape  # Correct shape unpacking
+        full_masks = np.zeros((len(detections), image_shape[0], image_shape[1]), dtype=np.float32)
+
+        for idx, det in enumerate(detections):
+            box = det['box']
+            x1, y1, w, h = box
+            x1, y1, x2, y2 = x1, y1, x1 + w, y1 + h
+
+            if y2 > image_shape[1]:
+                h = h + image_shape[1] - h - y1
+            if x2 > image_shape[0]:
+                w = w + image_shape[1] - w - y1
+
+            # Get the corresponding mask coefficients for this detection
+            coeffs = det["mask_coefficients"]
+
+            # Compute the linear combination of proto masks
+            # for now, plural batch operation is not supported, and this is the point where you should start.
+            # instead of proto_masks[0], do some iterative operation.
+            mask = np.tensordot(coeffs, proto_masks[0], axes=[0, 0])  # Dot product along the number of prototypes
+
+            # Resize mask to the bounding box size, using sigmoid to normalize
+            resized_mask = cv2.resize(mask, (w, h))
+            resized_mask = self.sigmoid(resized_mask)
+
+            # Threshold to create a binary mask
+            final_mask = (resized_mask > 0.5).astype(np.uint8)
+
+            # Place the mask in the corresponding location on a full-sized mask image
+            full_mask = np.zeros((image_shape[0], image_shape[1]), dtype=np.uint8)
+            print(final_mask.shape)
+            print(full_mask[y1:y2, x1:x2].shape)
+            full_mask[y1:y2, x1:x2] = final_mask
+
+            # Combine the mask with the masks of other detections
+            full_masks[idx] = full_mask
+
+        return full_masks
+
+    def load_classes_from_file(self):
+        with open(self.classes_path, 'r') as f:
+            self.classes = f.read().strip().split('\n')
+
+    def load_onnx_network(self):
+        self.net = cv2.dnn.readNetFromONNX(self.model_path)
+        if self.cuda_enabled:
+            print("\nRunning on CUDA")
+            self.net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
+            self.net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
+        else:
+            print("\nRunning on CPU")
+            self.net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
+            self.net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
+
+    def format_to_square(self, source):
+        col, row = source.shape[1], source.shape[0]
+        max_side = max(col, row)
+        result = np.zeros((max_side, max_side, 3), dtype=np.uint8)
+        result[0:row, 0:col] = source
+        return result
+
+def overlay_mask(image, mask, color=(0, 255, 0), alpha=0.5):
+    """
+    Overlays a mask onto an image using a specified color and transparency level.
+
+    Parameters:
+        image (np.ndarray): The original image.
+        mask (np.ndarray): The mask to overlay. Must be the same size as the image.
+        color (tuple): The color for the mask overlay in BGR format (default is green).
+        alpha (float): Transparency factor for the mask; 0 is fully transparent, 1 is opaque.
+
+    Returns:
+        np.ndarray: The image with the overlay.
+    """
+    # Ensure the mask is a binary mask
+    mask = (mask > 0).astype(np.uint8)  # Convert mask to binary if not already
+
+    # Create an overlay with the same size as the image but only using the mask area
+    overlay = np.zeros_like(image, dtype=np.uint8)
+    overlay[mask == 1] = color
+
+    # Blend the overlay with the image using the alpha factor
+    return cv2.addWeighted(src1=overlay, alpha=alpha, src2=image, beta=1 - alpha, gamma=0)
+
+
+
+def main():
+    import time
+
+
+    # Path to your ONNX model and classes text file
+    model_path = 'yoloseg/weight/best.onnx'
+    classes_txt_file = 'yoloseg/config/classes.txt'
+    image_path = 'yoloseg/img3.jpg'
+
+    model_input_shape = (640, 640)
+    inference_engine = Inference(
+        onnx_model_path=model_path,
+        model_input_shape=model_input_shape,
+        classes_txt_file=classes_txt_file,
+        run_with_cuda=True
+    )
+
+    # Load an image
+    img = cv2.imread(image_path)
+    if img is None:
+        print("Error loading image")
+        return
+    img = cv2.resize(img, model_input_shape)
+    # Run inference
+    t1 = time.time()
+    detections, mask_maps = inference_engine.run_inference(img)
+    t2 = time.time()
+
+    print(t2-t1)
+
+    # Display results
+    for detection in detections:
+        x, y, w, h = detection['box']
+        class_name = detection['class_name']
+        confidence = detection['confidence']
+        cv2.rectangle(img, (x, y), (x+w, y+h), detection['color'], 2)
+        label = f"{class_name}: {confidence:.2f}"
+        cv2.putText(img, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, detection['color'], 2)
+
+    # Show the image
+    # cv2.imshow('Detections', img)
+    # cv2.waitKey(0)
+    # cv2.destroyAllWindows()
+
+    # If you also want to display segmentation maps, you would need additional handling here
+    # Example for displaying first mask if available:
+    if mask_maps is not None:
+
+        seg_image = overlay_mask(img, mask_maps[0], color=(0, 255, 0), alpha=0.3)
+        cv2.imshow("segmentation", seg_image)
+        cv2.waitKey(0)
+        cv2.destroyAllWindows()
+
+if __name__ == "__main__":
+    main()(파일 끝에 줄바꿈 문자 없음)
 
yoloseg/utils.py (added)
+++ yoloseg/utils.py
@@ -0,0 +1,57 @@
+import numpy as np
+import cv2
+
+
+def sigmoid(x):
+    return 1 / (1 + np.exp(-x))
+
+
+def xywh2xyxy(x):
+    # x has shape [n, 4] where each row is (center_x, center_y, width, height)
+    y = np.zeros_like(x)
+    y[:, 0:2] = x[:, 0:2] - x[:, 2:4] / 2  # calculate min_x, min_y
+    y[:, 2:4] = x[:, 0:2] + x[:, 2:4] / 2  # calculate max_x, max_y
+    return y
+
+def iou(box, boxes):
+    # Compute xmin, ymin, xmax, ymax for both boxes
+    xmin = np.maximum(box[0], boxes[:, 0])
+    ymin = np.maximum(box[1], boxes[:, 1])
+    xmax = np.minimum(box[2], boxes[:, 2])
+    ymax = np.minimum(box[3], boxes[:, 3])
+
+    # Compute intersection area
+    intersection_area = np.maximum(0, xmax - xmin) * np.maximum(0, ymax - ymin)
+
+    # Compute union area
+    box_area = (box[2] - box[0]) * (box[3] - box[1])
+    boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
+    union_area = box_area + boxes_area - intersection_area
+
+    # Compute IoU
+    iou = intersection_area / union_area
+
+    return iou
+
+
+def fast_nms(boxes, scores, iou_threshold):
+    sorted_indices = np.argsort(scores)[::-1]
+
+    selected_indices = []
+    while sorted_indices.size > 0:
+        box_id = sorted_indices[0]
+        selected_indices.append(box_id)
+
+        if sorted_indices.size == 1:
+            break
+
+        remaining_boxes = boxes[sorted_indices[1:]]
+        current_box = boxes[box_id].reshape(1, -1)
+
+        ious = np.array([iou(current_box[0], remaining_box) for remaining_box in remaining_boxes])
+        keep_indices = np.where(ious < iou_threshold)[0]
+
+        sorted_indices = sorted_indices[keep_indices + 1]
+
+    return selected_indices
+
 
yoloseg/weight/best.onnx (added)
+++ yoloseg/weight/best.onnx
This file is too big to display.
Add a comment
List