
+++ README.md
... | ... | @@ -0,0 +1,1 @@ |
1 | +# Pothhole-detection |
+++ main.py
... | ... | @@ -0,0 +1,57 @@ |
1 | +import cv2 | |
2 | +import torch | |
3 | +import numpy as np | |
4 | +from ImageProcessing import MSCN | |
5 | +from torchvision.io import read_image | |
6 | + | |
7 | +# Create a background subtractor object | |
8 | +bg_subtractor = cv2.bgsegm.createBackgroundSubtractorGMG() | |
9 | +kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(1,10)) | |
10 | + | |
11 | +# Open video file or capture device | |
12 | +video = cv2.VideoCapture('spring_-_38344 (Original).mp4') | |
13 | + | |
14 | +# Minimum and maximum area thresholds for filtering objects | |
15 | +min_area = 1 # Adjust as needed | |
16 | +max_area = 500 # Adjust as needed | |
17 | + | |
18 | +frame_count = 0 | |
19 | +while True: | |
20 | + # Read frame from video | |
21 | + ret, frame = video.read() | |
22 | + if not ret: | |
23 | + break | |
24 | + | |
25 | + # Apply background subtraction | |
26 | + fg_mask = bg_subtractor.apply(frame) | |
27 | + fg_mask = cv2.morphologyEx(fg_mask, cv2.MORPH_OPEN, kernel) | |
28 | + | |
29 | + # Apply thresholding to get binary foreground mask | |
30 | + thresh = cv2.threshold(fg_mask, 128, 255, cv2.THRESH_BINARY)[1] | |
31 | + | |
32 | + # Find contours of the moving objects | |
33 | + contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
34 | + | |
35 | + # Filter objects by size | |
36 | + for contour in contours: | |
37 | + # Calculate contour area | |
38 | + area = cv2.contourArea(contour) | |
39 | + | |
40 | + # Check if contour area is within the specified range | |
41 | + if min_area < area < max_area : | |
42 | + (x, y, w, h) = cv2.boundingRect(contour) | |
43 | + cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2) | |
44 | + | |
45 | + # Display the resulting frame | |
46 | + | |
47 | + if frame_count > 120 : | |
48 | + cv2.imshow('Moving Object Detection', frame) | |
49 | + # cv2.imshow('movingobjectcontour', fg_mask) | |
50 | + # Exit if 'q' is pressed | |
51 | + frame_count += 1 | |
52 | + if cv2.waitKey(1) & 0xFF == ord('q'): | |
53 | + break | |
54 | + | |
55 | +# Release the video capture and close windows | |
56 | +video.release() | |
57 | +cv2.destroyAllWindows() |
+++ model/autoencoder.py
... | ... | @@ -0,0 +1,134 @@ |
1 | +import torch | |
2 | +from torch import nn | |
3 | +from torch.nn import functional as F | |
4 | +from torchvision.models import vgg16 | |
5 | + | |
6 | +class AutoEncoder(nn.Module): | |
7 | + def __init__(self): | |
8 | + super(AutoEncoder, self).__init__() | |
9 | + | |
10 | + self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=5, stride=1, padding=2, bias=False) | |
11 | + self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=2, padding=1, bias=False) | |
12 | + | |
13 | + self.conv3 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1, bias=False) | |
14 | + self.conv4 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=2, padding=1, bias=False) | |
15 | + | |
16 | + self.conv5 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1, bias=False) | |
17 | + self.conv6 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1, bias=False) | |
18 | + | |
19 | + self.dilated_conv1 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, dilation=2, padding=2, bias=False) | |
20 | + self.dilated_conv2 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, dilation=4, padding=4, bias=False) | |
21 | + self.dilated_conv3 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, dilation=8, padding=8, bias=False) | |
22 | + self.dilated_conv4 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, dilation=16, padding=16, bias=False) | |
23 | + | |
24 | + self.conv7 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1, bias=False) | |
25 | + self.conv8 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1, bias=False) | |
26 | + | |
27 | + self.deconv1 = nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=4, stride=2, padding=1, bias=False) | |
28 | + self.avg_pool1 = nn.AvgPool2d(kernel_size=3, stride=1, padding=1) | |
29 | + | |
30 | + self.deconv2 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=4, stride=2, padding=1, bias=False) | |
31 | + self.avg_pool2 = nn.AvgPool2d(kernel_size=3, stride=1, padding=1) | |
32 | + | |
33 | + self.conv9 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1, bias=False) | |
34 | + self.conv10 = nn.Conv2d(in_channels=64, out_channels=32, kernel_size=3, stride=1, padding=1, bias=False) | |
35 | + | |
36 | + self.skip_output1 = nn.Conv2d(in_channels=256, out_channels=3, kernel_size=3, stride=1, padding=1, bias=False) | |
37 | + self.skip_output2 = nn.Conv2d(in_channels=128, out_channels=3, kernel_size=3, stride=1, padding=1, bias=False) | |
38 | + self.skip_output3 = nn.Conv2d(in_channels=32, out_channels=3, kernel_size=3, stride=1, padding=1, bias=False) | |
39 | + | |
40 | + # maybe change it into concat Networks? this seems way to cumbersome. | |
41 | + def forward(self, input_tensor): | |
42 | + # Feed the input through each layer | |
43 | + relu1 = torch.relu(self.conv1(input_tensor)) | |
44 | + relu2 = torch.relu(self.conv2(relu1)) | |
45 | + relu3 = torch.relu(self.conv3(relu2)) | |
46 | + relu4 = torch.relu(self.conv4(relu3)) | |
47 | + relu5 = torch.relu(self.conv5(relu4)) | |
48 | + relu6 = torch.relu(self.conv6(relu5)) | |
49 | + relu7 = torch.relu(self.dilated_conv1(relu6)) | |
50 | + relu8 = torch.relu(self.dilated_conv2(relu7)) | |
51 | + relu9 = torch.relu(self.dilated_conv3(relu8)) | |
52 | + relu10 = torch.relu(self.dilated_conv4(relu9)) | |
53 | + relu11 = torch.relu(self.conv7(relu10)) | |
54 | + relu12 = torch.relu(self.conv8(relu11)) | |
55 | + | |
56 | + deconv1 = self.deconv1(relu12) | |
57 | + avg_pool1 = self.avg_pool1(deconv1) | |
58 | + relu13 = torch.relu(avg_pool1) | |
59 | + | |
60 | + relu14 = torch.relu(self.conv9(relu13 + relu3)) | |
61 | + | |
62 | + deconv2 = self.deconv2(relu14) | |
63 | + avg_pool2 = self.avg_pool2(deconv2) | |
64 | + relu15 = torch.relu(avg_pool2) | |
65 | + | |
66 | + relu16 = torch.relu(self.conv10(relu15 + relu1)) | |
67 | + | |
68 | + skip_output_1 = self.skip_output1(relu12) | |
69 | + skip_output_2 = self.skip_output2(relu14) | |
70 | + skip_output_3 = torch.tanh(self.skip_output3(relu16)) | |
71 | + | |
72 | + ret = { | |
73 | + 'skip_1': skip_output_1, | |
74 | + 'skip_2': skip_output_2, | |
75 | + 'skip_3': skip_output_3, | |
76 | + } | |
77 | + | |
78 | + return ret | |
79 | + | |
80 | + | |
81 | +class LossFunction(nn.Module): | |
82 | + def __init__(self): | |
83 | + super(LossFunction, self).__init__() | |
84 | + | |
85 | + # Load pre-trained VGG model for feature extraction | |
86 | + self.vgg = vgg16(pretrained=True).features | |
87 | + self.vgg.eval() | |
88 | + for param in self.vgg.parameters(): | |
89 | + param.requires_grad = False | |
90 | + | |
91 | + self.lambda_i = [0.6, 0.8, 1.0] | |
92 | + | |
93 | + def forward(self, input_tensor, label_tensor): | |
94 | + ori_height, ori_width = label_tensor.shape[2:] | |
95 | + | |
96 | + # Rescale labels to match the scales of the outputs | |
97 | + label_tensor_resize_2 = F.interpolate(label_tensor, size=(ori_height // 2, ori_width // 2)) | |
98 | + label_tensor_resize_4 = F.interpolate(label_tensor, size=(ori_height // 4, ori_width // 4)) | |
99 | + label_list = [label_tensor_resize_4, label_tensor_resize_2, label_tensor] | |
100 | + | |
101 | + # Initialize autoencoder model | |
102 | + autoencoder = AutoEncoder() | |
103 | + inference_ret = autoencoder(input_tensor) | |
104 | + | |
105 | + output_list = [inference_ret['skip_1'], inference_ret['skip_2'], inference_ret['skip_3']] | |
106 | + | |
107 | + # Compute lm_loss | |
108 | + lm_loss = 0.0 | |
109 | + for index, output in enumerate(output_list): | |
110 | + mse_loss = nn.MSELoss()(output, label_list[index]) * self.lambda_i[index] | |
111 | + lm_loss += mse_loss | |
112 | + | |
113 | + # Compute lp_loss | |
114 | + src_vgg_feats = self.extract_vgg_feats(label_tensor) | |
115 | + pred_vgg_feats = self.extract_vgg_feats(output_list[-1]) | |
116 | + | |
117 | + lp_losses = [] | |
118 | + for index in range(len(src_vgg_feats)): | |
119 | + lp_losses.append(nn.MSELoss()(src_vgg_feats[index], pred_vgg_feats[index])) | |
120 | + lp_loss = torch.mean(torch.stack(lp_losses)) | |
121 | + | |
122 | + loss = lm_loss + lp_loss | |
123 | + | |
124 | + return loss, inference_ret['skip_3'] | |
125 | + | |
126 | + def extract_vgg_feats(self, input_tensor): | |
127 | + # Extract features from the input tensor using the VGG network | |
128 | + feats = [] | |
129 | + x = input_tensor | |
130 | + for layer_num, layer in enumerate(self.vgg): | |
131 | + x = layer(x) | |
132 | + if layer_num in {3, 8, 15, 22, 29}: | |
133 | + feats.append(x) | |
134 | + return feats(No newline at end of file) |
+++ model/discriminator.py
... | ... | @@ -0,0 +1,1 @@ |
1 | +from torch import nn(No newline at end of file) |
+++ model/generator.py
... | ... | @@ -0,0 +1,250 @@ |
1 | +import torch | |
2 | +from torch import nn | |
3 | +from torch.nn import functional as F | |
4 | + | |
5 | + | |
6 | +def conv3x3(in_ch, out_ch, stride=1, padding=1, groups=1, dilation=1): | |
7 | + return nn.Conv2d(in_ch, out_ch, kernel_size=3, stride=stride, padding=padding, groups=groups, dilation=dilation) | |
8 | + | |
9 | + | |
10 | +def conv1x1(in_ch, out_ch, stride=1): | |
11 | + return nn.Conv2d(in_ch, out_ch, kernel_size=1, stride=stride) | |
12 | + | |
13 | + | |
14 | +class ResNetBlock(nn.Module): | |
15 | + def __init__(self, blocks=3, layers=1, input_ch=3, out_ch=32, kernel_size=None, stride=1, padding=1, groups=1, | |
16 | + dilation=1): | |
17 | + """ | |
18 | + :type kernel_size: iterator or int | |
19 | + """ | |
20 | + super(ResNetBlock, self).__init__() | |
21 | + if kernel_size is None: | |
22 | + kernel_size = [3, 3] | |
23 | + self.conv1 = nn.Conv2d( | |
24 | + input_ch, out_ch, | |
25 | + kernel_size=kernel_size, | |
26 | + stride=stride, | |
27 | + padding=padding, | |
28 | + groups=groups, | |
29 | + dilation=dilation | |
30 | + ) | |
31 | + self.conv2 = nn.Sequential( | |
32 | + nn.Conv2d( | |
33 | + out_ch, out_ch, | |
34 | + kernel_size=kernel_size, | |
35 | + stride=stride, | |
36 | + padding=padding, | |
37 | + groups=groups, | |
38 | + dilation=dilation | |
39 | + ), | |
40 | + nn.ReLU() | |
41 | + ) | |
42 | + self.conv_hidden = nn.ModuleList() | |
43 | + for block in range(blocks): | |
44 | + for layer in range(layers): | |
45 | + self.conv_hidden.append( | |
46 | + self.conv2 | |
47 | + ) | |
48 | + self.relu = nn.ReLU | |
49 | + self.blocks = blocks | |
50 | + self.layers = layers | |
51 | + | |
52 | + def forward(self, x): | |
53 | + shortcut = x | |
54 | + x = self.conv1(shortcut) | |
55 | + for i, hidden_layer in enumerate(self.conv_hidden): | |
56 | + x = hidden_layer(x) | |
57 | + if (i % self.layers == 0) & (i != 0): | |
58 | + x = self.relu(x) | |
59 | + x += shortcut | |
60 | + return x | |
61 | + | |
62 | + | |
63 | +class ConvLSTM(nn.Module): | |
64 | + def __init__(self, ch, kernel_size=3): | |
65 | + super(ConvLSTM, self).__init__() | |
66 | + self.padding = (kernel_size-1)/2 | |
67 | + self.conv_i = nn.Conv2d(in_channels=ch, out_channels=ch, kernel_size=kernel_size, stride=1, padding=1, | |
68 | + bias=False) | |
69 | + self.conv_f = nn.Conv2d(in_channels=ch, out_channels=ch, kernel_size=kernel_size, stride=1, padding=1, | |
70 | + bias=False) | |
71 | + self.conv_c = nn.Conv2d(in_channels=ch, out_channels=ch, kernel_size=kernel_size, stride=1, padding=1, | |
72 | + bias=False) | |
73 | + self.conv_o = nn.Conv2d(in_channels=ch, out_channels=ch, kernel_size=kernel_size, stride=1, padding=1, | |
74 | + bias=False) | |
75 | + self.conv_attention_map = nn.Conv2d(in_channels=ch, out_channels=1, kernel_size=kernel_size, stride=1, | |
76 | + padding=1, bias=False) | |
77 | + self.ch = ch | |
78 | + | |
79 | + def init_hidden(self, batch_size, image_size, init=0.5): | |
80 | + height, width = image_size | |
81 | + return torch.ones(batch_size, self.ch, height, width).to(self.conv_i.weight.device) * init | |
82 | + | |
83 | + def forward(self, input_tensor, input_cell_state=None): | |
84 | + if input_cell_state is None: | |
85 | + batch_size, _, height, width = input_tensor.size() | |
86 | + input_cell_state = self.init_hidden(batch_size, (height, width)) | |
87 | + | |
88 | + conv_i = self.conv_i(input_tensor) | |
89 | + sigmoid_i = torch.sigmoid(conv_i) | |
90 | + | |
91 | + conv_f = self.conv_f(input_tensor) | |
92 | + sigmoid_f = torch.sigmoid(conv_f) | |
93 | + | |
94 | + cell_state = sigmoid_f * input_cell_state + sigmoid_i * torch.tanh(self.conv_c(input_tensor)) | |
95 | + | |
96 | + conv_o = self.conv_o(input_tensor) | |
97 | + sigmoid_o = torch.sigmoid(conv_o) | |
98 | + | |
99 | + lstm_feats = sigmoid_o * torch.tanh(cell_state) | |
100 | + | |
101 | + attention_map = self.conv_attention_map(lstm_feats) | |
102 | + attention_map = torch.sigmoid(attention_map) | |
103 | + | |
104 | + return attention_map, cell_state, lstm_feats | |
105 | + | |
106 | + | |
107 | +class GeneratorBlock(nn.Module): | |
108 | + def __init__(self, blocks=3, layers=1, input_ch=3, out_ch=32, kernel_size=None, stride=1, padding=1, groups=1, | |
109 | + dilation=1): | |
110 | + """ | |
111 | + :type kernel_size: iterator or int | |
112 | + """ | |
113 | + super(GeneratorBlock, self).__init__() | |
114 | + if kernel_size is None: | |
115 | + kernel_size = [3, 3] | |
116 | + self.blocks = blocks | |
117 | + self.layers = layers | |
118 | + self.input_ch = input_ch | |
119 | + self.out_ch = out_ch | |
120 | + self.kernel_size = kernel_size | |
121 | + self.stride = stride | |
122 | + self.padding = padding | |
123 | + self.groups = groups | |
124 | + self.dilation = dilation | |
125 | + self.sigmoid = nn.Sigmoid() | |
126 | + self.resnet = nn.Sequential( | |
127 | + ResNetBlock( | |
128 | + blocks=self.blocks, | |
129 | + layers=self.layers, | |
130 | + input_ch=self.input_ch, | |
131 | + out_ch=self.out_ch, | |
132 | + kernel_size=self.kernel_size, | |
133 | + stride=self.stride, | |
134 | + padding=self.padding, | |
135 | + groups=self.groups, | |
136 | + dilation=self.dilation | |
137 | + ) | |
138 | + ) | |
139 | + self.LSTM = nn.Sequential( | |
140 | + ConvLSTM( | |
141 | + ch=out_ch, kernel_size=kernel_size, | |
142 | + ) | |
143 | + ) | |
144 | + | |
145 | + def forward(self, original_image, prev_cell_state=None): | |
146 | + x = self.resnet(original_image) | |
147 | + attention_map, cell_state, lstm_feats = self.LSTM(x, prev_cell_state) | |
148 | + x = attention_map * original_image | |
149 | + return x, attention_map, cell_state, lstm_feats | |
150 | + | |
151 | + | |
152 | +class Generator(nn.Module): | |
153 | + def __init__(self, repetition, blocks=3, layers=1, input_ch=3, out_ch=32, kernel_size=None, stride=1, padding=1, | |
154 | + groups=1, dilation=1): | |
155 | + """ | |
156 | + :type kernel_size: iterator or int | |
157 | + """ | |
158 | + super(Generator, self).__init__() | |
159 | + if kernel_size is None: | |
160 | + kernel_size = [3, 3] | |
161 | + self.blocks = blocks | |
162 | + self.layers = layers | |
163 | + self.input_ch = input_ch | |
164 | + self.out_ch = out_ch | |
165 | + self.kernel_size = kernel_size | |
166 | + self.stride = stride | |
167 | + self.padding = padding | |
168 | + self.groups = groups | |
169 | + self.dilation = dilation | |
170 | + self.repetition = repetition | |
171 | + self.generator_block = nn.Sequential( | |
172 | + GeneratorBlock(blocks=blocks, | |
173 | + layers=layers, | |
174 | + input_ch=input_ch, | |
175 | + out_ch=out_ch, | |
176 | + kernel_size=kernel_size, | |
177 | + stride=stride, | |
178 | + padding=padding, | |
179 | + groups=groups, | |
180 | + dilation=dilation) | |
181 | + ) | |
182 | + self.generator_blocks = nn.ModuleList() | |
183 | + for repetition in range(repetition): | |
184 | + self.conv_hidden.append( | |
185 | + self.generator_block | |
186 | + ) | |
187 | + | |
188 | + def forward(self, x): | |
189 | + cell_state = None | |
190 | + attention_map = None | |
191 | + for generator_block in self.generator_blocks: | |
192 | + x, attention_map, cell_state, lstm_feats = generator_block(x, cell_state) | |
193 | + return x, attention_map | |
194 | + | |
195 | +# need fixing | |
196 | +class AttentiveRNNLoss(nn.Module): | |
197 | + def __init__(self): | |
198 | + super(AttentiveRNNLoss, self).__init__() | |
199 | + | |
200 | + def forward(self, input_tensor, label_tensor): | |
201 | + # Initialize attentive rnn model | |
202 | + attentive_rnn = Generator | |
203 | + inference_ret = attentive_rnn(input_tensor) | |
204 | + | |
205 | + loss = 0.0 | |
206 | + n = len(inference_ret['attention_map_list']) | |
207 | + for index, attention_map in enumerate(inference_ret['attention_map_list']): | |
208 | + mse_loss = (0.8 ** (n - index + 1)) * nn.MSELoss()(attention_map, label_tensor) | |
209 | + loss += mse_loss | |
210 | + | |
211 | + return loss, inference_ret['final_attention_map'] | |
212 | + | |
213 | +# Need work | |
214 | +class DiscriminativeNet(nn.Module): | |
215 | + def __init__(self, W, H): | |
216 | + super(DiscriminativeNet, self).__init__() | |
217 | + self.conv1 = nn.Conv2d(in_channels=3, out_channels=8, kernel_size=5, stride=1, padding=2) | |
218 | + self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=5, stride=1, padding=2) | |
219 | + self.conv3 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=2) | |
220 | + self.conv4 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2) | |
221 | + self.conv5 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=5, stride=1, padding=2) | |
222 | + self.conv6 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=5, stride=1, padding=2) | |
223 | + self.conv_map = nn.Conv2d(in_channels=128, out_channels=1, kernel_size=5, stride=1, padding=2, bias=False) | |
224 | + self.conv7 = nn.Conv2d(in_channels=128, out_channels=64, kernel_size=5, stride=4, padding=2) | |
225 | + self.conv8 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=5, stride=4, padding=2) | |
226 | + self.conv9 = nn.Conv2d(in_channels=64, out_channels=32, kernel_size=5, stride=4, padding=2) | |
227 | + self.fc1 = nn.Linear(32 * W * H, | |
228 | + 1024) # You need to adjust the input dimension here depending on your input size | |
229 | + self.fc2 = nn.Linear(1024, 1) | |
230 | + | |
231 | + def forward(self, x): | |
232 | + x1 = F.leaky_relu(self.conv1(x)) | |
233 | + x2 = F.leaky_relu(self.conv2(x1)) | |
234 | + x3 = F.leaky_relu(self.conv3(x2)) | |
235 | + x4 = F.leaky_relu(self.conv4(x3)) | |
236 | + x5 = F.leaky_relu(self.conv5(x4)) | |
237 | + x6 = F.leaky_relu(self.conv6(x5)) | |
238 | + attention_map = self.conv_map(x6) | |
239 | + x7 = F.leaky_relu(self.conv7(attention_map * x6)) | |
240 | + x8 = F.leaky_relu(self.conv8(x7)) | |
241 | + x9 = F.leaky_relu(self.conv9(x8)) | |
242 | + x9 = x9.view(x9.size(0), -1) # flatten the tensor | |
243 | + fc1 = self.fc1(x9) | |
244 | + fc2 = self.fc2(fc1) | |
245 | + fc_out = torch.sigmoid(fc2) | |
246 | + | |
247 | + # Ensure fc_out is not exactly 0 or 1 for stability of log operation in loss | |
248 | + fc_out = torch.clamp(fc_out, min=1e-7, max=1 - 1e-7) | |
249 | + | |
250 | + return fc_out, attention_map, fc2 |
Add a comment
Delete comment
Once you delete this comment, you won't be able to recover it. Are you sure you want to delete this comment?