|
| 1 | +# This file contains modules common to various models |
| 2 | + |
| 3 | +import math |
| 4 | + |
| 5 | +import numpy as np |
| 6 | +import requests |
| 7 | +import torch |
| 8 | +import torch.nn as nn |
| 9 | +from PIL import Image, ImageDraw |
| 10 | + |
| 11 | +from utils.datasets import letterbox |
| 12 | +from utils.general import non_max_suppression, make_divisible, scale_coords, xyxy2xywh |
| 13 | +from utils.plots import color_list |
| 14 | + |
| 15 | + |
| 16 | +def autopad(k, p=None): # kernel, padding |
| 17 | + # Pad to 'same' |
| 18 | + if p is None: |
| 19 | + p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad |
| 20 | + return p |
| 21 | + |
| 22 | + |
| 23 | +def DWConv(c1, c2, k=1, s=1, act=True): |
| 24 | + # Depthwise convolution |
| 25 | + return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act) |
| 26 | + |
| 27 | + |
| 28 | +class Conv(nn.Module): |
| 29 | + # Standard convolution |
| 30 | + def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups |
| 31 | + super(Conv, self).__init__() |
| 32 | + self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) |
| 33 | + self.bn = nn.BatchNorm2d(c2) |
| 34 | + self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) |
| 35 | + |
| 36 | + def forward(self, x): |
| 37 | + return self.act(self.bn(self.conv(x))) |
| 38 | + |
| 39 | + def fuseforward(self, x): |
| 40 | + return self.act(self.conv(x)) |
| 41 | + |
| 42 | + |
| 43 | +class Bottleneck(nn.Module): |
| 44 | + # Standard bottleneck |
| 45 | + def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion |
| 46 | + super(Bottleneck, self).__init__() |
| 47 | + c_ = int(c2 * e) # hidden channels |
| 48 | + self.cv1 = Conv(c1, c_, 1, 1) |
| 49 | + self.cv2 = Conv(c_, c2, 3, 1, g=g) |
| 50 | + self.add = shortcut and c1 == c2 |
| 51 | + |
| 52 | + def forward(self, x): |
| 53 | + return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) |
| 54 | + |
| 55 | + |
| 56 | +class BottleneckCSP(nn.Module): |
| 57 | + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks |
| 58 | + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion |
| 59 | + super(BottleneckCSP, self).__init__() |
| 60 | + c_ = int(c2 * e) # hidden channels |
| 61 | + self.cv1 = Conv(c1, c_, 1, 1) |
| 62 | + self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) |
| 63 | + self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) |
| 64 | + self.cv4 = Conv(2 * c_, c2, 1, 1) |
| 65 | + self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) |
| 66 | + self.act = nn.LeakyReLU(0.1, inplace=True) |
| 67 | + self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) |
| 68 | + |
| 69 | + def forward(self, x): |
| 70 | + y1 = self.cv3(self.m(self.cv1(x))) |
| 71 | + y2 = self.cv2(x) |
| 72 | + return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) |
| 73 | + |
| 74 | + |
| 75 | +class C3(nn.Module): |
| 76 | + # CSP Bottleneck with 3 convolutions |
| 77 | + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion |
| 78 | + super(C3, self).__init__() |
| 79 | + c_ = int(c2 * e) # hidden channels |
| 80 | + self.cv1 = Conv(c1, c_, 1, 1) |
| 81 | + self.cv2 = Conv(c1, c_, 1, 1) |
| 82 | + self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2) |
| 83 | + self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) |
| 84 | + # self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)]) |
| 85 | + |
| 86 | + def forward(self, x): |
| 87 | + return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1)) |
| 88 | + |
| 89 | + |
| 90 | +class SPP(nn.Module): |
| 91 | + # Spatial pyramid pooling layer used in YOLOv3-SPP |
| 92 | + def __init__(self, c1, c2, k=(5, 9, 13)): |
| 93 | + super(SPP, self).__init__() |
| 94 | + c_ = c1 // 2 # hidden channels |
| 95 | + self.cv1 = Conv(c1, c_, 1, 1) |
| 96 | + self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1) |
| 97 | + self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) |
| 98 | + |
| 99 | + def forward(self, x): |
| 100 | + x = self.cv1(x) |
| 101 | + return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1)) |
| 102 | + |
| 103 | + |
| 104 | +class Focus(nn.Module): |
| 105 | + # Focus wh information into c-space |
| 106 | + def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups |
| 107 | + super(Focus, self).__init__() |
| 108 | + self.conv = Conv(c1 * 4, c2, k, s, p, g, act) |
| 109 | + # self.contract = Contract(gain=2) |
| 110 | + |
| 111 | + def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) |
| 112 | + return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1)) |
| 113 | + # return self.conv(self.contract(x)) |
| 114 | + |
| 115 | + |
| 116 | +class Contract(nn.Module): |
| 117 | + # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40) |
| 118 | + def __init__(self, gain=2): |
| 119 | + super().__init__() |
| 120 | + self.gain = gain |
| 121 | + |
| 122 | + def forward(self, x): |
| 123 | + N, C, H, W = x.size() # assert (H / s == 0) and (W / s == 0), 'Indivisible gain' |
| 124 | + s = self.gain |
| 125 | + x = x.view(N, C, H // s, s, W // s, s) # x(1,64,40,2,40,2) |
| 126 | + x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40) |
| 127 | + return x.view(N, C * s * s, H // s, W // s) # x(1,256,40,40) |
| 128 | + |
| 129 | + |
| 130 | +class Expand(nn.Module): |
| 131 | + # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160) |
| 132 | + def __init__(self, gain=2): |
| 133 | + super().__init__() |
| 134 | + self.gain = gain |
| 135 | + |
| 136 | + def forward(self, x): |
| 137 | + N, C, H, W = x.size() # assert C / s ** 2 == 0, 'Indivisible gain' |
| 138 | + s = self.gain |
| 139 | + x = x.view(N, s, s, C // s ** 2, H, W) # x(1,2,2,16,80,80) |
| 140 | + x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2) |
| 141 | + return x.view(N, C // s ** 2, H * s, W * s) # x(1,16,160,160) |
| 142 | + |
| 143 | + |
| 144 | +class Concat(nn.Module): |
| 145 | + # Concatenate a list of tensors along dimension |
| 146 | + def __init__(self, dimension=1): |
| 147 | + super(Concat, self).__init__() |
| 148 | + self.d = dimension |
| 149 | + |
| 150 | + def forward(self, x): |
| 151 | + return torch.cat(x, self.d) |
| 152 | + |
| 153 | + |
| 154 | +class NMS(nn.Module): |
| 155 | + # Non-Maximum Suppression (NMS) module |
| 156 | + conf = 0.25 # confidence threshold |
| 157 | + iou = 0.45 # IoU threshold |
| 158 | + classes = None # (optional list) filter by class |
| 159 | + |
| 160 | + def __init__(self): |
| 161 | + super(NMS, self).__init__() |
| 162 | + |
| 163 | + def forward(self, x): |
| 164 | + return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) |
| 165 | + |
| 166 | + |
| 167 | +class autoShape(nn.Module): |
| 168 | + # input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS |
| 169 | + img_size = 640 # inference size (pixels) |
| 170 | + conf = 0.25 # NMS confidence threshold |
| 171 | + iou = 0.45 # NMS IoU threshold |
| 172 | + classes = None # (optional list) filter by class |
| 173 | + |
| 174 | + def __init__(self, model): |
| 175 | + super(autoShape, self).__init__() |
| 176 | + self.model = model.eval() |
| 177 | + |
| 178 | + def autoshape(self): |
| 179 | + print('autoShape already enabled, skipping... ') # model already converted to model.autoshape() |
| 180 | + return self |
| 181 | + |
| 182 | + def forward(self, imgs, size=640, augment=False, profile=False): |
| 183 | + # Inference from various sources. For height=720, width=1280, RGB images example inputs are: |
| 184 | + # filename: imgs = 'data/samples/zidane.jpg' |
| 185 | + # URI: = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/zidane.jpg' |
| 186 | + # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(720,1280,3) |
| 187 | + # PIL: = Image.open('image.jpg') # HWC x(720,1280,3) |
| 188 | + # numpy: = np.zeros((720,1280,3)) # HWC |
| 189 | + # torch: = torch.zeros(16,3,720,1280) # BCHW |
| 190 | + # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images |
| 191 | + |
| 192 | + p = next(self.model.parameters()) # for device and type |
| 193 | + if isinstance(imgs, torch.Tensor): # torch |
| 194 | + return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference |
| 195 | + |
| 196 | + # Pre-process |
| 197 | + n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) # number of images, list of images |
| 198 | + shape0, shape1 = [], [] # image and inference shapes |
| 199 | + for i, im in enumerate(imgs): |
| 200 | + if isinstance(im, str): # filename or uri |
| 201 | + im = Image.open(requests.get(im, stream=True).raw if im.startswith('http') else im) # open |
| 202 | + im = np.array(im) # to numpy |
| 203 | + if im.shape[0] < 5: # image in CHW |
| 204 | + im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1) |
| 205 | + im = im[:, :, :3] if im.ndim == 3 else np.tile(im[:, :, None], 3) # enforce 3ch input |
| 206 | + s = im.shape[:2] # HWC |
| 207 | + shape0.append(s) # image shape |
| 208 | + g = (size / max(s)) # gain |
| 209 | + shape1.append([y * g for y in s]) |
| 210 | + imgs[i] = im # update |
| 211 | + shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape |
| 212 | + x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad |
| 213 | + x = np.stack(x, 0) if n > 1 else x[0][None] # stack |
| 214 | + x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW |
| 215 | + x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32 |
| 216 | + |
| 217 | + # Inference |
| 218 | + with torch.no_grad(): |
| 219 | + y = self.model(x, augment, profile)[0] # forward |
| 220 | + y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS |
| 221 | + |
| 222 | + # Post-process |
| 223 | + for i in range(n): |
| 224 | + scale_coords(shape1, y[i][:, :4], shape0[i]) |
| 225 | + |
| 226 | + return Detections(imgs, y, self.names) |
| 227 | + |
| 228 | + |
| 229 | +class Detections: |
| 230 | + # detections class for YOLOv5 inference results |
| 231 | + def __init__(self, imgs, pred, names=None): |
| 232 | + super(Detections, self).__init__() |
| 233 | + d = pred[0].device # device |
| 234 | + gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs] # normalizations |
| 235 | + self.imgs = imgs # list of images as numpy arrays |
| 236 | + self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls) |
| 237 | + self.names = names # class names |
| 238 | + self.xyxy = pred # xyxy pixels |
| 239 | + self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels |
| 240 | + self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized |
| 241 | + self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized |
| 242 | + self.n = len(self.pred) |
| 243 | + |
| 244 | + def display(self, pprint=False, show=False, save=False, render=False): |
| 245 | + colors = color_list() |
| 246 | + for i, (img, pred) in enumerate(zip(self.imgs, self.pred)): |
| 247 | + str = f'image {i + 1}/{len(self.pred)}: {img.shape[0]}x{img.shape[1]} ' |
| 248 | + if pred is not None: |
| 249 | + for c in pred[:, -1].unique(): |
| 250 | + n = (pred[:, -1] == c).sum() # detections per class |
| 251 | + str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string |
| 252 | + if show or save or render: |
| 253 | + img = Image.fromarray(img.astype(np.uint8)) if isinstance(img, np.ndarray) else img # from np |
| 254 | + for *box, conf, cls in pred: # xyxy, confidence, class |
| 255 | + # str += '%s %.2f, ' % (names[int(cls)], conf) # label |
| 256 | + ImageDraw.Draw(img).rectangle(box, width=4, outline=colors[int(cls) % 10]) # plot |
| 257 | + if pprint: |
| 258 | + print(str.rstrip(', ')) |
| 259 | + if show: |
| 260 | + img.show(f'image {i}') # show |
| 261 | + if save: |
| 262 | + f = f'results{i}.jpg' |
| 263 | + img.save(f) # save |
| 264 | + print(f"{'Saving' * (i == 0)} {f},", end='' if i < self.n - 1 else ' done.\n') |
| 265 | + if render: |
| 266 | + self.imgs[i] = np.asarray(img) |
| 267 | + |
| 268 | + def print(self): |
| 269 | + self.display(pprint=True) # print results |
| 270 | + |
| 271 | + def show(self): |
| 272 | + self.display(show=True) # show results |
| 273 | + |
| 274 | + def save(self): |
| 275 | + self.display(save=True) # save results |
| 276 | + |
| 277 | + def render(self): |
| 278 | + self.display(render=True) # render results |
| 279 | + return self.imgs |
| 280 | + |
| 281 | + def __len__(self): |
| 282 | + return self.n |
| 283 | + |
| 284 | + def tolist(self): |
| 285 | + # return a list of Detections objects, i.e. 'for result in results.tolist():' |
| 286 | + x = [Detections([self.imgs[i]], [self.pred[i]], self.names) for i in range(self.n)] |
| 287 | + for d in x: |
| 288 | + for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']: |
| 289 | + setattr(d, k, getattr(d, k)[0]) # pop out of list |
| 290 | + return x |
| 291 | + |
| 292 | + |
| 293 | +class Classify(nn.Module): |
| 294 | + # Classification head, i.e. x(b,c1,20,20) to x(b,c2) |
| 295 | + def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups |
| 296 | + super(Classify, self).__init__() |
| 297 | + self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1) |
| 298 | + self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1) |
| 299 | + self.flat = nn.Flatten() |
| 300 | + |
| 301 | + def forward(self, x): |
| 302 | + z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list |
| 303 | + return self.flat(self.conv(z)) # flatten to x(b,c2) |
0 commit comments