minerva-ml
diff --git a/‎common_blocks/architectures/base.py‎
Lines changed: 1 addition & 2 deletions b/‎common_blocks/architectures/base.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎common_blocks/architectures/pspnet.py‎
Lines changed: 100 additions & 0 deletions b/‎common_blocks/architectures/pspnet.py‎
Lines changed: 100 additions & 0 deletions
diff --git a/‎common_blocks/augmentation.py‎
Lines changed: 0 additions & 3 deletions b/‎common_blocks/augmentation.py‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎common_blocks/callbacks.py‎
Lines changed: 14 additions & 5 deletions b/‎common_blocks/callbacks.py‎
Lines changed: 14 additions & 5 deletions
diff --git a/‎common_blocks/loaders.py‎
Lines changed: 17 additions & 45 deletions b/‎common_blocks/loaders.py‎
Lines changed: 17 additions & 45 deletions
diff --git a/‎common_blocks/models.py‎
Lines changed: 7 additions & 1 deletion b/‎common_blocks/models.py‎
Lines changed: 7 additions & 1 deletion
@@ -68,7 +68,6 @@ def __init__(self, in_channels, middle_channels, out_channels):
         self.conv1 = Conv2dBnRelu(in_channels, middle_channels)
         self.conv2 = Conv2dBnRelu(middle_channels, out_channels)
         self.upsample = nn.Upsample(scale_factor=2, mode='bilinear')
-        self.relu = nn.ReLU(inplace=True)
         self.channel_se = ChannelSELayer(out_channels, reduction=16)
         self.spatial_se = SpatialSELayer(out_channels)
 
@@ -82,7 +81,7 @@ def forward(self, x, e=None):
         channel_se = self.channel_se(x)
         spatial_se = self.spatial_se(x)
 
-        x = self.relu(channel_se + spatial_se)
+        x = channel_se + spatial_se
         return x
 
 
 
@@ -0,0 +1,100 @@
+from torch import nn
+from torch.nn import functional as F
+import torch
+
+from .base import Conv2dBnRelu
+from .encoders import ResNetEncoders
+
+
+class PSPModule(nn.Module):
+    def __init__(self, features, out_features=1024, sizes=(1, 2, 3, 6)):
+        super().__init__()
+        self.stages = []
+        self.stages = nn.ModuleList([self._make_stage(features, size) for size in sizes])
+        self.bottleneck = nn.Conv2d(features * (len(sizes) + 1), out_features, kernel_size=1)
+        self.relu = nn.ReLU()
+
+    def _make_stage(self, features, size):
+        prior = nn.AdaptiveAvgPool2d(output_size=(size, size))
+        conv = nn.Conv2d(features, features, kernel_size=1, bias=False)
+        return nn.Sequential(prior, conv)
+
+    def forward(self, feats):
+        h, w = feats.size(2), feats.size(3)
+        priors = [F.upsample(input=stage(feats), size=(h, w), mode='bilinear') for stage in self.stages] + [feats]
+        bottle = self.bottleneck(torch.cat(priors, 1))
+        return self.relu(bottle)
+
+
+class PSPUpsample(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super().__init__()
+        self.conv = nn.Sequential(
+            nn.Conv2d(in_channels, out_channels, 3, padding=1),
+            nn.BatchNorm2d(out_channels),
+            nn.PReLU()
+        )
+
+    def forward(self, x):
+        p = F.upsample(input=x, scale_factor=2, mode='bilinear')
+        return self.conv(p)
+
+
+class PSPNet(nn.Module):
+    def __init__(self,
+                 encoder_depth,
+                 num_classes=2,
+                 sizes=(1, 2, 3, 6),
+                 deep_features_size=1024,
+                 dropout_2d=0.2,
+                 pretrained=False,
+                 use_hypercolumn=False,
+                 pool0=False):
+        super().__init__()
+        self.num_classes = num_classes
+        self.dropout_2d = dropout_2d
+        self.use_hypercolumn = use_hypercolumn
+
+        self.encoders = ResNetEncoders(encoder_depth, pretrained=pretrained, pool0=pool0)
+
+        if encoder_depth in [18, 34]:
+            bottom_channel_nr = 512
+        elif encoder_depth in [50, 101, 152]:
+            bottom_channel_nr = 2048
+        else:
+            raise NotImplementedError('only 18, 34, 50, 101, 152 version of Resnet are implemented')
+
+        self.psp = PSPModule(bottom_channel_nr, deep_features_size, sizes)
+
+        self.up4 = PSPUpsample(deep_features_size, deep_features_size // 2)
+        self.up3 = PSPUpsample(deep_features_size // 2, deep_features_size // 4)
+        self.up2 = PSPUpsample(deep_features_size // 4, deep_features_size // 8)
+        self.up1 = PSPUpsample(deep_features_size // 8, deep_features_size // 16)
+
+        if self.use_hypercolumn:
+            self.final = nn.Sequential(Conv2dBnRelu(15 * bottom_channel_nr // 8, bottom_channel_nr // 8),
+                                       nn.Conv2d(bottom_channel_nr // 8, num_classes, kernel_size=1, padding=0))
+        else:
+            self.final = nn.Sequential(Conv2dBnRelu(bottom_channel_nr // 8, bottom_channel_nr // 8),
+                                       nn.Conv2d(bottom_channel_nr // 8, num_classes, kernel_size=1, padding=0))
+
+    def forward(self, x):
+        encoder2, encoder3, encoder4, encoder5 = self.encoders(x)
+        encoder5 = F.dropout2d(encoder5, p=self.dropout_2d)
+
+        psp = self.psp(encoder5)
+
+        up4 = self.up4(psp)
+        up3 = self.up3(up4)
+        up2 = self.up2(up3)
+        up1 = self.up1(up2)
+        if self.use_hypercolumn:
+            hypercolumn = torch.cat([up1,
+                                     F.upsample(up2, scale_factor=2, mode='bilinear'),
+                                     F.upsample(up3, scale_factor=4, mode='bilinear'),
+                                     F.upsample(up4, scale_factor=8, mode='bilinear'),
+                                     ], 1)
+            drop = F.dropout2d(hypercolumn, p=self.dropout_2d)
+        else:
+            drop = F.dropout2d(up4, p=self.dropout_2d)
+        return self.final(drop)
@@ -110,9 +110,6 @@ def test_time_augmentation_transform(image, tta_parameters):
         image = np.flipud(image)
     if tta_parameters['lr_flip']:
         image = np.fliplr(image)
-    if tta_parameters['color_shift']:
-        tta_intensity = reseed(tta_intensity_seq, deterministic=False)
-        image = tta_intensity.augment_image(image)
     image = rotate(image, tta_parameters['rotation'])
     return image
 
 
@@ -18,7 +18,7 @@
 from common_blocks.utils.misc import get_logger, sigmoid, softmax, make_apply_transformer, get_list_of_image_predictions
 from common_blocks.utils.io import read_masks
 from .metrics import intersection_over_union_thresholds
-from .postprocessing import crop_image, resize_image, binarize, label
+from .postprocessing import crop_image, resize_image, binarize, label, masks_to_bounding_boxes
 
 logger = get_logger()
 
@@ -670,7 +670,16 @@ def postprocessing_pipeline_simplified(cache_dirpath, loader_mode):
                    input_steps=[binarizer],
                    adapter=Adapter({'images': E(binarizer.name, 'binarized_images'),
                                     }))
-    labeler.set_mode_inference()
-    labeler.set_parameters_upstream({'experiment_directory': cache_dirpath,
-                                     'is_fittable': False})
-    return labeler
+    bounding_boxer = Step(name='bounding_boxer',
+                          transformer=make_apply_transformer(masks_to_bounding_boxes,
+                                                             output_name='labeled_images',
+                                                             apply_on=['images']),
+                          input_steps=[labeler],
+                          adapter=Adapter({'images': E(labeler.name, 'labeled_images'),
+                                           }))
+
+    bounding_boxer.set_mode_inference()
+    bounding_boxer.set_parameters_upstream({'experiment_directory': cache_dirpath,
+                                            'is_fittable': False
+                                            })
+    return bounding_boxer
@@ -202,9 +202,10 @@ def load_target(self, data_source, index, load_func):
 
 
 class ImageSegmentationTTADataset(ImageSegmentationDataset):
-    def __init__(self, tta_params, *args, **kwargs):
+    def __init__(self, tta_params, tta_transform, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.tta_params = tta_params
+        self.tta_transform = tta_transform
 
     def __getitem__(self, index):
         if self.image_source == 'memory':
@@ -222,7 +223,7 @@ def __getitem__(self, index):
 
         if self.tta_params is not None:
             tta_transform_specs = self.tta_params[index]
-            Xi = test_time_augmentation_transform(Xi, tta_transform_specs)
+            Xi = self.tta_transform(Xi, tta_transform_specs)
         Xi = to_pil(Xi)
 
         if self.image_transform is not None:
@@ -320,6 +321,7 @@ def transform(self, X, tta_params, **kwargs):
 
     def get_datagen(self, X, tta_params, loader_params):
         dataset = self.dataset(tta_params=tta_params,
+                               tta_transform=self.augmentation_params.tta_transform,
                                X=X,
                                y=None,
                                train_mode=False,
@@ -369,8 +371,6 @@ def __init__(self, loader_params, dataset_params, augmentation_params):
                                                    transforms.Normalize(mean=self.dataset_params.MEAN,
                                                                         std=self.dataset_params.STD),
                                                    ])
-        self.mask_transform = transforms.Compose([transforms.Lambda(preprocess_target),
-                                                  ])
 
         self.image_augment_inference = ImgAug(self.augmentation_params['image_augment_inference'])
         self.image_augment_with_target_inference = ImgAug(
@@ -394,22 +394,18 @@ def transform(self, X, **kwargs):
         return {'X_tta': X_tta, 'tta_params': tta_params, 'img_ids': img_ids}
 
     def _get_tta_data(self, i, row):
-        original_specs = {'ud_flip': False, 'lr_flip': False, 'rotation': 0, 'color_shift': False}
+        original_specs = {'ud_flip': False, 'lr_flip': False, 'rotation': 0}
         tta_specs = [original_specs]
 
         ud_options = [True, False] if self.tta_transformations.flip_ud else [False]
         lr_options = [True, False] if self.tta_transformations.flip_lr else [False]
         rot_options = [0, 90, 180, 270] if self.tta_transformations.rotation else [0]
-        if self.tta_transformations.color_shift_runs:
-            color_shift_options = list(range(1, self.tta_transformations.color_shift_runs + 1, 1))
-        else:
-            color_shift_options = [False]
 
-        for ud, lr, rot, color in product(ud_options, lr_options, rot_options, color_shift_options):
-            if ud is False and lr is False and rot == 0 and color is False:
+        for ud, lr, rot in product(ud_options, lr_options, rot_options):
+            if ud is False and lr is False and rot == 0 is False:
                 continue
             else:
-                tta_specs.append({'ud_flip': ud, 'lr_flip': lr, 'rotation': rot, 'color_shift': color})
+                tta_specs.append({'ud_flip': ud, 'lr_flip': lr, 'rotation': rot})
 
         img_ids = [i] * len(tta_specs)
         X_rows = [row] * len(tta_specs)
@@ -431,30 +427,27 @@ def transform(self, X, **kwargs):
         return {'X_tta': [X_tta], 'tta_params': tta_params, 'img_ids': img_ids}
 
     def _get_tta_data(self, i, row):
-        original_specs = {'ud_flip': False, 'lr_flip': False, 'rotation': 0, 'color_shift': False}
+        original_specs = {'ud_flip': False, 'lr_flip': False, 'rotation': 0}
         tta_specs = [original_specs]
 
         ud_options = [True, False] if self.tta_transformations.flip_ud else [False]
         lr_options = [True, False] if self.tta_transformations.flip_lr else [False]
         rot_options = [0, 90, 180, 270] if self.tta_transformations.rotation else [0]
-        if self.tta_transformations.color_shift_runs:
-            color_shift_options = list(range(1, self.tta_transformations.color_shift_runs + 1, 1))
-        else:
-            color_shift_options = [False]
 
-        for ud, lr, rot, color in product(ud_options, lr_options, rot_options, color_shift_options):
-            if ud is False and lr is False and rot == 0 and color is False:
+        for ud, lr, rot in product(ud_options, lr_options, rot_options):
+            if ud is False and lr is False and rot == 0 is False:
                 continue
             else:
-                tta_specs.append({'ud_flip': ud, 'lr_flip': lr, 'rotation': rot, 'color_shift': color})
+                tta_specs.append({'ud_flip': ud, 'lr_flip': lr, 'rotation': rot})
 
         img_ids = [i] * len(tta_specs)
         X_rows = [row] * len(tta_specs)
         return X_rows, tta_specs, img_ids
 
 
 class TestTimeAugmentationAggregator(BaseTransformer):
-    def __init__(self, method, nthreads):
+    def __init__(self, tta_inverse_transform, method, nthreads):
+        self.tta_inverse_transform = tta_inverse_transform
         self.method = method
         self.nthreads = nthreads
 
@@ -471,6 +464,7 @@ def transform(self, images, tta_params, img_ids, **kwargs):
         _aggregate_augmentations = partial(aggregate_augmentations,
                                            images=images,
                                            tta_params=tta_params,
+                                           tta_inverse_transform=self.tta_inverse_transform,
                                            img_ids=img_ids,
                                            agg_method=self.agg_method)
         unique_img_ids = set(img_ids)
@@ -480,40 +474,18 @@ def transform(self, images, tta_params, img_ids, **kwargs):
         return {'aggregated_prediction': averages_images}
 
 
-def aggregate_augmentations(img_id, images, tta_params, img_ids, agg_method):
+def aggregate_augmentations(img_id, images, tta_params, tta_inverse_transform, img_ids, agg_method):
     tta_predictions_for_id = []
     for image, tta_param, ids in zip(images, tta_params, img_ids):
         if ids == img_id:
-            tta_prediction = test_time_augmentation_inverse_transform(image, tta_param)
+            tta_prediction = tta_inverse_transform(image, tta_param)
             tta_predictions_for_id.append(tta_prediction)
         else:
             continue
     tta_averaged = agg_method(np.stack(tta_predictions_for_id, axis=-1))
     return tta_averaged
 
 
-def test_time_augmentation_transform(image, tta_parameters):
-    if tta_parameters['ud_flip']:
-        image = np.flipud(image)
-    if tta_parameters['lr_flip']:
-        image = np.fliplr(image)
-    if tta_parameters['color_shift']:
-        random_color_shift = reseed(intensity_seq, deterministic=False)
-        image = random_color_shift.augment_image(image)
-    image = rotate(image, tta_parameters['rotation'])
-    return image
-
-
-def test_time_augmentation_inverse_transform(image, tta_parameters):
-    image = per_channel_rotation(image.copy(), -1 * tta_parameters['rotation'])
-
-    if tta_parameters['lr_flip']:
-        image = per_channel_fliplr(image.copy())
-    if tta_parameters['ud_flip']:
-        image = per_channel_flipud(image.copy())
-    return image
-
-
 def per_channel_flipud(x):
     x_ = x.copy()
     for i, channel in enumerate(x):
 
@@ -5,7 +5,7 @@
 from toolkit.pytorch_transformers.models import Model
 from torch.autograd import Variable
 
-from .architectures import unet, large_kernel_matters
+from .architectures import unet, large_kernel_matters, pspnet
 from . import callbacks as cbk
 from .lovasz_losses import lovasz_hinge
 from common_blocks.utils.misc import sigmoid, softmax, get_list_of_image_predictions
@@ -36,6 +36,11 @@
                                                          'dropout_2d': 0.0, 'use_relu': True, 'pool0': False
                                                          },
                                         'init_weights': False},
+                 'PSPNet': {'model': pspnet.PSPNet,
+                            'model_config': {'encoder_depth': 34, 'pretrained': True,
+                                             'use_hypercolumn': True, 'pool0': False
+                                             },
+                            }
                  }
 
 
@@ -164,6 +169,7 @@ def set_loss(self):
         elif self.activation_func == 'sigmoid':
             loss_function = lovasz_loss
             # loss_function = DiceLoss()
+            # loss_function = FocalWithLogitsLoss()
             # loss_function = nn.BCEWithLogitsLoss()
         else:
             raise Exception('Only softmax and sigmoid activations are allowed')