Add support for absolute paths, fix detach problem, refactor

gordicaleksa · gordicaleksa · commit 1719b37e3267 · 2020-08-02T10:58:09.000+02:00
diff --git a/neural_style_transfer.py b/neural_style_transfer.py
@@ -141,16 +141,18 @@ def closure():
     # sorted so that the ones on the top are more likely to be changed than the ones on the bottom
     #
     parser = argparse.ArgumentParser()
-    parser.add_argument("--content_img_name", type=str, help="content image name", default='tubingen.png')
-    parser.add_argument("--style_img_name", type=str, help="style image name", default='kandinsky.jpg')
+    parser.add_argument("--content_img_name", type=str, help="content image name", default='figures.jpg')
+    parser.add_argument("--style_img_name", type=str, help="style image name", default='vg_starry_night.jpg')
     parser.add_argument("--height", type=int, help="height of content and style images", default=400)
+
     parser.add_argument("--content_weight", type=float, help="weight factor for content loss", default=1e5)
     parser.add_argument("--style_weight", type=float, help="weight factor for style loss", default=3e4)
     parser.add_argument("--tv_weight", type=float, help="weight factor for total variation loss", default=1e0)
-    parser.add_argument("--saving_freq", type=int, help="saving frequency for intermediate images (-1 means only final)", default=-1)
+
     parser.add_argument("--optimizer", type=str, choices=['lbfgs', 'adam'], default='lbfgs')
-    parser.add_argument("--init_method", type=str, choices=['random', 'content', 'style'], default='content')
     parser.add_argument("--model", type=str, choices=['vgg16', 'vgg19'], default='vgg19')
+    parser.add_argument("--init_method", type=str, choices=['random', 'content', 'style'], default='content')
+    parser.add_argument("--saving_freq", type=int, help="saving frequency for intermediate images (-1 means only final)", default=-1)
     args = parser.parse_args()
 
     # some values of weights that worked for figures.jpg, vg_starry_night.jpg (starting point for finding good images)
diff --git a/reconstruct_image_from_representation.py b/reconstruct_image_from_representation.py
@@ -44,7 +44,7 @@ def reconstruct_image_from_representation(config):
     should_reconstruct_content = config['should_reconstruct_content']
     should_visualize_representation = config['should_visualize_representation']
     dump_path = os.path.join(config['output_img_dir'], ('c' if should_reconstruct_content else 's') + '_reconstruction_' + config['optimizer'])
-    dump_path = os.path.join(dump_path, config['content_img_name'].split('.')[0] if should_reconstruct_content else config['style_img_name'].split('.')[0])
+    dump_path = os.path.join(dump_path, os.path.basename(config['content_img_name']).split('.')[0] if should_reconstruct_content else os.path.basename(config['style_img_name']).split('.')[0])
     os.makedirs(dump_path, exist_ok=True)
 
     content_img_path = os.path.join(config['content_images_dir'], config['content_img_name'])
diff --git a/utils/utils.py b/utils/utils.py
@@ -13,26 +13,35 @@
 IMAGENET_STD_NEUTRAL = [1, 1, 1]
 
 
+#
+# Image manipulation util functions
+#
+
 def load_image(img_path, target_shape=None):
     if not os.path.exists(img_path):
         raise Exception(f'Path does not exist: {img_path}')
-    img = cv.imread(img_path)[:, :, ::-1].astype(np.float32)  # [:, :, ::-1] converts rgb into bgr (opencv contraint...)
-    img /= 255.0  # get to [0, 1] range
-    if target_shape is not None:
+    img = cv.imread(img_path)[:, :, ::-1]  # [:, :, ::-1] converts BGR (opencv format...) into RGB
+
+    if target_shape is not None:  # resize section
         if isinstance(target_shape, int) and target_shape != -1:  # scalar -> implicitly setting the height
-            ratio = target_shape / img.shape[0]
-            width = int(img.shape[1] * ratio)
-            img = cv.resize(img, (width, target_shape), interpolation=cv.INTER_CUBIC)
+            current_height, current_width = img.shape[:2]
+            new_height = target_shape
+            new_width = int(current_width * (new_height / current_height))
+            img = cv.resize(img, (new_width, new_height), interpolation=cv.INTER_CUBIC)
         else:  # set both dimensions to target shape
             img = cv.resize(img, (target_shape[1], target_shape[0]), interpolation=cv.INTER_CUBIC)
+
+    # this need to go after resizing - otherwise cv.resize will push values outside of [0,1] range
+    img = img.astype(np.float32)  # convert from uint8 to float32
+    img /= 255.0  # get to [0, 1] range
     return img
 
 
 def prepare_img(img_path, target_shape, device):
     img = load_image(img_path, target_shape=target_shape)
 
-    # normalize using ImageNet's mean and std (VGG was trained on images normalized this way)
-    # [0, 255] range works much better than [0, 1] range (VGG was again trained that way)
+    # normalize using ImageNet's mean
+    # [0, 255] range works much better than [0, 1] range
     transform = transforms.Compose([
         transforms.ToTensor(),
         transforms.Lambda(lambda x: x.mul(255)),
@@ -44,24 +53,14 @@ def prepare_img(img_path, target_shape, device):
     return img
 
 
-def get_uint8_range(x):
-    if isinstance(x, np.ndarray):
-        x -= np.min(x)
-        x /= np.max(x)
-        x *= 255
-        return x
-    else:
-        raise ValueError(f'Expected numpy array got {type(x)}')
-
-
 def save_image(img, img_path):
     if len(img.shape) == 2:
         img = np.stack((img,) * 3, axis=-1)
     cv.imwrite(img_path, img[:, :, ::-1])  # [:, :, ::-1] converts rgb into bgr (opencv contraint...)
 
 
 def generate_out_img_name(config):
-    prefix = config['content_img_name'].split('.')[0] + '_' + config['style_img_name'].split('.')[0]
+    prefix = os.path.basename(config['content_img_name']).split('.')[0] + '_' + os.path.basename(config['style_img_name']).split('.')[0]
     # called from the reconstruction script
     if 'reconstruct_script' in config:
         suffix = f'_o_{config["optimizer"]}_h_{str(config["height"])}_m_{config["model"]}{config["img_format"][1]}'
@@ -72,7 +71,7 @@ def generate_out_img_name(config):
 
 def save_and_maybe_display(optimizing_img, dump_path, config, img_id, num_of_iterations, should_display=False):
     saving_freq = config['saving_freq']
-    out_img = optimizing_img.squeeze(axis=0).to('cpu').numpy()
+    out_img = optimizing_img.squeeze(axis=0).to('cpu').detach().numpy()
     out_img = np.moveaxis(out_img, 0, 2)  # swap channel from 1st to 3rd position: ch, _, _ -> _, _, chr
 
     # for saving_freq == -1 save only the final result (otherwise save with frequency saving_freq and save the last pic)
@@ -83,11 +82,27 @@ def save_and_maybe_display(optimizing_img, dump_path, config, img_id, num_of_ite
         dump_img += np.array(IMAGENET_MEAN_255).reshape((1, 1, 3))
         dump_img = np.clip(dump_img, 0, 255).astype('uint8')
         cv.imwrite(os.path.join(dump_path, out_img_name), dump_img[:, :, ::-1])
+
     if should_display:
         plt.imshow(np.uint8(get_uint8_range(out_img)))
         plt.show()
 
 
+def get_uint8_range(x):
+    if isinstance(x, np.ndarray):
+        x -= np.min(x)
+        x /= np.max(x)
+        x *= 255
+        return x
+    else:
+        raise ValueError(f'Expected numpy array got {type(x)}')
+
+
+#
+# End of image manipulation util functions
+#
+
+
 # initially it takes some time for PyTorch to download the models into local cache
 def prepare_model(model, device):
     # we are not tuning model weights -> we are only tuning optimizing_img's pixels! (that's why requires_grad=False)