Add vizualization code to the repository (#56)

jettify · web-flow · commit f53d18b67ab9 · 2020-03-01T19:19:30.000-05:00
diff --git a/README.rst b/README.rst
@@ -13,6 +13,7 @@ torch-optimizer
 **torch-optimizer** -- collection of optimizers for PyTorch_.
 
 
+
 Simple example
 --------------
 
@@ -65,6 +66,38 @@ Supported Optimizers
 +-------------+-------------------------------------------------------------------------------+
 
 
+Visualisations
+--------------
+Visualisations help us to see how different algorithms deals with simple
+situations like: saddle points, local minima, valleys etc, and may provide
+interesting insights into inner workings of algorithm. Rosenbrock_ and Rastrigin_
+benchmark_ functions was selected, because:
+
+* Rosenbrock_ (also known as banana function), is non-convex function that has
+  one global minima  `(1.0. 1.0)`. The global minimum is inside a long,
+  narrow, parabolic shaped flat valley. To find the valley is trivial. To
+  converge to the global minima, however, is difficult. Optimization
+  algorithms might pay a lot of attention to one coordinate, and have
+  problems to follow valley which is relatively flat.
+
+ .. image::  https://upload.wikimedia.org/wikipedia/commons/3/32/Rosenbrock_function.svg
+
+* Rastrigin_ function is a non-convex and has one global minima in `(0.0, 0.0)`.
+  Finding the minimum of this function is a fairly difficult problem due to
+  its large search space and its large number of local minima.
+
+  .. image::  https://upload.wikimedia.org/wikipedia/commons/8/8b/Rastrigin_function.png
+
+Each optimizer performs `501` optimization steps. Learning rate is best one found
+by hyper parameter search algorithm, rest of tuning parameters are default. It
+is very easy to extend script and tune other optimizer parameters.
+
+
+.. code::
+
+    python examples/viz_optimizers.py
+
+
 AccSGD
 ------
 
@@ -322,3 +355,6 @@ learning rate control, and has similar theoretical guarantees on convergence as
 
 .. _Python: https://www.python.org
 .. _PyTorch: https://github.com/pytorch/pytorch
+.. _Rastrigin: https://en.wikipedia.org/wiki/Rastrigin_function
+.. _Rosenbrock: https://en.wikipedia.org/wiki/Rosenbrock_function
+.. _benchmark: https://en.wikipedia.org/wiki/Test_functions_for_optimization
diff --git a/examples/requirements-examples.txt b/examples/requirements-examples.txt
@@ -0,0 +1,4 @@
+torch==1.4.0
+hyperopt==0.2.3
+torchvision==0.5.0
+matplotlib==3.1.3
diff --git a/examples/viz_optimizers.py b/examples/viz_optimizers.py
@@ -0,0 +1,188 @@
+import math
+import numpy as np
+import torch_optimizer as optim
+import torch
+from hyperopt import fmin, tpe, hp
+import matplotlib.pyplot as plt
+
+
+plt.style.use('seaborn-white')
+
+
+def rosenbrock(tensor):
+    # https://en.wikipedia.org/wiki/Test_functions_for_optimization
+    x, y = tensor
+    return (1 - x) ** 2 + 100 * (y - x ** 2) ** 2
+
+
+def rastrigin(tensor, lib=torch):
+    # https://en.wikipedia.org/wiki/Test_functions_for_optimization
+    x, y = tensor
+    A = 10
+    f = (
+        A * 2
+        + (x ** 2 - A * lib.cos(x * math.pi * 2))
+        + (y ** 2 - A * lib.cos(y * math.pi * 2))
+    )
+    return f
+
+
+def execute_steps(
+    func, initial_state, optimizer_class, optimizer_config, num_iter=500
+):
+    x = torch.Tensor(initial_state).requires_grad_(True)
+    optimizer = optimizer_class([x], **optimizer_config)
+    steps = []
+    steps = np.zeros((2, num_iter + 1))
+    steps[:, 0] = np.array(initial_state)
+    for i in range(1, num_iter + 1):
+        optimizer.zero_grad()
+        f = func(x)
+        f.backward(retain_graph=True)
+        optimizer.step()
+        steps[:, i] = x.detach().numpy()
+    return steps
+
+
+def objective_rastrigin(params):
+    lr = params['lr']
+    optimizer_class = params['optimizer_class']
+    initial_state = (-2.0, 3.5)
+    minimum = (0, 0)
+    optimizer_config = dict(lr=lr)
+    num_iter = 100
+    steps = execute_steps(
+        rastrigin, initial_state, optimizer_class, optimizer_config, num_iter
+    )
+    return (steps[0][-1] - minimum[0]) ** 2 + (steps[1][-1] - minimum[1]) ** 2
+
+
+def objective_rosenbrok(params):
+    lr = params['lr']
+    optimizer_class = params['optimizer_class']
+    minimum = (1.0, 1.0)
+    initial_state = (-2.0, 2.0)
+    optimizer_config = dict(lr=lr)
+    num_iter = 100
+    steps = execute_steps(
+        rosenbrock, initial_state, optimizer_class, optimizer_config, num_iter
+    )
+    return (steps[0][-1] - minimum[0]) ** 2 + (steps[1][-1] - minimum[1]) ** 2
+
+
+def plot_rastrigin(grad_iter, optimizer_name, lr):
+    x = np.linspace(-4.5, 4.5, 250)
+    y = np.linspace(-4.5, 4.5, 250)
+    minimum = (0, 0)
+
+    X, Y = np.meshgrid(x, y)
+    Z = rastrigin([X, Y], lib=np)
+
+    iter_x, iter_y = grad_iter[0, :], grad_iter[1, :]
+
+    fig = plt.figure(figsize=(8, 8))
+
+    ax = fig.add_subplot(1, 1, 1)
+    ax.contour(X, Y, Z, 20, cmap='jet')
+    ax.plot(iter_x, iter_y, color='r', marker='x')
+    ax.set_title(
+        f'Rastrigin func: {optimizer_name} with '
+        f'{len(iter_x)} iterations, lr={lr:.6}'
+    )
+    plt.plot(*minimum, 'gD')
+    plt.plot(iter_x[-1], iter_y[-1], 'rD')
+    plt.savefig(f'rastrigin_{optimizer_name}.png')
+
+
+def plot_rosenbrok(grad_iter, optimizer_name, lr):
+    x = np.linspace(-2, 2, 250)
+    y = np.linspace(-1, 3, 250)
+    minimum = (1.0, 1.0)
+
+    X, Y = np.meshgrid(x, y)
+    Z = rosenbrock([X, Y])
+
+    iter_x, iter_y = grad_iter[0, :], grad_iter[1, :]
+
+    fig = plt.figure(figsize=(8, 8))
+
+    ax = fig.add_subplot(1, 1, 1)
+    ax.contour(X, Y, Z, 90, cmap='jet')
+    ax.plot(iter_x, iter_y, color='r', marker='x')
+
+    ax.set_title(
+        f'Rosenbrock func: {optimizer_name} with {len(iter_x)} '
+        f'iterations, lr={lr:.6}'
+    )
+    plt.plot(*minimum, 'gD')
+    plt.plot(iter_x[-1], iter_y[-1], 'rD')
+    plt.savefig(f'rosenbrock_{optimizer_name}.png')
+
+
+def execute_experiments(
+    optimizers, objective, func, plot_func, initial_state, seed=1
+):
+    seed = seed
+    for item in optimizers:
+        optimizer_class, lr_low, lr_hi = item
+        space = {
+            'optimizer_class': hp.choice('optimizer_class', [optimizer_class]),
+            'lr': hp.loguniform('lr', lr_low, lr_hi),
+        }
+        best = fmin(
+            fn=objective,
+            space=space,
+            algo=tpe.suggest,
+            max_evals=200,
+            rstate=np.random.RandomState(seed),
+        )
+        print(best['lr'], optimizer_class)
+
+        steps = execute_steps(
+            func,
+            initial_state,
+            optimizer_class,
+            {'lr': best['lr']},
+            num_iter=500,
+        )
+        plot_func(steps, optimizer_class.__name__, best['lr'])
+
+
+if __name__ == '__main__':
+    # python examples/viz_optimizers.py
+
+    # Each optimizer has tweaked search space to produce better plots and
+    # help to converge on better lr faster.
+    optimizers = [
+        (optim.AccSGD, -8, -0.1),
+        (optim.AdaBound, -8, 0.7),
+        (optim.AdaMod, -8, 1.2),
+        (optim.DiffGrad, -8, 0.7),
+        (optim.Lamb, -8, 0.7),
+        (optim.NovoGrad, -6, -2.0),
+        (optim.RAdam, -8, 0.7),
+        (optim.SGDW, -8, -0.9),
+        (optim.Yogi, -8, 0.1),
+    ]
+    execute_experiments(
+        optimizers, objective_rastrigin, rastrigin, plot_rastrigin, (-2.0, 3.5)
+    )
+
+    optimizers = [
+        (optim.AccSGD, -8, -0.1),
+        (optim.AdaBound, -8, 0.7),
+        (optim.AdaMod, -4, 1.0),
+        (optim.DiffGrad, -8, 0.2),
+        (optim.Lamb, -8, -0.5),
+        (optim.NovoGrad, -8, -1.0),
+        (optim.RAdam, -8, 0.7),
+        (optim.SGDW, -8, 0.7),
+        (optim.Yogi, -8, 0.1),
+    ]
+    execute_experiments(
+        optimizers,
+        objective_rosenbrok,
+        rosenbrock,
+        plot_rosenbrok,
+        (-2.0, 2.0),
+    )