Skip to content

Commit 49715b2

Browse files
committed
line search but it's not fast...
1 parent 5ee3fd4 commit 49715b2

File tree

1 file changed

+103
-16
lines changed

1 file changed

+103
-16
lines changed

src/numericalnim/optimize.nim

Lines changed: 103 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -133,15 +133,67 @@ proc eye[T](n: int): Tensor[T] =
133133
for i in 0 ..< n:
134134
result[i, i] = 1
135135

136-
proc steepestDescent*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha: U = U(0.1), tol: U = U(1e-6), fastMode: bool = false): Tensor[U] =
136+
type LineSearchCriterion = enum
137+
Armijo, Wolfe, WolfeStrong, None
138+
139+
proc line_search*[U, T](alpha: var U, p: Tensor[T], x0: Tensor[U], f: proc(x: Tensor[U]): T, criterion: LineSearchCriterion, fastMode: bool = false) =
140+
# note: set initial alpha for the methods as 1 / sqrt(dot(grad, grad)) so first step has length 1.
141+
if criterion == None:
142+
return
143+
var gradient = tensorGradient(f, x0, fastMode=fastMode)
144+
let dirDerivInit = dot(gradient, p)
145+
146+
if 0 < dirDerivInit:
147+
# p is pointing uphill, use whatever alpha we currently have.
148+
return
149+
150+
let fInit = f(x0)
151+
var counter = 0
152+
alpha = 1
153+
while counter < 20:
154+
let x = x0 + alpha * p
155+
let fx = f(x)
156+
gradient = tensorGradient(f, x, fastMode=fastMode)
157+
counter += 1
158+
159+
if fx > fInit + 1e-4*alpha * dirDerivInit: # c1 = 1e-4 multiply as well? Doesn't seem to work
160+
alpha *= 0.5
161+
else:
162+
if criterion == Armijo:
163+
return
164+
let dirDeriv = dot(gradient, p)
165+
if dirDeriv < 0.9 * dirDerivInit:
166+
alpha *= 2.1
167+
else:
168+
if criterion == Wolfe:
169+
return
170+
if dirDeriv > -0.9*dirDerivInit:
171+
alpha *= 0.5
172+
else:
173+
return
174+
if alpha < 1e-3:
175+
alpha = 1e-3
176+
return
177+
elif alpha > 1e2:
178+
alpha = 1e2
179+
return
180+
181+
182+
183+
184+
185+
186+
proc steepestDescent*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha: U = U(0.1), tol: U = U(1e-6), fastMode: bool = false, criterion: LineSearchCriterion = None): Tensor[U] =
137187
## Minimize scalar-valued function f.
188+
var alpha = alpha
138189
var x = x0.clone()
139190
var fNorm = abs(f(x0))
140191
var gradient = tensorGradient(f, x0, fastMode=fastMode)
141192
var gradNorm = vectorNorm(gradient)
142193
var iters: int
143194
while gradNorm > tol*(1 + fNorm) and iters < 10000:
144195
let p = -gradient
196+
line_search(alpha, p, x, f, criterion, fastMode)
145197
x += alpha * p
146198
let fx = f(x)
147199
fNorm = abs(fx)
@@ -218,8 +270,9 @@ proc bfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha: U =
218270
#echo iters, " iterations done!"
219271
result = x
220272

221-
proc bfgs_optimized*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha: U = U(1), tol: U = U(1e-6), fastMode: bool = false): Tensor[U] =
273+
proc bfgs_optimized*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha: U = U(1), tol: U = U(1e-6), fastMode: bool = false, criterion: LineSearchCriterion = None): Tensor[U] =
222274
# Use gemm and gemv with preallocated Tensors and setting beta = 0
275+
var alpha = alpha
223276
var x = x0.clone()
224277
let xLen = x.shape[0]
225278
var fNorm = abs(f(x))
@@ -240,6 +293,7 @@ proc bfgs_optimized*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U],
240293
#echo "p iter ", iters, ": ", p
241294
#echo "x iter ", iters, ": ", x
242295
#echo "gradient iter ", iters, ": ", gradient
296+
line_search(alpha, p, x, f, criterion, fastMode)
243297
x += alpha * p
244298
let newGradient = tensorGradient(f, x, fastMode=fastMode)
245299
let sk = alpha * p.reshape(xLen, 1)
@@ -296,14 +350,15 @@ proc bfgs_optimized*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U],
296350
#echo iters, " iterations done!"
297351
result = x
298352

299-
proc lbfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha: U = U(1), tol: U = U(1e-6), fastMode: bool = false): Tensor[U] =
353+
proc lbfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha: U = U(1), tol: U = U(1e-6), fastMode: bool = false, m: int = 10, criterion: LineSearchCriterion = None): Tensor[U] =
354+
var alpha = alpha
300355
var x = x0.clone()
301356
let xLen = x.shape[0]
302357
var fNorm = abs(f(x))
303358
var gradient = 0.01*tensorGradient(f, x, fastMode=fastMode)
304359
var gradNorm = vectorNorm(gradient)
305360
var iters: int
306-
let m = 10 # number of past iterations to save
361+
#let m = 10 # number of past iterations to save
307362
var sk_queue = initDeque[Tensor[U]](m)
308363
var yk_queue = initDeque[Tensor[T]](m)
309364
# the problem is the first iteration as the gradient is huge and no adjustments are made
@@ -330,6 +385,7 @@ proc lbfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha: U
330385
z = -z
331386
let p = z
332387
#echo "q: ", q
388+
line_search(alpha, p, x, f, criterion, fastMode)
333389
x += alpha * p
334390
sk_queue.addFirst alpha*p
335391
let newGradient = tensorGradient(f, x, fastMode=fastMode)
@@ -402,40 +458,71 @@ when isMainModule:
402458
result += (ix - 1)^2
403459

404460
#let x0 = [-0.5, 2.0].toTensor
405-
let x0 = ones[float](500) * -1
461+
let x0 = ones[float](100) * -1
406462
#[ let sol1 = steepestDescent(f1, x0, tol=1e-8, alpha=0.001, fastMode=true)
407463
echo sol1
408464
echo f1(sol1)
409465
echo "Newton: ", newton(f1, x0, tol=1e-8, fastMode=false)
410466
echo "Newton: ", newton(f1, x0, tol=1e-8, fastMode=true)
411467
echo "BFGS: ", bfgs(f1, x0, tol=1e-8, fastMode=false)
412468
echo "BFGS: ", bfgs(f1, x0, tol=1e-8, fastMode=true)
413-
echo "LBFGS:", lbfgs(f1, x0, tol=1e-8, fastMode=false) ]#
469+
echo "LBFGS:", lbfgs(f1, x0, tol=1e-8, fastMode=false)
414470
echo "BFGS: ", bfgs(f1, x0, tol=1e-8, fastMode=false)
415471
echo "BFGS opt: ", bfgs_optimized(f1, x0, tol=1e-8, fastMode=false)
416472
echo "BFGS: ", bfgs(f1, x0, tol=1e-8, fastMode=true)
417-
echo "BFGS opt: ", bfgs_optimized(f1, x0, tol=1e-8, fastMode=true)
473+
echo "BFGS opt: ", bfgs_optimized(f1, x0, tol=1e-8, fastMode=true) ]#
418474

419-
timeIt "steepest slow mode":
420-
keep steepestDescent(f1, x0, tol=1e-8, alpha=0.001, fastMode=false)
421-
timeIt "steepest fast mode":
422-
keep steepestDescent(f1, x0, tol=1e-8, alpha=0.001, fastMode=true)
423-
timeIt "newton slow mode":
475+
#[ echo bfgs_optimized(f1, x0, tol=1e-8, alpha=0.001, fastMode=false, criterion=None)
476+
echo bfgs_optimized(f1, x0, tol=1e-8, alpha=0.001, fastMode=false, criterion=Armijo)
477+
echo bfgs_optimized(f1, x0, tol=1e-8, alpha=0.001, fastMode=false, criterion=Wolfe)
478+
echo bfgs_optimized(f1, x0, tol=1e-8, alpha=0.001, fastMode=false, criterion=WolfeStrong)
479+
echo lbfgs(f1, x0, tol=1e-8, alpha=0.001, fastMode=false, criterion=None)
480+
echo lbfgs(f1, x0, tol=1e-8, alpha=0.001, fastMode=false, criterion=Armijo)
481+
echo lbfgs(f1, x0, tol=1e-8, alpha=0.001, fastMode=false, criterion=Wolfe)
482+
echo lbfgs(f1, x0, tol=1e-8, alpha=0.001, fastMode=false, criterion=WolfeStrong) ]#
483+
484+
timeIt "steepest slow mode None":
485+
keep bfgs_optimized(f1, x0, tol=1e-8, alpha=1, fastMode=false, criterion=None)
486+
487+
timeIt "steepest slow mode Armijo":
488+
keep bfgs_optimized(f1, x0, tol=1e-8, alpha=1, fastMode=false, criterion=Armijo)
489+
490+
timeIt "steepest slow mode Wolfe":
491+
keep bfgs_optimized(f1, x0, tol=1e-8, alpha=1, fastMode=false, criterion=Wolfe)
492+
493+
timeIt "steepest slow mode WolfeStrong":
494+
keep bfgs_optimized(f1, x0, tol=1e-8, alpha=1, fastMode=false, criterion=WolfeStrong)
495+
496+
timeIt "steepest slow mode None":
497+
keep lbfgs(f1, x0, tol=1e-8, alpha=1, fastMode=false, criterion=None)
498+
499+
timeIt "steepest slow mode Armijo":
500+
keep lbfgs(f1, x0, tol=1e-8, alpha=1, fastMode=false, criterion=Armijo)
501+
502+
timeIt "steepest slow mode Wolfe":
503+
keep lbfgs(f1, x0, tol=1e-8, alpha=1, fastMode=false, criterion=Wolfe)
504+
505+
timeIt "steepest slow mode WolfeStrong":
506+
keep lbfgs(f1, x0, tol=1e-8, alpha=1, fastMode=false, criterion=WolfeStrong)
507+
#[ timeIt "newton slow mode":
424508
keep newton(f1, x0, tol=1e-8, fastMode=false)
425509
timeIt "newton fast mode":
426510
keep newton(f1, x0, tol=1e-8, fastMode=true)
427511
timeIt "bfgs slow mode":
428512
keep bfgs(f1, x0, tol=1e-8, fastMode=false)
429513
timeIt "bfgs fast mode":
430514
keep bfgs(f1, x0, tol=1e-8, fastMode=true)
431-
timeIt "optimized bfgs slow mode":
432-
keep bfgs_optimized(f1, x0, tol=1e-8, fastMode=false)
433-
timeIt "optimized bfgs fast mode":
434-
keep bfgs_optimized(f1, x0, tol=1e-8, fastMode=true)
435515
timeIt "lbfgs slow mode":
436516
keep lbfgs(f1, x0, tol=1e-8, fastMode=false)
437517
timeIt "lbfgs fast mode":
438518
keep lbfgs(f1, x0, tol=1e-8, fastMode=true)
519+
timeIt "optimized bfgs slow mode":
520+
keep bfgs_optimized(f1, x0, tol=1e-8, fastMode=false)
521+
timeIt "optimized bfgs fast mode":
522+
keep bfgs_optimized(f1, x0, tol=1e-8, fastMode=true)
523+
timeIt "steepest fast mode":
524+
keep steepestDescent(f1, x0, tol=1e-8, alpha=0.001, fastMode=true) ]#
525+
439526

440527
# Lev-Marq:
441528
#[ proc fFit(params: Tensor[float], x: float): float =

0 commit comments

Comments
 (0)