Skip to content

Commit 772097c

Browse files
committed
add OptimOptions type
1 parent 2725dfc commit 772097c

File tree

1 file changed

+45
-29
lines changed

1 file changed

+45
-29
lines changed

src/numericalnim/optimize.nim

Lines changed: 45 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,23 @@ proc secant*(f: proc(x: float64): float64, start: array[2, float64], precision:
123123
## Multidimensional methods ##
124124
##############################
125125

126+
type LineSearchCriterion = enum
127+
Armijo, Wolfe, WolfeStrong, NoLineSearch
128+
129+
type OptimOptions*[U] = object
130+
tol*, alpha*, lambda0*: U
131+
fastMode*: bool
132+
maxIterations*: int
133+
lineSearchCriterion*: LineSearchCriterion
134+
135+
proc optimOptions*[U](tol: U = U(1e-6), alpha: U = U(1), lambda0: U = U(1), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U] =
136+
result.tol = tol
137+
result.alpha = alpha
138+
result.lambda0 = lambda0
139+
result.fastMode = fastMode
140+
result.maxIterations = maxIterations
141+
result.lineSearchCriterion = lineSearchCriterion
142+
126143
proc vectorNorm*[T](v: Tensor[T]): T =
127144
## Calculates the norm of the vector, ie the sqrt(Σ vᵢ²)
128145
assert v.rank == 1, "v must be a 1d vector!"
@@ -133,12 +150,9 @@ proc eye[T](n: int): Tensor[T] =
133150
for i in 0 ..< n:
134151
result[i, i] = 1
135152

136-
type LineSearchCriterion = enum
137-
Armijo, Wolfe, WolfeStrong, None
138-
139153
proc line_search*[U, T](alpha: var U, p: Tensor[T], x0: Tensor[U], f: proc(x: Tensor[U]): T, criterion: LineSearchCriterion, fastMode: bool = false) =
140154
# note: set initial alpha for the methods as 1 / sqrt(dot(grad, grad)) so first step has length 1.
141-
if criterion == None:
155+
if criterion == NoLineSearch:
142156
return
143157
var gradient = tensorGradient(f, x0, fastMode=fastMode)
144158
let dirDerivInit = dot(gradient, p)
@@ -183,41 +197,43 @@ proc line_search*[U, T](alpha: var U, p: Tensor[T], x0: Tensor[U], f: proc(x: Te
183197

184198

185199

186-
proc steepestDescent*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha: U = U(0.001), tol: U = U(1e-6), fastMode: bool = false, criterion: LineSearchCriterion = None): Tensor[U] =
200+
proc steepestDescent*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], options: OptimOptions[U] = optimOptions[U](alpha = U(0.001))): Tensor[U] =
187201
## Minimize scalar-valued function f.
188-
var alpha = alpha
202+
var alpha = options.alpha
189203
var x = x0.clone()
190204
var fNorm = abs(f(x0))
191-
var gradient = tensorGradient(f, x0, fastMode=fastMode)
205+
var gradient = tensorGradient(f, x0, fastMode=options.fastMode)
192206
var gradNorm = vectorNorm(gradient)
193207
var iters: int
194-
while gradNorm > tol*(1 + fNorm) and iters < 10000:
208+
while gradNorm > options.tol*(1 + fNorm) and iters < 10000:
195209
let p = -gradient
196-
line_search(alpha, p, x, f, criterion, fastMode)
210+
line_search(alpha, p, x, f, options.lineSearchCriterion, options.fastMode)
197211
x += alpha * p
198212
let fx = f(x)
199213
fNorm = abs(fx)
200-
gradient = tensorGradient(f, x, fastMode=fastMode)
214+
gradient = tensorGradient(f, x, fastMode=options.fastMode)
201215
gradNorm = vectorNorm(gradient)
202216
iters += 1
203217
if iters >= 10000:
204218
discard "Limit of 10000 iterations reached!"
205219
#echo iters, " iterations done!"
206220
result = x
207221

208-
proc newton*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha: U = U(1), tol: U = U(1e-6), fastMode: bool = false): Tensor[U] =
222+
proc newton*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], options: OptimOptions[U] = optimOptions[U]()): Tensor[U] =
223+
var alpha = options.alpha
209224
var x = x0.clone()
210225
var fNorm = abs(f(x))
211-
var gradient = tensorGradient(f, x, fastMode=fastMode)
226+
var gradient = tensorGradient(f, x, fastMode=options.fastMode)
212227
var gradNorm = vectorNorm(gradient)
213228
var hessian = tensorHessian(f, x)
214229
var iters: int
215-
while gradNorm > tol*(1 + fNorm) and iters < 10000:
230+
while gradNorm > options.tol*(1 + fNorm) and iters < 10000:
216231
let p = -solve(hessian, gradient)
232+
line_search(alpha, p, x, f, options.lineSearchCriterion, options.fastMode)
217233
x += alpha * p
218234
let fx = f(x)
219235
fNorm = abs(fx)
220-
gradient = tensorGradient(f, x, fastMode=fastMode)
236+
gradient = tensorGradient(f, x, fastMode=options.fastMode)
221237
gradNorm = vectorNorm(gradient)
222238
hessian = tensorHessian(f, x)
223239
iters += 1
@@ -226,7 +242,7 @@ proc newton*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha: U
226242
#echo iters, " iterations done!"
227243
result = x
228244

229-
proc bfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha: U = U(1), tol: U = U(1e-6), fastMode: bool = false): Tensor[U] =
245+
proc bfgs_old*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha: U = U(1), tol: U = U(1e-6), fastMode: bool = false): Tensor[U] =
230246
var x = x0.clone()
231247
let xLen = x.shape[0]
232248
var fNorm = abs(f(x))
@@ -270,20 +286,20 @@ proc bfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha: U =
270286
#echo iters, " iterations done!"
271287
result = x
272288

273-
proc bfgs_optimized*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha: U = U(1), tol: U = U(1e-6), fastMode: bool = false, criterion: LineSearchCriterion = None): Tensor[U] =
289+
proc bfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], options: OptimOptions[U] = optimOptions[U]()): Tensor[U] =
274290
# Use gemm and gemv with preallocated Tensors and setting beta = 0
275-
var alpha = alpha
291+
var alpha = options.alpha
276292
var x = x0.clone()
277293
let xLen = x.shape[0]
278294
var fNorm = abs(f(x))
279-
var gradient = 0.01*tensorGradient(f, x, fastMode=fastMode)
295+
var gradient = 0.01*tensorGradient(f, x, fastMode=options.fastMode)
280296
var gradNorm = vectorNorm(gradient)
281297
var hessianB = eye[T](xLen) # inverse of the approximated hessian
282298
var p = newTensor[U](xLen)
283299
var tempVector1 = newTensor[U](xLen, 1)
284300
var tempVector2 = newTensor[U](1, xLen)
285301
var iters: int
286-
while gradNorm > tol*(1 + fNorm) and iters < 10000:
302+
while gradNorm > options.tol*(1 + fNorm) and iters < 10000:
287303
# We are using hessianB in calculating it so we are modifying it prior to its use!
288304

289305

@@ -293,9 +309,9 @@ proc bfgs_optimized*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U],
293309
#echo "p iter ", iters, ": ", p
294310
#echo "x iter ", iters, ": ", x
295311
#echo "gradient iter ", iters, ": ", gradient
296-
line_search(alpha, p, x, f, criterion, fastMode)
312+
line_search(alpha, p, x, f, options.lineSearchCriterion, options.fastMode)
297313
x += alpha * p
298-
let newGradient = tensorGradient(f, x, fastMode=fastMode)
314+
let newGradient = tensorGradient(f, x, fastMode=options.fastMode)
299315
let sk = alpha * p.reshape(xLen, 1)
300316

301317
let yk = (newGradient - gradient).reshape(xLen, 1)
@@ -350,19 +366,19 @@ proc bfgs_optimized*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U],
350366
#echo iters, " iterations done!"
351367
result = x
352368

353-
proc lbfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha: U = U(1), tol: U = U(1e-6), fastMode: bool = false, m: int = 10, criterion: LineSearchCriterion = None): Tensor[U] =
354-
var alpha = alpha
369+
proc lbfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], m: int = 10, options: OptimOptions[U] = optimOptions[U]()): Tensor[U] =
370+
var alpha = options.alpha
355371
var x = x0.clone()
356372
let xLen = x.shape[0]
357373
var fNorm = abs(f(x))
358-
var gradient = 0.01*tensorGradient(f, x, fastMode=fastMode)
374+
var gradient = 0.01*tensorGradient(f, x, fastMode=options.fastMode)
359375
var gradNorm = vectorNorm(gradient)
360376
var iters: int
361377
#let m = 10 # number of past iterations to save
362378
var sk_queue = initDeque[Tensor[U]](m)
363379
var yk_queue = initDeque[Tensor[T]](m)
364380
# the problem is the first iteration as the gradient is huge and no adjustments are made
365-
while gradNorm > tol*(1 + fNorm) and iters < 10000:
381+
while gradNorm > options.tol*(1 + fNorm) and iters < 10000:
366382
#echo "grad: ", gradient
367383
#echo "x: ", x
368384
var q = gradient.clone()
@@ -385,10 +401,10 @@ proc lbfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha: U
385401
z = -z
386402
let p = z
387403
#echo "q: ", q
388-
line_search(alpha, p, x, f, criterion, fastMode)
404+
line_search(alpha, p, x, f, options.lineSearchCriterion, options.fastMode)
389405
x += alpha * p
390406
sk_queue.addFirst alpha*p
391-
let newGradient = tensorGradient(f, x, fastMode=fastMode)
407+
let newGradient = tensorGradient(f, x, fastMode=options.fastMode)
392408
let yk = newGradient - gradient
393409
yk_queue.addFirst yk
394410
gradient = newGradient
@@ -483,7 +499,7 @@ when isMainModule:
483499
echo lbfgs(f1, x0, tol=1e-8, alpha=0.001, fastMode=false, criterion=Wolfe)
484500
echo lbfgs(f1, x0, tol=1e-8, alpha=0.001, fastMode=false, criterion=WolfeStrong) ]#
485501

486-
timeIt "steepest slow mode None":
502+
#[ timeIt "steepest slow mode None":
487503
keep bfgs_optimized(f1, x0, tol=1e-8, alpha=1, fastMode=false, criterion=None)
488504
489505
timeIt "steepest slow mode Armijo":
@@ -505,7 +521,7 @@ when isMainModule:
505521
keep lbfgs(f1, x0, tol=1e-8, alpha=1, fastMode=false, criterion=Wolfe)
506522
507523
timeIt "steepest slow mode WolfeStrong":
508-
keep lbfgs(f1, x0, tol=1e-8, alpha=1, fastMode=false, criterion=WolfeStrong)
524+
keep lbfgs(f1, x0, tol=1e-8, alpha=1, fastMode=false, criterion=WolfeStrong) ]#
509525
#[ timeIt "newton slow mode":
510526
keep newton(f1, x0, tol=1e-8, fastMode=false)
511527
timeIt "newton fast mode":

0 commit comments

Comments
 (0)