@@ -123,6 +123,23 @@ proc secant*(f: proc(x: float64): float64, start: array[2, float64], precision:
123123# # Multidimensional methods ##
124124# #############################
125125
126+ type LineSearchCriterion = enum
127+ Armijo , Wolfe , WolfeStrong , NoLineSearch
128+
129+ type OptimOptions * [U] = object
130+ tol* , alpha* , lambda0* : U
131+ fastMode* : bool
132+ maxIterations* : int
133+ lineSearchCriterion* : LineSearchCriterion
134+
135+ proc optimOptions * [U](tol: U = U (1 e-6 ), alpha: U = U (1 ), lambda0: U = U (1 ), fastMode: bool = false , maxIterations: int = 10000 , lineSearchCriterion: LineSearchCriterion = NoLineSearch ): OptimOptions [U] =
136+ result .tol = tol
137+ result .alpha = alpha
138+ result .lambda0 = lambda0
139+ result .fastMode = fastMode
140+ result .maxIterations = maxIterations
141+ result .lineSearchCriterion = lineSearchCriterion
142+
126143proc vectorNorm * [T](v: Tensor [T]): T =
127144 # # Calculates the norm of the vector, ie the sqrt(Σ vᵢ²)
128145 assert v.rank == 1 , " v must be a 1d vector!"
@@ -133,12 +150,9 @@ proc eye[T](n: int): Tensor[T] =
133150 for i in 0 ..< n:
134151 result [i, i] = 1
135152
136- type LineSearchCriterion = enum
137- Armijo , Wolfe , WolfeStrong , None
138-
139153proc line_search * [U, T](alpha: var U, p: Tensor [T], x0: Tensor [U], f: proc (x: Tensor [U]): T, criterion: LineSearchCriterion , fastMode: bool = false ) =
140154 # note: set initial alpha for the methods as 1 / sqrt(dot(grad, grad)) so first step has length 1.
141- if criterion == None :
155+ if criterion == NoLineSearch :
142156 return
143157 var gradient = tensorGradient (f, x0, fastMode= fastMode)
144158 let dirDerivInit = dot (gradient, p)
@@ -183,41 +197,43 @@ proc line_search*[U, T](alpha: var U, p: Tensor[T], x0: Tensor[U], f: proc(x: Te
183197
184198
185199
186- proc steepestDescent * [U; T: not Tensor ](f: proc (x: Tensor [U]): T, x0: Tensor [U], alpha: U = U ( 0.001 ), tol: U = U (1 e-6 ), fastMode: bool = false , criterion: LineSearchCriterion = None ): Tensor [U] =
200+ proc steepestDescent * [U; T: not Tensor ](f: proc (x: Tensor [U]): T, x0: Tensor [U], options: OptimOptions [U] = optimOptions [U](alpha = U (0.001 )) ): Tensor [U] =
187201 # # Minimize scalar-valued function f.
188- var alpha = alpha
202+ var alpha = options. alpha
189203 var x = x0.clone ()
190204 var fNorm = abs (f (x0))
191- var gradient = tensorGradient (f, x0, fastMode= fastMode)
205+ var gradient = tensorGradient (f, x0, fastMode= options. fastMode)
192206 var gradNorm = vectorNorm (gradient)
193207 var iters: int
194- while gradNorm > tol* (1 + fNorm) and iters < 10000 :
208+ while gradNorm > options. tol* (1 + fNorm) and iters < 10000 :
195209 let p = - gradient
196- line_search (alpha, p, x, f, criterion, fastMode)
210+ line_search (alpha, p, x, f, options.lineSearchCriterion, options. fastMode)
197211 x += alpha * p
198212 let fx = f (x)
199213 fNorm = abs (fx)
200- gradient = tensorGradient (f, x, fastMode= fastMode)
214+ gradient = tensorGradient (f, x, fastMode= options. fastMode)
201215 gradNorm = vectorNorm (gradient)
202216 iters += 1
203217 if iters >= 10000 :
204218 discard " Limit of 10000 iterations reached!"
205219 # echo iters, " iterations done!"
206220 result = x
207221
208- proc newton * [U; T: not Tensor ](f: proc (x: Tensor [U]): T, x0: Tensor [U], alpha: U = U (1 ), tol: U = U (1 e-6 ), fastMode: bool = false ): Tensor [U] =
222+ proc newton * [U; T: not Tensor ](f: proc (x: Tensor [U]): T, x0: Tensor [U], options: OptimOptions [U] = optimOptions [U]()): Tensor [U] =
223+ var alpha = options.alpha
209224 var x = x0.clone ()
210225 var fNorm = abs (f (x))
211- var gradient = tensorGradient (f, x, fastMode= fastMode)
226+ var gradient = tensorGradient (f, x, fastMode= options. fastMode)
212227 var gradNorm = vectorNorm (gradient)
213228 var hessian = tensorHessian (f, x)
214229 var iters: int
215- while gradNorm > tol* (1 + fNorm) and iters < 10000 :
230+ while gradNorm > options. tol* (1 + fNorm) and iters < 10000 :
216231 let p = - solve (hessian, gradient)
232+ line_search (alpha, p, x, f, options.lineSearchCriterion, options.fastMode)
217233 x += alpha * p
218234 let fx = f (x)
219235 fNorm = abs (fx)
220- gradient = tensorGradient (f, x, fastMode= fastMode)
236+ gradient = tensorGradient (f, x, fastMode= options. fastMode)
221237 gradNorm = vectorNorm (gradient)
222238 hessian = tensorHessian (f, x)
223239 iters += 1
@@ -226,7 +242,7 @@ proc newton*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha: U
226242 # echo iters, " iterations done!"
227243 result = x
228244
229- proc bfgs * [U; T: not Tensor ](f: proc (x: Tensor [U]): T, x0: Tensor [U], alpha: U = U (1 ), tol: U = U (1 e-6 ), fastMode: bool = false ): Tensor [U] =
245+ proc bfgs_old * [U; T: not Tensor ](f: proc (x: Tensor [U]): T, x0: Tensor [U], alpha: U = U (1 ), tol: U = U (1 e-6 ), fastMode: bool = false ): Tensor [U] =
230246 var x = x0.clone ()
231247 let xLen = x.shape[0 ]
232248 var fNorm = abs (f (x))
@@ -270,20 +286,20 @@ proc bfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha: U =
270286 # echo iters, " iterations done!"
271287 result = x
272288
273- proc bfgs_optimized * [U; T: not Tensor ](f: proc (x: Tensor [U]): T, x0: Tensor [U], alpha: U = U ( 1 ), tol: U = U ( 1 e-6 ), fastMode: bool = false , criterion: LineSearchCriterion = None ): Tensor [U] =
289+ proc bfgs * [U; T: not Tensor ](f: proc (x: Tensor [U]): T, x0: Tensor [U], options: OptimOptions [U] = optimOptions [U]() ): Tensor [U] =
274290 # Use gemm and gemv with preallocated Tensors and setting beta = 0
275- var alpha = alpha
291+ var alpha = options. alpha
276292 var x = x0.clone ()
277293 let xLen = x.shape[0 ]
278294 var fNorm = abs (f (x))
279- var gradient = 0.01 * tensorGradient (f, x, fastMode= fastMode)
295+ var gradient = 0.01 * tensorGradient (f, x, fastMode= options. fastMode)
280296 var gradNorm = vectorNorm (gradient)
281297 var hessianB = eye [T](xLen) # inverse of the approximated hessian
282298 var p = newTensor [U](xLen)
283299 var tempVector1 = newTensor [U](xLen, 1 )
284300 var tempVector2 = newTensor [U](1 , xLen)
285301 var iters: int
286- while gradNorm > tol* (1 + fNorm) and iters < 10000 :
302+ while gradNorm > options. tol* (1 + fNorm) and iters < 10000 :
287303 # We are using hessianB in calculating it so we are modifying it prior to its use!
288304
289305
@@ -293,9 +309,9 @@ proc bfgs_optimized*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U],
293309 # echo "p iter ", iters, ": ", p
294310 # echo "x iter ", iters, ": ", x
295311 # echo "gradient iter ", iters, ": ", gradient
296- line_search (alpha, p, x, f, criterion, fastMode)
312+ line_search (alpha, p, x, f, options.lineSearchCriterion, options. fastMode)
297313 x += alpha * p
298- let newGradient = tensorGradient (f, x, fastMode= fastMode)
314+ let newGradient = tensorGradient (f, x, fastMode= options. fastMode)
299315 let sk = alpha * p.reshape (xLen, 1 )
300316
301317 let yk = (newGradient - gradient).reshape (xLen, 1 )
@@ -350,19 +366,19 @@ proc bfgs_optimized*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U],
350366 # echo iters, " iterations done!"
351367 result = x
352368
353- proc lbfgs * [U; T: not Tensor ](f: proc (x: Tensor [U]): T, x0: Tensor [U], alpha: U = U ( 1 ), tol: U = U ( 1 e-6 ), fastMode: bool = false , m: int = 10 , criterion: LineSearchCriterion = None ): Tensor [U] =
354- var alpha = alpha
369+ proc lbfgs * [U; T: not Tensor ](f: proc (x: Tensor [U]): T, x0: Tensor [U], m: int = 10 , options: OptimOptions [U] = optimOptions [U]() ): Tensor [U] =
370+ var alpha = options. alpha
355371 var x = x0.clone ()
356372 let xLen = x.shape[0 ]
357373 var fNorm = abs (f (x))
358- var gradient = 0.01 * tensorGradient (f, x, fastMode= fastMode)
374+ var gradient = 0.01 * tensorGradient (f, x, fastMode= options. fastMode)
359375 var gradNorm = vectorNorm (gradient)
360376 var iters: int
361377 # let m = 10 # number of past iterations to save
362378 var sk_queue = initDeque [Tensor [U]](m)
363379 var yk_queue = initDeque [Tensor [T]](m)
364380 # the problem is the first iteration as the gradient is huge and no adjustments are made
365- while gradNorm > tol* (1 + fNorm) and iters < 10000 :
381+ while gradNorm > options. tol* (1 + fNorm) and iters < 10000 :
366382 # echo "grad: ", gradient
367383 # echo "x: ", x
368384 var q = gradient.clone ()
@@ -385,10 +401,10 @@ proc lbfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha: U
385401 z = - z
386402 let p = z
387403 # echo "q: ", q
388- line_search (alpha, p, x, f, criterion, fastMode)
404+ line_search (alpha, p, x, f, options.lineSearchCriterion, options. fastMode)
389405 x += alpha * p
390406 sk_queue.addFirst alpha* p
391- let newGradient = tensorGradient (f, x, fastMode= fastMode)
407+ let newGradient = tensorGradient (f, x, fastMode= options. fastMode)
392408 let yk = newGradient - gradient
393409 yk_queue.addFirst yk
394410 gradient = newGradient
@@ -483,7 +499,7 @@ when isMainModule:
483499 echo lbfgs(f1, x0, tol=1e-8, alpha=0.001, fastMode=false, criterion=Wolfe)
484500 echo lbfgs(f1, x0, tol=1e-8, alpha=0.001, fastMode=false, criterion=WolfeStrong) ]#
485501
486- timeIt " steepest slow mode None" :
502+ #[ timeIt "steepest slow mode None":
487503 keep bfgs_optimized(f1, x0, tol=1e-8, alpha=1, fastMode=false, criterion=None)
488504
489505 timeIt "steepest slow mode Armijo":
@@ -505,7 +521,7 @@ when isMainModule:
505521 keep lbfgs(f1, x0, tol=1e-8, alpha=1, fastMode=false, criterion=Wolfe)
506522
507523 timeIt "steepest slow mode WolfeStrong":
508- keep lbfgs (f1, x0, tol= 1 e-8 , alpha= 1 , fastMode= false , criterion= WolfeStrong )
524+ keep lbfgs(f1, x0, tol=1e-8, alpha=1, fastMode=false, criterion=WolfeStrong) ]#
509525#[ timeIt "newton slow mode":
510526 keep newton(f1, x0, tol=1e-8, fastMode=false)
511527 timeIt "newton fast mode":
0 commit comments