@@ -135,7 +135,7 @@ proc eye[T](n: int): Tensor[T] =
135135 for i in 0 ..< n:
136136 result [i, i] = 1
137137
138- proc steepestDescent * [U, T ](f: proc (x: Tensor [U]): T, x0: Tensor [U], alpha: U = U (0.1 ), tol: U = U (1 e-6 ), fastMode: bool = false ): Tensor [U] =
138+ proc steepestDescent * [U; T: not Tensor ](f: proc (x: Tensor [U]): T, x0: Tensor [U], alpha: U = U (0.1 ), tol: U = U (1 e-6 ), fastMode: bool = false ): Tensor [U] =
139139 # # Minimize scalar-valued function f.
140140 var x = x0.clone ()
141141 var fNorm = abs (f (x0))
@@ -155,7 +155,7 @@ proc steepestDescent*[U, T](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha: U =
155155 # echo iters, " iterations done!"
156156 result = x
157157
158- proc newton * [U, T ](f: proc (x: Tensor [U]): T, x0: Tensor [U], alpha: U = U (0. 1 ), tol: U = U (1 e-6 ), fastMode: bool = false ): Tensor [U] =
158+ proc newton * [U; T: not Tensor ](f: proc (x: Tensor [U]): T, x0: Tensor [U], alpha: U = U (1 ), tol: U = U (1 e-6 ), fastMode: bool = false ): Tensor [U] =
159159 var x = x0.clone ()
160160 var fNorm = abs (f (x))
161161 var gradient = tensorGradient (f, x, fastMode= fastMode)
@@ -176,7 +176,36 @@ proc newton*[U, T](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha: U = U(0.1), t
176176 # echo iters, " iterations done!"
177177 result = x
178178
179- proc levmarq * [U, T](f: proc (params: Tensor [U], x: U): T, params0: Tensor [U], xData: Tensor [U], yData: Tensor [T], alpha = U (1 ), tol: U = U (1 e-6 ), lambda0: U = U (1 ), fastMode = false ): Tensor [U] =
179+ proc bfgs * [U; T: not Tensor ](f: proc (x: Tensor [U]): T, x0: Tensor [U], alpha: U = U (1 ), tol: U = U (1 e-6 ), fastMode: bool = false ): Tensor [U] =
180+ var x = x0.clone ()
181+ let xLen = x.shape[0 ]
182+ var fNorm = abs (f (x))
183+ var gradient = tensorGradient (f, x, fastMode= fastMode)
184+ var gradNorm = vectorNorm (gradient)
185+ var hessianB = eye [T](xLen) # inverse of the approximated hessian
186+ var iters: int
187+ while gradNorm > tol* (1 + fNorm) and iters < 10000 :
188+ let p = - hessianB * gradient.reshape (xLen, 1 )
189+ x += alpha * p
190+ let newGradient = tensorGradient (f, x, fastMode= fastMode)
191+ let sk = alpha * p.reshape (xLen, 1 )
192+ let yk = (newGradient - gradient).reshape (xLen, 1 )
193+ let sk_yk_dot = dot (sk.squeeze, yk.squeeze)
194+ hessianB += (sk_yk_dot + dot (yk.squeeze, squeeze (hessianB * yk))) / (sk_yk_dot * sk_yk_dot) * (sk * sk.transpose) - ((hessianB * yk) * sk.transpose + sk * (yk.transpose * hessianB)) / sk_yk_dot
195+
196+ gradient = newGradient
197+ let fx = f (x)
198+ fNorm = abs (fx)
199+ gradNorm = vectorNorm (gradient)
200+ iters += 1
201+ if iters >= 10000 :
202+ discard " Limit of 10000 iterations reached!"
203+ # echo iters, " iterations done!"
204+ result = x
205+
206+
207+
208+ proc levmarq * [U; T: not Tensor ](f: proc (params: Tensor [U], x: U): T, params0: Tensor [U], xData: Tensor [U], yData: Tensor [T], alpha = U (1 ), tol: U = U (1 e-6 ), lambda0: U = U (1 ), fastMode = false ): Tensor [U] =
180209 assert xData.rank == 1
181210 assert yData.rank == 1
182211 assert params0.rank == 1
@@ -225,13 +254,18 @@ when isMainModule:
225254 proc f1 (x: Tensor [float ]): float =
226255 # result = -20*exp(-0.2*sqrt(0.5 * (x[0]*x[0] + x[1]*x[1]))) - exp(0.5*(cos(2*PI*x[0]) + cos(2*PI*x[1]))) + E + 20 # Ackley
227256 result = (1 - x[0 ])^ 2 + 100 * (x[1 ] - x[0 ]^ 2 )^ 2
257+ for ix in x:
258+ result += (ix - 1 )^ 2
228259
229- let x0 = [- 1.0 , - 1.0 ].toTensor
260+ # let x0 = [-1.0, -1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0].toTensor
261+ let x0 = ones [float ](500 ) * - 1
230262 let sol1 = steepestDescent (f1, x0, tol= 1 e-8 , alpha= 0.001 , fastMode= true )
231263 echo sol1
232264 echo f1 (sol1)
233265 echo " Newton: " , newton (f1, x0, tol= 1 e-8 , fastMode= false )
234266 echo " Newton: " , newton (f1, x0, tol= 1 e-8 , fastMode= true )
267+ echo " BFGS: " , bfgs (f1, x0, tol= 1 e-8 , fastMode= false )
268+ echo " BFGS: " , bfgs (f1, x0, tol= 1 e-8 , fastMode= true )
235269
236270 timeIt " steepest slow mode" :
237271 keep steepestDescent (f1, x0, tol= 1 e-8 , alpha= 0.001 , fastMode= false )
@@ -241,6 +275,10 @@ when isMainModule:
241275 keep newton (f1, x0, tol= 1 e-8 , fastMode= false )
242276 timeIt " newton fast mode" :
243277 keep newton (f1, x0, tol= 1 e-8 , fastMode= true )
278+ timeIt " bfgs slow mode" :
279+ keep bfgs (f1, x0, tol= 1 e-8 , fastMode= false )
280+ timeIt " bfgs fast mode" :
281+ keep bfgs (f1, x0, tol= 1 e-8 , fastMode= true )
244282
245283 # Lev-Marq:
246284#[ proc fFit(params: Tensor[float], x: float): float =
0 commit comments