@@ -183,13 +183,30 @@ proc bfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha: U =
183183 var hessianB = eye [T](xLen) # inverse of the approximated hessian
184184 var iters: int
185185 while gradNorm > tol* (1 + fNorm) and iters < 10000 :
186+ # echo "Hessian iter ", iters, ": ", hessianB
186187 let p = - hessianB * gradient.reshape (xLen, 1 )
188+ # echo "p iter ", iters, ": ", p
187189 x += alpha * p
188190 let newGradient = tensorGradient (f, x, fastMode= fastMode)
189191 let sk = alpha * p.reshape (xLen, 1 )
192+ # gemm(1.0, hessianB, hessianB, 0.0, newGradient)
190193 let yk = (newGradient - gradient).reshape (xLen, 1 )
191194 let sk_yk_dot = dot (sk.squeeze, yk.squeeze)
192- hessianB += (sk_yk_dot + dot (yk.squeeze, squeeze (hessianB * yk))) / (sk_yk_dot * sk_yk_dot) * (sk * sk.transpose) - ((hessianB * yk) * sk.transpose + sk * (yk.transpose * hessianB)) / sk_yk_dot
195+ let prefactor1 = (sk_yk_dot + dot (yk.squeeze, squeeze (hessianB * yk))) / (sk_yk_dot * sk_yk_dot)
196+ let prefactor2 = 1 / sk_yk_dot
197+ let m1 = (sk * sk.transpose)
198+ let m2_1 = (hessianB * yk) * sk.transpose
199+ let m2_2 = sk * (yk.transpose * hessianB)
200+ # echo "prefactor2: ", prefactor2
201+ hessianB += prefactor1 * m1
202+ # echo "hessian2: ", hessianB
203+ # echo "hessian: ", hessianB
204+ # echo "yk: ", yk
205+ hessianB -= prefactor2 * m2_1
206+ # echo "checkpoint1: ", (hessianB * yk)
207+ # echo "hessian2.5: ", hessianB
208+ hessianB -= prefactor2 * m2_2
209+ # echo "hessian3: ", hessianB
193210
194211 gradient = newGradient
195212 let fx = f (x)
@@ -201,6 +218,84 @@ proc bfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha: U =
201218 # echo iters, " iterations done!"
202219 result = x
203220
221+ proc bfgs_optimized * [U; T: not Tensor ](f: proc (x: Tensor [U]): T, x0: Tensor [U], alpha: U = U (1 ), tol: U = U (1 e-6 ), fastMode: bool = false ): Tensor [U] =
222+ # Use gemm and gemv with preallocated Tensors and setting beta = 0
223+ var x = x0.clone ()
224+ let xLen = x.shape[0 ]
225+ var fNorm = abs (f (x))
226+ var gradient = 0.01 * tensorGradient (f, x, fastMode= fastMode)
227+ var gradNorm = vectorNorm (gradient)
228+ var hessianB = eye [T](xLen) # inverse of the approximated hessian
229+ var p = newTensor [U](xLen)
230+ var tempVector1 = newTensor [U](xLen, 1 )
231+ var tempVector2 = newTensor [U](1 , xLen)
232+ var iters: int
233+ while gradNorm > tol* (1 + fNorm) and iters < 10000 :
234+ # We are using hessianB in calculating it so we are modifying it prior to its use!
235+
236+
237+ # echo "Hessian iter ", iters, ": ", hessianB
238+ # let p = -hessianB * gradient.reshape(xLen, 1)
239+ gemv (- 1.0 , hessianB, gradient.reshape (xLen, 1 ), 0.0 , p)
240+ # echo "p iter ", iters, ": ", p
241+ # echo "x iter ", iters, ": ", x
242+ # echo "gradient iter ", iters, ": ", gradient
243+ x += alpha * p
244+ let newGradient = tensorGradient (f, x, fastMode= fastMode)
245+ let sk = alpha * p.reshape (xLen, 1 )
246+
247+ let yk = (newGradient - gradient).reshape (xLen, 1 )
248+ let sk_yk_dot = dot (sk.squeeze, yk.squeeze)
249+ # gemm(1.0, hessianB, hessianB, 0.0, newGradient)
250+ # Do the calculation in steps, minimizing allocations
251+ # sk * sk.transpose is matrix × matrix
252+ # how to deal with the vector.T × vector = Matrix? Use gemm? gemm() can be used as += if beta is set to 1.0
253+ # echo "aim: ", hessianB + (sk_yk_dot + dot(yk.squeeze, squeeze(hessianB * yk))) / (sk_yk_dot * sk_yk_dot) * (sk * sk.transpose) - ((hessianB * yk) * sk.transpose + sk * (yk.transpose * hessianB)) / sk_yk_dot
254+ # echo "real Hessian: ", hessianB + (sk_yk_dot + dot(yk.squeeze, squeeze(hessianB * yk))) / (sk_yk_dot * sk_yk_dot) * (sk * sk.transpose)
255+ let prefactor1 = (sk_yk_dot + dot (yk.squeeze, squeeze (hessianB * yk))) / (sk_yk_dot * sk_yk_dot)
256+ let prefactor2 = 1 / sk_yk_dot
257+ gemv (1.0 , hessianB, yk, 0.0 , tempVector1) # temp1 = hessianB * yk
258+ gemm (1.0 , yk.transpose, hessianB, 0.0 , tempVector2) # temp2 = yk.transpose * hessianB
259+
260+ gemm (prefactor1, sk, sk.transpose, 1.0 , hessianB) # hessianB += prefactor1 * sk * sk.transpose
261+
262+ gemm (- prefactor2, tempVector1, sk.transpose, 1.0 , hessianB) # hessianB -= prefactor2 * temp1 * sk.transpose
263+
264+ gemm (- prefactor2, sk, tempVector2, 1.0 , hessianB) # hessianB -= prefactor2 * sk * temp2
265+ # echo "hessian2: ", hessianB # This is correct
266+
267+ # somewhere down here the error occurs!↓
268+
269+ # reuse vector p:
270+ # gemv(1.0, hessianB, yk, 0.0, tempVector1) # temp1 = hessianB * yk
271+ # echo "checkpoint1: ", tempVector1 # this is incorrect!!!
272+
273+ #
274+ #
275+ # Rewrite with transposes as gemv instead!
276+ # (A * B)^T = B^T * A^T → yk.transpose * hessianB = transpose(hessianB.transpose * yk)
277+ #
278+
279+ # gemm(1.0, yk.transpose, hessianB, 0.0, p)
280+ # gemv(1.0, hessianB.transpose, yk, 0.0, tempVector) # p = yk.transpose * hessianB
281+ # tempVector = tempVector.transpose
282+
283+
284+ # echo "hessian3: ", hessianB
285+
286+ # hessianB += (sk_yk_dot + dot(yk.squeeze, squeeze(hessianB * yk))) / (sk_yk_dot * sk_yk_dot) * (sk * sk.transpose) - ((hessianB * yk) * sk.transpose + sk * (yk.transpose * hessianB)) / sk_yk_dot
287+ # tempVector = tempVector.transpose # reverse it back to column vector
288+
289+ gradient = newGradient
290+ let fx = f (x)
291+ fNorm = abs (fx)
292+ gradNorm = vectorNorm (gradient)
293+ iters += 1
294+ if iters >= 10000 :
295+ discard " Limit of 10000 iterations reached!"
296+ # echo iters, " iterations done!"
297+ result = x
298+
204299proc lbfgs * [U; T: not Tensor ](f: proc (x: Tensor [U]): T, x0: Tensor [U], alpha: U = U (1 ), tol: U = U (1 e-6 ), fastMode: bool = false ): Tensor [U] =
205300 var x = x0.clone ()
206301 let xLen = x.shape[0 ]
@@ -302,19 +397,24 @@ when isMainModule:
302397 proc f1 (x: Tensor [float ]): float =
303398 # result = -20*exp(-0.2*sqrt(0.5 * (x[0]*x[0] + x[1]*x[1]))) - exp(0.5*(cos(2*PI*x[0]) + cos(2*PI*x[1]))) + E + 20 # Ackley
304399 result = (1 - x[0 ])^ 2 + 100 * (x[1 ] - x[0 ]^ 2 )^ 2
305- for ix in x:
306- result += (ix - 1 )^ 2
400+ if x.shape[0 ] > 2 :
401+ for ix in x[2 .. _]:
402+ result += (ix - 1 )^ 2
307403
308404 # let x0 = [-0.5, 2.0].toTensor
309- let x0 = ones [float ](500 ) * 0
310- let sol1 = steepestDescent (f1, x0, tol= 1 e-8 , alpha= 0.001 , fastMode= true )
405+ let x0 = ones [float ](500 ) * - 1
406+ #[ let sol1 = steepestDescent(f1, x0, tol=1e-8, alpha=0.001, fastMode=true)
311407 echo sol1
312408 echo f1(sol1)
313409 echo "Newton: ", newton(f1, x0, tol=1e-8, fastMode=false)
314410 echo "Newton: ", newton(f1, x0, tol=1e-8, fastMode=true)
315411 echo "BFGS: ", bfgs(f1, x0, tol=1e-8, fastMode=false)
316412 echo "BFGS: ", bfgs(f1, x0, tol=1e-8, fastMode=true)
317- echo " LBFGS:" , lbfgs (f1, x0, tol= 1 e-8 , fastMode= false )
413+ echo "LBFGS:", lbfgs(f1, x0, tol=1e-8, fastMode=false) ]#
414+ echo " BFGS: " , bfgs (f1, x0, tol= 1 e-8 , fastMode= false )
415+ echo " BFGS opt: " , bfgs_optimized (f1, x0, tol= 1 e-8 , fastMode= false )
416+ echo " BFGS: " , bfgs (f1, x0, tol= 1 e-8 , fastMode= true )
417+ echo " BFGS opt: " , bfgs_optimized (f1, x0, tol= 1 e-8 , fastMode= true )
318418
319419 timeIt " steepest slow mode" :
320420 keep steepestDescent (f1, x0, tol= 1 e-8 , alpha= 0.001 , fastMode= false )
@@ -328,6 +428,10 @@ when isMainModule:
328428 keep bfgs (f1, x0, tol= 1 e-8 , fastMode= false )
329429 timeIt " bfgs fast mode" :
330430 keep bfgs (f1, x0, tol= 1 e-8 , fastMode= true )
431+ timeIt " optimized bfgs slow mode" :
432+ keep bfgs_optimized (f1, x0, tol= 1 e-8 , fastMode= false )
433+ timeIt " optimized bfgs fast mode" :
434+ keep bfgs_optimized (f1, x0, tol= 1 e-8 , fastMode= true )
331435 timeIt " lbfgs slow mode" :
332436 keep lbfgs (f1, x0, tol= 1 e-8 , fastMode= false )
333437 timeIt " lbfgs fast mode" :
0 commit comments