Skip to content

Commit 5ee3fd4

Browse files
committed
optimzed version of bfgs (remove old one after test)
1 parent ef63387 commit 5ee3fd4

File tree

1 file changed

+110
-6
lines changed

1 file changed

+110
-6
lines changed

src/numericalnim/optimize.nim

Lines changed: 110 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -183,13 +183,30 @@ proc bfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha: U =
183183
var hessianB = eye[T](xLen) # inverse of the approximated hessian
184184
var iters: int
185185
while gradNorm > tol*(1 + fNorm) and iters < 10000:
186+
#echo "Hessian iter ", iters, ": ", hessianB
186187
let p = -hessianB * gradient.reshape(xLen, 1)
188+
#echo "p iter ", iters, ": ", p
187189
x += alpha * p
188190
let newGradient = tensorGradient(f, x, fastMode=fastMode)
189191
let sk = alpha * p.reshape(xLen, 1)
192+
#gemm(1.0, hessianB, hessianB, 0.0, newGradient)
190193
let yk = (newGradient - gradient).reshape(xLen, 1)
191194
let sk_yk_dot = dot(sk.squeeze, yk.squeeze)
192-
hessianB += (sk_yk_dot + dot(yk.squeeze, squeeze(hessianB * yk))) / (sk_yk_dot * sk_yk_dot) * (sk * sk.transpose) - ((hessianB * yk) * sk.transpose + sk * (yk.transpose * hessianB)) / sk_yk_dot
195+
let prefactor1 = (sk_yk_dot + dot(yk.squeeze, squeeze(hessianB * yk))) / (sk_yk_dot * sk_yk_dot)
196+
let prefactor2 = 1 / sk_yk_dot
197+
let m1 = (sk * sk.transpose)
198+
let m2_1 = (hessianB * yk) * sk.transpose
199+
let m2_2 = sk * (yk.transpose * hessianB)
200+
#echo "prefactor2: ", prefactor2
201+
hessianB += prefactor1 * m1
202+
#echo "hessian2: ", hessianB
203+
#echo "hessian: ", hessianB
204+
#echo "yk: ", yk
205+
hessianB -= prefactor2 * m2_1
206+
#echo "checkpoint1: ", (hessianB * yk)
207+
#echo "hessian2.5: ", hessianB
208+
hessianB -= prefactor2 * m2_2
209+
#echo "hessian3: ", hessianB
193210

194211
gradient = newGradient
195212
let fx = f(x)
@@ -201,6 +218,84 @@ proc bfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha: U =
201218
#echo iters, " iterations done!"
202219
result = x
203220

221+
proc bfgs_optimized*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha: U = U(1), tol: U = U(1e-6), fastMode: bool = false): Tensor[U] =
222+
# Use gemm and gemv with preallocated Tensors and setting beta = 0
223+
var x = x0.clone()
224+
let xLen = x.shape[0]
225+
var fNorm = abs(f(x))
226+
var gradient = 0.01*tensorGradient(f, x, fastMode=fastMode)
227+
var gradNorm = vectorNorm(gradient)
228+
var hessianB = eye[T](xLen) # inverse of the approximated hessian
229+
var p = newTensor[U](xLen)
230+
var tempVector1 = newTensor[U](xLen, 1)
231+
var tempVector2 = newTensor[U](1, xLen)
232+
var iters: int
233+
while gradNorm > tol*(1 + fNorm) and iters < 10000:
234+
# We are using hessianB in calculating it so we are modifying it prior to its use!
235+
236+
237+
#echo "Hessian iter ", iters, ": ", hessianB
238+
#let p = -hessianB * gradient.reshape(xLen, 1)
239+
gemv(-1.0, hessianB, gradient.reshape(xLen, 1), 0.0, p)
240+
#echo "p iter ", iters, ": ", p
241+
#echo "x iter ", iters, ": ", x
242+
#echo "gradient iter ", iters, ": ", gradient
243+
x += alpha * p
244+
let newGradient = tensorGradient(f, x, fastMode=fastMode)
245+
let sk = alpha * p.reshape(xLen, 1)
246+
247+
let yk = (newGradient - gradient).reshape(xLen, 1)
248+
let sk_yk_dot = dot(sk.squeeze, yk.squeeze)
249+
# gemm(1.0, hessianB, hessianB, 0.0, newGradient)
250+
# Do the calculation in steps, minimizing allocations
251+
# sk * sk.transpose is matrix × matrix
252+
# how to deal with the vector.T × vector = Matrix? Use gemm? gemm() can be used as += if beta is set to 1.0
253+
#echo "aim: ", hessianB + (sk_yk_dot + dot(yk.squeeze, squeeze(hessianB * yk))) / (sk_yk_dot * sk_yk_dot) * (sk * sk.transpose) - ((hessianB * yk) * sk.transpose + sk * (yk.transpose * hessianB)) / sk_yk_dot
254+
#echo "real Hessian: ", hessianB + (sk_yk_dot + dot(yk.squeeze, squeeze(hessianB * yk))) / (sk_yk_dot * sk_yk_dot) * (sk * sk.transpose)
255+
let prefactor1 = (sk_yk_dot + dot(yk.squeeze, squeeze(hessianB * yk))) / (sk_yk_dot * sk_yk_dot)
256+
let prefactor2 = 1 / sk_yk_dot
257+
gemv(1.0, hessianB, yk, 0.0, tempVector1) # temp1 = hessianB * yk
258+
gemm(1.0, yk.transpose, hessianB, 0.0, tempVector2) # temp2 = yk.transpose * hessianB
259+
260+
gemm(prefactor1, sk, sk.transpose, 1.0, hessianB) # hessianB += prefactor1 * sk * sk.transpose
261+
262+
gemm(-prefactor2, tempVector1, sk.transpose, 1.0, hessianB) # hessianB -= prefactor2 * temp1 * sk.transpose
263+
264+
gemm(-prefactor2, sk, tempVector2, 1.0, hessianB) # hessianB -= prefactor2 * sk * temp2
265+
#echo "hessian2: ", hessianB # This is correct
266+
267+
# somewhere down here the error occurs!↓
268+
269+
# reuse vector p:
270+
#gemv(1.0, hessianB, yk, 0.0, tempVector1) # temp1 = hessianB * yk
271+
#echo "checkpoint1: ", tempVector1 # this is incorrect!!!
272+
273+
#
274+
#
275+
# Rewrite with transposes as gemv instead!
276+
# (A * B)^T = B^T * A^T → yk.transpose * hessianB = transpose(hessianB.transpose * yk)
277+
#
278+
279+
#gemm(1.0, yk.transpose, hessianB, 0.0, p)
280+
#gemv(1.0, hessianB.transpose, yk, 0.0, tempVector) # p = yk.transpose * hessianB
281+
#tempVector = tempVector.transpose
282+
283+
284+
#echo "hessian3: ", hessianB
285+
286+
#hessianB += (sk_yk_dot + dot(yk.squeeze, squeeze(hessianB * yk))) / (sk_yk_dot * sk_yk_dot) * (sk * sk.transpose) - ((hessianB * yk) * sk.transpose + sk * (yk.transpose * hessianB)) / sk_yk_dot
287+
#tempVector = tempVector.transpose # reverse it back to column vector
288+
289+
gradient = newGradient
290+
let fx = f(x)
291+
fNorm = abs(fx)
292+
gradNorm = vectorNorm(gradient)
293+
iters += 1
294+
if iters >= 10000:
295+
discard "Limit of 10000 iterations reached!"
296+
#echo iters, " iterations done!"
297+
result = x
298+
204299
proc lbfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha: U = U(1), tol: U = U(1e-6), fastMode: bool = false): Tensor[U] =
205300
var x = x0.clone()
206301
let xLen = x.shape[0]
@@ -302,19 +397,24 @@ when isMainModule:
302397
proc f1(x: Tensor[float]): float =
303398
# result = -20*exp(-0.2*sqrt(0.5 * (x[0]*x[0] + x[1]*x[1]))) - exp(0.5*(cos(2*PI*x[0]) + cos(2*PI*x[1]))) + E + 20 # Ackley
304399
result = (1 - x[0])^2 + 100*(x[1] - x[0]^2)^2
305-
for ix in x:
306-
result += (ix - 1)^2
400+
if x.shape[0] > 2:
401+
for ix in x[2 .. _]:
402+
result += (ix - 1)^2
307403

308404
#let x0 = [-0.5, 2.0].toTensor
309-
let x0 = ones[float](500) * 0
310-
let sol1 = steepestDescent(f1, x0, tol=1e-8, alpha=0.001, fastMode=true)
405+
let x0 = ones[float](500) * -1
406+
#[ let sol1 = steepestDescent(f1, x0, tol=1e-8, alpha=0.001, fastMode=true)
311407
echo sol1
312408
echo f1(sol1)
313409
echo "Newton: ", newton(f1, x0, tol=1e-8, fastMode=false)
314410
echo "Newton: ", newton(f1, x0, tol=1e-8, fastMode=true)
315411
echo "BFGS: ", bfgs(f1, x0, tol=1e-8, fastMode=false)
316412
echo "BFGS: ", bfgs(f1, x0, tol=1e-8, fastMode=true)
317-
echo "LBFGS:", lbfgs(f1, x0, tol=1e-8, fastMode=false)
413+
echo "LBFGS:", lbfgs(f1, x0, tol=1e-8, fastMode=false) ]#
414+
echo "BFGS: ", bfgs(f1, x0, tol=1e-8, fastMode=false)
415+
echo "BFGS opt: ", bfgs_optimized(f1, x0, tol=1e-8, fastMode=false)
416+
echo "BFGS: ", bfgs(f1, x0, tol=1e-8, fastMode=true)
417+
echo "BFGS opt: ", bfgs_optimized(f1, x0, tol=1e-8, fastMode=true)
318418

319419
timeIt "steepest slow mode":
320420
keep steepestDescent(f1, x0, tol=1e-8, alpha=0.001, fastMode=false)
@@ -328,6 +428,10 @@ when isMainModule:
328428
keep bfgs(f1, x0, tol=1e-8, fastMode=false)
329429
timeIt "bfgs fast mode":
330430
keep bfgs(f1, x0, tol=1e-8, fastMode=true)
431+
timeIt "optimized bfgs slow mode":
432+
keep bfgs_optimized(f1, x0, tol=1e-8, fastMode=false)
433+
timeIt "optimized bfgs fast mode":
434+
keep bfgs_optimized(f1, x0, tol=1e-8, fastMode=true)
331435
timeIt "lbfgs slow mode":
332436
keep lbfgs(f1, x0, tol=1e-8, fastMode=false)
333437
timeIt "lbfgs fast mode":

0 commit comments

Comments
 (0)