Skip to content

Commit ef63387

Browse files
committed
somewhat working lbfgs function
1 parent 351c4bd commit ef63387

File tree

1 file changed

+59
-6
lines changed

1 file changed

+59
-6
lines changed

src/numericalnim/optimize.nim

Lines changed: 59 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
1-
import strformat
1+
import std/[strformat, sequtils, math, deques]
22
import arraymancer
3-
import sequtils
4-
import math
53
import ./differentiate
64

75
proc steepest_descent*(deriv: proc(x: float64): float64, start: float64, gamma: float64 = 0.01, precision: float64 = 1e-5, max_iters: Natural = 1000):float64 {.inline.} =
@@ -180,7 +178,7 @@ proc bfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha: U =
180178
var x = x0.clone()
181179
let xLen = x.shape[0]
182180
var fNorm = abs(f(x))
183-
var gradient = tensorGradient(f, x, fastMode=fastMode)
181+
var gradient = 0.01*tensorGradient(f, x, fastMode=fastMode)
184182
var gradNorm = vectorNorm(gradient)
185183
var hessianB = eye[T](xLen) # inverse of the approximated hessian
186184
var iters: int
@@ -203,7 +201,57 @@ proc bfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha: U =
203201
#echo iters, " iterations done!"
204202
result = x
205203

204+
proc lbfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha: U = U(1), tol: U = U(1e-6), fastMode: bool = false): Tensor[U] =
205+
var x = x0.clone()
206+
let xLen = x.shape[0]
207+
var fNorm = abs(f(x))
208+
var gradient = 0.01*tensorGradient(f, x, fastMode=fastMode)
209+
var gradNorm = vectorNorm(gradient)
210+
var iters: int
211+
let m = 10 # number of past iterations to save
212+
var sk_queue = initDeque[Tensor[U]](m)
213+
var yk_queue = initDeque[Tensor[T]](m)
214+
# the problem is the first iteration as the gradient is huge and no adjustments are made
215+
while gradNorm > tol*(1 + fNorm) and iters < 10000:
216+
#echo "grad: ", gradient
217+
#echo "x: ", x
218+
var q = gradient.clone()
219+
# we want to loop from latest inserted to oldest
220+
# → we insert at the beginning and pop at the end
221+
var alphas: seq[U]
222+
for i in 0 ..< sk_queue.len:
223+
let rho_i = 1 / dot(sk_queue[i], yk_queue[i])
224+
let alpha_i = rho_i * dot(sk_queue[i], q)
225+
q -= alpha_i * yk_queue[i]
226+
alphas.add alpha_i
227+
let gamma = if sk_queue.len == 0: 1.0 else: dot(sk_queue[0], yk_queue[0]) / dot(yk_queue[0], yk_queue[0])
228+
#echo gamma
229+
var z = gamma * q
230+
for i in countdown(sk_queue.len - 1, 0):
231+
let rho_i = 1 / dot(sk_queue[i], yk_queue[i])
232+
let beta_i = rho_i * dot(yk_queue[i], z)
233+
let alpha_i = alphas[i]
234+
z += sk_queue[i] * (alpha_i - beta_i)
235+
z = -z
236+
let p = z
237+
#echo "q: ", q
238+
x += alpha * p
239+
sk_queue.addFirst alpha*p
240+
let newGradient = tensorGradient(f, x, fastMode=fastMode)
241+
let yk = newGradient - gradient
242+
yk_queue.addFirst yk
243+
gradient = newGradient
244+
let fx = f(x)
245+
fNorm = abs(fx)
246+
gradNorm = vectorNorm(gradient)
247+
if sk_queue.len > m: discard sk_queue.popLast
248+
if yk_queue.len > m: discard yk_queue.popLast
249+
iters += 1
206250

251+
if iters >= 10000:
252+
discard "Limit of 10000 iterations reached!"
253+
#echo iters, " iterations done!"
254+
result = x
207255

208256
proc levmarq*[U; T: not Tensor](f: proc(params: Tensor[U], x: U): T, params0: Tensor[U], xData: Tensor[U], yData: Tensor[T], alpha = U(1), tol: U = U(1e-6), lambda0: U = U(1), fastMode = false): Tensor[U] =
209257
assert xData.rank == 1
@@ -257,15 +305,16 @@ when isMainModule:
257305
for ix in x:
258306
result += (ix - 1)^2
259307

260-
#let x0 = [-1.0, -1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0].toTensor
261-
let x0 = ones[float](500) * -1
308+
#let x0 = [-0.5, 2.0].toTensor
309+
let x0 = ones[float](500) * 0
262310
let sol1 = steepestDescent(f1, x0, tol=1e-8, alpha=0.001, fastMode=true)
263311
echo sol1
264312
echo f1(sol1)
265313
echo "Newton: ", newton(f1, x0, tol=1e-8, fastMode=false)
266314
echo "Newton: ", newton(f1, x0, tol=1e-8, fastMode=true)
267315
echo "BFGS: ", bfgs(f1, x0, tol=1e-8, fastMode=false)
268316
echo "BFGS: ", bfgs(f1, x0, tol=1e-8, fastMode=true)
317+
echo "LBFGS:", lbfgs(f1, x0, tol=1e-8, fastMode=false)
269318

270319
timeIt "steepest slow mode":
271320
keep steepestDescent(f1, x0, tol=1e-8, alpha=0.001, fastMode=false)
@@ -279,6 +328,10 @@ when isMainModule:
279328
keep bfgs(f1, x0, tol=1e-8, fastMode=false)
280329
timeIt "bfgs fast mode":
281330
keep bfgs(f1, x0, tol=1e-8, fastMode=true)
331+
timeIt "lbfgs slow mode":
332+
keep lbfgs(f1, x0, tol=1e-8, fastMode=false)
333+
timeIt "lbfgs fast mode":
334+
keep lbfgs(f1, x0, tol=1e-8, fastMode=true)
282335

283336
# Lev-Marq:
284337
#[ proc fFit(params: Tensor[float], x: float): float =

0 commit comments

Comments
 (0)