In 2011 the authors of the L-BFGSB program published a correction and
update to their 1995 code. The latter is the basis of the L-BFGS-B
method of the optim()
function in base-R. The package
lbfgsb3
wrapped the updated code using a
.Fortran
call after removing a very large number of Fortran
output statements. Matthew Fidler used this Fortran code and an
Rcpp
interface to produce package lbfgsb3c
where the function lbfgsb3c()
returns an object similar to
that of base-R optim()
and that of
optimx::optimr()
. Subsequently, in a fine example of the
collaborations that have made R so useful, we have
merged the functionality of package lbfgsb3
into
lbfgsb3c
, as explained in this vignette. Note that this
document is intended primarily to document our efforts to check the
differences in variants of the code rather than be expository.
lbfgsb3c
There is really only one optimizer function in the package, but it may be called by four (4) names:
lbfgsb3c()
uses Rcpp (Eddelbuettel (2013), Eddelbuettel and François (2011), Eddelbuettel and Balamuta (2017)) to streamline
the call to the underlying Fortran. This is the base function used.lbfgsb3x()
is an alias of lbfgsb3c()
. We
were using this name for a while, and have kept the alias to avoid
having to edit test scripts.lbfgsb3
, which imitates a .Fortran
call of
the compiled 2011 Fortran code. The object returned by this routine is
NOT equivalent to the object returned by base-R optim()
or
by optimx::optimr()
. Instead, it includes a structure
info
which contains the detailed diagnostic information of
the Fortran code. For most users, this is not of interest, and I only
recommend use of this function for those needing to examine how the
optimization has been carried out.lbfgsb3f()
is an alias of lbfsgb3()
.We recommend using the lbfsgb3c()
call for most
uses.
# candlestick function
# J C Nash 2011-2-3
cstick.f<-function(x,alpha=100){
x<-as.vector(x)
r2<-crossprod(x)
f<-as.double(r2+alpha/r2)
return(f)
}
cstick.g<-function(x,alpha=100){
x<-as.vector(x)
r2<-as.numeric(crossprod(x))
g1<-2*x
g2 <- (-alpha)*2*x/(r2*r2)
g<-as.double(g1+g2)
return(g)
}
library(lbfgsb3c)
nn <- 2
x0 <- c(10,10)
lo <- c(1, 1)
up <- c(10,10)
print(x0)
## [1] 10 10
## c2o <- opm(x0, cstick.f, cstick.g, lower=lo, upper=up, method=meths, control=list(trace=0))
## print(summary(c2o, order=value))
c2l1 <- lbfgsb3c(x0, cstick.f, cstick.g, lower=lo, upper=up)
c2l1
## $par
## [1] 2.236068 2.236068
##
## $grad
## [1] -1.121272e-06 -1.121272e-06
##
## $value
## [1] 20
##
## $counts
## [1] 14 14
##
## $convergence
## [1] 0
##
## $message
## [1] "CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH"
## meths <- c("L-BFGS-B", "lbfgsb3c", "Rvmmin", "Rcgmin", "Rtnmin")
## require(optimx)
## cstick2a <- opm(x0, cstick.f, cstick.g, method=meths, upper=up, lower=lo, control=list(kkt=FALSE))
## print(summary(cstick2a, par.select=1:2, order=value))
lo <- c(4, 4)
## c2ob <- opm(x0, cstick.f, cstick.g, lower=lo, upper=up, method=meths, control=list(trace=0))
## print(summary(c2ob, order=value))
c2l2 <- lbfgsb3c(x0, cstick.f, cstick.g, lower=lo, upper=up)
c2l2
## $par
## [1] 4 4
##
## $grad
## [1] 7.21875 7.21875
##
## $value
## [1] 35.125
##
## $counts
## [1] 2 2
##
## $convergence
## [1] 0
##
## $message
## [1] "CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL"
## cstick2b <- opm(x0, cstick.f, cstick.g, method=meths, upper=up, lower=lo, control=list(kkt=FALSE))
## print(summary(cstick2b, par.select=1:2, order=value))
## nn <- 100
## x0 <- rep(10, nn)
## up <- rep(10, nn)
## lo <- rep(1e-4, nn)
## cco <- opm(x0, cstick.f, cstick.g, lower=lo, upper=up, method=meths, control=list(trace=0, kkt=FALSE))
## print(summary(cco, par.select=1:4, order=value))
# require(funconstrain) ## not in CRAN, so explicit inclusion of this function
# exrosen <- ex_rosen()
# exrosenf <- exrosen$fn
exrosenf <- function (par) {
n <- length(par)
if (n%%2 != 0) {
stop("Extended Rosenbrock: n must be even")
}
fsum <- 0
for (i in 1:(n/2)) {
p2 <- 2 * i
p1 <- p2 - 1
f_p1 <- 10 * (par[p2] - par[p1]^2)
f_p2 <- 1 - par[p1]
fsum <- fsum + f_p1 * f_p1 + f_p2 * f_p2
}
fsum
}
# exroseng <- exrosen$gr
exroseng <- function (par) {
n <- length(par)
if (n%%2 != 0) {
stop("Extended Rosenbrock: n must be even")
}
grad <- rep(0, n)
for (i in 1:(n/2)) {
p2 <- 2 * i
p1 <- p2 - 1
xx <- par[p1] * par[p1]
yx <- par[p2] - xx
f_p1 <- 10 * yx
f_p2 <- 1 - par[p1]
grad[p1] <- grad[p1] - 400 * par[p1] * yx - 2 * f_p2
grad[p2] <- grad[p2] + 200 * yx
}
grad
}
exrosenx0 <- function (n = 20) {
if (n%%2 != 0) {
stop("Extended Rosenbrock: n must be even")
}
rep(c(-1.2, 1), n/2)
}
require(lbfgsb3c)
## require(optimx)
## require(optimx)
for (n in seq(2,12, by=2)) {
cat("ex_rosen try for n=",n,"\n")
x0 <- exrosenx0(n)
lo <- rep(-1.5, n)
up <- rep(3, n)
print(x0)
cat("optim L-BFGS-B\n")
eo <- optim(x0, exrosenf, exroseng, lower=lo, upper=up, method="L-BFGS-B", control=list(trace=0))
print(eo)
cat("lbfgsb3c\n")
el <- lbfgsb3c(x0, exrosenf, exroseng, lower=lo, upper=up, control=list(trace=0))
print(el)
## erfg <- opm(x0, exrosenf, exroseng, method=meths, lower=lo, upper=up)
## print(summary(erfg, par.select=1:2, order=value))
}
## ex_rosen try for n= 2
## [1] -1.2 1.0
## optim L-BFGS-B
## $par
## [1] 1 1
##
## $value
## [1] 3.844416e-14
##
## $counts
## function gradient
## 51 51
##
## $convergence
## [1] 0
##
## $message
## [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
##
## lbfgsb3c
## $par
## [1] 1 1
##
## $grad
## [1] -8.65458e-07 6.26284e-07
##
## $value
## [1] 3.844419e-14
##
## $counts
## [1] 51 51
##
## $convergence
## [1] 0
##
## $message
## [1] "CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH"
##
## ex_rosen try for n= 4
## [1] -1.2 1.0 -1.2 1.0
## optim L-BFGS-B
## $par
## [1] 1 1 1 1
##
## $value
## [1] 7.688835e-14
##
## $counts
## function gradient
## 51 51
##
## $convergence
## [1] 0
##
## $message
## [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
##
## lbfgsb3c
## $par
## [1] 1 1 1 1
##
## $grad
## [1] -8.65458e-07 6.26284e-07 -8.65458e-07 6.26284e-07
##
## $value
## [1] 7.688829e-14
##
## $counts
## [1] 51 51
##
## $convergence
## [1] 0
##
## $message
## [1] "CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH"
##
## ex_rosen try for n= 6
## [1] -1.2 1.0 -1.2 1.0 -1.2 1.0
## optim L-BFGS-B
## $par
## [1] 1 1 1 1 1 1
##
## $value
## [1] 1.153325e-13
##
## $counts
## function gradient
## 51 51
##
## $convergence
## [1] 0
##
## $message
## [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
##
## lbfgsb3c
## $par
## [1] 1 1 1 1 1 1
##
## $grad
## [1] -8.654581e-07 6.262840e-07 -8.654581e-07 6.262840e-07 -8.654581e-07
## [6] 6.262840e-07
##
## $value
## [1] 1.153324e-13
##
## $counts
## [1] 51 51
##
## $convergence
## [1] 0
##
## $message
## [1] "CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH"
##
## ex_rosen try for n= 8
## [1] -1.2 1.0 -1.2 1.0 -1.2 1.0 -1.2 1.0
## optim L-BFGS-B
## $par
## [1] 1 1 1 1 1 1 1 1
##
## $value
## [1] 1.537767e-13
##
## $counts
## function gradient
## 51 51
##
## $convergence
## [1] 0
##
## $message
## [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
##
## lbfgsb3c
## $par
## [1] 1 1 1 1 1 1 1 1
##
## $grad
## [1] -8.65458e-07 6.26284e-07 -8.65458e-07 6.26284e-07 -8.65458e-07
## [6] 6.26284e-07 -8.65458e-07 6.26284e-07
##
## $value
## [1] 1.537766e-13
##
## $counts
## [1] 51 51
##
## $convergence
## [1] 0
##
## $message
## [1] "CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH"
##
## ex_rosen try for n= 10
## [1] -1.2 1.0 -1.2 1.0 -1.2 1.0 -1.2 1.0 -1.2 1.0
## optim L-BFGS-B
## $par
## [1] 1 1 1 1 1 1 1 1 1 1
##
## $value
## [1] 1.922208e-13
##
## $counts
## function gradient
## 51 51
##
## $convergence
## [1] 0
##
## $message
## [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
##
## lbfgsb3c
## $par
## [1] 1 1 1 1 1 1 1 1 1 1
##
## $grad
## [1] -8.654582e-07 6.262840e-07 -8.654582e-07 6.262840e-07 -8.654582e-07
## [6] 6.262840e-07 -8.654582e-07 6.262840e-07 -8.654582e-07 6.262840e-07
##
## $value
## [1] 1.922207e-13
##
## $counts
## [1] 51 51
##
## $convergence
## [1] 0
##
## $message
## [1] "CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH"
##
## ex_rosen try for n= 12
## [1] -1.2 1.0 -1.2 1.0 -1.2 1.0 -1.2 1.0 -1.2 1.0 -1.2 1.0
## optim L-BFGS-B
## $par
## [1] 1 1 1 1 1 1 1 1 1 1 1 1
##
## $value
## [1] 2.306652e-13
##
## $counts
## function gradient
## 51 51
##
## $convergence
## [1] 0
##
## $message
## [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
##
## lbfgsb3c
## $par
## [1] 1 1 1 1 1 1 1 1 1 1 1 1
##
## $grad
## [1] -8.654581e-07 6.262840e-07 -8.654581e-07 6.262840e-07 -8.654581e-07
## [6] 6.262840e-07 -8.654581e-07 6.262840e-07 -8.654581e-07 6.262840e-07
## [11] -8.654581e-07 6.262840e-07
##
## $value
## [1] 2.306649e-13
##
## $counts
## [1] 51 51
##
## $convergence
## [1] 0
##
## $message
## [1] "CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH"
While you may use the same interface as described in the writing R extensions to interface compiled code with this function, see L-BFGS-B, it is sometimes more convenient to use your own compiled code.
The following example shows how this is done using the file
jrosen.f
. We have unfortunately found that compilation is
not always portable across systems, so this example is presented without
execution.
subroutine rosen(n, x, fval)
double precision x(n), fval, dx
integer n, i
fval = 0.0D0
do 10 i=1,(n-1)
dx = x(i + 1) - x(i) * x(i)
fval = fval + 100.0 * dx * dx
dx = 1.0 - x(i)
fval = fval + dx * dx
10 continue
return
end
Here is the example script. Note that we must have the file
jrosen.f
available. Because the executable files on
different systems use different conventions and structures, we have
turned evaluation off here so this vignette can be built on multiple
platforms. However, we wished to provide examples of how compiled code
could be used.
system("R CMD SHLIB jrosen.f")
dyn.load("jrosen.so")
is.loaded("rosen")
x0 <- as.double(c(-1.2,1))
fv <- as.double(-999)
n <- as.double(2)
testf <- .Fortran("rosen", n=as.integer(n), x=as.double(x0), fval=as.double(fv))
testf
rrosen <- function(x) {
fval <- 0.0
for (i in 1:(n-1)) {
dx <- x[i + 1] - x[i] * x[i]
fval <- fval + 100.0 * dx * dx
dx <- 1.0 - x[i]
fval <- fval + dx * dx
}
fval
}
(rrosen(x0))
frosen <- function(x){
nn <- length(x)
if (nn > 100) { stop("max number of parameters is 100")}
fv <- -999.0
val <- .Fortran("rosen", n=as.integer(nn), x=as.double(x), fval=as.double(fv))
val$fval # NOTE--need ONLY function value returned
}
# Test the funcion
tval <- frosen(x0)
str(tval)
cat("Run with Nelder-Mead using R function\n")
mynm <- optim(x0, rrosen, control=list(trace=0))
print(mynm)
cat("\n\n Run with Nelder-Mead using Fortran function")
mynmf <- optim(x0, frosen, control=list(trace=0))
print(mynmf)
library(lbfgsb3c)
library(microbenchmark)
cat("try lbfgsb3c, no Gradient \n")
cat("R function\n")
tlR<-microbenchmark(myopR <- lbfgsb3c(x0, rrosen, gr=NULL, control=list(trace=0)))
print(tlR)
print(myopR)
cat("Fortran function\n")
tlF<-microbenchmark(myop <- lbfgsb3c(x0, frosen, gr=NULL, control=list(trace=0)))
print(tlF)
print(myop)
In this example, Fortran execution was actually SLOWER than plain R on the system where it was run.