require(copula)
doExtras <- FALSEThis vignette visualizes (log) likelihood functions of Archimedean
copulas, some of which are numerically challenging to compute. Because
of this computational challenge, we also check for equivalence of some
of the several computational methods, testing for numerical
near-equality using all.equal(L1, L2).
We start by defining the following auxiliary functions.
##' @title [m]inus Log-Likelihood for Archimedean Copulas ("fast version")
##' @param theta parameter (length 1 for our current families)
##' @param acop Archimedean copula (of class "acopula")
##' @param u data matrix n x d
##' @param n.MC if > 0 MC is applied with sample size equal to n.MC; otherwise,
##'        the exact formula is used
##' @param ... potential further arguments, passed to <acop> @dacopula()
##' @return negative log-likelihood
##' @author Martin Maechler (Marius originally)
mLogL <- function(theta, acop, u, n.MC=0, ...) { # -(log-likelihood)
    -sum(acop@dacopula(u, theta, n.MC=n.MC, log=TRUE, ...))
}##' @title Plotting the Negative Log-Likelihood for Archimedean Copulas
##' @param cop an outer_nacopula (currently with no children)
##' @param u n x d  data matrix
##' @param xlim x-range for curve() plotting
##' @param main title for curve()
##' @param XtrArgs a list of further arguments for mLogL()
##' @param ... further arguments for curve()
##' @return invisible()
##' @author Martin Maechler
curveLogL <- function(cop, u, xlim, main, XtrArgs=list(), ...) {
    unam <- deparse(substitute(u))
    stopifnot(is(cop, "outer_nacopula"), is.list(XtrArgs),
              (d <- ncol(u)) >= 2, d == dim(cop),
              length(cop@childCops) == 0# not yet *nested* A.copulas
              )
    acop <- cop@copula
    th. <- acop@theta # the true theta
    acop <- setTheta(acop, NA) # so it's clear, the true theta is not used below
    if(missing(main)) {
        tau. <- cop@copula@tau(th.)
        main <- substitute("Neg. Log Lik."~ -italic(l)(theta, UU) ~ TXT ~~
               FUN(theta['*'] == Th) %=>% tau['*'] == Tau,
               list(UU = unam,
                TXT= sprintf("; n=%d, d=%d;  A.cop",
                         nrow(u), d),
                FUN = acop@name,
                Th = format(th.,digits=3),
                Tau = format(tau., digits=3)))
    }
    r <- curve(do.call(Vectorize(mLogL, "theta"), c(list(x, acop, u), XtrArgs)),
               xlim=xlim, main=main,
               xlab = expression(theta),
               ylab = substitute(- log(L(theta, u, ~~ COP)), list(COP=acop@name)),
               ...)
    if(is.finite(th.))
        axis(1, at = th., labels=expression(theta["*"]),
             lwd=2, col="dark gray", tck = -1/30)
    else warning("non-finite cop@copula@theta = ", th.)
    axis(1, at = initOpt(acop@name),
         labels = FALSE, lwd = 2, col = 2, tck = 1/20)
    invisible(r)
}Ensure that we are told about it, if the numerical algorithms choose
methods using Rmpfr (R package interfacing to multi
precision arithmetic MPFR):
op <- options("copula:verboseUsingRmpfr"=TRUE) # see when "Rmpfr" methods are chosen automaticallyn <- 200
d <- 100
tau <- 0.2
theta <- copJoe@iTau(tau)
cop <- onacopulaL("Joe", list(theta,1:d))
theta## [1] 1.443824Here, the three different methods work “the same”:
set.seed(1)
U1 <- rnacopula(n,cop)
enacopula(U1, cop, "mle") # 1.432885 --  fine## [1] 1.432898th4 <- 1 + (1:4)/4
mL.tr <- c(-3558.5, -3734.4, -3299.5, -2505.)
mLt1 <- sapply(th4, function(th) mLogL(th, cop@copula, U1, method="log.poly")) # default
mLt2 <- sapply(th4, function(th) mLogL(th, cop@copula, U1, method="log1p"))
mLt3 <- sapply(th4, function(th) mLogL(th, cop@copula, U1, method="poly"))
stopifnot(all.equal(mLt1, mL.tr, tolerance=5e-5),
          all.equal(mLt2, mL.tr, tolerance=5e-5),
          all.equal(mLt3, mL.tr, tolerance=5e-5))
system.time(r1l  <- curveLogL(cop, U1, c(1, 2.5), X=list(method="log.poly")))##        User      System verstrichen 
##       0.322       0.002       0.307mtext("all three polyJ() methods on top of each other")
system.time({
    r1J <- curveLogL(cop, U1, c(1, 2.5), X=list(method="poly"),
                     add=TRUE, col=adjustcolor("red", .4))
    r1m  <- curveLogL(cop, U1, c(1, 2.5), X=list(method="log1p"),
                      add=TRUE, col=adjustcolor("blue",.5))
})##        User      System verstrichen 
##       0.575       0.000       0.577U2 <- rnacopula(n,cop)
summary(dCopula(U2, cop)) # => density for the *correct* parameter looks okay##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
##  0.000e+00  4.900e+01  6.430e+02 2.777e+175  1.932e+04 5.553e+177## hmm: max = 5.5e177
if(doExtras)
    system.time(r2 <- curveLogL(cop, U2, c(1, 2.5)))
stopifnot(all.equal(enacopula(U2, cop, "mle"), 1.43992755, tolerance=1e-5),
          all.equal(mLogL(1.8, cop@copula, U2), -4070.1953,tolerance=1e-5)) # (was -Inf)U3 <- rnacopula(n,cop)
(th. <- enacopula(U3, cop, "mle")) # 1.4495## [1] 1.449569system.time(r3 <- curveLogL(cop, U3, c(1, 2.5)))##        User      System verstrichen 
##       0.296       0.000       0.297axis(1, at = th., label = quote(hat(theta)))U4 <- rnacopula(n,cop)
enacopula(U4, cop, "mle") # 1.4519  (prev. was 2.351 : "completely wrong")## [1] 1.451916summary(dCopula(U4, cop)) # ok (had one Inf)##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
##  0.000e+00  7.500e+01  9.080e+02 1.981e+259  1.434e+04 3.961e+261if(doExtras)
    system.time(r4 <- curveLogL(cop, U4, c(1, 2.5)))
mLogL(2.2351, cop@copula, U4)## [1] -1789.59mLogL(1.5,    cop@copula, U4)## [1] -3882.819mLogL(1.2,    cop@copula, U4)## [1] -3517.366if(doExtras) # each curve takes almost 2 sec
    system.time({
        curveLogL(cop, U4, c(1, 1.01))
        curveLogL(cop, U4, c(1, 1.0001))
        curveLogL(cop, U4, c(1, 1.000001))
    })
## --> limit goes *VERY* steeply up to  0
## --> theta 1.164 is about the boundary:
stopifnot(identical(setTheta(cop, 1.164), onacopula(cop@copula, C(1.164, 1:100))),
      all.equal(600.59577,
            cop@copula@dacopula(U4[118,,drop=FALSE],
                    theta=1.164, log = TRUE), tolerance=1e-5)) # was "Inf"n <- 200
d <- 150
tau <- 0.3
(theta <- copJoe@iTau(tau))## [1] 1.772108cop <- onacopulaL("Joe",list(theta,1:d))set.seed(47)
U. <- rnacopula(n,cop)
enacopula(U., cop, "mle") # 1.784578## [1] 1.78459system.time(r. <- curveLogL(cop, U., c(1.1, 3)))##        User      System verstrichen 
##       0.395       0.000       0.397## => still looks very goodd <- 180
tau <- 0.4
(theta <- copJoe@iTau(tau))## [1] 2.219066cop <- onacopulaL("Joe",list(theta,1:d))U. <- rnacopula(n,cop)
enacopula(U., cop, "mle") # 2.217582## [1] 2.217593if(doExtras)
system.time(r. <- curveLogL(cop, U., c(1.1, 4)))
## => still looks very goodn <- 200
d <- 50 # smaller 'd' -- so as to not need 'Rmpfr' here
tau <- 0.2
(theta <- copGumbel@iTau(tau))## [1] 1.25cop <- onacopulaL("Gumbel",list(theta,1:d))set.seed(1)
U1 <- rnacopula(n,cop)
if(doExtras) {
    U2 <- rnacopula(n,cop)
    U3 <- rnacopula(n,cop)
}
enacopula(U1, cop, "mle") # 1.227659 (was 1.241927)## [1] 1.227659##--> Plots with "many" likelihood evaluations
system.time(r1 <- curveLogL(cop, U1, c(1, 2.1)))##        User      System verstrichen 
##       0.408       0.000       0.409if(doExtras) system.time({
    mtext("and two other generated samples")
    r2 <- curveLogL(cop, U2, c(1, 2.1), add=TRUE)
    r3 <- curveLogL(cop, U3, c(1, 2.1), add=TRUE)
})d <- 150
tau <- 0.6
(theta <- copGumbel@iTau(tau))## [1] 2.5cG.5 <- onacopulaL("Gumbel",list(theta,1:d))set.seed(17)
U4 <- rnacopula(n,cG.5)
U5 <- rnacopula(n,cG.5)
U6 <- rnacopula(n,cG.5)
if(doExtras) { ## "Rmpfr" is used {2012-06-21}: -- therefore about 18 seconds!
 tol <- if(interactive()) 1e-12 else 1e-8
 print(system.time(
 ee. <- c(enacopula(U4, cG.5, "mle", tol=tol),
          enacopula(U5, cG.5, "mle", tol=tol),
          enacopula(U6, cG.5, "mle", tol=tol))))
dput(ee.)# in case the following fails
## tol=1e-12 Linux nb-mm3 3.2.0-25-generic x86_64 (2012-06-23):
##   c(2.47567251789004, 2.48424484287686, 2.50410767129408)
##   c(2.475672518,      2.484244763,      2.504107671),
stopifnot(all.equal(ee., c(2.475672518, 2.484244763, 2.504107671),
            tolerance= max(1e-7, 16*tol)))
}
## --> Plots with "many" likelihood evaluations
th. <- seq(1, 3, by= 1/4)
if(doExtras) # "default2012" (polyG default) partly uses Rmpfr here:
system.time(r4   <- sapply(th., mLogL, acop=cG.5@copula, u=U4))## 25.6 sec
## whereas this (polyG method) is very fast {and still ok}:
system.time(r4.p <- sapply(th., mLogL, acop=cG.5@copula, u=U4, method="pois"))##        User      System verstrichen 
##       0.094       0.000       0.095r4. <- c(0, -18375.33, -21948.033, -24294.995, -25775.502,
         -26562.609, -26772.767, -26490.809, -25781.224)
stopifnot(!doExtras ||
          all.equal(r4,   r4., tolerance = 8e-8),
          all.equal(r4.p, r4., tolerance = 8e-8))
## --> use fast method here as well:
system.time(r5.p <- sapply(th., mLogL, acop=cG.5@copula, u=U5, method="pois"))##        User      System verstrichen 
##       0.096       0.000       0.096system.time(r6.p <- sapply(th., mLogL, acop=cG.5@copula, u=U6, method="pois"))##        User      System verstrichen 
##       0.092       0.000       0.092if(doExtras) {
    if(FALSE) # for speed analysis, etc
        debug(copula:::polyG)
    mLogL(1.65, cG.5@copula, U4) # -23472.96
}
dd <- dCopula(U4, setTheta(cG.5, 1.64), log = TRUE,
              method = if(doExtras)"default" else "pois")
summary(dd)##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   41.59   53.30   81.09  116.91  137.54  707.13stopifnot(!is.na(dd), # no NaN's anymore
      40 < range(dd), range(dd) < 710)n <- 64
d <- 5
tau <- 0.8
(theta <- copFrank@iTau(tau))## [1] 18.19154cop <- onacopulaL("Frank", list(theta,1:d))set.seed(11) # these seeds give no problems: 101, 41, 21
U. <- rnacopula(n,cop)
cop@copula <- setTheta(cop@copula, NA) # forget the true theta
system.time(f.ML <- emle(U., cop)); f.ML # --> fine: theta = 18.033, Log-lik = 314.01##        User      System verstrichen 
##       0.012       0.000       0.013## 
## Call:
## bbmle::mle2(minuslogl = nLL, start = start, optimizer = "optimize", 
##     lower = interval[1], upper = interval[2])
## 
## Coefficients:
##   theta 
## 18.0333 
## 
## Log-likelihood: 314.01if(doExtras)
    system.time(f.mlMC <- emle(U., cop, n.MC = 1e4)) # with MC
stopifnot(all.equal(unname(coef(f.ML)), 18.03331, tolerance= 1e-6),
      all.equal(f.ML@min, -314.0143, tolerance=1e-6),
      !doExtras || ## Simulate MLE (= SMLE) is "extra" random,  hmm...
      all.equal(unname(coef(f.mlMC)), 17.8, tolerance= 0.01)
      ##           64-bit ubuntu: 17.817523
      ##         ? 64-bit Mac:    17.741
     )
cop@copula <- setTheta(cop@copula, theta)
r. <- curveLogL(cop, U., c(1, 200)) # => now looks finetail(as.data.frame(r.), 15)##          x        y
## 87  172.14 2105.690
## 88  174.13 2143.642
## 89  176.12 2181.637
## 90  178.11 2219.675
## 91  180.10 2257.754
## 92  182.09 2295.874
## 93  184.08 2334.034
## 94  186.07 2372.232
## 95  188.06 2410.468
## 96  190.05 2448.742
## 97  192.04 2487.051
## 98  194.03 2525.396
## 99  196.02 2563.776
## 100 198.01 2602.189
## 101 200.00 2640.636stopifnot( is.finite( r.$y ),
      ## and is convex (everywhere):
      diff(r.$y, d=2) > 0)
options(op) # revert to previous state## R version 4.3.2 Patched (2023-12-01 r85659)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Fedora Linux 38 (Thirty Eight)
## 
## Matrix products: default
## BLAS:   /u/maechler/R/D/r-patched/F38-64-inst/lib/libRblas.so 
## LAPACK: /usr/lib64/liblapack.so.3.11.0
## 
## attached base packages:
##  [1] parallel  grid      stats4    tools     stats     graphics  grDevices
##  [8] utils     datasets  methods   base     
## 
## other attached packages:
## [1] rugarch_1.5-1  gsl_2.1-8      mev_1.16       lattice_0.22-5 bbmle_1.0.25  
## [6] copula_1.1-3  
## 
## loaded via a namespace (and not attached):
##  [1] gmp_0.7-3                   ks_1.14.1                  
##  [3] sass_0.4.8                  KernSmooth_2.23-22         
##  [5] SkewHyperbolic_0.4-2        pracma_2.4.4               
##  [7] digest_0.6.33               evaluate_0.23              
##  [9] nleqslv_3.3.5               mvtnorm_1.2-4              
## [11] fastmap_1.1.1               jsonlite_1.8.7             
## [13] Matrix_1.6-4                mclust_6.0.1               
## [15] truncnorm_1.0-9             stabledist_0.7-1           
## [17] spd_2.0-1                   numDeriv_2016.8-1.1        
## [19] jquerylib_0.1.4             Rdpack_2.6                 
## [21] cli_3.6.1                   rlang_1.1.2                
## [23] rbibutils_2.2.16            pspline_1.0-19             
## [25] cachem_1.0.8                yaml_2.3.7                 
## [27] polynom_1.4-1               nloptr_2.0.3               
## [29] bdsmatrix_1.3-6             mathjaxr_1.6-0             
## [31] Runuran_0.38                partitions_1.10-7          
## [33] R6_2.5.1                    zoo_1.8-12                 
## [35] lifecycle_1.0.4             ADGofTest_0.3              
## [37] MASS_7.3-60                 Rsolnp_1.16                
## [39] pcaPP_2.0-3                 bslib_0.6.1                
## [41] GeneralizedHyperbolic_0.8-6 Rcpp_1.0.11                
## [43] xfun_0.41                   highr_0.10                 
## [45] knitr_1.45                  htmltools_0.5.7            
## [47] rmarkdown_2.25              xts_0.13.1                 
## [49] compiler_4.3.2              alabama_2023.1.0           
## [51] DistributionUtils_0.6-1