We first load all the required libraries.

library(devtools)
library(gRim)
library(igraph)
install_github("pzwiernik/golazo")
library(golazo)

Loading the data

Load the Body Fat dataset.

data(BodyFat)
head(BodyFat)

Modify the data as it contains errors and make all measurements.

BodyFat <- BodyFat[-c(31,42,48,76,86,96,159,169,175,182,206),]

Transforming measurements to a similar scale and removing variables Density and Age.

#BodyFat$Age <- sqrt(BodyFat$Age)
BodyFat$Weight <- sqrt(BodyFat$Weight)
gRbodyfat <- BodyFat[,-c(1,3)]
n <- nrow(gRbodyfat)

Calculate covariance matrix and correlation matrix.

S.body <- cov.wt(gRbodyfat, method ="ML")$cov
R.body <- cov2cor(S.body)

Positive graphical lasso

We first compute the positive glasso estimate for a number of possible penalty parameters. For each \(\rho\) we compute the EBIC score.

d <- nrow(R.body)
L <- matrix(0,d,d)
U <- matrix(1,d,d)
diag(U) <- 0
ebic <- EBICgolazo(R.body,n=n,L=L,U=U,rhomin=0.001,rhomax=1,nrhos=20,gamma=0.5)
plot(ebic$rhos,ebic$ebic,type="l",xlab="rho",ylab="EBIC",main="EBIC for gamma=0.5")

rho <- ebic$opt.rho

Here \(\rho=0.11\) gives the lowest value of EBIC. The algorithm converges in a couple of steps.

res <- positive.golazo(R.body,rho)
## ** The function maximizes the log-likelihood function penalized with the general GOLAZO penalty.
## The algorithm is initiated at the input matrix.
## 
##  The algorithm will stop when the dual gap is below:  1e-07 .
## 
## Iteration | Dual Gap
## 1      |  0.3229601 
## 2      |  0.02903326 
## 3      |  0.004478887 
## 4      |  0.0007829846 
## 5      |  0.0001191888 
## 6      |  1.759889e-05 
## 7      |  2.592005e-06 
## 8      |  3.431388e-07 
## 9      |  9.724048e-09
Khat <- res$K
print(round(Khat,3))
##         BodyFat Weight Height   Neck  Chest Abdomen    Hip  Thigh   Knee  Ankle
## BodyFat   3.186  0.000  0.400  0.000  0.000  -2.926  0.000  0.000  0.000  0.289
## Weight    0.000 16.492 -1.516 -1.317 -2.953  -2.273 -5.595 -0.206 -2.068 -0.711
## Height    0.400 -1.516  1.674  0.000  0.508   0.396  0.000  0.000 -0.292 -0.177
## Neck      0.000 -1.317  0.000  3.803 -0.733   0.000  0.000  0.000  0.000  0.000
## Chest     0.000 -2.953  0.508 -0.733  7.694  -4.007  0.000  0.000  0.000  0.000
## Abdomen  -2.926 -2.273  0.396  0.000 -4.007  10.421 -1.728  0.000  0.000  0.000
## Hip       0.000 -5.595  0.000  0.000  0.000  -1.728 10.707 -3.298 -0.043  0.000
## Thigh     0.000 -0.206  0.000  0.000  0.000   0.000 -3.298  5.496 -0.800 -0.238
## Knee      0.000 -2.068 -0.292  0.000  0.000   0.000 -0.043 -0.800  4.269 -0.923
## Ankle     0.289 -0.711 -0.177  0.000  0.000   0.000  0.000 -0.238 -0.923  2.866
## Biceps    0.000 -0.981  0.000 -0.238  0.000   0.000  0.000 -0.761  0.000  0.000
## Forearm   0.006 -0.413  0.000 -0.432  0.000   0.000  0.000  0.000  0.000  0.000
## Wrist     0.069 -0.301  0.000 -0.898  0.000   0.000  0.000  0.000  0.000 -0.745
##         Biceps Forearm  Wrist
## BodyFat  0.000   0.006  0.069
## Weight  -0.981  -0.413 -0.301
## Height   0.000   0.000  0.000
## Neck    -0.238  -0.432 -0.898
## Chest    0.000   0.000  0.000
## Abdomen  0.000   0.000  0.000
## Hip      0.000   0.000  0.000
## Thigh   -0.761   0.000  0.000
## Knee     0.000   0.000  0.000
## Ankle    0.000   0.000 -0.745
## Biceps   3.755  -1.541  0.000
## Forearm -1.541   3.339 -0.710
## Wrist    0.000  -0.710  2.860

The graph of the underlying partial correlations looks as follows.

G <- qgraph::qgraph(-cov2cor(res$K),layout="spring",palette="colorblind",theme="colorblind")

layout <- G$layout

We run the graphical lasso for the same data. The optimal penalty is chosen again using EBIC. The penalty is chosen the minimal possible.

L <- matrix(-1,d,d)
U <- matrix(1,d,d)
diag(U) <- diag(L) <- 0
gebic <- EBICgolazo(R.body,n=n,L=L,U=U,rhomin=0.001,rhomax=1,nrhos=20,gamma=0.5)
plot(gebic$rhos,gebic$ebic,type="l",xlab="rho",ylab="EBIC",main="EBIC for gamma=0.5")

grho <- gebic$opt.rho

And again rerun GOLAZO for this optimal rho.

L <- matrix(-grho,d,d)
U <- matrix(grho,d,d)
diag(U) <- diag(L) <- 0
res2 <- golazo(R.body,L=L,U=U)
## ** The function maximizes the log-likelihood function penalized with the general GOLAZO penalty.
## The algorithm is initiated at the input matrix.
## 
##  The algorithm will stop when the dual gap is below:  1e-07 .
## 
## Iteration | Dual Gap
## 1      |  0.0002576488 
## 2      |  5.224596e-07 
## 3      |  -4.163336e-15
Kglasso <- res2$K
print(round(Kglasso,3))
##         BodyFat  Weight Height   Neck   Chest Abdomen     Hip  Thigh   Knee
## BodyFat   3.689   0.000  0.292  0.605   0.339  -4.584   1.032 -0.215 -0.135
## Weight    0.000  47.011 -8.881 -3.602 -11.105  -8.379 -11.321 -4.187 -0.998
## Height    0.292  -8.881  3.331  0.589   2.448   2.052   1.318  1.285 -0.649
## Neck      0.605  -3.602  0.589  4.236   0.009  -0.769   1.089 -0.342  0.252
## Chest     0.339 -11.105  2.448  0.009  10.358  -3.865   1.960  2.266  0.196
## Abdomen  -4.584  -8.379  2.052 -0.769  -3.865  16.609  -3.091  0.963 -0.723
## Hip       1.032 -11.321  1.318  1.089   1.960  -3.091  12.988 -3.587 -0.246
## Thigh    -0.215  -4.187  1.285 -0.342   2.266   0.963  -3.587  7.100 -1.077
## Knee     -0.135  -0.998 -0.649  0.252   0.196  -0.723  -0.246 -1.077  4.439
## Ankle     0.218  -3.218  0.349  0.621   0.262   1.257   0.296 -0.683 -0.894
## Biceps   -0.088  -1.969  0.374 -0.167  -0.162   0.705   0.056 -1.002  0.043
## Forearm  -0.427  -2.838  0.269 -0.440  -0.558   2.197   1.336 -0.528 -0.079
## Wrist     0.532  -0.323  0.042 -1.127   0.382  -1.194   0.035  1.284 -0.364
##          Ankle Biceps Forearm  Wrist
## BodyFat  0.218 -0.088  -0.427  0.532
## Weight  -3.218 -1.969  -2.838 -0.323
## Height   0.349  0.374   0.269  0.042
## Neck     0.621 -0.167  -0.440 -1.127
## Chest    0.262 -0.162  -0.558  0.382
## Abdomen  1.257  0.705   2.197 -1.194
## Hip      0.296  0.056   1.336  0.035
## Thigh   -0.683 -1.002  -0.528  1.284
## Knee    -0.894  0.043  -0.079 -0.364
## Ankle    3.346  0.492   0.121 -1.168
## Biceps   0.492  3.966  -1.541 -0.129
## Forearm  0.121 -1.541   3.996 -0.962
## Wrist   -1.168 -0.129  -0.962  3.406

And the partial correlation graph is

qgraph::qgraph(-cov2cor(Kglasso),layout=layout,palette="colorblind",theme="colorblind")

The glasso estimate is dense so the likelihood is higher for that point.

c(log(det(Khat))-sum(diag(R.body%*%Khat)),log(det(Kglasso))-sum(diag(R.body%*%Kglasso)))
## [1] 3.05566 4.37581

However, penalizing for size of the graph, the EBIC criterion prefers the positive glasso estimate.

# number of edges
cat("The positive glasso EBIC: ",ebic$ebic[which(ebic$rhos==rho)],"\n")
## The positive glasso EBIC:  -364.8998
cat("The glasso EBIC: ",gebic$ebic[which(gebic$rhos==grho)],"\n")
## The glasso EBIC:  -226.6239

Dual MLE estimate

Now we take \(\hat K\) computed in the previous step and maximize the dual likelihood subject to edge positivity constraints on the underlying graph with edges \(\hat E\).

resDLE <- la.golazo(Khat)
## ** The function maximizes the log-likelihood function penalized with the general GOLAZO penalty.
## The algorithm is initiated at the input matrix.
## 
##  The algorithm will stop when the dual gap is below:  1e-07 .
## 
## Iteration | Dual Gap
## 1      |  4.80742e-09

The positive graphical lasso procedure already gave an edge positive distribution so the second step does not do anything.