We first load all the required libraries.

library(devtools)
library(gRim)
library(igraph)
install_github("pzwiernik/golazo")
library(golazo)

Loading the data

Download FIR upregulated genes.

download.file("https://functionalgenomics.upf.edu/supplements/FIRinELGANs/TableS6.csv",
              "FIRupregulatedgenes.csv")
upgenes <- read.csv("FIRupregulatedgenes.csv", sep="\t", stringsAsFactors=FALSE)
dim(upgenes)
## [1] 592   7

Download genes with a role in the innate immune response.

download.file("https://www.innatedb.com/download/innatedb_curated_genes.xls",
              "innatedb_curated_genes.xls")
library(xlsx)
innatedb <- read.xlsx(file="innatedb_curated_genes.xls", sheetIndex=1,
                      stringsAsFactors=FALSE)
dim(innatedb)
## [1] 3713    5

Subset innatedb for human genes with a role in the innate immune response human is taxon 9606 https://www.ncbi.nlm.nih.gov/taxonomy/9606

iigenes.human <- unique(innatedb[innatedb$Species == 9606, "Gene.Symbol"])
length(iigenes.human)
## [1] 1052

Read the gene expression dataset.

temp <- tempfile()
download.file("https://functionalgenomics.upf.edu/supplements/FIRinELGANs/CostaCastelo2016data.zip",temp)
y <- read.csv(unz(temp, "CostaCastelo2016normalizedGeneExp.csv"), row.names=1)
unlink(temp)
dim(y)
## [1] 12093    43
stopifnot(all(upgenes$Symbol %in% rownames(y))) ## QC

Further subset the innate immune genes to those present in our data

iigenes.human <- iigenes.human[iigenes.human %in% rownames(y)]
length(iigenes.human)
## [1] 704

fetch the intersecting set of 136 upregulated genes with a role in the innate immune response

iiupgenes <- intersect(upgenes$Symbol, iigenes.human)
length(iiupgenes)
## [1] 136

Now we are finally ready to begin our analysis. Compute the sample correlation matrix

S <- cov.wt(t(y[iiupgenes,]), method ="ML")$cov
n <- length(iiupgenes)
R <- cov2cor(S)
d <- nrow(R)

Select the penalty parameter using BIC/EBIC. For illustration, we check only 5 potential rhos.

L <- matrix(0,d,d)
U <- matrix(1,d,d)
diag(U) <- 0
ebic <- EBICgolazo(R,n=n,L=L,U=U,rhomin = 0.01,rhomax=1,nrhos=10)
plot(ebic$rhos,ebic$ebic,type="l")

rho <- ebic$opt.rho

Rerun the analysis for the optimal rho.

res <- positive.golazo(R,rho)
## ** The function maximizes the log-likelihood function penalized with the general GOLAZO penalty.
## The input matrix is not positive definite. Computing the starting point.. DONE
## 
##  The algorithm will stop when the dual gap is below:  1e-07 .
## 
## Iteration | Dual Gap
## 1      |  78.9456 
## 2      |  16.39732 
## 3      |  7.256931 
## 4      |  4.26599 
## 5      |  2.858529 
## 6      |  2.004468 
## 7      |  1.432673 
## 8      |  1.040407 
## 9      |  0.7629886 
## 10     |  0.5644676 
## 11     |  0.420677 
## 12     |  0.3164019 
## 13     |  0.2391608 
## 14     |  0.1816402 
## 15     |  0.1382717 
## 16     |  0.1053131 
## 17     |  0.08023786 
## 18     |  0.06112197 
## 19     |  0.04655535 
## 20     |  0.03546659 
## 21     |  0.02702336 
## 22     |  0.0205944 
## 23     |  0.01569752 
## 24     |  0.01196517 
## 25     |  0.009118848 
## 26     |  0.006951403 
## 27     |  0.005299603 
## 28     |  0.004037011 
## 29     |  0.003073707 
## 30     |  0.00233886 
## 31     |  0.001775927 
## 32     |  0.001346277 
## 33     |  0.001019749 
## 34     |  0.0007664926 
## 35     |  0.0005742699 
## 36     |  0.0004284788 
## 37     |  0.0003170495 
## 38     |  0.0002319242 
## 39     |  0.0001690118 
## 40     |  0.0001237473 
## 41     |  8.608429e-05 
## 42     |  5.862505e-05 
## 43     |  3.894448e-05 
## 44     |  2.517995e-05 
## 45     |  1.420998e-05 
## 46     |  6.744628e-06 
## 47     |  2.494403e-06 
## 48     |  -1.386148e-07

The estimated graph is sparse. We can play around with it using the igraph package.

G <- qgraph::qgraph(-cov2cor(res$K),layout="spring",palette="colorblind",theme="colorblind",threshold=1e-5)
## Registered S3 methods overwritten by 'huge':
##   method    from   
##   plot.sim  BDgraph
##   print.sim BDgraph

layout <- G$layout
G <- as.igraph(G)
edge_density(G, loops=F)
## [1] 0.06851852
diameter(G, directed=F, weights=NA)
## [1] 5

Or do some clustering.

GG <- G
E(GG)$weights <- 1
ceb <- cluster_edge_betweenness(G,directed=FALSE,weights=NULL) 
plot(ceb, G,vertex.label.cex=.2,layout=layout)

We will now compare our results with graphical lasso.

L <- matrix(-1,d,d)
U <- matrix(1,d,d)
diag(U) <- diag(L)  <- 0
gebic <- EBICgolazo(R,n=n,L=L,U=U,rhomin = 0.01,rhomax=1,nrhos=10)
grho <- gebic$opt.rho

Recompute for the optimal rho.

L <- matrix(-rho,d,d)
U <- matrix(rho,d,d)
diag(U) <- diag(L)  <- 0
resgl <- golazo(R,L=L,U=U)
## ** The function maximizes the log-likelihood function penalized with the general GOLAZO penalty.
## The input matrix is not positive definite. Computing the starting point.. DONE
## 
##  The algorithm will stop when the dual gap is below:  1e-07 .
## 
## Iteration | Dual Gap
## 1      |  22.68689 
## 2      |  1.728408 
## 3      |  0.3878756 
## 4      |  0.1137225 
## 5      |  0.03512693 
## 6      |  0.01114183 
## 7      |  0.00357517 
## 8      |  0.001110542 
## 9      |  0.0003062499 
## 10     |  6.271978e-05 
## 11     |  9.176186e-06 
## 12     |  2.132126e-06 
## 13     |  7.218843e-07 
## 14     |  2.459596e-07 
## 15     |  8.422813e-08

Check the corresponding EBIC score.

Kgl <- resgl$K
Rgl <- abs(cov2cor(resgl$K))
nedggl = length(which(Rgl[upper.tri(abs(Rgl),diag=FALSE)]> 1e-6))
cat("Edge density: ",nedggl/(d*(d-1)/2),"\n")
## Edge density:  0.2020697
ebic$ebic[which(ebic$rhos==rho)]
## [1] -8758.366
gebic$ebic[which(gebic$rhos==grho)]
## [1] 4610.703

To complete the analysis we run the second step of our procedure.

resDLE <- la.golazo(res$K)
## ** The function maximizes the log-likelihood function penalized with the general GOLAZO penalty.
## The algorithm is initiated at the input matrix.
## 
##  The algorithm will stop when the dual gap is below:  1e-07 .
## 
## Iteration | Dual Gap
## 1      |  -3.488211e-08