The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

Network inference and analysis of CLL data

Frédéric Bertrand and Myriam Maumy-Bertrand

Université de Strasbourg et CNRS,IRMA, labex IRMIA
frederic.bertrand@utt.fr

2025-07-02

Data preparation

Retrieve the full CLL dataset.

require(Patterns)
CLLfile <- "https://github.com/fbertran/Patterns/raw/master/add_data/CLL.RData"
download.file(CLLfile,destfile = file.path(tempdir(),"downloadData.RData"),method = "libcurl")
load(file.path(tempdir(),"downloadData.RData"))
unlink(file.path(tempdir(),"downloadData.RData"))

CLL[1:10,1:5]

Split the CLL dataset into healthy and aggressive stimulated and unstimulated dataset.

hea_US<-CLL[,which((1:48)%%8<5&(1:48)%%8>0)+2]
hea_S<-CLL[,which(!((1:48)%%8<5&(1:48)%%8>0))+2]

agg_US<-CLL[,which((1:40)%%8<5&(1:40)%%8>0)+98]
agg_S<-CLL[,which(!((1:40)%%8<5&(1:40)%%8>0))+98]

m_hea_US<-as.omics_array(hea_US,c(60,90,210,390),6,name=CLL[,1],gene_ID=CLL[,2])
m_hea_S<- as.omics_array(hea_S,c(60,90,210,390),6,name=CLL[,1],gene_ID=CLL[,2])
  
m_agg_US<-as.omics_array((agg_US),c(60,90,210,390),5,name=CLL[,1],gene_ID=CLL[,2])
m_agg_S<- as.omics_array((agg_S),c(60,90,210,390),5,name=CLL[,1],gene_ID=CLL[,2])

Focus on EGR1, run the code to get the graph of the expression values (pasted together for all the subjects) for all the probeset tagged as EGR1.

matplot(t(log(agg_S[which(CLL[,2] %in% "EGR1"),])),type="l",lty=1)

Selection genes according to their profiles.

selection1<-geneSelection(list(m_agg_US,m_agg_S),list("condition&time",c(1,2),c(1,1)),-1,alpha=0.1)
selection2<-geneSelection(list(m_agg_US,m_agg_S),list("condition&time",c(1,2),c(1,1)+1),-1,alpha=0.1)
selection3<-geneSelection(list(m_agg_US,m_agg_S),list("condition&time",c(1,2),c(1,1)+2),50,alpha=0.005)
selection4<-geneSelection(list(m_agg_US,m_agg_S),list("condition&time",c(1,2),c(1,1)+3),50,alpha=0.005)

Merge the four selections into a single one.

selection<-Patterns::unionOmics(list(selection1,selection2,selection3,selection4))
summary(selection)

Number of genes in the merged selection.

length(selection@gene_ID)

Translate the probesets’ names for the selection.

require(biomaRt)

affyids=c("202763_at","209310_s_at","207500_at")
ensembl = useMart("ensembl",dataset="hsapiens_gene_ensembl")
infos<-getBM(attributes=c("affy_hg_u133_plus_2","ensembl_gene_id","hgnc_symbol","chromosome_name","start_position","end_position","band"), filters = "affy_hg_u133_plus_2", values = CLL[CLL[,1] %in% selection@name,1] , mart = ensembl,uniqueRows=TRUE, checkFilters = TRUE)
selection@gene_ID <- lapply(selection@name,function(x) {unique(infos[infos$affy_hg_u133_plus_2==x,"hgnc_symbol"])})

Network inference

Add groupping information according to the pre-merge selection membership to perform network inference.

selection@group <- rep(NA, length(selection@name))
names(selection@group) <- selection@name
selection@group[selection@name %in% selection4@name] <- 4
selection@group[selection@name %in% selection3@name] <- 3
selection@group[selection@name %in% selection2@name] <- 2
selection@group[selection@name %in% selection1@name] <- 1
plot(selection)

Check the length of the group slot of the selection object.

length(selection@group)

Performs a lasso based inference of the network. Then prints the network pbject.

network<-inference(selection,fitfun="LASSO2",Finit=CascadeFinit(4,4),Fshape=CascadeFshape(4,4))
str(network)

Plot the inferred F matrix.

plotF(network@F, choice='F')

Save results.

save(list=c("selection"),file="selection.RData")
save(list=c("infos"),file="infos.RData")

Focus on transcription factors.

Retrieve human transcription factors from HumanTFDB, extracted from AnimalTFDB 3.0: a comprehensive resource for annotation and prediction of animal transcription factors. Hui Hu, Ya-Ru Miao, Long-Hao Jia, Qing-Yang Yu, Qiong Zhang and An-Yuan Guo. Nucl. Acids Res. (2018).

doc <- read.delim("http://bioinfo.life.hust.edu.cn/static/AnimalTFDB3/download/Homo_sapiens_TF",encoding = "UTF-8", header=TRUE)
TF<-as.character(doc[,"Symbol"])
TF<-TF[order(TF)]

The TF object holds the list of human transcription factors geneID. We retrieve those that are in the selection object.

infos_selection <- infos[infos$affy_hg_u133_plus_2 %in% selection@name,]
tfs<-which(infos_selection[,"hgnc_symbol"] %in% TF)

Some plots of the TF found in the selection.

matplot(t(selection@omicsarray[tfs,]),type="l",lty=1)
kk<-kmeans((selection@omicsarray[tfs,]),10)
matplot(t(kk$centers),type="l",lty=1)

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.