Inference and analysis of the Transcriptional Regulatory Network (TRN) in Miscanthus genotypes

true

setwd("~/analysis/miscanthus-rnaseq-Msin/miscanthus_transcriptional_regulatory_coexpression_network")

library(RTN)
library(RedeR)

## ***This is RedeR 1.34.0! For a quick start, please type 'vignette('RedeR')'.
##    Supporting information is available at Genome Biology 13:R29, 2012,
##    (doi:10.1186/gb-2012-13-4-r29).

#library(rJava)
#library(pheatmap)
#library(RColorBrewer)

sessionInfo()

## R version 3.6.2 (2019-12-12)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] RedeR_1.34.0 RTN_2.10.1  
## 
## loaded via a namespace (and not attached):
##  [1] XVector_0.26.0              GenomeInfoDb_1.22.1        
##  [3] compiler_3.6.2              zlibbioc_1.32.0            
##  [5] bitops_1.0-6                mixtools_1.2.0             
##  [7] class_7.3-17                tools_3.6.2                
##  [9] minet_3.44.1                digest_0.6.27              
## [11] evaluate_0.14               lattice_0.20-41            
## [13] pkgconfig_2.0.3             rlang_0.4.10               
## [15] Matrix_1.3-2                igraph_1.2.6               
## [17] DelayedArray_0.12.3         yaml_2.2.1                 
## [19] parallel_3.6.2              xfun_0.20                  
## [21] GenomeInfoDbData_1.2.2      e1071_1.7-4                
## [23] stringr_1.4.0               knitr_1.30                 
## [25] S4Vectors_0.24.4            IRanges_2.20.2             
## [27] stats4_3.6.2                segmented_1.3-1            
## [29] grid_3.6.2                  data.table_1.13.6          
## [31] Biobase_2.46.0              snow_0.4-3                 
## [33] BiocParallel_1.20.1         survival_3.2-7             
## [35] rmarkdown_2.6               limma_3.42.2               
## [37] kernlab_0.9-29              viper_1.20.0               
## [39] magrittr_2.0.1              matrixStats_0.57.0         
## [41] GenomicRanges_1.38.0        htmltools_0.5.1            
## [43] MASS_7.3-53                 BiocGenerics_0.32.0        
## [45] splines_3.6.2               SummarizedExperiment_1.16.1
## [47] KernSmooth_2.23-18          stringi_1.5.3              
## [49] RCurl_1.98-1.2

vst <- read.delim("input/deseq2_vst.csv", header = T, sep = ',', row.names = 1)
vst <- as.matrix(vst)

tfs_list <- read.delim("input/Msin_TFs_list.txt", header = FALSE)
tfs_list <- as.vector(unlist(tfs_list))


# my.rtni <- tni.constructor(expData = vst, 
#                         regulatoryElements = tfs_list)
# 
# 
# my.rtni <- tni.permutation(my.rtni) #pval default 0.01
# my.rtni <- tni.bootstrap(my.rtni)
# my.rtni <- tni.dpi.filter(my.rtni) #default eps=0
# save(my.rtni, file="my.rtni_RTN.Rdata")
load("my_rtni_RTN.Rdata")

tni.regulon.summary(my.rtni)

## This regulatory network comprised of 4936 regulons.

## -- DPI-filtered network:

## regulatoryElements            Targets              Edges 
##               4936              42630             156465 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0     6.0    26.0    31.7    48.0   262.0

## -- Reference network:

## regulatoryElements            Targets              Edges 
##               4936              42630            2888818 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       8      97     585     694    4933 
## ---

#tni.regulon.summary(my.rtni, regulatoryElements = "Misin01G019100.1")


my.tnet <- tni.get(my.rtni, what = "tnet") 
#write.csv(my.tnet,file="RTN_Mutual_Information_regulators_targets.csv")
my.refnet <- tni.get(my.rtni, what = "refnet") 
my.regulons <- tni.get(my.rtni, what = "regulons.and.mode")

###NEEDED TO PLOT A NETWORK FOR ONE TF (NEXT BLOCK)
export JAVA_HOME=$(/usr/libexec/java_home -v 1.8)

###PLOT A NETWORK FOR ONE TF

# #export JAVA_HOME=$(/usr/libexec/java_home -v 1.8)
# rdp <- RedPort()
# calld(rdp, checkcalls=TRUE)
# g <- tni.graph(my.rtni, tfs = "Misin01G000800.1") 
# #replace tfs with regulatoryElements in newer versions of the library
# 
# rdp <- RedPort()
# calld(rdp)
# addGraph(rdp, g, layout=NULL)
# addLegend.color(rdp, g, type="edge")
# addLegend.shape(rdp, g)
# relax(rdp, ps = TRUE)

#LEAF
leaf.few.many <- read.csv(file = "input/leaf_few_many.csv", header = T, sep = ',')
foldchange.leaf.few.many <- setNames(unlist(leaf.few.many$log2FoldChange), unlist(leaf.few.many$X))
#this creates an "named numeric vector"
# foldchange.leaf.few.many[1:10]
# MisinT562900.1 MisinT352600.1 MisinT352700.1 MisinT352800.1 MisinT352900.1 MisinT526100.1 MisinT287300.1 MisinT287400.1 
# 0.011637136    0.011268459   -0.049725130   -0.018087613   -0.018192030   -0.105505799    0.006179712    0.000000000 
# MisinT287500.1 MisinT287600.1 
# 0.001096978    0.057356938 
# length(foldchange.leaf.few.many)
# [1] 67789
leaf.few.many.005 <- read.csv(file = "input/leaf_few_many_padj005_list.csv", header = T, sep = ',')
leaf.few.many.005 <- as.vector(unlist(leaf.few.many.005$X))


#root
root.few.many <- read.csv(file = "input/roots_few_many.csv", header = T, sep = ',')
foldchange.root.few.many <- setNames(unlist(root.few.many$log2FoldChange), unlist(root.few.many$X))
#this creates an "named numeric vector"
root.few.many.005 <- read.csv(file = "input/root_few_many_padj005_list.csv", header = T, sep = ',')
root.few.many.005 <- as.vector(unlist(root.few.many.005$X))

#stem
stem.few.many <- read.csv(file = "input/stem_few_many.csv", header = T, sep = ',')
foldchange.stem.few.many <- setNames(unlist(stem.few.many$log2FoldChange), unlist(stem.few.many$X))
#this creates an "named numeric vector"
stem.few.many.005 <- read.csv(file = "input/stem_few_many_padj005_list.csv", header = T, sep = ',')
stem.few.many.005 <- as.vector(unlist(stem.few.many.005$X))

###SLOW CALCULATIONS

# leaf.rtna <- tni2tna.preprocess(object = my.rtni, 
#                            phenotype = foldchange.leaf.few.many, 
#                            hits = leaf.few.many.005) 
# 
# leaf.rtna <- tna.mra(leaf.rtna, minRegulonSize = 5, verbose = T)
# leaf.rtna <- tna.gsea1(leaf.rtna, minRegulonSize = 5, verbose = T)
# leaf.rtna <- tna.gsea2(leaf.rtna, minRegulonSize = 5, verbose = T)
#save(leaf.rtna, file="leaf.rtna_RTN.Rdata")





#root
# root.rtna <- tni2tna.preprocess(object = my.rtni, 
#                                 phenotype = foldchange.root.few.many, 
#                                 hits = root.few.many.005) 
# root.rtna <- tna.mra(root.rtna, minRegulonSize = 5, verbose = T)
# root.rtna <- tna.gsea1(root.rtna, minRegulonSize = 5, verbose = T)
# root.rtna <- tna.gsea2(root.rtna, minRegulonSize = 5, verbose = T)
#save(root.rtna, file="root.rtna_RTN.Rdata")

#stem
# stem.rtna <- tni2tna.preprocess(object = my.rtni, 
#                                 phenotype = foldchange.stem.few.many, 
#                                 hits = stem.few.many.005) 
# stem.rtna <- tna.mra(stem.rtna, minRegulonSize = 5, verbose = T)
# stem.rtna <- tna.gsea1(stem.rtna, minRegulonSize = 5, verbose = T)
# stem.rtna <- tna.gsea2(stem.rtna, minRegulonSize = 5, verbose = T)
#save(stem.rtna, file="stem.rtna_RTN.Rdata")

load(file="root.rtna_RTN.Rdata")
load(file="stem.rtna_RTN.Rdata")
load(file="leaf.rtna_RTN.Rdata")

#####
#CONTINUE TO EXTRACT RESULTS INTO TABLES
####




#extract mra
write.csv(tna.get(stem.rtna, what="mra", ntop=-1),file="RTN_MRA-results_stem.csv")
write.csv(tna.get(root.rtna, what="mra", ntop=-1),file="RTN_MRA-results_root.csv")
write.csv(tna.get(leaf.rtna, what="mra", ntop=-1),file="RTN_MRA-results_leaf.csv")

#join analysis to MRA_analysis.xlsx

###significant_regulons_list.txt is a concatenated list of the TFs names from all three tissues from the MRA analysis

my.regulons <- tni.get(my.rtni, what = "regulons.and.mode") 
regulons.list <- tni.get(my.rtni, what = "regulatoryElements") 

###TAKES SEVERAL HOURS
# regulons.members <- data.frame(TF=character(),MEMBERS=character())
# names(regulons.members) <- c("TF","MEMBERS")
# for(tf in as.vector(regulons.list) ){
#   #print(names(my.regulons[[tf]]) )
#   my.string <- paste("'", as.character(names(my.regulons[[tf]])),"\"",collapse=", ",sep="")
#   tmp <- data.frame(TF=as.vector(tf), MEMBERS=my.string )
#   regulons.members <- rbind(regulons.members, tmp)
# }
# write.csv(regulons.members, file="TRN_TF_regulon_members_list.txt")
regulons.members <- read.delim("TRN_TF_regulon_members_list.txt", header = T, row.names = 1)

# #create a mapping (2 column file) between TFs and targets:
# tf.target <- data.frame(TF=character(),TARGET=character())
# #
# names(tf.target) <- c("TF","TARGET")
# #
# for(tf in regulons.list){
#   for(target in names(my.regulons[[tf]])){
#     tmp <- data.frame(TF=tf, MEMBERS=target)
#     tf.target <- rbind(tf.target, tmp)
#   }
# }
# write.csv(tf.target, file="map_TF_to_target_2cols.csv")
tf.target <- read.delim("map_TF_to_target_2cols.csv", header = T, row.names = 1)

#extract gsea1
#GSEA-1T uses a rank-based scoring metric in order to test the association between the gene set and the phenotype
#*ranked list of genes generated from a differential gene expression*
write.csv(tna.get(stem.rtna, what="gsea1", ntop=100),file="RTN_GSEA1-results_stem.csv")
write.csv(tna.get(root.rtna, what="gsea1", ntop=100),file="RTN_GSEA1-results_root.csv")
write.csv(tna.get(leaf.rtna, what="gsea1", ntop=100),file="RTN_GSEA1-results_leaf.csv")

#extract gsea2
#separates the regulon into positive and negative targets 
#and then assesses the distribution of the targets across the *ranked list of genes*.
write.csv(tna.get(stem.rtna, what="gsea2", ntop=100),file="RTN_GSEA2-results_stem.csv")
write.csv(tna.get(root.rtna, what="gsea2", ntop=100),file="RTN_GSEA2-results_root.csv")
write.csv(tna.get(leaf.rtna, what="gsea2", ntop=100),file="RTN_GSEA2-results_leaf.csv")

#using the targets for a given TF as test-group against the GO annotation


###########
#enrichment analysis of GO terms with TOPGO
###########
#using the targets for a given TF as test-group against the GO annotation
#all this code is from my notebook https://jjdevega.github.io/miscanthus_starch_rnaseq/

#PREFLIGHT (load dependencies and functions)
library(topGO,quietly = T)

## 
## Attaching package: 'BiocGenerics'

## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB

## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs

## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which, which.max, which.min

## 
## Attaching package: 'graph'

## The following object is masked from 'package:RedeR':
## 
##     updateGraph

## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.

## Warning: package 'S4Vectors' was built under R version 3.6.3

## 
## Attaching package: 'S4Vectors'

## The following object is masked from 'package:base':
## 
##     expand.grid

##

## 
## Attaching package: 'SparseM'

## The following object is masked from 'package:base':
## 
##     backsolve

## 
## groupGOTerms:    GOBPTerm, GOMFTerm, GOCCTerm environments built.

## 
## Attaching package: 'topGO'

## The following object is masked from 'package:IRanges':
## 
##     members

library(data.table,quietly = T)

## 
## Attaching package: 'data.table'

## The following object is masked from 'package:IRanges':
## 
##     shift

## The following objects are masked from 'package:S4Vectors':
## 
##     first, second

run.topgo.pipeline.BP <- function(mytemp) {
  #PIPE starts
  list.mytemp <- factor(as.integer(allgenes %in% mytemp))
  names(list.mytemp) <- allgenes
  #BP
  fdata.mytemp.BP <- new("topGOdata", ontology="BP", allGenes=list.mytemp, annot = annFUN.gene2GO, gene2GO = geneID2GO)
  #  results.fdata.mytemp.BP <- GenTable(fdata.mytemp.BP, weight_fisher = runTest(fdata.mytemp.BP, algorithm = "weight", statistic = "fisher"), elim_fisher = runTest(fdata.mytemp.BP, algorithm = "elim", statistic = "fisher"), weight01_fisher = runTest(fdata.mytemp.BP, algorithm = "weight01", statistic = "fisher"), topNodes=150)
  results.fdata.mytemp.BP <- GenTable(fdata.mytemp.BP, weight01_fisher = runTest(fdata.mytemp.BP, algorithm = "weight01", statistic = "fisher"), topNodes=150)
  #add genes to dataframe
  results.fdata.mytemp.BP$genes <-   sapply(results.fdata.mytemp.BP$GO.ID, function(x) {
    genes<- genesInTerm(fdata.mytemp.BP, x)  
    genes[[1]][genes[[1]] %in% mytemp]
  })
  results.fdata.mytemp.BP  
}

#MF
run.topgo.pipeline.MF <- function(mytemp) {
  list.mytemp <- factor(as.integer(allgenes %in% mytemp))
  names(list.mytemp) <- allgenes
  fdata.mytemp.MF <- new("topGOdata", ontology="MF", allGenes=list.mytemp, annot = annFUN.gene2GO, gene2GO = geneID2GO)
  #  results.fdata.mytemp.MF <- GenTable(fdata.mytemp.MF, weight_fisher = runTest(fdata.mytemp.MF, algorithm = "weight", statistic = "fisher"), elim_fisher = runTest(fdata.mytemp.MF, algorithm = "elim", statistic = "fisher"), weight01_fisher = runTest(fdata.mytemp.MF, algorithm = "weight01", statistic = "fisher"), topNodes=50)
  results.fdata.mytemp.MF <- GenTable(fdata.mytemp.MF, weight01_fisher = runTest(fdata.mytemp.MF, algorithm = "weight01", statistic = "fisher"), topNodes=50)
  #add genes to dataframe
  results.fdata.mytemp.MF$genes <-   sapply(results.fdata.mytemp.MF$GO.ID, function(x) {
    genes<-genesInTerm(fdata.mytemp.MF, x) 
    genes[[1]][genes[[1]] %in% mytemp]
  })
  results.fdata.mytemp.MF
}


full.topgo.pipeline.BP <- function(mytemp) {
  #PIPE starts
  list.mytemp <- factor(as.integer(allgenes %in% mytemp))
  names(list.mytemp) <- allgenes
  #BP
  fdata.mytemp.BP <- new("topGOdata", ontology="BP", allGenes=list.mytemp, annot = annFUN.gene2GO, gene2GO = geneID2GO)
  #  results.fdata.mytemp.BP <- GenTable(fdata.mytemp.BP, weight_fisher = runTest(fdata.mytemp.BP, algorithm = "weight", statistic = "fisher"), elim_fisher = runTest(fdata.mytemp.BP, algorithm = "elim", statistic = "fisher"), weight01_fisher = runTest(fdata.mytemp.BP, algorithm = "weight01", statistic = "fisher"), topNodes=150)
  results.fdata.mytemp.BP <- GenTable(fdata.mytemp.BP, weight01_fisher = runTest(fdata.mytemp.BP, algorithm = "weight01", statistic = "fisher"), topNodes=300)
  #add genes to dataframe
  results.fdata.mytemp.BP$genes <-   sapply(results.fdata.mytemp.BP$GO.ID, function(x) {
    genes<- genesInTerm(fdata.mytemp.BP, x)  
    genes[[1]][genes[[1]] %in% mytemp]
  })
  results.fdata.mytemp.BP  
}

#MF
full.topgo.pipeline.MF <- function(mytemp) {
  list.mytemp <- factor(as.integer(allgenes %in% mytemp))
  names(list.mytemp) <- allgenes
  fdata.mytemp.MF <- new("topGOdata", ontology="MF", allGenes=list.mytemp, annot = annFUN.gene2GO, gene2GO = geneID2GO)
  #  results.fdata.mytemp.MF <- GenTable(fdata.mytemp.MF, weight_fisher = runTest(fdata.mytemp.MF, algorithm = "weight", statistic = "fisher"), elim_fisher = runTest(fdata.mytemp.MF, algorithm = "elim", statistic = "fisher"), weight01_fisher = runTest(fdata.mytemp.MF, algorithm = "weight01", statistic = "fisher"), topNodes=50)
  results.fdata.mytemp.MF <- GenTable(fdata.mytemp.MF, weight01_fisher = runTest(fdata.mytemp.MF, algorithm = "weight01", statistic = "fisher"), topNodes=100)
  #add genes to dataframe
  results.fdata.mytemp.MF$genes <-   sapply(results.fdata.mytemp.MF$GO.ID, function(x) {
    genes<-genesInTerm(fdata.mytemp.MF, x) 
    genes[[1]][genes[[1]] %in% mytemp]
  })
  results.fdata.mytemp.MF
}

##END FUNCTIONS###



#COMPUTE ENRICHMENT ANALYSIS
library(topGO,quietly = T)
library(data.table,quietly = T)
library(dplyr,quietly = T)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:data.table':
## 
##     between, first, last

## The following object is masked from 'package:AnnotationDbi':
## 
##     select

## The following objects are masked from 'package:IRanges':
## 
##     collapse, desc, intersect, setdiff, slice, union

## The following objects are masked from 'package:S4Vectors':
## 
##     first, intersect, rename, setdiff, setequal, union

## The following object is masked from 'package:Biobase':
## 
##     combine

## The following object is masked from 'package:graph':
## 
##     union

## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

topgo.file <- read.delim("input/slim_annot_TOPGO.annot", header = F)
geneID2GO <- readMappings(file = "input/slim_annot_TOPGO.annot")
allgenes <- topgo.file$V1 #66789

library(dplyr)
library(data.table)

#extract list of targets for each significant MRA
regulons.list <- tni.get(my.rtni, what = "regulatoryElements") 
length(regulons.list) #4936

## [1] 4936

my.regulons <- tni.get(my.rtni, what = "regulons.and.mode") 

everyone <- data.frame()

#loop that runs the BP analysisi for each TF in the list:
#for(i in as.vector(unlist(regulons.list))){

#loop that runs the BP analysisi for the first 100 TFs in the list:
for(i in as.vector(unlist(regulons.list[1:100]))){
  try({ 
      #the try is a hack to prevent breaking the loop when no gene in the geneset has a GO annotation
   myset <- names(my.regulons[[i]])
   myBP <- run.topgo.pipeline.BP(myset)  %>% filter (Significant>=1) 
   myBP$ID <- as.character(i)
   myBP <- apply(myBP,2,as.character) #prevents error unimplemented type 'list' in 'EncodeElement'
   everyone <- rbind(everyone,myBP)
  } , silent=TRUE) 
  dim(everyone)
}

#write.csv(everyone,file="topgo_tfs_slim-enrichment_analysis_everyTFs.csv")

#cat all these rows and filter for a FDR value (0.05) 
#join analysis to xlsx

#list every TF with a DE target (or DE TF itslef):
cat map_TF_to_target_2cols.csv | fgrep -wf input/list_DEs_all.txt | cut -f2 -d ','| sort | uniq > map_TF_to_target_2cols-GREP-WITH-DE-LIST.csv

export JAVA_HOME=$(/usr/libexec/java_home -v 1.8)

# library(RedeR)
# load("my_rtni_RTN.Rdata")
# # export JAVA_HOME=$(/usr/libexec/java_home -v 1.8)
# 
# 
# 
# all1 <- read.csv("map_TF_to_target_2cols-GREP-WITH-DE-LIST.csv", header = F)
# all1 <- as.vector(unlist(all1))
# length (all1)
# #[1] 1247
# 
# rdp <- RedPort()
# calld(rdp, checkcalls=TRUE)
# g <- tni.graph(my.rtni, tfs = all1) #tfs with regulatoryElements in newer versions 
# #addGraph(rdp, g, layout=NULL) #SLOW!
# 
# #and export as xmggl