true
setwd("~/analysis/miscanthus-rnaseq-Msin/miscanthus_transcriptional_regulatory_coexpression_network")
library(RTN)
library(RedeR)
## ***This is RedeR 1.34.0! For a quick start, please type 'vignette('RedeR')'.
## Supporting information is available at Genome Biology 13:R29, 2012,
## (doi:10.1186/gb-2012-13-4-r29).
#library(rJava)
#library(pheatmap)
#library(RColorBrewer)
sessionInfo()
## R version 3.6.2 (2019-12-12)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] RedeR_1.34.0 RTN_2.10.1
##
## loaded via a namespace (and not attached):
## [1] XVector_0.26.0 GenomeInfoDb_1.22.1
## [3] compiler_3.6.2 zlibbioc_1.32.0
## [5] bitops_1.0-6 mixtools_1.2.0
## [7] class_7.3-17 tools_3.6.2
## [9] minet_3.44.1 digest_0.6.27
## [11] evaluate_0.14 lattice_0.20-41
## [13] pkgconfig_2.0.3 rlang_0.4.10
## [15] Matrix_1.3-2 igraph_1.2.6
## [17] DelayedArray_0.12.3 yaml_2.2.1
## [19] parallel_3.6.2 xfun_0.20
## [21] GenomeInfoDbData_1.2.2 e1071_1.7-4
## [23] stringr_1.4.0 knitr_1.30
## [25] S4Vectors_0.24.4 IRanges_2.20.2
## [27] stats4_3.6.2 segmented_1.3-1
## [29] grid_3.6.2 data.table_1.13.6
## [31] Biobase_2.46.0 snow_0.4-3
## [33] BiocParallel_1.20.1 survival_3.2-7
## [35] rmarkdown_2.6 limma_3.42.2
## [37] kernlab_0.9-29 viper_1.20.0
## [39] magrittr_2.0.1 matrixStats_0.57.0
## [41] GenomicRanges_1.38.0 htmltools_0.5.1
## [43] MASS_7.3-53 BiocGenerics_0.32.0
## [45] splines_3.6.2 SummarizedExperiment_1.16.1
## [47] KernSmooth_2.23-18 stringi_1.5.3
## [49] RCurl_1.98-1.2
vst <- read.delim("input/deseq2_vst.csv", header = T, sep = ',', row.names = 1)
vst <- as.matrix(vst)
tfs_list <- read.delim("input/Msin_TFs_list.txt", header = FALSE)
tfs_list <- as.vector(unlist(tfs_list))
# my.rtni <- tni.constructor(expData = vst,
# regulatoryElements = tfs_list)
#
#
# my.rtni <- tni.permutation(my.rtni) #pval default 0.01
# my.rtni <- tni.bootstrap(my.rtni)
# my.rtni <- tni.dpi.filter(my.rtni) #default eps=0
# save(my.rtni, file="my.rtni_RTN.Rdata")
load("my_rtni_RTN.Rdata")
tni.regulon.summary(my.rtni)
## This regulatory network comprised of 4936 regulons.
## -- DPI-filtered network:
## regulatoryElements Targets Edges
## 4936 42630 156465
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 6.0 26.0 31.7 48.0 262.0
## -- Reference network:
## regulatoryElements Targets Edges
## 4936 42630 2888818
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 8 97 585 694 4933
## ---
#tni.regulon.summary(my.rtni, regulatoryElements = "Misin01G019100.1")
my.tnet <- tni.get(my.rtni, what = "tnet")
#write.csv(my.tnet,file="RTN_Mutual_Information_regulators_targets.csv")
my.refnet <- tni.get(my.rtni, what = "refnet")
my.regulons <- tni.get(my.rtni, what = "regulons.and.mode")
###NEEDED TO PLOT A NETWORK FOR ONE TF (NEXT BLOCK)
export JAVA_HOME=$(/usr/libexec/java_home -v 1.8)
###PLOT A NETWORK FOR ONE TF
# #export JAVA_HOME=$(/usr/libexec/java_home -v 1.8)
# rdp <- RedPort()
# calld(rdp, checkcalls=TRUE)
# g <- tni.graph(my.rtni, tfs = "Misin01G000800.1")
# #replace tfs with regulatoryElements in newer versions of the library
#
# rdp <- RedPort()
# calld(rdp)
# addGraph(rdp, g, layout=NULL)
# addLegend.color(rdp, g, type="edge")
# addLegend.shape(rdp, g)
# relax(rdp, ps = TRUE)
#LEAF
leaf.few.many <- read.csv(file = "input/leaf_few_many.csv", header = T, sep = ',')
foldchange.leaf.few.many <- setNames(unlist(leaf.few.many$log2FoldChange), unlist(leaf.few.many$X))
#this creates an "named numeric vector"
# foldchange.leaf.few.many[1:10]
# MisinT562900.1 MisinT352600.1 MisinT352700.1 MisinT352800.1 MisinT352900.1 MisinT526100.1 MisinT287300.1 MisinT287400.1
# 0.011637136 0.011268459 -0.049725130 -0.018087613 -0.018192030 -0.105505799 0.006179712 0.000000000
# MisinT287500.1 MisinT287600.1
# 0.001096978 0.057356938
# length(foldchange.leaf.few.many)
# [1] 67789
leaf.few.many.005 <- read.csv(file = "input/leaf_few_many_padj005_list.csv", header = T, sep = ',')
leaf.few.many.005 <- as.vector(unlist(leaf.few.many.005$X))
#root
root.few.many <- read.csv(file = "input/roots_few_many.csv", header = T, sep = ',')
foldchange.root.few.many <- setNames(unlist(root.few.many$log2FoldChange), unlist(root.few.many$X))
#this creates an "named numeric vector"
root.few.many.005 <- read.csv(file = "input/root_few_many_padj005_list.csv", header = T, sep = ',')
root.few.many.005 <- as.vector(unlist(root.few.many.005$X))
#stem
stem.few.many <- read.csv(file = "input/stem_few_many.csv", header = T, sep = ',')
foldchange.stem.few.many <- setNames(unlist(stem.few.many$log2FoldChange), unlist(stem.few.many$X))
#this creates an "named numeric vector"
stem.few.many.005 <- read.csv(file = "input/stem_few_many_padj005_list.csv", header = T, sep = ',')
stem.few.many.005 <- as.vector(unlist(stem.few.many.005$X))
###SLOW CALCULATIONS
# leaf.rtna <- tni2tna.preprocess(object = my.rtni,
# phenotype = foldchange.leaf.few.many,
# hits = leaf.few.many.005)
#
# leaf.rtna <- tna.mra(leaf.rtna, minRegulonSize = 5, verbose = T)
# leaf.rtna <- tna.gsea1(leaf.rtna, minRegulonSize = 5, verbose = T)
# leaf.rtna <- tna.gsea2(leaf.rtna, minRegulonSize = 5, verbose = T)
#save(leaf.rtna, file="leaf.rtna_RTN.Rdata")
#root
# root.rtna <- tni2tna.preprocess(object = my.rtni,
# phenotype = foldchange.root.few.many,
# hits = root.few.many.005)
# root.rtna <- tna.mra(root.rtna, minRegulonSize = 5, verbose = T)
# root.rtna <- tna.gsea1(root.rtna, minRegulonSize = 5, verbose = T)
# root.rtna <- tna.gsea2(root.rtna, minRegulonSize = 5, verbose = T)
#save(root.rtna, file="root.rtna_RTN.Rdata")
#stem
# stem.rtna <- tni2tna.preprocess(object = my.rtni,
# phenotype = foldchange.stem.few.many,
# hits = stem.few.many.005)
# stem.rtna <- tna.mra(stem.rtna, minRegulonSize = 5, verbose = T)
# stem.rtna <- tna.gsea1(stem.rtna, minRegulonSize = 5, verbose = T)
# stem.rtna <- tna.gsea2(stem.rtna, minRegulonSize = 5, verbose = T)
#save(stem.rtna, file="stem.rtna_RTN.Rdata")
load(file="root.rtna_RTN.Rdata")
load(file="stem.rtna_RTN.Rdata")
load(file="leaf.rtna_RTN.Rdata")
#####
#CONTINUE TO EXTRACT RESULTS INTO TABLES
####
#extract mra
write.csv(tna.get(stem.rtna, what="mra", ntop=-1),file="RTN_MRA-results_stem.csv")
write.csv(tna.get(root.rtna, what="mra", ntop=-1),file="RTN_MRA-results_root.csv")
write.csv(tna.get(leaf.rtna, what="mra", ntop=-1),file="RTN_MRA-results_leaf.csv")
#join analysis to MRA_analysis.xlsx
###significant_regulons_list.txt is a concatenated list of the TFs names from all three tissues from the MRA analysis
my.regulons <- tni.get(my.rtni, what = "regulons.and.mode")
regulons.list <- tni.get(my.rtni, what = "regulatoryElements")
###TAKES SEVERAL HOURS
# regulons.members <- data.frame(TF=character(),MEMBERS=character())
# names(regulons.members) <- c("TF","MEMBERS")
# for(tf in as.vector(regulons.list) ){
# #print(names(my.regulons[[tf]]) )
# my.string <- paste("'", as.character(names(my.regulons[[tf]])),"\"",collapse=", ",sep="")
# tmp <- data.frame(TF=as.vector(tf), MEMBERS=my.string )
# regulons.members <- rbind(regulons.members, tmp)
# }
# write.csv(regulons.members, file="TRN_TF_regulon_members_list.txt")
regulons.members <- read.delim("TRN_TF_regulon_members_list.txt", header = T, row.names = 1)
# #create a mapping (2 column file) between TFs and targets:
# tf.target <- data.frame(TF=character(),TARGET=character())
# #
# names(tf.target) <- c("TF","TARGET")
# #
# for(tf in regulons.list){
# for(target in names(my.regulons[[tf]])){
# tmp <- data.frame(TF=tf, MEMBERS=target)
# tf.target <- rbind(tf.target, tmp)
# }
# }
# write.csv(tf.target, file="map_TF_to_target_2cols.csv")
tf.target <- read.delim("map_TF_to_target_2cols.csv", header = T, row.names = 1)
#extract gsea1
#GSEA-1T uses a rank-based scoring metric in order to test the association between the gene set and the phenotype
#*ranked list of genes generated from a differential gene expression*
write.csv(tna.get(stem.rtna, what="gsea1", ntop=100),file="RTN_GSEA1-results_stem.csv")
write.csv(tna.get(root.rtna, what="gsea1", ntop=100),file="RTN_GSEA1-results_root.csv")
write.csv(tna.get(leaf.rtna, what="gsea1", ntop=100),file="RTN_GSEA1-results_leaf.csv")
#extract gsea2
#separates the regulon into positive and negative targets
#and then assesses the distribution of the targets across the *ranked list of genes*.
write.csv(tna.get(stem.rtna, what="gsea2", ntop=100),file="RTN_GSEA2-results_stem.csv")
write.csv(tna.get(root.rtna, what="gsea2", ntop=100),file="RTN_GSEA2-results_root.csv")
write.csv(tna.get(leaf.rtna, what="gsea2", ntop=100),file="RTN_GSEA2-results_leaf.csv")
#using the targets for a given TF as test-group against the GO annotation
###########
#enrichment analysis of GO terms with TOPGO
###########
#using the targets for a given TF as test-group against the GO annotation
#all this code is from my notebook https://jjdevega.github.io/miscanthus_starch_rnaseq/
#PREFLIGHT (load dependencies and functions)
library(topGO,quietly = T)
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which, which.max, which.min
##
## Attaching package: 'graph'
## The following object is masked from 'package:RedeR':
##
## updateGraph
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Warning: package 'S4Vectors' was built under R version 3.6.3
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
##
## expand.grid
##
##
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
##
## backsolve
##
## groupGOTerms: GOBPTerm, GOMFTerm, GOCCTerm environments built.
##
## Attaching package: 'topGO'
## The following object is masked from 'package:IRanges':
##
## members
library(data.table,quietly = T)
##
## Attaching package: 'data.table'
## The following object is masked from 'package:IRanges':
##
## shift
## The following objects are masked from 'package:S4Vectors':
##
## first, second
run.topgo.pipeline.BP <- function(mytemp) {
#PIPE starts
list.mytemp <- factor(as.integer(allgenes %in% mytemp))
names(list.mytemp) <- allgenes
#BP
fdata.mytemp.BP <- new("topGOdata", ontology="BP", allGenes=list.mytemp, annot = annFUN.gene2GO, gene2GO = geneID2GO)
# results.fdata.mytemp.BP <- GenTable(fdata.mytemp.BP, weight_fisher = runTest(fdata.mytemp.BP, algorithm = "weight", statistic = "fisher"), elim_fisher = runTest(fdata.mytemp.BP, algorithm = "elim", statistic = "fisher"), weight01_fisher = runTest(fdata.mytemp.BP, algorithm = "weight01", statistic = "fisher"), topNodes=150)
results.fdata.mytemp.BP <- GenTable(fdata.mytemp.BP, weight01_fisher = runTest(fdata.mytemp.BP, algorithm = "weight01", statistic = "fisher"), topNodes=150)
#add genes to dataframe
results.fdata.mytemp.BP$genes <- sapply(results.fdata.mytemp.BP$GO.ID, function(x) {
genes<- genesInTerm(fdata.mytemp.BP, x)
genes[[1]][genes[[1]] %in% mytemp]
})
results.fdata.mytemp.BP
}
#MF
run.topgo.pipeline.MF <- function(mytemp) {
list.mytemp <- factor(as.integer(allgenes %in% mytemp))
names(list.mytemp) <- allgenes
fdata.mytemp.MF <- new("topGOdata", ontology="MF", allGenes=list.mytemp, annot = annFUN.gene2GO, gene2GO = geneID2GO)
# results.fdata.mytemp.MF <- GenTable(fdata.mytemp.MF, weight_fisher = runTest(fdata.mytemp.MF, algorithm = "weight", statistic = "fisher"), elim_fisher = runTest(fdata.mytemp.MF, algorithm = "elim", statistic = "fisher"), weight01_fisher = runTest(fdata.mytemp.MF, algorithm = "weight01", statistic = "fisher"), topNodes=50)
results.fdata.mytemp.MF <- GenTable(fdata.mytemp.MF, weight01_fisher = runTest(fdata.mytemp.MF, algorithm = "weight01", statistic = "fisher"), topNodes=50)
#add genes to dataframe
results.fdata.mytemp.MF$genes <- sapply(results.fdata.mytemp.MF$GO.ID, function(x) {
genes<-genesInTerm(fdata.mytemp.MF, x)
genes[[1]][genes[[1]] %in% mytemp]
})
results.fdata.mytemp.MF
}
full.topgo.pipeline.BP <- function(mytemp) {
#PIPE starts
list.mytemp <- factor(as.integer(allgenes %in% mytemp))
names(list.mytemp) <- allgenes
#BP
fdata.mytemp.BP <- new("topGOdata", ontology="BP", allGenes=list.mytemp, annot = annFUN.gene2GO, gene2GO = geneID2GO)
# results.fdata.mytemp.BP <- GenTable(fdata.mytemp.BP, weight_fisher = runTest(fdata.mytemp.BP, algorithm = "weight", statistic = "fisher"), elim_fisher = runTest(fdata.mytemp.BP, algorithm = "elim", statistic = "fisher"), weight01_fisher = runTest(fdata.mytemp.BP, algorithm = "weight01", statistic = "fisher"), topNodes=150)
results.fdata.mytemp.BP <- GenTable(fdata.mytemp.BP, weight01_fisher = runTest(fdata.mytemp.BP, algorithm = "weight01", statistic = "fisher"), topNodes=300)
#add genes to dataframe
results.fdata.mytemp.BP$genes <- sapply(results.fdata.mytemp.BP$GO.ID, function(x) {
genes<- genesInTerm(fdata.mytemp.BP, x)
genes[[1]][genes[[1]] %in% mytemp]
})
results.fdata.mytemp.BP
}
#MF
full.topgo.pipeline.MF <- function(mytemp) {
list.mytemp <- factor(as.integer(allgenes %in% mytemp))
names(list.mytemp) <- allgenes
fdata.mytemp.MF <- new("topGOdata", ontology="MF", allGenes=list.mytemp, annot = annFUN.gene2GO, gene2GO = geneID2GO)
# results.fdata.mytemp.MF <- GenTable(fdata.mytemp.MF, weight_fisher = runTest(fdata.mytemp.MF, algorithm = "weight", statistic = "fisher"), elim_fisher = runTest(fdata.mytemp.MF, algorithm = "elim", statistic = "fisher"), weight01_fisher = runTest(fdata.mytemp.MF, algorithm = "weight01", statistic = "fisher"), topNodes=50)
results.fdata.mytemp.MF <- GenTable(fdata.mytemp.MF, weight01_fisher = runTest(fdata.mytemp.MF, algorithm = "weight01", statistic = "fisher"), topNodes=100)
#add genes to dataframe
results.fdata.mytemp.MF$genes <- sapply(results.fdata.mytemp.MF$GO.ID, function(x) {
genes<-genesInTerm(fdata.mytemp.MF, x)
genes[[1]][genes[[1]] %in% mytemp]
})
results.fdata.mytemp.MF
}
##END FUNCTIONS###
#COMPUTE ENRICHMENT ANALYSIS
library(topGO,quietly = T)
library(data.table,quietly = T)
library(dplyr,quietly = T)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following object is masked from 'package:AnnotationDbi':
##
## select
## The following objects are masked from 'package:IRanges':
##
## collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
##
## first, intersect, rename, setdiff, setequal, union
## The following object is masked from 'package:Biobase':
##
## combine
## The following object is masked from 'package:graph':
##
## union
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
topgo.file <- read.delim("input/slim_annot_TOPGO.annot", header = F)
geneID2GO <- readMappings(file = "input/slim_annot_TOPGO.annot")
allgenes <- topgo.file$V1 #66789
library(dplyr)
library(data.table)
#extract list of targets for each significant MRA
regulons.list <- tni.get(my.rtni, what = "regulatoryElements")
length(regulons.list) #4936
## [1] 4936
my.regulons <- tni.get(my.rtni, what = "regulons.and.mode")
everyone <- data.frame()
#loop that runs the BP analysisi for each TF in the list:
#for(i in as.vector(unlist(regulons.list))){
#loop that runs the BP analysisi for the first 100 TFs in the list:
for(i in as.vector(unlist(regulons.list[1:100]))){
try({
#the try is a hack to prevent breaking the loop when no gene in the geneset has a GO annotation
myset <- names(my.regulons[[i]])
myBP <- run.topgo.pipeline.BP(myset) %>% filter (Significant>=1)
myBP$ID <- as.character(i)
myBP <- apply(myBP,2,as.character) #prevents error unimplemented type 'list' in 'EncodeElement'
everyone <- rbind(everyone,myBP)
} , silent=TRUE)
dim(everyone)
}
#write.csv(everyone,file="topgo_tfs_slim-enrichment_analysis_everyTFs.csv")
#cat all these rows and filter for a FDR value (0.05)
#join analysis to xlsx
#list every TF with a DE target (or DE TF itslef):
cat map_TF_to_target_2cols.csv | fgrep -wf input/list_DEs_all.txt | cut -f2 -d ','| sort | uniq > map_TF_to_target_2cols-GREP-WITH-DE-LIST.csv
export JAVA_HOME=$(/usr/libexec/java_home -v 1.8)
# library(RedeR)
# load("my_rtni_RTN.Rdata")
# # export JAVA_HOME=$(/usr/libexec/java_home -v 1.8)
#
#
#
# all1 <- read.csv("map_TF_to_target_2cols-GREP-WITH-DE-LIST.csv", header = F)
# all1 <- as.vector(unlist(all1))
# length (all1)
# #[1] 1247
#
# rdp <- RedPort()
# calld(rdp, checkcalls=TRUE)
# g <- tni.graph(my.rtni, tfs = all1) #tfs with regulatoryElements in newer versions
# #addGraph(rdp, g, layout=NULL) #SLOW!
#
# #and export as xmggl