**Miscanthus is a commercial lignocellulosic biomass crop owing to its high biomass productivityMiscanthus species are known for their high biomass yield and hence are potential candidates as bio-energy crop, particularly in the temperate regions. Thise present study was conducted to elucidate the physiological and molecular response of in miscanthus Miscanthus genotypes when subjected to well-watered and droughted greenhouse conditions. A significant biomass loss was observed under drought conditions for all genotypes. Among the species, a A sterile hybrid, M. x giganteus hybrid, gave the highestshowed a lower reduction in biomass yield under drought conditions compared to the control. Unexpectedly, biomass yield uUnder well-watered conditions, biomass yield was as good or better than control treatment conditions in all species . tested. M. sinensis was more tolerant than M. sacchariflorus in both water stress conditions. In comparative transcriptomics, a total of 67789 transcripts/unigenes were queried among the species. Among those, nearly 3% transcripts 4,389 of the 67,789 genes (6.4%) in the reference genome exhibited were significantly differential differentially expressed expression within and among all four Miscanthus species during drought conditions. Most of the genes were differentially expressed in a single species, but the . 16 of those differentially expressed genes (DEGs) were shared among all Miscanthus species. Geneenrichment analysis of gene ontology (GO) terms enrichment analysis revealed that the same drought-relevant gene biological processes categories were regulated in all the species during stress conditions. Namely, downregulated differentially expressed genes were significantly involved in protein modification and kinase activity, cell receptor signalling and ion binding; while upregulated differentially expressed genes were significantly involved in sucrose and starch metabolism, redox, and water and glycerol homeostasis and channel activity. Candidate genes with roles in these functional categories were identified in Miscanthus. including “phosphatase activity”, “kinase activity” and “oxidoreductase activity”. In addition, transcripts with biological processes gene ontology vocabulary such as amino acid and lipid metabolic processes were significantly depleted in most of the genotypes. This study provides the first transcriptome data on the induction of drought-related gene expression in differentacross a range of threefour Miscanthus species. Thus, the findings in the present study can play a key role in fast-tracking miscanthus breeding efforts and its full domestication.
require(DESeq2,quietly = T);require(ggplot2,quietly =T);require(dplyr,quietly = T);library(pheatmap,quietly = T);library(apeglm,quietly = T);library(UpSetR,quietly = T);library(ashr,quietly = T)
#set temporary working directory and load the dataset
#setwd("C:/Users/gari/Documents/Abel personal documents/water stress experiments Misc_2013/JoseDvega/Jose November 2019/Folder 2")
setwd("~/analysis/susanne_RNASEQ_data/miscanthus_drought_rnaseq/")
#setwd("~/analysis/susanne_RNASEQ_data/abel")
counts <- read.delim("input/gene_count_matrix_without48.csv",header=T,sep=',',row.names = 1, check.names = F)
dim(counts) #67789 35
## [1] 67789 35
pheno <- read.delim("input/pheno_without48.csv",header = TRUE,check.names=T, sep=',')
dim(pheno)
## [1] 35 3
######## Mxg ########
pheno.Mxg <- pheno %>% filter (spp == "Mxg") # selecting only mxg metadata
print(pheno.Mxg)
## sample spp treatment
## 1 33 Mxg waterlogging
## 2 37 Mxg drought
## 3 41 Mxg control
## 4 45 Mxg waterlogging
## 5 49 Mxg drought
## 6 53 Mxg control
## 7 57 Mxg waterlogging
## 8 61 Mxg drought
## 9 65 Mxg control
counts.Mxg <- counts %>% select(M33, M37, M41, M45, M49, M53, M57, M61, M65)# selecting only mxg counts table
rownames(pheno.Mxg) <- colnames(counts.Mxg)
dds.Mxg <- DESeqDataSetFromMatrix(countData = counts.Mxg, colData = pheno.Mxg, design = ~ treatment)
dds.Mxg <- DESeq(dds.Mxg)
resultsNames(dds.Mxg)
## [1] "Intercept" "treatment_drought_vs_control"
## [3] "treatment_waterlogging_vs_control"
counts(dds.Mxg) %>% str
## int [1:67789, 1:9] 0 0 2166 524 17 71 0 210 0 53 ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:67789] "Misin18G283300.v7.1" "Misin10G028800.v7.1" "Misin02G445300.v7.1" "Misin08G137800.v7.1" ...
## ..$ : chr [1:9] "M33" "M37" "M41" "M45" ...
res1_mxg <- lfcShrink(dds.Mxg, coef = "treatment_drought_vs_control", type="apeglm")
dim(res1_mxg) # 67789 5
## [1] 67789 5
summary(res1_mxg)
##
## out of 51821 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 2236, 4.3%
## LFC < 0 (down) : 2585, 5%
## outliers [1] : 757, 1.5%
## low counts [2] : 9870, 19%
## (mean count < 5)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
res2_mxg <- lfcShrink(dds.Mxg, coef = "treatment_waterlogging_vs_control", type="apeglm")
summary(res2_mxg)
##
## out of 51821 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 61, 0.12%
## LFC < 0 (down) : 26, 0.05%
## outliers [1] : 757, 1.5%
## low counts [2] : 3953, 7.6%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
plotMA(res1_mxg) # control vs drought
plotMA(res2_mxg) # control vs well-watered
# save the output dataset in the same directory
write.table(res1_mxg,"output/DEG-Mxg-drought_Abel.txt")
write.table(res2_mxg,"output/DEG-Mxg-waterlog_Abel.txt")
##### Msac #######
pheno.Msac <- pheno %>% filter (spp == "Msac")
print(pheno.Msac)
## sample spp treatment
## 1 31 Msac waterlogging
## 2 35 Msac drought
## 3 39 Msac control
## 4 43 Msac waterlogging
## 5 47 Msac drought
## 6 51 Msac control
## 7 55 Msac waterlogging
## 8 59 Msac drought
## 9 63 Msac control
counts.Msac <- counts %>% select(M31, M35, M39, M43, M47, M51, M55, M59, M63)
rownames(pheno.Msac ) <- colnames(counts.Msac)
dds.Msac <- DESeqDataSetFromMatrix(countData = counts.Msac, colData = pheno.Msac, design = ~ treatment)
dds.Msac <- DESeq(dds.Msac)
resultsNames(dds.Msac)
## [1] "Intercept" "treatment_drought_vs_control"
## [3] "treatment_waterlogging_vs_control"
res1_Msac <- lfcShrink(dds.Msac, coef = "treatment_drought_vs_control", type="apeglm")
dim(res1_Msac) # 67789 5
## [1] 67789 5
summary(res1_Msac)
##
## out of 49654 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 1870, 3.8%
## LFC < 0 (down) : 2172, 4.4%
## outliers [1] : 278, 0.56%
## low counts [2] : 9439, 19%
## (mean count < 5)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
res2_Msac <- lfcShrink(dds.Msac, coef = "treatment_waterlogging_vs_control", type="apeglm")
summary(res2_Msac)
##
## out of 49654 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 22, 0.044%
## LFC < 0 (down) : 29, 0.058%
## outliers [1] : 278, 0.56%
## low counts [2] : 1795, 3.6%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
plotMA(res1_Msac) # control vs drought
plotMA(res2_Msac) # control vs well-watered
write.table(res1_Msac,"output/DEG-Msac-drought_Abel.txt")
write.table(res2_Msac,"output/DEG-Msac-waterlog_Abel.txt")
#### Hybrid3n #######
pheno.Hybrid3n <- pheno %>% filter (spp == "Hybrid3n")
print(pheno.Hybrid3n)
## sample spp treatment
## 1 34 Hybrid3n waterlogging
## 2 38 Hybrid3n drought
## 3 42 Hybrid3n control
## 4 46 Hybrid3n waterlogging
## 5 50 Hybrid3n drought
## 6 54 Hybrid3n control
## 7 58 Hybrid3n waterlogging
## 8 62 Hybrid3n drought
## 9 66 Hybrid3n control
counts.Hybrid3n <- counts %>% select(M34, M38, M42, M46, M50, M54, M58, M62, M66)
colnames(counts.Hybrid3n) <- rownames(pheno.Hybrid3n)
dds.Hybrid3n <- DESeqDataSetFromMatrix(countData = counts.Hybrid3n, colData = pheno.Hybrid3n, design = ~ treatment)
dds.Hybrid3n <- DESeq(dds.Hybrid3n)
resultsNames(dds.Hybrid3n)
## [1] "Intercept" "treatment_drought_vs_control"
## [3] "treatment_waterlogging_vs_control"
res1_Hybrid3n <- lfcShrink(dds.Hybrid3n, coef = "treatment_drought_vs_control", type="apeglm")
dim(res1_Hybrid3n) # 67789 5
## [1] 67789 5
summary(res1_Hybrid3n)
##
## out of 49478 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 725, 1.5%
## LFC < 0 (down) : 977, 2%
## outliers [1] : 299, 0.6%
## low counts [2] : 8455, 17%
## (mean count < 5)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
res2_Hybrid3n <- lfcShrink(dds.Hybrid3n, coef = "treatment_waterlogging_vs_control", type="apeglm")
summary(res2_Hybrid3n)
##
## out of 49478 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 55, 0.11%
## LFC < 0 (down) : 64, 0.13%
## outliers [1] : 299, 0.6%
## low counts [2] : 7525, 15%
## (mean count < 4)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
plotMA(res1_Hybrid3n) # control vs drought
plotMA(res2_Hybrid3n) # control vs well-watered
#
write.table(res1_Hybrid3n,"output/DEG-Hybrid3n-drought_Abel.txt")
write.table(res2_Hybrid3n,"output/DEG-Hybrid3n-waterlog_Abel.txt")
#### Msin #######
pheno.Msin <- pheno %>% filter (spp == "Msin")
print(pheno.Msin)
## sample spp treatment
## 1 32 Msin waterlogging
## 2 36 Msin drought
## 3 40 Msin control
## 4 44 Msin waterlogging
## 5 52 Msin control
## 6 56 Msin waterlogging
## 7 60 Msin drought
## 8 64 Msin control
counts.Msin <- counts %>% select(M32, M36, M40, M44, M52, M56, M60, M64)
dds.Msin <- DESeqDataSetFromMatrix(countData = counts.Msin, colData = pheno.Msin, design = ~ treatment)
dds.Msin <- DESeq(dds.Msin)
resultsNames(dds.Msin)
## [1] "Intercept" "treatment_drought_vs_control"
## [3] "treatment_waterlogging_vs_control"
res1_Msin <- lfcShrink(dds.Msin, coef = "treatment_drought_vs_control", type="apeglm")
dim(res1_Msin) # 67789 5
## [1] 67789 5
summary(res1_Msin)
##
## out of 50018 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 202, 0.4%
## LFC < 0 (down) : 936, 1.9%
## outliers [1] : 96, 0.19%
## low counts [2] : 10464, 21%
## (mean count < 6)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
res2_Msin <- lfcShrink(dds.Msin, coef = "treatment_waterlogging_vs_control", type="apeglm")
summary(res2_Msin)
##
## out of 50018 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 4, 0.008%
## LFC < 0 (down) : 6, 0.012%
## outliers [1] : 96, 0.19%
## low counts [2] : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
plotMA(res1_Msin) # control vs drought
plotMA(res2_Msin) # control vs well-watered
write.table(res1_Msin,"output/DEG-Msin-drought_Abel.txt")
write.table(res2_Msin,"output/DEG-Msin-waterlog_Abel.txt")
###DESeq for all withoutM48
#setwd("C:/Users/gari/Documents/Abel personal documents/water stress experiments Misc_2013/JoseDvega/Jose November 2019/Folder 2")
setwd("~/analysis/susanne_RNASEQ_data/miscanthus_drought_rnaseq/")
counts.WOM48 <- read.delim("input/gene_count_matrix_without48.csv",header=T,sep=',',row.names = 1,check.names=FALSE)
dim(counts.WOM48)
## [1] 67789 35
#pheno$name = as.factor(pheno$name)#enforcing rownames are factors not numerics
rownames(pheno) <- colnames(counts.WOM48)
all(rownames(pheno) == colnames(counts.WOM48))
## [1] TRUE
#ggplot(counts.WOM48)+ geom_histogram(aes(x = M31), stat = "bin", bins = 200) + xlab("Raw expression counts") + ylab ("Number of genes")
dds.WOM48 <- DESeqDataSetFromMatrix(countData = counts.WOM48, colData = pheno, design = ~ treatment + spp)
summary(dds.WOM48)
## [1] "DESeqDataSet object of length 67789 with 0 metadata columns"
keep.WOM48 <- rowSums(counts(dds.WOM48) >= 5) >= 4
table(keep.WOM48)
## keep.WOM48
## FALSE TRUE
## 18731 49058
dds.WOM48.filtered <- dds.WOM48[keep.WOM48,]
summary(dds.WOM48.filtered)
## [1] "DESeqDataSet object of length 49058 with 0 metadata columns"
dds.WOM48.filtered <- DESeq(dds.WOM48.filtered) ;resultsNames(dds.WOM48.filtered)
## [1] "Intercept" "treatment_drought_vs_control"
## [3] "treatment_waterlogging_vs_control" "spp_Msac_vs_Hybrid3n"
## [5] "spp_Msin_vs_Hybrid3n" "spp_Mxg_vs_Hybrid3n"
boxplot(log10(counts(dds.WOM48.filtered)+1))# before normalization
dds.WOM48.filtered <- estimateSizeFactors(dds.WOM48.filtered)
sizeFactors(dds.WOM48.filtered)
## M31 M32 M33 M34 M35 M36 M37 M38
## 0.9246360 0.8663091 1.1383236 0.9893500 0.8970502 0.8623480 1.1609429 1.0285116
## M39 M40 M41 M42 M43 M44 M45 M46
## 0.9072739 1.1005317 0.6702764 0.7223468 1.3292962 0.9450737 1.0454916 0.9776439
## M47 M49 M50 M51 M52 M53 M54 M55
## 0.9890113 1.6834196 1.5705317 1.3065587 1.6259665 1.3565735 1.4778934 0.8602344
## M56 M57 M58 M59 M60 M61 M62 M63
## 1.0113151 1.4543822 1.1355904 0.6893328 0.6704424 0.8798643 1.4957203 0.9405997
## M64 M65 M66
## 0.7141060 0.6623280 1.2380000
normalized_dds_counts <- counts(dds.WOM48.filtered, normalized=TRUE)
#boxplot(log10(counts(dds.WOM48.filtered,normalized=TRUE)+1))# after normalization
vsd_all.WOM48 <- vst(dds.WOM48.filtered, blind=TRUE)
head(vsd_all.WOM48, n=3)
## class: DESeqTransform
## dim: 3 35
## metadata(1): version
## assays(1): ''
## rownames(3): Misin10G028800.v7.1 Misin02G445300.v7.1
## Misin08G137800.v7.1
## rowData names(38): baseMean baseVar ... maxCooks dispFit
## colnames(35): M31 M32 ... M65 M66
## colData names(4): sample spp treatment sizeFactor
vsd_mat_all.WOM48 <- assay(vsd_all.WOM48)# Compute pairwise correlation values
vsd_cor_all.WOM48 <- cor(vsd_mat_all.WOM48)
#View(vsd_cor_all)
pheatmap(vsd_cor_all.WOM48, annotation = select(pheno, treatment))
pheatmap(vsd_cor_all.WOM48, annotation = select(pheno, spp))
resultsNames(dds.WOM48.filtered)
## [1] "Intercept" "treatment_drought_vs_control"
## [3] "treatment_waterlogging_vs_control" "spp_Msac_vs_Hybrid3n"
## [5] "spp_Msin_vs_Hybrid3n" "spp_Mxg_vs_Hybrid3n"
res_drought_vs_con_ape = lfcShrink(dds.WOM48.filtered, coef = 2, type = "apeglm")
res_drought_vs_con_norm = lfcShrink(dds.WOM48.filtered, coef = 2, type = "normal")
res_drought_vs_con_Ash = lfcShrink(dds.WOM48.filtered, coef = 2, type = "ashr")
par(mfrow=c(1,3), mar=c(4,4,2,1))
xlim = c(1, 1e5); ylim=c(-3,3)
png("output/drought_vs_con_feb 2020_apeglm.png");plotMA(res_drought_vs_con_ape, xlim=xlim, ylim=ylim, main= "drought_vs_con_feb 2020_apeglm"); dev.off()
## quartz_off_screen
## 2
png("output/drought_vs_con_feb 2020_normal.png");plotMA(res_drought_vs_con_norm , xlim=xlim, ylim=ylim, main= "drought_vs_con_feb 2020_normal"); dev.off()
## quartz_off_screen
## 2
png("output/drought_vs_cont_feb 2020_ashr.png");plotMA(res_drought_vs_con_Ash, xlim=xlim, ylim=ylim, main= "drought_vs_cont_feb 2020_ashr"); dev.off()
## quartz_off_screen
## 2
## well-watered vs control
par(mfrow=c(1,3), mar=c(4,4,2,1))
xlim = c(1, 1e5); ylim=c(-3,3)
res_water_vs_con_ape = lfcShrink(dds.WOM48.filtered, coef = 3, type = "apeglm")
res_water_vs_con_norm = lfcShrink(dds.WOM48.filtered, coef = 3, type = "normal")
res_water_vs_con_Ash = lfcShrink(dds.WOM48.filtered, coef = 3, type = "ashr")
png("output/water_vs_con_feb 2020_apeglm.png"); plotMA(res_water_vs_con_ape, xlim=xlim, ylim=ylim, main= "water_vs_con_feb 2020_apeglm"); dev.off()
## quartz_off_screen
## 2
png("output/water_vs_con_feb 2020_normal.png");plotMA(res_water_vs_con_norm , xlim=xlim, ylim=ylim, main= "water_vs_con_feb 2020_normal"); dev.off()
## quartz_off_screen
## 2
png("output/water_vs_cont_feb 2020_ashr.png");plotMA(res_water_vs_con_Ash, xlim=xlim, ylim=ylim, main= "water_vs_cont_feb 2020_ashr"); dev.off()
## quartz_off_screen
## 2
plotPCA1 <- function (dds.WOM48.filtered, intgroup = "treatment", ntop = 500, returnData = FALSE)
{
rv <- rowVars(assay(dds.WOM48.filtered))
select <- order(rv, decreasing = TRUE)[seq_len(min(ntop, length(rv)))]
pca <- prcomp(t(assay(dds.WOM48.filtered)[select, ]))
percentVar <- pca$sdev^2/sum(pca$sdev^2)
if (!all(intgroup %in% names(colData(dds.WOM48.filtered)))) {
stop("the argument 'intgroup' should specify columns of colData(dds.filtered)")
}
intgroup.df <- as.data.frame(colData(dds.WOM48.filtered)[, intgroup,
drop = FALSE])
group <- if (length(intgroup) > 1) {
factor(apply(intgroup.df, 1, paste, collapse = " : "))
}
else {
colData(dds.WOM48.filtered)[[intgroup]]
}
d <- data.frame(PC1 = pca$x[, 1], PC2 = pca$x[, 2], PC3 = pca$x[, 3], PC4 = pca$x[, 4], group = group, intgroup.df, name = colnames(dds.WOM48.filtered))
if (returnData) {
attr(d, "percentVar") <- percentVar[1:2]
return(d)
}
library(ggrepel)
ggplot(data = d, aes_string(x = "PC1", y = "PC2", color = "treatment", shape = "spp" )) + geom_point(size = 4) +
# geom_text_repel(aes(label = name), hjust = 0.6,vjust = -0.7, nudge_x = 0.09, size = 0,
# point.padding = 0.35, box.padding = 0.25, direction = "both") +
xlab(paste0("PC1: ", round(percentVar[1] *
100), "% variance")) + ylab(paste0("PC2: ", round(percentVar[2] *
100), "% variance")) + coord_fixed() + theme_bw()+ scale_shape_manual(values = c(21, 22, 23, 24)) +
# xlim(-40,40) + ylim(-40,40) +
theme(axis.text=element_text(size=14),legend.position = "right",plot.title = element_text(size=14),
axis.title=element_text(size=14,face="bold")) + scale_color_manual(values = c("#e41a1c", "#377eb8", "#4daf4a"))+
theme(legend.title = element_text(color = "white", size = 14),
legend.text = element_text(color = "black")) + theme (legend.key.size = unit(0.5, "cm"),
legend.key.width = unit(0.2,"cm") )
}
p_all_1 = plotPCA1(vsd_all.WOM48, intgroup=c("treatment", "spp")) # fig 1
p_all_1
# upset plot for well-watered DEGs 19/02/2020
#setwd("C:/Users/gari/Documents/Abel personal documents/water stress experiments Misc_2013/JoseDvega/Jose November 2019/Folder 2")
setwd("~/analysis/susanne_RNASEQ_data/miscanthus_drought_rnaseq/")
counts <- read.delim("input/gene_count_matrix_without48.csv",header=T,sep=',',row.names = 1)
allgenes <- rownames(counts)
mxg.waterlog <- read.delim("output/DEG-Mxg-waterlog_Abel.txt", header = T, sep = ' ')
msin.waterlog <- read.delim("output/DEG-Msin-waterlog_Abel.txt", header = T, sep = ' ')
msac.waterlog <- read.delim("output/DEG-Msac-waterlog_Abel.txt", header = T, sep = ' ')
hybrid3n.waterlog <- read.delim("output/DEG-Hybrid3n-waterlog_Abel.txt", header = T, sep = ' ')
list <- as.data.frame(allgenes)
rownames(list) <- list$allgenes #MOVED HERE
list$Mxg.well_watered <- as.integer(allgenes %in% rownames(mxg.waterlog[mxg.waterlog$padj<0.01,]))
list$Msin.well_watered <- as.integer(allgenes %in% rownames(msin.waterlog[msin.waterlog$padj<0.05,])) #!!!!
list$Msac.well_watered <- as.integer(allgenes %in% rownames(msac.waterlog[msac.waterlog$padj<0.01,]))
list$hybrid3n.well_watered <- as.integer(allgenes %in% rownames(hybrid3n.waterlog[hybrid3n.waterlog$padj<0.05,])) #!!!!
#rownames(list) <- list$allgenes
list <- list[,-1]
write.csv(list,file="output/upset_waterlog.csv")
upset <- upset(list, nsets = 4, number.angles = 0, point.size = 3, line.size = 1.2, mainbar.y.label = "Shared DEG", sets.x.label = "Total DEG", text.scale = c(2, 2, 2, 1, 2, 2), order.by = "freq")
upset
## Upregulated DEGs in droughted conditions Vs. control
# upset plot for drought DEGs 19/02/2020
#setwd("C:/Users/gari/Documents/Abel personal documents/water stress experiments Misc_2013/JoseDvega/Jose November 2019/Folder 2")
setwd("~/analysis/susanne_RNASEQ_data/miscanthus_drought_rnaseq/")
counts <- read.delim("input/gene_count_matrix_without48.csv",header=T,sep=',',row.names = 1)
allgenes <- rownames(counts)
mxg.drought <- read.delim("output/DEG-Mxg-drought_Abel.txt", header = T, sep = ' ')
msin.drought <- read.delim("output/DEG-Msin-drought_Abel.txt", header = T, sep = ' ')
msac.drought <- read.delim("output/DEG-Msac-drought_Abel.txt", header = T, sep = ' ')
hybrid3n.drought <- read.delim("output/DEG-Hybrid3n-drought_Abel.txt", header = T, sep = ' ')
list <- as.data.frame(allgenes)
list$mxg.drought <- as.integer(allgenes %in% rownames(mxg.drought[mxg.drought$padj<0.01,]))
list$msin.drought <- as.integer(allgenes %in% rownames(msin.drought[msin.drought$padj<0.05,])) #!!!!
list$msac.drought <- as.integer(allgenes %in% rownames(msac.drought[msac.drought$padj<0.01,]))
list$hybrid3n.drought <- as.integer(allgenes %in% rownames(hybrid3n.drought[hybrid3n.drought$padj<0.05,])) #!!!!
rownames(list) <- list$allgenes
list <- list[,-1]
write.csv(list,file="output/upset_drought.csv")
myupset<-upset(list, nsets = 4, number.angles = 0, point.size = 3, line.size = 1.2, mainbar.y.label = "Shared DEG", sets.x.label = "Total DEG", text.scale = c(2, 2, 2, 2, 2, 2), order.by = "freq")
myupset
##EA calculations with TopGP
#NEW:
require(DESeq2);require(ggplot2);require(dplyr);library(pheatmap);library(apeglm);library(UpSetR);library(topGO);library(data.table)
options(scipen=999) #disable scientific annotation
#setwd("C:/Users/gari/Documents/Abel personal documents/water stress experiments Misc_2013/JoseDvega/Jose November 2019/abel_topgo")
setwd("~/analysis/susanne_RNASEQ_data/miscanthus_drought_rnaseq/")
#setwd("~/analysis/susanne_RNASEQ_data/abel")
list$mxg.drought.UP <- as.integer(allgenes %in% rownames(mxg.drought[mxg.drought$log2FoldChange>0,]))
list$mxg.drought.DOWN <- as.integer(allgenes %in% rownames(mxg.drought[mxg.drought$log2FoldChange<0,]))
list$hybrid3n.drought.UP <- as.integer(allgenes %in% rownames(hybrid3n.drought[hybrid3n.drought$log2FoldChange>0,]))
list$hybrid3n.drought.DOWN <- as.integer(allgenes %in% rownames(hybrid3n.drought[hybrid3n.drought$log2FoldChange<0,]))
list$msac.drought.UP <- as.integer(allgenes %in% rownames(msac.drought[msac.drought$log2FoldChange>0,]))
list$msac.drought.DOWN <- as.integer(allgenes %in% rownames(msac.drought[msac.drought$log2FoldChange<0,]))
list$msin.drought.UP <- as.integer(allgenes %in% rownames(msin.drought[msin.drought$log2FoldChange>0,]))
list$msin.drought.DOWN <- as.integer(allgenes %in% rownames(msin.drought[msin.drought$log2FoldChange<0,]))
###
list$names<-rownames(list) #!!!!
mxg.drought.up <- list %>% filter(mxg.drought == 1) %>% filter(mxg.drought.UP == 1) #1010
dim(mxg.drought.up)
## [1] 1010 13
mxg.drought.down <- list %>% filter(mxg.drought == 1) %>% filter(mxg.drought.DOWN == 1) #1343
dim(mxg.drought.down)
## [1] 1343 13
hybrid3n.drought.up <- list %>% filter(hybrid3n.drought == 1) %>% filter(hybrid3n.drought.UP == 1) #488
dim(hybrid3n.drought.up)
## [1] 488 13
hybrid3n.drought.down <- list %>% filter(hybrid3n.drought == 1) %>% filter(hybrid3n.drought.DOWN == 1) #691
dim(hybrid3n.drought.down)
## [1] 691 13
msac.drought.up <- list %>% filter(msac.drought == 1) %>% filter(msac.drought.UP == 1)
dim(msac.drought.up) #793
## [1] 793 13
msac.drought.down <- list %>% filter(msac.drought == 1) %>% filter(msac.drought.DOWN == 1)
dim(msac.drought.down) #874
## [1] 874 13
msin.drought.up <- list %>% filter(msin.drought == 1) %>% filter(msin.drought.UP == 1)
dim(msin.drought.up) #131
## [1] 131 13
msin.drought.down <- list %>% filter(msin.drought == 1) %>% filter(msin.drought.DOWN == 1)
dim(msin.drought.down) #642
## [1] 642 13
###FUNCTIONS:
run.topgo.pipeline.BP <- function(mytemp) {
#PIPE starts
list.mytemp <- factor(as.integer(allgenes %in% mytemp$names))
names(list.mytemp) <- allgenes
#BP
fdata.mytemp.BP <- new("topGOdata", ontology="BP", allGenes=list.mytemp, annot = annFUN.gene2GO, gene2GO = geneID2GO)
results.fdata.mytemp.BP <- GenTable(fdata.mytemp.BP, weight_fisher = runTest(fdata.mytemp.BP, algorithm = "weight", statistic = "fisher"), elim_fisher = runTest(fdata.mytemp.BP, algorithm = "elim", statistic = "fisher"), weight01_fisher = runTest(fdata.mytemp.BP, algorithm = "weight01", statistic = "fisher"), topNodes=150)
#add genes to dataframe
results.fdata.mytemp.BP$genes <- sapply(results.fdata.mytemp.BP$GO.ID, function(x) {
genes<- genesInTerm(fdata.mytemp.BP, x)
genes[[1]][genes[[1]] %in% mytemp$names]
})
results.fdata.mytemp.BP
}
#MF
run.topgo.pipeline.MF <- function(mytemp) {
list.mytemp <- factor(as.integer(allgenes %in% mytemp$names))
names(list.mytemp) <- allgenes
fdata.mytemp.MF <- new("topGOdata", ontology="MF", allGenes=list.mytemp, annot = annFUN.gene2GO, gene2GO = geneID2GO)
results.fdata.mytemp.MF <- GenTable(fdata.mytemp.MF, weight_fisher = runTest(fdata.mytemp.MF, algorithm = "weight", statistic = "fisher"), elim_fisher = runTest(fdata.mytemp.MF, algorithm = "elim", statistic = "fisher"), weight01_fisher = runTest(fdata.mytemp.MF, algorithm = "weight01", statistic = "fisher"), topNodes=50)
#add genes to dataframe
results.fdata.mytemp.MF$genes <- sapply(results.fdata.mytemp.MF$GO.ID, function(x) {
genes<-genesInTerm(fdata.mytemp.MF, x)
genes[[1]][genes[[1]] %in% mytemp$names]
})
results.fdata.mytemp.MF
}
##END FUNCTIONS###
#calculate and save results:
counts <- read.delim("input/gene_count_matrix_without48.csv",header=T,sep=',',row.names = 1)
allgenes <- rownames(counts)
topgo.file <- read.delim("input/slim_annot_TOPGO.annot", header = F)
geneID2GO <- readMappings(file = "input/slim_annot_TOPGO.annot")
slim.BP.mxg.drought.up <- run.topgo.pipeline.BP(mxg.drought.up)
fwrite(slim.BP.mxg.drought.up, file = "output/slim-mxg.drought.up.BP.csv")
slim.MF.mxg.drought.up <- run.topgo.pipeline.MF(mxg.drought.up)
fwrite(slim.MF.mxg.drought.up, file = "output/slim-mxg.drought.up.MF.csv")
slim.BP.mxg.drought.down <- run.topgo.pipeline.BP(mxg.drought.down)
fwrite(slim.BP.mxg.drought.down, file = "output/slim-mxg.drought.down.BP.csv")
slim.MF.mxg.drought.down <- run.topgo.pipeline.MF(mxg.drought.down)
fwrite(slim.MF.mxg.drought.down, file = "output/slim-mxg.drought.down.MF.csv")
#
slim.BP.hybrid3n.drought.up <- run.topgo.pipeline.BP(hybrid3n.drought.up)
fwrite(slim.BP.hybrid3n.drought.up, file = "output/slim-hybrid3n.drought.up.BP.csv")
slim.MF.hybrid3n.drought.up <- run.topgo.pipeline.MF(hybrid3n.drought.up)
fwrite(slim.MF.hybrid3n.drought.up, file = "output/slim-hybrid3n.drought.up.MF.csv")
slim.BP.hybrid3n.drought.down <- run.topgo.pipeline.BP(hybrid3n.drought.down)
fwrite(run.topgo.pipeline.BP(hybrid3n.drought.down), file = "output/slim-hybrid3n.drought.down.BP.csv")
slim.MF.hybrid3n.drought.down <- run.topgo.pipeline.MF(hybrid3n.drought.down)
fwrite(slim.MF.hybrid3n.drought.down, file = "output/slim-hybrid3n.drought.down.MF.csv")
#
slim.BP.msin.drought.up <- run.topgo.pipeline.BP(msin.drought.up)
fwrite(slim.BP.msin.drought.up, file = "output/slim-msin.drought.up.BP.csv")
slim.MF.msin.drought.up <- run.topgo.pipeline.MF(msin.drought.up)
fwrite(slim.MF.msin.drought.up, file = "output/slim-msin.drought.up.MF.csv")
slim.BP.msin.drought.down <- run.topgo.pipeline.BP(msin.drought.down)
fwrite(slim.BP.msin.drought.down, file = "output/slim-msin.drought.down.BP.csv")
slim.MF.msin.drought.down <- run.topgo.pipeline.MF(msin.drought.down)
fwrite(slim.MF.msin.drought.down, file = "output/slim-msin.drought.down.MF.csv")
#
slim.BP.msac.drought.up <- run.topgo.pipeline.BP(msac.drought.up)
fwrite(slim.BP.msac.drought.up, file = "output/slim-msac.drought.up.BP.csv")
slim.MF.msac.drought.up <- run.topgo.pipeline.MF(msac.drought.up)
fwrite(slim.MF.msac.drought.up, file = "output/slim-msac.drought.up.MF.csv")
slim.BP.msac.drought.down <- run.topgo.pipeline.BP(msac.drought.down)
fwrite(slim.BP.msac.drought.down, file = "output/slim-msac.drought.down.BP.csv")
slim.MF.msac.drought.down <- run.topgo.pipeline.MF(msac.drought.down)
fwrite(slim.MF.msac.drought.down, file = "output/slim-msac.drought.down.MF.csv")
###
#FULL ANNOTATION:
topgo.file <- read.delim("input/full_annot_TOPGO.annot", header = F)
geneID2GO <- readMappings(file = "input/full_annot_TOPGO.annot")
#FULL
full.BP.mxg.drought.up <- run.topgo.pipeline.BP(mxg.drought.up)
fwrite(full.BP.mxg.drought.up, file = "output/full-mxg.drought.up.BP.csv")
full.MF.mxg.drought.up <- run.topgo.pipeline.MF(mxg.drought.up)
fwrite(full.MF.mxg.drought.up, file = "output/full-mxg.drought.up.MF.csv")
full.BP.mxg.drought.down <- run.topgo.pipeline.BP(mxg.drought.down)
fwrite(full.BP.mxg.drought.down, file = "output/full-mxg.drought.down.BP.csv")
full.MF.mxg.drought.down <- run.topgo.pipeline.MF(mxg.drought.down)
fwrite(full.MF.mxg.drought.down, file = "output/full-mxg.drought.down.MF.csv")
#
full.BP.hybrid3n.drought.up <- run.topgo.pipeline.BP(hybrid3n.drought.up)
fwrite(full.BP.hybrid3n.drought.up, file = "output/full-hybrid3n.drought.up.BP.csv")
full.MF.hybrid3n.drought.up <- run.topgo.pipeline.MF(hybrid3n.drought.up)
fwrite(full.MF.hybrid3n.drought.up, file = "output/full-hybrid3n.drought.up.MF.csv")
full.BP.hybrid3n.drought.down <- run.topgo.pipeline.BP(hybrid3n.drought.down)
fwrite(run.topgo.pipeline.BP(hybrid3n.drought.down), file = "output/full-hybrid3n.drought.down.BP.csv")
full.MF.hybrid3n.drought.down <- run.topgo.pipeline.MF(hybrid3n.drought.down)
fwrite(full.MF.hybrid3n.drought.down, file = "output/full-hybrid3n.drought.down.MF.csv")
#
full.BP.msin.drought.up <- run.topgo.pipeline.BP(msin.drought.up)
fwrite(full.BP.msin.drought.up, file = "output/full-msin.drought.up.BP.csv")
full.MF.msin.drought.up <- run.topgo.pipeline.MF(msin.drought.up)
fwrite(full.MF.msin.drought.up, file = "output/full-msin.drought.up.MF.csv")
full.BP.msin.drought.down <- run.topgo.pipeline.BP(msin.drought.down)
fwrite(full.BP.msin.drought.down, file = "output/full-msin.drought.down.BP.csv")
full.MF.msin.drought.down <- run.topgo.pipeline.MF(msin.drought.down)
fwrite(full.MF.msin.drought.down, file = "output/full-msin.drought.down.MF.csv")
#
full.BP.msac.drought.up <- run.topgo.pipeline.BP(msac.drought.up)
fwrite(full.BP.msac.drought.up, file = "output/full-msac.drought.up.BP.csv")
full.MF.msac.drought.up <- run.topgo.pipeline.MF(msac.drought.up)
fwrite(full.MF.msac.drought.up, file = "output/full-msac.drought.up.MF.csv")
full.BP.msac.drought.down <- run.topgo.pipeline.BP(msac.drought.down)
fwrite(full.BP.msac.drought.down, file = "output/full-msac.drought.down.BP.csv")
full.MF.msac.drought.down <- run.topgo.pipeline.MF(msac.drought.down)
fwrite(full.MF.msac.drought.down, file = "output/full-msac.drought.down.MF.csv")
#BUBBLEPLOTS
library(corrplot);library(reshape2);library(RColorBrewer);library(pheatmap);library(dplyr);library(ggplot2)
library(topGO)
library(data.table)
#setwd("C:/Users/gari/Documents/Abel personal documents/water stress experiments Misc_2013/JoseDvega/Jose November 2019/abel_topgo")
setwd("~/analysis/susanne_RNASEQ_data/miscanthus_drought_rnaseq/")
#setwd("~/analysis/susanne_RNASEQ_data/abel")
#slim
slim.BP.mxg.drought.up$regulation <- 1
slim.MF.mxg.drought.up$regulation <- 1
slim.BP.mxg.drought.down$regulation <- -1
slim.MF.mxg.drought.down$regulation <- -1
slim.BP.hybrid3n.drought.up$regulation <- 1
slim.MF.hybrid3n.drought.up$regulation <- 1
slim.BP.hybrid3n.drought.down$regulation <- -1
slim.MF.hybrid3n.drought.down$regulation <- -1
slim.BP.msin.drought.up$regulation <- 1
slim.MF.msin.drought.up$regulation <- 1
slim.BP.msin.drought.down$regulation <- -1
slim.MF.msin.drought.down$regulation <- -1
slim.BP.msac.drought.up$regulation <- 1
slim.MF.msac.drought.up$regulation <- 1
slim.BP.msac.drought.down$regulation <- -1
slim.MF.msac.drought.down$regulation <- -1
#
slim.BP.mxg.drought.up$spp <- "mxg"
slim.MF.mxg.drought.up$spp <- "mxg"
slim.BP.mxg.drought.down$spp <- "mxg"
slim.MF.mxg.drought.down$spp <- "mxg"
slim.BP.hybrid3n.drought.up$spp <- "hybrid3n"
slim.MF.hybrid3n.drought.up$spp <- "hybrid3n"
slim.BP.hybrid3n.drought.down$spp <- "hybrid3n"
slim.MF.hybrid3n.drought.down$spp <- "hybrid3n"
slim.BP.msin.drought.up$spp <- "msin"
slim.MF.msin.drought.up$spp <- "msin"
slim.BP.msin.drought.down$spp <- "msin"
slim.MF.msin.drought.down$spp <- "msin"
slim.BP.msac.drought.up$spp <- "msac"
slim.MF.msac.drought.up$spp <- "msac"
slim.BP.msac.drought.down$spp <- "msac"
slim.MF.msac.drought.down$spp <- "msac"
#
#CORRECT FORMATING:
#
supertable <- rbind(slim.BP.mxg.drought.up,slim.BP.mxg.drought.down,slim.BP.hybrid3n.drought.up,slim.BP.hybrid3n.drought.down,slim.BP.msin.drought.up,slim.BP.msin.drought.down,slim.BP.msac.drought.up,slim.BP.msac.drought.down,slim.MF.mxg.drought.up,slim.MF.mxg.drought.down,slim.MF.hybrid3n.drought.up,slim.MF.hybrid3n.drought.down,slim.MF.msin.drought.up,slim.MF.msin.drought.down,slim.MF.msac.drought.up,slim.MF.msac.drought.down)
supertable.MF <- rbind(slim.MF.mxg.drought.up,slim.MF.mxg.drought.down,slim.MF.hybrid3n.drought.up,slim.MF.hybrid3n.drought.down,slim.MF.msin.drought.up,slim.MF.msin.drought.down,slim.MF.msac.drought.up,slim.MF.msac.drought.down)
supertable.BP <- rbind(slim.BP.mxg.drought.up,slim.BP.mxg.drought.down,slim.BP.hybrid3n.drought.up,slim.BP.hybrid3n.drought.down,slim.BP.msin.drought.up,slim.BP.msin.drought.down,slim.BP.msac.drought.up,slim.BP.msac.drought.down)
#correct the scientific notation by converting from character to numeric
supertable$weight01_fisher <- as.numeric(supertable$weight01_fisher)
## Warning: NAs introduced by coercion
supertable.MF$weight01_fisher <- as.numeric(supertable.MF$weight01_fisher)
## Warning: NAs introduced by coercion
supertable.BP$weight01_fisher <- as.numeric(supertable.BP$weight01_fisher)
enriched.GOs <- supertable %>% filter(weight01_fisher<0.01) %>% filter (Significant>=5) #%>% select(GO.ID)
enriched.GOs.MF <- supertable.MF %>% filter(weight01_fisher<0.01) %>% filter (Significant>=5) #%>% select(GO.ID)
enriched.GOs.BP <- supertable.BP %>% filter(weight01_fisher<0.01) %>% filter (Significant>=5) #%>% select(GO.ID)
#list of GOs that are enrich in at least one analysis
selected <- supertable %>% filter(GO.ID %in% unlist(enriched.GOs)) #filter our enrich GOs by name
selected.MF <- supertable.MF %>% filter(GO.ID %in% unlist(enriched.GOs.MF)) #filter our enrich GOs by name
selected.BP <- supertable.BP %>% filter(GO.ID %in% unlist(enriched.GOs.BP)) #filter our enrich GOs by name
#PUT A BOTTOM CAP
selected$weight01_fisher[selected$weight01_fisher<0.0001] <- 0.0001
selected.MF$weight01_fisher[selected.MF$weight01_fisher<0.0001] <- 0.0001
selected.BP$weight01_fisher[selected.BP$weight01_fisher<0.0001] <- 0.0001
#PLOT BUBBLES SLIM
library(RColorBrewer)
table.merge.filter <- selected
p <- ggplot(data=table.merge.filter, aes(x=spp,y=Term))
p <- p + geom_point(data=table.merge.filter %>% filter(regulation<0) %>% filter(weight01_fisher>0.1), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*-1)), shape=25, stroke=1, alpha=1 )
p <- p + geom_point(data=table.merge.filter %>% filter(regulation>0) %>% filter(weight01_fisher>0.1), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*1)), shape=24, stroke=1, alpha=1)
p <- p + geom_point(data=table.merge.filter %>% filter(regulation<0) %>% filter(weight01_fisher<=0.1) %>% filter(weight01_fisher>0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*-1)), shape=25, stroke=1.5, alpha=1)
p <- p + geom_point(data=table.merge.filter %>% filter(regulation>0) %>% filter(weight01_fisher<=0.1) %>% filter(weight01_fisher>0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*1)), shape=24, stroke=1.5, alpha=1 )
p <- p + geom_point(data=table.merge.filter %>% filter(regulation<0) %>% filter(weight01_fisher<=0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*-1)), shape=25, stroke=2, alpha=1 )
p <- p + geom_point(data=table.merge.filter %>% filter(regulation>0) %>% filter(weight01_fisher<=0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*1)), shape=24, stroke=2, alpha=1)
#p <- p + scale_size(breaks = c(0.5,1,2,3,4,5), range=c(2,8))
#p <- p + scale_color_gradientn(trans = "log2", colours = brewer.pal(9,"Blues"), breaks = c(0,1,5,10,20,50,100,200))
p <- p + scale_size(breaks = c(10,15,20,30,40,50,60,70,80,90,100,110,150), range = (c(1,10)))
#p <- p + scale_color_gradientn(colours = c(rev(brewer.pal(9,"Blues")),"white",brewer.pal(9,"Reds")), limits = c(-15,15), breaks = c(-15,-10,-5,-2.5,-1,0,1,2.5,5,10,15))
p <- p + scale_color_gradientn(colours = c(rev(brewer.pal(9,"Blues")),brewer.pal(9,"Reds")))
#p <- p + scale_y_discrete(limits=rev(myTERMS))
p + theme_minimal()
table.merge.filter <- selected.BP
p1 <- ggplot(data=table.merge.filter, aes(x=spp,y=Term))
p1 <- p1 + geom_point(data=table.merge.filter %>% filter(regulation<0) %>% filter(weight01_fisher>0.1), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*-1)), shape=25, stroke=1, alpha=1 )
p1 <- p1 + geom_point(data=table.merge.filter %>% filter(regulation>0) %>% filter(weight01_fisher>0.1), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*1)), shape=24, stroke=1, alpha=1)
p1 <- p1 + geom_point(data=table.merge.filter %>% filter(regulation<0) %>% filter(weight01_fisher<=0.1) %>% filter(weight01_fisher>0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*-1)), shape=25, stroke=1.5, alpha=1)
p1 <- p1 + geom_point(data=table.merge.filter %>% filter(regulation>0) %>% filter(weight01_fisher<=0.1) %>% filter(weight01_fisher>0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*1)), shape=24, stroke=1.5, alpha=1 )
p1 <- p1 + geom_point(data=table.merge.filter %>% filter(regulation<0) %>% filter(weight01_fisher<=0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*-1)), shape=25, stroke=2, alpha=1 )
p1 <- p1 + geom_point(data=table.merge.filter %>% filter(regulation>0) %>% filter(weight01_fisher<=0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*1)), shape=24, stroke=2, alpha=1)
p1 <- p1 + scale_size(breaks = c(10,15,20,30,40,50,60,70,80,90,100,110,150), range = (c(1,10)))
p1 <- p1 + scale_color_gradientn(colours = c(rev(brewer.pal(9,"Blues")),brewer.pal(9,"Reds")))
p1 <- p1 + scale_x_discrete(limits = c("mxg","hybrid3n","msin","msac"))
p1 <- p1 + scale_y_discrete(limits=rev(c("cellular amino acid metabolic process",
"photosynthesis",
"cellular protein modification process",
"protein folding",
"generation of precursor metabolites and ...",
"signal transduction",
"small molecule metabolic process",
"carbohydrate metabolic process",
"cofactor metabolic process",
"lipid metabolic process",
"biosynthetic process",
"translation",
"secondary metabolic process",
"ribosome biogenesis",
"homeostatic process",
"membrane organization",
"response to stress",
"nitrogen cycle metabolic process")))
p1 <- p1 + theme_linedraw()
p1
table.merge.filter <- selected.MF
p2 <- ggplot(data=table.merge.filter, aes(x=spp,y=Term))
p2 <- p2 + geom_point(data=table.merge.filter %>% filter(regulation<0) %>% filter(weight01_fisher>0.1), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*-1)), shape=25, stroke=1, alpha=1 )
p2 <- p2 + geom_point(data=table.merge.filter %>% filter(regulation>0) %>% filter(weight01_fisher>0.1), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*1)), shape=24, stroke=1, alpha=1)
p2 <- p2 + geom_point(data=table.merge.filter %>% filter(regulation<0) %>% filter(weight01_fisher<=0.1) %>% filter(weight01_fisher>0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*-1)), shape=25, stroke=1.5, alpha=1)
p2 <- p2 + geom_point(data=table.merge.filter %>% filter(regulation>0) %>% filter(weight01_fisher<=0.1) %>% filter(weight01_fisher>0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*1)), shape=24, stroke=1.5, alpha=1 )
p2 <- p2 + geom_point(data=table.merge.filter %>% filter(regulation<0) %>% filter(weight01_fisher<=0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*-1)), shape=25, stroke=2, alpha=1 )
p2 <- p2 + geom_point(data=table.merge.filter %>% filter(regulation>0) %>% filter(weight01_fisher<=0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*1)), shape=24, stroke=2, alpha=1)
p2 <- p2 + scale_size(breaks = c(10,15,20,30,40,50,60,70,80,90,100,110,150), range = (c(1,10)))
p2 <- p2 + scale_color_gradientn(colours = c(rev(brewer.pal(9,"Blues")),brewer.pal(9,"Reds")))
p2 <- p2 + scale_x_discrete(limits = c("mxg","hybrid3n","msin","msac"))
p2 <- p2 + scale_y_discrete(limits=rev(c("kinase activity",
"ion binding",
"structural constituent of ribosome",
"oxidoreductase activity",
"RNA binding",
"transferase activity, transferring glyco...",
"hydrolase activity, acting on glycosyl b...",
"isomerase activity",
"phosphatase activity")))
p2 <- p2 + theme_linedraw()
p2
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
## The following object is masked from 'package:Biobase':
##
## combine
## The following object is masked from 'package:BiocGenerics':
##
## combine
grid.arrange(p2, p1, layout_matrix=rbind(c(2,1),c(2,3)))
#FULL GO ANNOTATION
library(RColorBrewer)
#setwd("~/analysis/susanne_RNASEQ_data/abel")
setwd("~/analysis/susanne_RNASEQ_data/miscanthus_drought_rnaseq/")
#full
full.BP.mxg.drought.up$regulation <- 1
full.MF.mxg.drought.up$regulation <- 1
full.BP.mxg.drought.down$regulation <- -1
full.MF.mxg.drought.down$regulation <- -1
full.BP.hybrid3n.drought.up$regulation <- 1
full.MF.hybrid3n.drought.up$regulation <- 1
full.BP.hybrid3n.drought.down$regulation <- -1
full.MF.hybrid3n.drought.down$regulation <- -1
full.BP.msin.drought.up$regulation <- 1
full.MF.msin.drought.up$regulation <- 1
full.BP.msin.drought.down$regulation <- -1
full.MF.msin.drought.down$regulation <- -1
full.BP.msac.drought.up$regulation <- 1
full.MF.msac.drought.up$regulation <- 1
full.BP.msac.drought.down$regulation <- -1
full.MF.msac.drought.down$regulation <- -1
#
full.BP.mxg.drought.up$spp <- "mxg"
full.MF.mxg.drought.up$spp <- "mxg"
full.BP.mxg.drought.down$spp <- "mxg"
full.MF.mxg.drought.down$spp <- "mxg"
full.BP.hybrid3n.drought.up$spp <- "hybrid3n"
full.MF.hybrid3n.drought.up$spp <- "hybrid3n"
full.BP.hybrid3n.drought.down$spp <- "hybrid3n"
full.MF.hybrid3n.drought.down$spp <- "hybrid3n"
full.BP.msin.drought.up$spp <- "msin"
full.MF.msin.drought.up$spp <- "msin"
full.BP.msin.drought.down$spp <- "msin"
full.MF.msin.drought.down$spp <- "msin"
full.BP.msac.drought.up$spp <- "msac"
full.MF.msac.drought.up$spp <- "msac"
full.BP.msac.drought.down$spp <- "msac"
full.MF.msac.drought.down$spp <- "msac"
#
#CORRECT FORMATING:
#
supertable <- rbind(full.BP.mxg.drought.up,full.BP.mxg.drought.down,full.BP.hybrid3n.drought.up,full.BP.hybrid3n.drought.down,full.BP.msin.drought.up,full.BP.msin.drought.down,full.BP.msac.drought.up,full.BP.msac.drought.down,full.MF.mxg.drought.up,full.MF.mxg.drought.down,full.MF.hybrid3n.drought.up,full.MF.hybrid3n.drought.down,full.MF.msin.drought.up,full.MF.msin.drought.down,full.MF.msac.drought.up,full.MF.msac.drought.down)
#supertable.BP <- rbind(full.BP.mxg.drought.up,full.BP.mxg.drought.down,full.BP.hybrid3n.drought.up,full.BP.hybrid3n.drought.down,full.BP.msin.drought.up,full.BP.msin.drought.down,full.BP.msac.drought.up,full.BP.msac.drought.down)
#supertable.MF <- rbind(full.MF.mxg.drought.up,full.MF.mxg.drought.down,full.MF.hybrid3n.drought.up,full.MF.hybrid3n.drought.down,full.MF.msin.drought.up,full.MF.msin.drought.down,full.MF.msac.drought.up,full.MF.msac.drought.down)
#DO EITHER ONE EACH TIME:
#supertable<-supertable.BP
#supertable<-supertable.MF
#correct the scientific notation by converting from character to numeric
supertable$weight01_fisher <- as.numeric(supertable$weight01_fisher)
enriched.GOs <- supertable %>% filter(weight01_fisher<0.005) %>% filter (Significant>=10) #%>% select(GO.ID) #list of GOs that are enrich in at least one analysis
#CHANGED MIN MEMBERS TO 10 genes
selected <- supertable %>% filter(GO.ID %in% unlist(enriched.GOs)) #filter our enrich GOs by name
#PUT A BOTTOM CAP
selected$weight01_fisher[selected$weight01_fisher<0.0001] <- 0.0001
table.merge.filter <- selected
p <- ggplot(data=table.merge.filter, aes(x=spp,y=Term))
p <- p + geom_point(data=table.merge.filter %>% filter(regulation<0) %>% filter(weight01_fisher>0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*-1)), stroke=0.5, alpha=0.8)
p <- p + geom_point(data=table.merge.filter %>% filter(regulation>0) %>% filter(weight01_fisher>0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*1)), stroke=0.5, alpha=0.8)
p <- p + geom_point(data=table.merge.filter %>% filter(regulation<0) %>% filter(weight01_fisher<=0.01) %>% filter(weight01_fisher>0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*-1)), stroke=1, alpha=0.8)
p <- p + geom_point(data=table.merge.filter %>% filter(regulation>0) %>% filter(weight01_fisher<=0.01) %>% filter(weight01_fisher>0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*1)), stroke=1, alpha=0.8)
p <- p + geom_point(data=table.merge.filter %>% filter(regulation<0) %>% filter(weight01_fisher<=0.001), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*-1)), stroke=2, alpha=0.8)
p <- p + geom_point(data=table.merge.filter %>% filter(regulation>0) %>% filter(weight01_fisher<=0.001), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*1)), stroke=2, alpha=0.8)
#p <- p + scale_size(breaks = c(0.5,1,2,3,4,5), range=c(2,8))
#p <- p + scale_color_gradientn(trans = "log2", colours = brewer.pal(9,"Blues"), breaks = c(0,1,5,10,20,50,100,200))
p <- p + scale_size(breaks = c(5,10,20,30,40,50,75,100), range = (c(2,9)))
#p <- p + scale_color_gradientn(colours = c(rev(brewer.pal(9,"Blues")),"white",brewer.pal(9,"Reds")), limits = c(-15,15), breaks = c(-15,-10,-5,-2.5,-1,0,1,2.5,5,10,15))
p <- p + scale_color_gradientn(colours = c(rev(brewer.pal(9,"Blues")),brewer.pal(9,"Reds")))
#p <- p + scale_y_discrete(limits=rev(myTERMS))
p <- p + scale_x_discrete(limits = c("mxg","hybrid3n","msin","msac"))
p + theme_light()
p2 <- ggplot(data=table.merge.filter, aes(x=spp,y=Term))
p2 <- p2 + geom_point(data=table.merge.filter %>% filter(regulation<0) %>% filter(weight01_fisher>0.1), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*-1)), shape=25, stroke=1, alpha=1 )
p2 <- p2 + geom_point(data=table.merge.filter %>% filter(regulation>0) %>% filter(weight01_fisher>0.1), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*1)), shape=24, stroke=1, alpha=1)
p2 <- p2 + geom_point(data=table.merge.filter %>% filter(regulation<0) %>% filter(weight01_fisher<=0.1) %>% filter(weight01_fisher>0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*-1)), shape=25, stroke=1.5, alpha=1)
p2 <- p2 + geom_point(data=table.merge.filter %>% filter(regulation>0) %>% filter(weight01_fisher<=0.1) %>% filter(weight01_fisher>0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*1)), shape=24, stroke=1.5, alpha=1 )
p2 <- p2 + geom_point(data=table.merge.filter %>% filter(regulation<0) %>% filter(weight01_fisher<=0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*-1)), shape=25, stroke=2, alpha=1 )
p2 <- p2 + geom_point(data=table.merge.filter %>% filter(regulation>0) %>% filter(weight01_fisher<=0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*1)), shape=24, stroke=2, alpha=1)
p2 <- p2 + scale_size(breaks = c(10,15,20,30,40,50,60,70,80,90,100,110,150), range = (c(1,10)))
p2 <- p2 + scale_color_gradientn(colours = c(rev(brewer.pal(9,"Blues")),brewer.pal(9,"Reds")))
p2 <- p2 + scale_x_discrete(limits = c("mxg","hybrid3n","msin","msac"))
p2 <- p2 + scale_y_discrete(limits=rev(c("serine family amino acid metabolic proce...",
"reductive pentose-phosphate cycle",
"cell surface receptor signaling pathway",
"protein kinase activity",
"sucrose metabolic process",
"starch metabolic process",
"polysaccharide catabolic process",
"chlorophyll binding",
"calcium ion binding",
"protein serine/threonine kinase activity",
"iron ion binding",
"thylakoid membrane organization",
"carbohydrate binding",
"glycerol transport",
"oxidation-reduction process",
"carbon utilization",
"cellular water homeostasis",
"water channel activity",
"glycerol channel activity",
"glycolytic process",
"polysaccharide binding",
"cation binding",
"carbohydrate transmembrane transport",
"water transport",
"chloroplast organization",
"photosynthesis",
"phosphorylation",
"protein serine/threonine phosphatase act...",
"mannose metabolic process",
"identical protein binding",
"RNA binding",
"response to heat",
"isopentenyl diphosphate biosynthetic pro...",
"cysteine biosynthetic process",
"abscisic acid-activated signaling pathwa...",
"calmodulin binding",
"defense response",
"ribosome biogenesis",
"dioxygenase activity",
"ATPase activity, coupled to transmembran...",
"structural constituent of ribosome",
"translation",
"pyruvate metabolic process",
"nucleotide binding",
"metal ion transport",
"secondary metabolite biosynthetic proces...",
"nucleotide-sugar metabolic process",
"cellular aldehyde metabolic process",
"protein folding",
"protein dephosphorylation")))
p2 + theme_linedraw()