true

Summary

**Miscanthus is a commercial lignocellulosic biomass crop owing to its high biomass productivityMiscanthus species are known for their high biomass yield and hence are potential candidates as bio-energy crop, particularly in the temperate regions. Thise present study was conducted to elucidate the physiological and molecular response of in miscanthus Miscanthus genotypes when subjected to well-watered and droughted greenhouse conditions. A significant biomass loss was observed under drought conditions for all genotypes. Among the species, a A sterile hybrid, M. x giganteus hybrid, gave the highestshowed a lower reduction in biomass yield under drought conditions compared to the control. Unexpectedly, biomass yield uUnder well-watered conditions, biomass yield was as good or better than control treatment conditions in all species . tested. M. sinensis was more tolerant than M. sacchariflorus in both water stress conditions. In comparative transcriptomics, a total of 67789 transcripts/unigenes were queried among the species. Among those, nearly 3% transcripts 4,389 of the 67,789 genes (6.4%) in the reference genome exhibited were significantly differential differentially expressed expression within and among all four Miscanthus species during drought conditions. Most of the genes were differentially expressed in a single species, but the . 16 of those differentially expressed genes (DEGs) were shared among all Miscanthus species. Geneenrichment analysis of gene ontology (GO) terms enrichment analysis revealed that the same drought-relevant gene biological processes categories were regulated in all the species during stress conditions. Namely, downregulated differentially expressed genes were significantly involved in protein modification and kinase activity, cell receptor signalling and ion binding; while upregulated differentially expressed genes were significantly involved in sucrose and starch metabolism, redox, and water and glycerol homeostasis and channel activity. Candidate genes with roles in these functional categories were identified in Miscanthus. including “phosphatase activity”, “kinase activity” and “oxidoreductase activity”. In addition, transcripts with biological processes gene ontology vocabulary such as amino acid and lipid metabolic processes were significantly depleted in most of the genotypes. This study provides the first transcriptome data on the induction of drought-related gene expression in differentacross a range of threefour Miscanthus species. Thus, the findings in the present study can play a key role in fast-tracking miscanthus breeding efforts and its full domestication.

Preparations

require(DESeq2,quietly = T);require(ggplot2,quietly =T);require(dplyr,quietly = T);library(pheatmap,quietly = T);library(apeglm,quietly = T);library(UpSetR,quietly = T);library(ashr,quietly = T)

Setting working directory and Loading dataset

#set temporary working directory and load the dataset
#setwd("C:/Users/gari/Documents/Abel personal documents/water stress experiments Misc_2013/JoseDvega/Jose November 2019/Folder 2")
 setwd("~/analysis/susanne_RNASEQ_data/miscanthus_drought_rnaseq/")
#setwd("~/analysis/susanne_RNASEQ_data/abel")
counts <- read.delim("input/gene_count_matrix_without48.csv",header=T,sep=',',row.names = 1, check.names = F)
dim(counts) #67789    35
## [1] 67789    35
pheno <- read.delim("input/pheno_without48.csv",header = TRUE,check.names=T, sep=',')
dim(pheno)
## [1] 35  3

Analysis for individual species

Mxg

########    Mxg ########
pheno.Mxg <- pheno %>% filter (spp == "Mxg") # selecting only mxg metadata
print(pheno.Mxg)
##   sample spp    treatment
## 1     33 Mxg waterlogging
## 2     37 Mxg      drought
## 3     41 Mxg      control
## 4     45 Mxg waterlogging
## 5     49 Mxg      drought
## 6     53 Mxg      control
## 7     57 Mxg waterlogging
## 8     61 Mxg      drought
## 9     65 Mxg      control
counts.Mxg <- counts %>% select(M33, M37, M41, M45, M49, M53, M57, M61, M65)# selecting only mxg counts table
rownames(pheno.Mxg) <- colnames(counts.Mxg)
dds.Mxg <- DESeqDataSetFromMatrix(countData = counts.Mxg, colData = pheno.Mxg, design = ~ treatment)
dds.Mxg <- DESeq(dds.Mxg)
resultsNames(dds.Mxg)
## [1] "Intercept"                         "treatment_drought_vs_control"     
## [3] "treatment_waterlogging_vs_control"
counts(dds.Mxg) %>% str
##  int [1:67789, 1:9] 0 0 2166 524 17 71 0 210 0 53 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:67789] "Misin18G283300.v7.1" "Misin10G028800.v7.1" "Misin02G445300.v7.1" "Misin08G137800.v7.1" ...
##   ..$ : chr [1:9] "M33" "M37" "M41" "M45" ...
res1_mxg <- lfcShrink(dds.Mxg, coef = "treatment_drought_vs_control", type="apeglm")
dim(res1_mxg) # 67789     5
## [1] 67789     5
summary(res1_mxg)
## 
## out of 51821 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 2236, 4.3%
## LFC < 0 (down)     : 2585, 5%
## outliers [1]       : 757, 1.5%
## low counts [2]     : 9870, 19%
## (mean count < 5)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
res2_mxg <- lfcShrink(dds.Mxg, coef = "treatment_waterlogging_vs_control", type="apeglm")
summary(res2_mxg)
## 
## out of 51821 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 61, 0.12%
## LFC < 0 (down)     : 26, 0.05%
## outliers [1]       : 757, 1.5%
## low counts [2]     : 3953, 7.6%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
plotMA(res1_mxg) # control vs drought
plotMA(res2_mxg) # control vs well-watered
# save the output dataset in the same directory
write.table(res1_mxg,"output/DEG-Mxg-drought_Abel.txt")
write.table(res2_mxg,"output/DEG-Mxg-waterlog_Abel.txt")

Msac

#####      Msac     #######
pheno.Msac <- pheno %>% filter (spp == "Msac") 
print(pheno.Msac)
##   sample  spp    treatment
## 1     31 Msac waterlogging
## 2     35 Msac      drought
## 3     39 Msac      control
## 4     43 Msac waterlogging
## 5     47 Msac      drought
## 6     51 Msac      control
## 7     55 Msac waterlogging
## 8     59 Msac      drought
## 9     63 Msac      control
counts.Msac <- counts %>% select(M31, M35, M39, M43, M47, M51, M55, M59, M63)
rownames(pheno.Msac ) <- colnames(counts.Msac)
dds.Msac <- DESeqDataSetFromMatrix(countData = counts.Msac, colData = pheno.Msac, design = ~ treatment)
dds.Msac <- DESeq(dds.Msac)
resultsNames(dds.Msac)
## [1] "Intercept"                         "treatment_drought_vs_control"     
## [3] "treatment_waterlogging_vs_control"
res1_Msac <- lfcShrink(dds.Msac, coef = "treatment_drought_vs_control", type="apeglm")
dim(res1_Msac) # 67789     5
## [1] 67789     5
summary(res1_Msac)
## 
## out of 49654 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 1870, 3.8%
## LFC < 0 (down)     : 2172, 4.4%
## outliers [1]       : 278, 0.56%
## low counts [2]     : 9439, 19%
## (mean count < 5)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
res2_Msac <- lfcShrink(dds.Msac, coef = "treatment_waterlogging_vs_control", type="apeglm")
summary(res2_Msac)
## 
## out of 49654 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 22, 0.044%
## LFC < 0 (down)     : 29, 0.058%
## outliers [1]       : 278, 0.56%
## low counts [2]     : 1795, 3.6%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
plotMA(res1_Msac) # control vs drought 
plotMA(res2_Msac) # control vs well-watered
write.table(res1_Msac,"output/DEG-Msac-drought_Abel.txt")
write.table(res2_Msac,"output/DEG-Msac-waterlog_Abel.txt")

Hybdrid3n

 ####      Hybrid3n     #######
pheno.Hybrid3n <- pheno %>% filter (spp == "Hybrid3n")
print(pheno.Hybrid3n)
##   sample      spp    treatment
## 1     34 Hybrid3n waterlogging
## 2     38 Hybrid3n      drought
## 3     42 Hybrid3n      control
## 4     46 Hybrid3n waterlogging
## 5     50 Hybrid3n      drought
## 6     54 Hybrid3n      control
## 7     58 Hybrid3n waterlogging
## 8     62 Hybrid3n      drought
## 9     66 Hybrid3n      control
counts.Hybrid3n <- counts %>% select(M34, M38, M42, M46, M50, M54, M58, M62, M66)
colnames(counts.Hybrid3n) <- rownames(pheno.Hybrid3n)
dds.Hybrid3n <- DESeqDataSetFromMatrix(countData = counts.Hybrid3n, colData = pheno.Hybrid3n, design = ~ treatment)
dds.Hybrid3n <- DESeq(dds.Hybrid3n)
resultsNames(dds.Hybrid3n)
## [1] "Intercept"                         "treatment_drought_vs_control"     
## [3] "treatment_waterlogging_vs_control"
res1_Hybrid3n <- lfcShrink(dds.Hybrid3n, coef = "treatment_drought_vs_control", type="apeglm")
dim(res1_Hybrid3n) # 67789     5
## [1] 67789     5
summary(res1_Hybrid3n)
## 
## out of 49478 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 725, 1.5%
## LFC < 0 (down)     : 977, 2%
## outliers [1]       : 299, 0.6%
## low counts [2]     : 8455, 17%
## (mean count < 5)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
res2_Hybrid3n <- lfcShrink(dds.Hybrid3n, coef = "treatment_waterlogging_vs_control", type="apeglm")
summary(res2_Hybrid3n)
## 
## out of 49478 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 55, 0.11%
## LFC < 0 (down)     : 64, 0.13%
## outliers [1]       : 299, 0.6%
## low counts [2]     : 7525, 15%
## (mean count < 4)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
plotMA(res1_Hybrid3n) # control vs drought
plotMA(res2_Hybrid3n) # control vs well-watered
#
write.table(res1_Hybrid3n,"output/DEG-Hybrid3n-drought_Abel.txt")
write.table(res2_Hybrid3n,"output/DEG-Hybrid3n-waterlog_Abel.txt")

Msin

####      Msin   #######
pheno.Msin <- pheno %>% filter (spp == "Msin") 
print(pheno.Msin)
##   sample  spp    treatment
## 1     32 Msin waterlogging
## 2     36 Msin      drought
## 3     40 Msin      control
## 4     44 Msin waterlogging
## 5     52 Msin      control
## 6     56 Msin waterlogging
## 7     60 Msin      drought
## 8     64 Msin      control
counts.Msin <- counts %>% select(M32, M36, M40, M44, M52, M56, M60, M64)
dds.Msin <- DESeqDataSetFromMatrix(countData = counts.Msin, colData = pheno.Msin, design = ~ treatment)
dds.Msin <- DESeq(dds.Msin)
resultsNames(dds.Msin)
## [1] "Intercept"                         "treatment_drought_vs_control"     
## [3] "treatment_waterlogging_vs_control"
res1_Msin <- lfcShrink(dds.Msin, coef = "treatment_drought_vs_control", type="apeglm")
dim(res1_Msin) # 67789     5
## [1] 67789     5
summary(res1_Msin)
## 
## out of 50018 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 202, 0.4%
## LFC < 0 (down)     : 936, 1.9%
## outliers [1]       : 96, 0.19%
## low counts [2]     : 10464, 21%
## (mean count < 6)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
res2_Msin <- lfcShrink(dds.Msin, coef = "treatment_waterlogging_vs_control", type="apeglm")
summary(res2_Msin)
## 
## out of 50018 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 4, 0.008%
## LFC < 0 (down)     : 6, 0.012%
## outliers [1]       : 96, 0.19%
## low counts [2]     : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
plotMA(res1_Msin) # control vs drought
plotMA(res2_Msin) # control vs well-watered

write.table(res1_Msin,"output/DEG-Msin-drought_Abel.txt")
write.table(res2_Msin,"output/DEG-Msin-waterlog_Abel.txt")

PCA for all libraries

###DESeq for all withoutM48
#setwd("C:/Users/gari/Documents/Abel personal documents/water stress experiments Misc_2013/JoseDvega/Jose November 2019/Folder 2") 
 setwd("~/analysis/susanne_RNASEQ_data/miscanthus_drought_rnaseq/")
counts.WOM48 <- read.delim("input/gene_count_matrix_without48.csv",header=T,sep=',',row.names = 1,check.names=FALSE)
dim(counts.WOM48)
## [1] 67789    35
#pheno$name = as.factor(pheno$name)#enforcing rownames are factors not numerics
rownames(pheno) <- colnames(counts.WOM48)
all(rownames(pheno) == colnames(counts.WOM48))
## [1] TRUE
#ggplot(counts.WOM48)+ geom_histogram(aes(x = M31), stat = "bin", bins = 200) + xlab("Raw expression counts") + ylab ("Number of genes")
dds.WOM48 <- DESeqDataSetFromMatrix(countData = counts.WOM48, colData = pheno, design = ~ treatment + spp)
summary(dds.WOM48)
## [1] "DESeqDataSet object of length 67789 with 0 metadata columns"
keep.WOM48 <- rowSums(counts(dds.WOM48) >= 5) >= 4
table(keep.WOM48)
## keep.WOM48
## FALSE  TRUE 
## 18731 49058
dds.WOM48.filtered <- dds.WOM48[keep.WOM48,]
summary(dds.WOM48.filtered)
## [1] "DESeqDataSet object of length 49058 with 0 metadata columns"
dds.WOM48.filtered <- DESeq(dds.WOM48.filtered) ;resultsNames(dds.WOM48.filtered)
## [1] "Intercept"                         "treatment_drought_vs_control"     
## [3] "treatment_waterlogging_vs_control" "spp_Msac_vs_Hybrid3n"             
## [5] "spp_Msin_vs_Hybrid3n"              "spp_Mxg_vs_Hybrid3n"
boxplot(log10(counts(dds.WOM48.filtered)+1))# before normalization

dds.WOM48.filtered <- estimateSizeFactors(dds.WOM48.filtered)
sizeFactors(dds.WOM48.filtered)
##       M31       M32       M33       M34       M35       M36       M37       M38 
## 0.9246360 0.8663091 1.1383236 0.9893500 0.8970502 0.8623480 1.1609429 1.0285116 
##       M39       M40       M41       M42       M43       M44       M45       M46 
## 0.9072739 1.1005317 0.6702764 0.7223468 1.3292962 0.9450737 1.0454916 0.9776439 
##       M47       M49       M50       M51       M52       M53       M54       M55 
## 0.9890113 1.6834196 1.5705317 1.3065587 1.6259665 1.3565735 1.4778934 0.8602344 
##       M56       M57       M58       M59       M60       M61       M62       M63 
## 1.0113151 1.4543822 1.1355904 0.6893328 0.6704424 0.8798643 1.4957203 0.9405997 
##       M64       M65       M66 
## 0.7141060 0.6623280 1.2380000
normalized_dds_counts <- counts(dds.WOM48.filtered, normalized=TRUE)
#boxplot(log10(counts(dds.WOM48.filtered,normalized=TRUE)+1))# after normalization
vsd_all.WOM48 <- vst(dds.WOM48.filtered, blind=TRUE)
head(vsd_all.WOM48, n=3)
## class: DESeqTransform 
## dim: 3 35 
## metadata(1): version
## assays(1): ''
## rownames(3): Misin10G028800.v7.1 Misin02G445300.v7.1
##   Misin08G137800.v7.1
## rowData names(38): baseMean baseVar ... maxCooks dispFit
## colnames(35): M31 M32 ... M65 M66
## colData names(4): sample spp treatment sizeFactor
vsd_mat_all.WOM48 <- assay(vsd_all.WOM48)# Compute pairwise correlation values
vsd_cor_all.WOM48 <- cor(vsd_mat_all.WOM48)
#View(vsd_cor_all)
pheatmap(vsd_cor_all.WOM48, annotation = select(pheno, treatment))

pheatmap(vsd_cor_all.WOM48, annotation = select(pheno, spp))

resultsNames(dds.WOM48.filtered)
## [1] "Intercept"                         "treatment_drought_vs_control"     
## [3] "treatment_waterlogging_vs_control" "spp_Msac_vs_Hybrid3n"             
## [5] "spp_Msin_vs_Hybrid3n"              "spp_Mxg_vs_Hybrid3n"
res_drought_vs_con_ape = lfcShrink(dds.WOM48.filtered, coef = 2, type = "apeglm")
res_drought_vs_con_norm = lfcShrink(dds.WOM48.filtered, coef = 2, type = "normal")
res_drought_vs_con_Ash = lfcShrink(dds.WOM48.filtered, coef = 2, type = "ashr")
par(mfrow=c(1,3), mar=c(4,4,2,1))
xlim = c(1, 1e5); ylim=c(-3,3)
png("output/drought_vs_con_feb 2020_apeglm.png");plotMA(res_drought_vs_con_ape, xlim=xlim, ylim=ylim, main= "drought_vs_con_feb 2020_apeglm"); dev.off()
## quartz_off_screen 
##                 2
png("output/drought_vs_con_feb 2020_normal.png");plotMA(res_drought_vs_con_norm , xlim=xlim, ylim=ylim, main= "drought_vs_con_feb 2020_normal"); dev.off()
## quartz_off_screen 
##                 2
png("output/drought_vs_cont_feb 2020_ashr.png");plotMA(res_drought_vs_con_Ash, xlim=xlim, ylim=ylim, main= "drought_vs_cont_feb 2020_ashr"); dev.off()
## quartz_off_screen 
##                 2
## well-watered vs control
par(mfrow=c(1,3), mar=c(4,4,2,1))
xlim = c(1, 1e5); ylim=c(-3,3)
res_water_vs_con_ape = lfcShrink(dds.WOM48.filtered, coef = 3, type = "apeglm")
res_water_vs_con_norm = lfcShrink(dds.WOM48.filtered, coef = 3, type = "normal")
res_water_vs_con_Ash = lfcShrink(dds.WOM48.filtered, coef = 3, type = "ashr")
png("output/water_vs_con_feb 2020_apeglm.png"); plotMA(res_water_vs_con_ape, xlim=xlim, ylim=ylim, main= "water_vs_con_feb 2020_apeglm"); dev.off()
## quartz_off_screen 
##                 2
png("output/water_vs_con_feb 2020_normal.png");plotMA(res_water_vs_con_norm , xlim=xlim, ylim=ylim, main= "water_vs_con_feb 2020_normal"); dev.off()
## quartz_off_screen 
##                 2
png("output/water_vs_cont_feb 2020_ashr.png");plotMA(res_water_vs_con_Ash, xlim=xlim, ylim=ylim, main= "water_vs_cont_feb 2020_ashr"); dev.off()
## quartz_off_screen 
##                 2
plotPCA1 <- function (dds.WOM48.filtered, intgroup = "treatment", ntop = 500, returnData = FALSE) 
  {
    rv <- rowVars(assay(dds.WOM48.filtered))
    select <- order(rv, decreasing = TRUE)[seq_len(min(ntop, length(rv)))]
    pca <- prcomp(t(assay(dds.WOM48.filtered)[select, ]))
    percentVar <- pca$sdev^2/sum(pca$sdev^2)
    if (!all(intgroup %in% names(colData(dds.WOM48.filtered)))) {
        stop("the argument 'intgroup' should specify columns of colData(dds.filtered)")
    }
    intgroup.df <- as.data.frame(colData(dds.WOM48.filtered)[, intgroup, 
        drop = FALSE])
    group <- if (length(intgroup) > 1) {
        factor(apply(intgroup.df, 1, paste, collapse = " : "))
    }
    else {
        colData(dds.WOM48.filtered)[[intgroup]]
    }
    d <- data.frame(PC1 = pca$x[, 1], PC2 = pca$x[, 2], PC3 = pca$x[, 3], PC4 = pca$x[, 4], group = group, intgroup.df, name = colnames(dds.WOM48.filtered))
    if (returnData) {
        attr(d, "percentVar") <- percentVar[1:2]
        return(d)
    }
    library(ggrepel)
    ggplot(data = d, aes_string(x = "PC1", y = "PC2", color = "treatment", shape = "spp" )) + geom_point(size = 4) +
   # geom_text_repel(aes(label = name), hjust = 0.6,vjust = -0.7, nudge_x = 0.09, size = 0, 
   #                 point.padding = 0.35, box.padding = 0.25, direction = "both") + 
        xlab(paste0("PC1: ", round(percentVar[1] * 
        100), "% variance")) + ylab(paste0("PC2: ", round(percentVar[2] * 
        100), "% variance")) + coord_fixed() + theme_bw()+ scale_shape_manual(values = c(21, 22, 23, 24)) +
 #    xlim(-40,40) + ylim(-40,40) +
      theme(axis.text=element_text(size=14),legend.position = "right",plot.title = element_text(size=14),
        axis.title=element_text(size=14,face="bold")) + scale_color_manual(values = c("#e41a1c", "#377eb8", "#4daf4a"))+
       theme(legend.title = element_text(color = "white", size = 14),
      legend.text = element_text(color = "black")) + theme (legend.key.size = unit(0.5, "cm"),
      legend.key.width = unit(0.2,"cm") )
}
 p_all_1 = plotPCA1(vsd_all.WOM48, intgroup=c("treatment", "spp")) # fig 1
 p_all_1

Upregulated DEGs in well-watered conditions Vs. control

# upset plot for well-watered DEGs 19/02/2020
#setwd("C:/Users/gari/Documents/Abel personal documents/water stress experiments Misc_2013/JoseDvega/Jose November 2019/Folder 2") 
 setwd("~/analysis/susanne_RNASEQ_data/miscanthus_drought_rnaseq/")

counts <- read.delim("input/gene_count_matrix_without48.csv",header=T,sep=',',row.names = 1)
allgenes <- rownames(counts)
mxg.waterlog <- read.delim("output/DEG-Mxg-waterlog_Abel.txt", header = T, sep = ' ')
msin.waterlog <- read.delim("output/DEG-Msin-waterlog_Abel.txt", header = T, sep = ' ')
msac.waterlog <- read.delim("output/DEG-Msac-waterlog_Abel.txt", header = T, sep = ' ')
hybrid3n.waterlog <- read.delim("output/DEG-Hybrid3n-waterlog_Abel.txt", header = T, sep = ' ')
list <- as.data.frame(allgenes)
rownames(list) <- list$allgenes #MOVED HERE
list$Mxg.well_watered <- as.integer(allgenes %in% rownames(mxg.waterlog[mxg.waterlog$padj<0.01,])) 
list$Msin.well_watered <- as.integer(allgenes %in% rownames(msin.waterlog[msin.waterlog$padj<0.05,])) #!!!!
list$Msac.well_watered <- as.integer(allgenes %in% rownames(msac.waterlog[msac.waterlog$padj<0.01,]))
list$hybrid3n.well_watered <- as.integer(allgenes %in% rownames(hybrid3n.waterlog[hybrid3n.waterlog$padj<0.05,])) #!!!!

#rownames(list) <- list$allgenes
list <- list[,-1]
write.csv(list,file="output/upset_waterlog.csv")

upset <- upset(list, nsets = 4, number.angles = 0, point.size = 3, line.size = 1.2, mainbar.y.label = "Shared DEG", sets.x.label = "Total DEG", text.scale = c(2, 2, 2, 1, 2, 2), order.by = "freq")
upset

## Upregulated DEGs in droughted conditions Vs. control

# upset plot for drought DEGs 19/02/2020
#setwd("C:/Users/gari/Documents/Abel personal documents/water stress experiments Misc_2013/JoseDvega/Jose November 2019/Folder 2") 
 setwd("~/analysis/susanne_RNASEQ_data/miscanthus_drought_rnaseq/")
counts <- read.delim("input/gene_count_matrix_without48.csv",header=T,sep=',',row.names = 1)
allgenes <- rownames(counts)
mxg.drought <- read.delim("output/DEG-Mxg-drought_Abel.txt", header = T, sep = ' ')
msin.drought <- read.delim("output/DEG-Msin-drought_Abel.txt", header = T, sep = ' ')
msac.drought <- read.delim("output/DEG-Msac-drought_Abel.txt", header = T, sep = ' ')
hybrid3n.drought <- read.delim("output/DEG-Hybrid3n-drought_Abel.txt", header = T, sep = ' ')
list <- as.data.frame(allgenes)
list$mxg.drought <- as.integer(allgenes %in% rownames(mxg.drought[mxg.drought$padj<0.01,])) 
list$msin.drought <- as.integer(allgenes %in% rownames(msin.drought[msin.drought$padj<0.05,])) #!!!!
list$msac.drought <- as.integer(allgenes %in% rownames(msac.drought[msac.drought$padj<0.01,]))
list$hybrid3n.drought <- as.integer(allgenes %in% rownames(hybrid3n.drought[hybrid3n.drought$padj<0.05,])) #!!!!
rownames(list) <- list$allgenes
list <- list[,-1]
write.csv(list,file="output/upset_drought.csv")

myupset<-upset(list, nsets = 4, number.angles = 0, point.size = 3, line.size = 1.2, mainbar.y.label = "Shared DEG", sets.x.label = "Total DEG", text.scale = c(2, 2, 2, 2, 2, 2), order.by = "freq")
myupset

Enrichments analysis of GO terms among DEGs in drought conditions

##EA calculations with TopGP

#NEW:
require(DESeq2);require(ggplot2);require(dplyr);library(pheatmap);library(apeglm);library(UpSetR);library(topGO);library(data.table)
options(scipen=999) #disable scientific annotation

#setwd("C:/Users/gari/Documents/Abel personal documents/water stress experiments Misc_2013/JoseDvega/Jose November 2019/abel_topgo") 
 setwd("~/analysis/susanne_RNASEQ_data/miscanthus_drought_rnaseq/")
#setwd("~/analysis/susanne_RNASEQ_data/abel")
list$mxg.drought.UP <- as.integer(allgenes %in% rownames(mxg.drought[mxg.drought$log2FoldChange>0,])) 
list$mxg.drought.DOWN <- as.integer(allgenes %in% rownames(mxg.drought[mxg.drought$log2FoldChange<0,])) 

list$hybrid3n.drought.UP <- as.integer(allgenes %in% rownames(hybrid3n.drought[hybrid3n.drought$log2FoldChange>0,])) 
list$hybrid3n.drought.DOWN <- as.integer(allgenes %in% rownames(hybrid3n.drought[hybrid3n.drought$log2FoldChange<0,])) 

list$msac.drought.UP <- as.integer(allgenes %in% rownames(msac.drought[msac.drought$log2FoldChange>0,])) 
list$msac.drought.DOWN <- as.integer(allgenes %in% rownames(msac.drought[msac.drought$log2FoldChange<0,])) 

list$msin.drought.UP <- as.integer(allgenes %in% rownames(msin.drought[msin.drought$log2FoldChange>0,])) 
list$msin.drought.DOWN <- as.integer(allgenes %in% rownames(msin.drought[msin.drought$log2FoldChange<0,])) 

###

list$names<-rownames(list) #!!!!

mxg.drought.up <-  list %>% filter(mxg.drought == 1) %>% filter(mxg.drought.UP == 1) #1010
dim(mxg.drought.up)
## [1] 1010   13
mxg.drought.down <- list %>% filter(mxg.drought == 1) %>% filter(mxg.drought.DOWN == 1)  #1343
dim(mxg.drought.down)
## [1] 1343   13
hybrid3n.drought.up <-  list %>% filter(hybrid3n.drought == 1) %>% filter(hybrid3n.drought.UP == 1) #488
dim(hybrid3n.drought.up)
## [1] 488  13
hybrid3n.drought.down <-  list %>% filter(hybrid3n.drought == 1) %>% filter(hybrid3n.drought.DOWN == 1) #691
dim(hybrid3n.drought.down)
## [1] 691  13
msac.drought.up <-  list %>% filter(msac.drought == 1) %>% filter(msac.drought.UP == 1) 
dim(msac.drought.up) #793
## [1] 793  13
msac.drought.down <- list %>% filter(msac.drought == 1) %>% filter(msac.drought.DOWN == 1)  
dim(msac.drought.down) #874
## [1] 874  13
msin.drought.up <- list %>% filter(msin.drought == 1) %>% filter(msin.drought.UP == 1) 
dim(msin.drought.up) #131
## [1] 131  13
msin.drought.down <- list %>% filter(msin.drought == 1) %>% filter(msin.drought.DOWN == 1)  
dim(msin.drought.down) #642
## [1] 642  13
###FUNCTIONS:


run.topgo.pipeline.BP <- function(mytemp) {
  #PIPE starts
  list.mytemp <- factor(as.integer(allgenes %in% mytemp$names))
  names(list.mytemp) <- allgenes
  #BP
  fdata.mytemp.BP <- new("topGOdata", ontology="BP", allGenes=list.mytemp, annot = annFUN.gene2GO, gene2GO = geneID2GO)
  results.fdata.mytemp.BP <- GenTable(fdata.mytemp.BP, weight_fisher = runTest(fdata.mytemp.BP, algorithm = "weight", statistic = "fisher"), elim_fisher = runTest(fdata.mytemp.BP, algorithm = "elim", statistic = "fisher"), weight01_fisher = runTest(fdata.mytemp.BP, algorithm = "weight01", statistic = "fisher"), topNodes=150)
  #add genes to dataframe
  results.fdata.mytemp.BP$genes <- sapply(results.fdata.mytemp.BP$GO.ID, function(x) {
    genes<- genesInTerm(fdata.mytemp.BP, x)  
    genes[[1]][genes[[1]] %in% mytemp$names]
  })
  results.fdata.mytemp.BP  
}

#MF
run.topgo.pipeline.MF <- function(mytemp) {
  list.mytemp <- factor(as.integer(allgenes %in% mytemp$names))
  names(list.mytemp) <- allgenes
  fdata.mytemp.MF <- new("topGOdata", ontology="MF", allGenes=list.mytemp, annot = annFUN.gene2GO, gene2GO = geneID2GO)
  results.fdata.mytemp.MF <- GenTable(fdata.mytemp.MF, weight_fisher = runTest(fdata.mytemp.MF, algorithm = "weight", statistic = "fisher"), elim_fisher = runTest(fdata.mytemp.MF, algorithm = "elim", statistic = "fisher"), weight01_fisher = runTest(fdata.mytemp.MF, algorithm = "weight01", statistic = "fisher"), topNodes=50)
  #add genes to dataframe
  results.fdata.mytemp.MF$genes <- sapply(results.fdata.mytemp.MF$GO.ID, function(x) {
    genes<-genesInTerm(fdata.mytemp.MF, x) 
    genes[[1]][genes[[1]] %in% mytemp$names]
  })
  results.fdata.mytemp.MF
}
##END FUNCTIONS###

#calculate and save results:
counts <- read.delim("input/gene_count_matrix_without48.csv",header=T,sep=',',row.names = 1)
allgenes <- rownames(counts)
 
topgo.file <- read.delim("input/slim_annot_TOPGO.annot", header = F)
geneID2GO <- readMappings(file = "input/slim_annot_TOPGO.annot")

slim.BP.mxg.drought.up <- run.topgo.pipeline.BP(mxg.drought.up)
fwrite(slim.BP.mxg.drought.up, file = "output/slim-mxg.drought.up.BP.csv")

slim.MF.mxg.drought.up <- run.topgo.pipeline.MF(mxg.drought.up)
fwrite(slim.MF.mxg.drought.up, file = "output/slim-mxg.drought.up.MF.csv")

slim.BP.mxg.drought.down <- run.topgo.pipeline.BP(mxg.drought.down)
fwrite(slim.BP.mxg.drought.down, file = "output/slim-mxg.drought.down.BP.csv")

slim.MF.mxg.drought.down <- run.topgo.pipeline.MF(mxg.drought.down)
fwrite(slim.MF.mxg.drought.down, file = "output/slim-mxg.drought.down.MF.csv")
#
slim.BP.hybrid3n.drought.up <- run.topgo.pipeline.BP(hybrid3n.drought.up)
fwrite(slim.BP.hybrid3n.drought.up, file = "output/slim-hybrid3n.drought.up.BP.csv")

slim.MF.hybrid3n.drought.up <- run.topgo.pipeline.MF(hybrid3n.drought.up)
fwrite(slim.MF.hybrid3n.drought.up, file = "output/slim-hybrid3n.drought.up.MF.csv")

slim.BP.hybrid3n.drought.down <- run.topgo.pipeline.BP(hybrid3n.drought.down)
fwrite(run.topgo.pipeline.BP(hybrid3n.drought.down), file = "output/slim-hybrid3n.drought.down.BP.csv")

slim.MF.hybrid3n.drought.down <- run.topgo.pipeline.MF(hybrid3n.drought.down)
fwrite(slim.MF.hybrid3n.drought.down, file = "output/slim-hybrid3n.drought.down.MF.csv")
#
slim.BP.msin.drought.up <- run.topgo.pipeline.BP(msin.drought.up)
fwrite(slim.BP.msin.drought.up, file = "output/slim-msin.drought.up.BP.csv")

slim.MF.msin.drought.up <- run.topgo.pipeline.MF(msin.drought.up)
fwrite(slim.MF.msin.drought.up, file = "output/slim-msin.drought.up.MF.csv")

slim.BP.msin.drought.down <- run.topgo.pipeline.BP(msin.drought.down)
fwrite(slim.BP.msin.drought.down, file = "output/slim-msin.drought.down.BP.csv")

slim.MF.msin.drought.down <- run.topgo.pipeline.MF(msin.drought.down)
fwrite(slim.MF.msin.drought.down, file = "output/slim-msin.drought.down.MF.csv")
#
slim.BP.msac.drought.up <- run.topgo.pipeline.BP(msac.drought.up)
fwrite(slim.BP.msac.drought.up, file = "output/slim-msac.drought.up.BP.csv")

slim.MF.msac.drought.up <- run.topgo.pipeline.MF(msac.drought.up)
fwrite(slim.MF.msac.drought.up, file = "output/slim-msac.drought.up.MF.csv")

slim.BP.msac.drought.down <- run.topgo.pipeline.BP(msac.drought.down)
fwrite(slim.BP.msac.drought.down, file = "output/slim-msac.drought.down.BP.csv")

slim.MF.msac.drought.down <- run.topgo.pipeline.MF(msac.drought.down)
fwrite(slim.MF.msac.drought.down, file = "output/slim-msac.drought.down.MF.csv")
###

#FULL ANNOTATION:
topgo.file <- read.delim("input/full_annot_TOPGO.annot", header = F)
geneID2GO <- readMappings(file = "input/full_annot_TOPGO.annot")

#FULL
full.BP.mxg.drought.up <- run.topgo.pipeline.BP(mxg.drought.up)
fwrite(full.BP.mxg.drought.up, file = "output/full-mxg.drought.up.BP.csv")

full.MF.mxg.drought.up <- run.topgo.pipeline.MF(mxg.drought.up)
fwrite(full.MF.mxg.drought.up, file = "output/full-mxg.drought.up.MF.csv")

full.BP.mxg.drought.down <- run.topgo.pipeline.BP(mxg.drought.down)
fwrite(full.BP.mxg.drought.down, file = "output/full-mxg.drought.down.BP.csv")

full.MF.mxg.drought.down <- run.topgo.pipeline.MF(mxg.drought.down)
fwrite(full.MF.mxg.drought.down, file = "output/full-mxg.drought.down.MF.csv")
#
full.BP.hybrid3n.drought.up <- run.topgo.pipeline.BP(hybrid3n.drought.up)
fwrite(full.BP.hybrid3n.drought.up, file = "output/full-hybrid3n.drought.up.BP.csv")

full.MF.hybrid3n.drought.up <- run.topgo.pipeline.MF(hybrid3n.drought.up)
fwrite(full.MF.hybrid3n.drought.up, file = "output/full-hybrid3n.drought.up.MF.csv")

full.BP.hybrid3n.drought.down <- run.topgo.pipeline.BP(hybrid3n.drought.down)
fwrite(run.topgo.pipeline.BP(hybrid3n.drought.down), file = "output/full-hybrid3n.drought.down.BP.csv")

full.MF.hybrid3n.drought.down <- run.topgo.pipeline.MF(hybrid3n.drought.down)
fwrite(full.MF.hybrid3n.drought.down, file = "output/full-hybrid3n.drought.down.MF.csv")
#
full.BP.msin.drought.up <- run.topgo.pipeline.BP(msin.drought.up)
fwrite(full.BP.msin.drought.up, file = "output/full-msin.drought.up.BP.csv")

full.MF.msin.drought.up <- run.topgo.pipeline.MF(msin.drought.up)
fwrite(full.MF.msin.drought.up, file = "output/full-msin.drought.up.MF.csv")

full.BP.msin.drought.down <- run.topgo.pipeline.BP(msin.drought.down)
fwrite(full.BP.msin.drought.down, file = "output/full-msin.drought.down.BP.csv")

full.MF.msin.drought.down <- run.topgo.pipeline.MF(msin.drought.down)
fwrite(full.MF.msin.drought.down, file = "output/full-msin.drought.down.MF.csv")
#
full.BP.msac.drought.up <- run.topgo.pipeline.BP(msac.drought.up)
fwrite(full.BP.msac.drought.up, file = "output/full-msac.drought.up.BP.csv")

full.MF.msac.drought.up <- run.topgo.pipeline.MF(msac.drought.up)
fwrite(full.MF.msac.drought.up, file = "output/full-msac.drought.up.MF.csv")

full.BP.msac.drought.down <- run.topgo.pipeline.BP(msac.drought.down)
fwrite(full.BP.msac.drought.down, file = "output/full-msac.drought.down.BP.csv")

full.MF.msac.drought.down <- run.topgo.pipeline.MF(msac.drought.down)
fwrite(full.MF.msac.drought.down, file = "output/full-msac.drought.down.MF.csv")

Plotting enrichment analysis drought/control

#BUBBLEPLOTS
library(corrplot);library(reshape2);library(RColorBrewer);library(pheatmap);library(dplyr);library(ggplot2)
library(topGO)
library(data.table)
#setwd("C:/Users/gari/Documents/Abel personal documents/water stress experiments Misc_2013/JoseDvega/Jose November 2019/abel_topgo")
 setwd("~/analysis/susanne_RNASEQ_data/miscanthus_drought_rnaseq/")
#setwd("~/analysis/susanne_RNASEQ_data/abel")

#slim
slim.BP.mxg.drought.up$regulation <- 1
slim.MF.mxg.drought.up$regulation <- 1
slim.BP.mxg.drought.down$regulation <- -1
slim.MF.mxg.drought.down$regulation <- -1
slim.BP.hybrid3n.drought.up$regulation <- 1 
slim.MF.hybrid3n.drought.up$regulation <- 1
slim.BP.hybrid3n.drought.down$regulation <- -1 
slim.MF.hybrid3n.drought.down$regulation <- -1
slim.BP.msin.drought.up$regulation <- 1
slim.MF.msin.drought.up$regulation <- 1
slim.BP.msin.drought.down$regulation <- -1 
slim.MF.msin.drought.down$regulation <- -1
slim.BP.msac.drought.up$regulation <- 1
slim.MF.msac.drought.up$regulation <- 1
slim.BP.msac.drought.down$regulation <- -1 
slim.MF.msac.drought.down$regulation <- -1
#
slim.BP.mxg.drought.up$spp <- "mxg"
slim.MF.mxg.drought.up$spp <- "mxg"
slim.BP.mxg.drought.down$spp <- "mxg"
slim.MF.mxg.drought.down$spp <- "mxg"
slim.BP.hybrid3n.drought.up$spp <- "hybrid3n"
slim.MF.hybrid3n.drought.up$spp <- "hybrid3n"
slim.BP.hybrid3n.drought.down$spp <- "hybrid3n"
slim.MF.hybrid3n.drought.down$spp <- "hybrid3n"
slim.BP.msin.drought.up$spp <- "msin"
slim.MF.msin.drought.up$spp <- "msin"
slim.BP.msin.drought.down$spp <- "msin"
slim.MF.msin.drought.down$spp <- "msin"
slim.BP.msac.drought.up$spp <- "msac"
slim.MF.msac.drought.up$spp <- "msac"
slim.BP.msac.drought.down$spp <- "msac"
slim.MF.msac.drought.down$spp <- "msac"
#
#CORRECT FORMATING:
#
supertable <- rbind(slim.BP.mxg.drought.up,slim.BP.mxg.drought.down,slim.BP.hybrid3n.drought.up,slim.BP.hybrid3n.drought.down,slim.BP.msin.drought.up,slim.BP.msin.drought.down,slim.BP.msac.drought.up,slim.BP.msac.drought.down,slim.MF.mxg.drought.up,slim.MF.mxg.drought.down,slim.MF.hybrid3n.drought.up,slim.MF.hybrid3n.drought.down,slim.MF.msin.drought.up,slim.MF.msin.drought.down,slim.MF.msac.drought.up,slim.MF.msac.drought.down)

supertable.MF <- rbind(slim.MF.mxg.drought.up,slim.MF.mxg.drought.down,slim.MF.hybrid3n.drought.up,slim.MF.hybrid3n.drought.down,slim.MF.msin.drought.up,slim.MF.msin.drought.down,slim.MF.msac.drought.up,slim.MF.msac.drought.down)

supertable.BP <- rbind(slim.BP.mxg.drought.up,slim.BP.mxg.drought.down,slim.BP.hybrid3n.drought.up,slim.BP.hybrid3n.drought.down,slim.BP.msin.drought.up,slim.BP.msin.drought.down,slim.BP.msac.drought.up,slim.BP.msac.drought.down)

#correct the scientific notation by converting from character to numeric
supertable$weight01_fisher <- as.numeric(supertable$weight01_fisher)
## Warning: NAs introduced by coercion
supertable.MF$weight01_fisher <- as.numeric(supertable.MF$weight01_fisher)
## Warning: NAs introduced by coercion
supertable.BP$weight01_fisher <- as.numeric(supertable.BP$weight01_fisher)


enriched.GOs <- supertable %>% filter(weight01_fisher<0.01) %>% filter (Significant>=5) #%>% select(GO.ID) 
enriched.GOs.MF <- supertable.MF %>% filter(weight01_fisher<0.01) %>% filter (Significant>=5) #%>% select(GO.ID) 
enriched.GOs.BP <- supertable.BP %>% filter(weight01_fisher<0.01) %>% filter (Significant>=5) #%>% select(GO.ID) 

#list of GOs that are enrich in at least one analysis

selected <- supertable %>% filter(GO.ID %in% unlist(enriched.GOs)) #filter our enrich GOs by name
selected.MF <- supertable.MF %>% filter(GO.ID %in% unlist(enriched.GOs.MF)) #filter our enrich GOs by name
selected.BP <- supertable.BP %>% filter(GO.ID %in% unlist(enriched.GOs.BP)) #filter our enrich GOs by name


#PUT A BOTTOM CAP
selected$weight01_fisher[selected$weight01_fisher<0.0001] <- 0.0001
selected.MF$weight01_fisher[selected.MF$weight01_fisher<0.0001] <- 0.0001
selected.BP$weight01_fisher[selected.BP$weight01_fisher<0.0001] <- 0.0001

plotting enrichment analysis drought/control (GOSLIM)

#PLOT BUBBLES SLIM 

library(RColorBrewer)

table.merge.filter <- selected
p <- ggplot(data=table.merge.filter, aes(x=spp,y=Term)) 
p <- p + geom_point(data=table.merge.filter %>% filter(regulation<0) %>% filter(weight01_fisher>0.1), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*-1)), shape=25, stroke=1, alpha=1 )
p <- p + geom_point(data=table.merge.filter  %>% filter(regulation>0) %>% filter(weight01_fisher>0.1), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*1)), shape=24, stroke=1, alpha=1)
p <- p + geom_point(data=table.merge.filter %>% filter(regulation<0) %>% filter(weight01_fisher<=0.1) %>% filter(weight01_fisher>0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*-1)), shape=25, stroke=1.5, alpha=1)
p <- p + geom_point(data=table.merge.filter  %>% filter(regulation>0) %>% filter(weight01_fisher<=0.1) %>% filter(weight01_fisher>0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*1)), shape=24, stroke=1.5, alpha=1 )
p <- p + geom_point(data=table.merge.filter %>% filter(regulation<0) %>% filter(weight01_fisher<=0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*-1)), shape=25, stroke=2, alpha=1 )
p <- p + geom_point(data=table.merge.filter  %>% filter(regulation>0) %>% filter(weight01_fisher<=0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*1)), shape=24, stroke=2, alpha=1) 
#p <- p + scale_size(breaks = c(0.5,1,2,3,4,5), range=c(2,8))
#p <- p + scale_color_gradientn(trans = "log2", colours = brewer.pal(9,"Blues"), breaks = c(0,1,5,10,20,50,100,200))
p <- p + scale_size(breaks = c(10,15,20,30,40,50,60,70,80,90,100,110,150), range = (c(1,10)))
#p <- p + scale_color_gradientn(colours = c(rev(brewer.pal(9,"Blues")),"white",brewer.pal(9,"Reds")), limits = c(-15,15), breaks = c(-15,-10,-5,-2.5,-1,0,1,2.5,5,10,15))
p <- p + scale_color_gradientn(colours = c(rev(brewer.pal(9,"Blues")),brewer.pal(9,"Reds")))
#p <- p + scale_y_discrete(limits=rev(myTERMS))
p + theme_minimal()

table.merge.filter <- selected.BP
p1 <- ggplot(data=table.merge.filter, aes(x=spp,y=Term)) 
p1 <- p1 + geom_point(data=table.merge.filter %>% filter(regulation<0) %>% filter(weight01_fisher>0.1), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*-1)), shape=25, stroke=1, alpha=1 )
p1 <- p1 + geom_point(data=table.merge.filter  %>% filter(regulation>0) %>% filter(weight01_fisher>0.1), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*1)), shape=24, stroke=1, alpha=1)
p1 <- p1 + geom_point(data=table.merge.filter %>% filter(regulation<0) %>% filter(weight01_fisher<=0.1) %>% filter(weight01_fisher>0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*-1)), shape=25, stroke=1.5, alpha=1)
p1 <- p1 + geom_point(data=table.merge.filter  %>% filter(regulation>0) %>% filter(weight01_fisher<=0.1) %>% filter(weight01_fisher>0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*1)), shape=24, stroke=1.5, alpha=1 )
p1 <- p1 + geom_point(data=table.merge.filter %>% filter(regulation<0) %>% filter(weight01_fisher<=0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*-1)), shape=25, stroke=2, alpha=1 )
p1 <- p1 + geom_point(data=table.merge.filter  %>% filter(regulation>0) %>% filter(weight01_fisher<=0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*1)), shape=24, stroke=2, alpha=1) 
p1 <- p1 + scale_size(breaks = c(10,15,20,30,40,50,60,70,80,90,100,110,150), range = (c(1,10)))
p1 <- p1 + scale_color_gradientn(colours = c(rev(brewer.pal(9,"Blues")),brewer.pal(9,"Reds")))
p1 <- p1 + scale_x_discrete(limits = c("mxg","hybrid3n","msin","msac")) 
p1 <- p1 + scale_y_discrete(limits=rev(c("cellular amino acid metabolic process",
"photosynthesis",
"cellular protein modification process",
"protein folding",
"generation of precursor metabolites and ...",
"signal transduction",
"small molecule metabolic process",
"carbohydrate metabolic process",
"cofactor metabolic process",
"lipid metabolic process",
"biosynthetic process",
"translation",
"secondary metabolic process",
"ribosome biogenesis",
"homeostatic process",
"membrane organization",
"response to stress",
"nitrogen cycle metabolic process")))
p1 <- p1 + theme_linedraw()
p1

table.merge.filter <- selected.MF
p2 <- ggplot(data=table.merge.filter, aes(x=spp,y=Term)) 
p2 <- p2 + geom_point(data=table.merge.filter %>% filter(regulation<0) %>% filter(weight01_fisher>0.1), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*-1)), shape=25, stroke=1, alpha=1 )
p2 <- p2 + geom_point(data=table.merge.filter  %>% filter(regulation>0) %>% filter(weight01_fisher>0.1), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*1)), shape=24, stroke=1, alpha=1)
p2 <- p2 + geom_point(data=table.merge.filter %>% filter(regulation<0) %>% filter(weight01_fisher<=0.1) %>% filter(weight01_fisher>0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*-1)), shape=25, stroke=1.5, alpha=1)
p2 <- p2 + geom_point(data=table.merge.filter  %>% filter(regulation>0) %>% filter(weight01_fisher<=0.1) %>% filter(weight01_fisher>0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*1)), shape=24, stroke=1.5, alpha=1 )
p2 <- p2 + geom_point(data=table.merge.filter %>% filter(regulation<0) %>% filter(weight01_fisher<=0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*-1)), shape=25, stroke=2, alpha=1 )
p2 <- p2 + geom_point(data=table.merge.filter  %>% filter(regulation>0) %>% filter(weight01_fisher<=0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*1)), shape=24, stroke=2, alpha=1) 
p2 <- p2 + scale_size(breaks = c(10,15,20,30,40,50,60,70,80,90,100,110,150), range = (c(1,10)))
p2 <- p2 + scale_color_gradientn(colours = c(rev(brewer.pal(9,"Blues")),brewer.pal(9,"Reds")))
p2 <- p2 + scale_x_discrete(limits = c("mxg","hybrid3n","msin","msac")) 
p2 <- p2 + scale_y_discrete(limits=rev(c("kinase activity",
"ion binding",
"structural constituent of ribosome",
"oxidoreductase activity",
"RNA binding",
"transferase activity, transferring glyco...",
"hydrolase activity, acting on glycosyl b...",
"isomerase activity",
"phosphatase activity")))
p2 <- p2 + theme_linedraw()
p2

Side-to-side plots

library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following object is masked from 'package:BiocGenerics':
## 
##     combine
grid.arrange(p2, p1,  layout_matrix=rbind(c(2,1),c(2,3)))

plotting EA full GO annotation

#FULL GO ANNOTATION

library(RColorBrewer)

#setwd("~/analysis/susanne_RNASEQ_data/abel")
 setwd("~/analysis/susanne_RNASEQ_data/miscanthus_drought_rnaseq/")

#full
full.BP.mxg.drought.up$regulation <- 1
full.MF.mxg.drought.up$regulation <- 1
full.BP.mxg.drought.down$regulation <- -1
full.MF.mxg.drought.down$regulation <- -1
full.BP.hybrid3n.drought.up$regulation <- 1 
full.MF.hybrid3n.drought.up$regulation <- 1
full.BP.hybrid3n.drought.down$regulation <- -1 
full.MF.hybrid3n.drought.down$regulation <- -1
full.BP.msin.drought.up$regulation <- 1
full.MF.msin.drought.up$regulation <- 1
full.BP.msin.drought.down$regulation <- -1 
full.MF.msin.drought.down$regulation <- -1
full.BP.msac.drought.up$regulation <- 1
full.MF.msac.drought.up$regulation <- 1
full.BP.msac.drought.down$regulation <- -1 
full.MF.msac.drought.down$regulation <- -1
#
full.BP.mxg.drought.up$spp <- "mxg"
full.MF.mxg.drought.up$spp <- "mxg"
full.BP.mxg.drought.down$spp <- "mxg"
full.MF.mxg.drought.down$spp <- "mxg"
full.BP.hybrid3n.drought.up$spp <- "hybrid3n"
full.MF.hybrid3n.drought.up$spp <- "hybrid3n"
full.BP.hybrid3n.drought.down$spp <- "hybrid3n"
full.MF.hybrid3n.drought.down$spp <- "hybrid3n"
full.BP.msin.drought.up$spp <- "msin"
full.MF.msin.drought.up$spp <- "msin"
full.BP.msin.drought.down$spp <- "msin"
full.MF.msin.drought.down$spp <- "msin"
full.BP.msac.drought.up$spp <- "msac"
full.MF.msac.drought.up$spp <- "msac"
full.BP.msac.drought.down$spp <- "msac"
full.MF.msac.drought.down$spp <- "msac"
#
#CORRECT FORMATING:
#
supertable <- rbind(full.BP.mxg.drought.up,full.BP.mxg.drought.down,full.BP.hybrid3n.drought.up,full.BP.hybrid3n.drought.down,full.BP.msin.drought.up,full.BP.msin.drought.down,full.BP.msac.drought.up,full.BP.msac.drought.down,full.MF.mxg.drought.up,full.MF.mxg.drought.down,full.MF.hybrid3n.drought.up,full.MF.hybrid3n.drought.down,full.MF.msin.drought.up,full.MF.msin.drought.down,full.MF.msac.drought.up,full.MF.msac.drought.down)

#supertable.BP <- rbind(full.BP.mxg.drought.up,full.BP.mxg.drought.down,full.BP.hybrid3n.drought.up,full.BP.hybrid3n.drought.down,full.BP.msin.drought.up,full.BP.msin.drought.down,full.BP.msac.drought.up,full.BP.msac.drought.down)

#supertable.MF <- rbind(full.MF.mxg.drought.up,full.MF.mxg.drought.down,full.MF.hybrid3n.drought.up,full.MF.hybrid3n.drought.down,full.MF.msin.drought.up,full.MF.msin.drought.down,full.MF.msac.drought.up,full.MF.msac.drought.down)

#DO EITHER ONE EACH TIME:
#supertable<-supertable.BP
#supertable<-supertable.MF


#correct the scientific notation by converting from character to numeric
supertable$weight01_fisher <- as.numeric(supertable$weight01_fisher)

enriched.GOs <- supertable %>% filter(weight01_fisher<0.005) %>% filter (Significant>=10)  #%>% select(GO.ID) #list of GOs that are enrich in at least one analysis
#CHANGED MIN MEMBERS TO 10 genes

selected <- supertable %>% filter(GO.ID %in% unlist(enriched.GOs)) #filter our enrich GOs by name

#PUT A BOTTOM CAP
selected$weight01_fisher[selected$weight01_fisher<0.0001] <- 0.0001

table.merge.filter <- selected

p <- ggplot(data=table.merge.filter, aes(x=spp,y=Term)) 
p <- p + geom_point(data=table.merge.filter %>% filter(regulation<0) %>% filter(weight01_fisher>0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*-1)),  stroke=0.5, alpha=0.8) 
p <- p + geom_point(data=table.merge.filter  %>% filter(regulation>0) %>% filter(weight01_fisher>0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*1)),  stroke=0.5, alpha=0.8) 
p <- p + geom_point(data=table.merge.filter %>% filter(regulation<0) %>% filter(weight01_fisher<=0.01) %>% filter(weight01_fisher>0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*-1)),  stroke=1, alpha=0.8) 
p <- p + geom_point(data=table.merge.filter  %>% filter(regulation>0) %>% filter(weight01_fisher<=0.01) %>% filter(weight01_fisher>0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*1)),  stroke=1, alpha=0.8) 
p <- p + geom_point(data=table.merge.filter %>% filter(regulation<0) %>% filter(weight01_fisher<=0.001), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*-1)),  stroke=2, alpha=0.8) 
p <- p + geom_point(data=table.merge.filter  %>% filter(regulation>0) %>% filter(weight01_fisher<=0.001), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*1)),  stroke=2, alpha=0.8) 
#p <- p + scale_size(breaks = c(0.5,1,2,3,4,5), range=c(2,8))
#p <- p + scale_color_gradientn(trans = "log2", colours = brewer.pal(9,"Blues"), breaks = c(0,1,5,10,20,50,100,200))
p <- p + scale_size(breaks = c(5,10,20,30,40,50,75,100), range = (c(2,9)))
#p <- p + scale_color_gradientn(colours = c(rev(brewer.pal(9,"Blues")),"white",brewer.pal(9,"Reds")), limits = c(-15,15), breaks = c(-15,-10,-5,-2.5,-1,0,1,2.5,5,10,15))
p <- p + scale_color_gradientn(colours = c(rev(brewer.pal(9,"Blues")),brewer.pal(9,"Reds")))
#p <- p + scale_y_discrete(limits=rev(myTERMS))
p <- p + scale_x_discrete(limits = c("mxg","hybrid3n","msin","msac")) 
p + theme_light()

p2 <- ggplot(data=table.merge.filter, aes(x=spp,y=Term)) 
p2 <- p2 + geom_point(data=table.merge.filter %>% filter(regulation<0) %>% filter(weight01_fisher>0.1), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*-1)), shape=25, stroke=1, alpha=1 )
p2 <- p2 + geom_point(data=table.merge.filter  %>% filter(regulation>0) %>% filter(weight01_fisher>0.1), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*1)), shape=24, stroke=1, alpha=1)
p2 <- p2 + geom_point(data=table.merge.filter %>% filter(regulation<0) %>% filter(weight01_fisher<=0.1) %>% filter(weight01_fisher>0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*-1)), shape=25, stroke=1.5, alpha=1)
p2 <- p2 + geom_point(data=table.merge.filter  %>% filter(regulation>0) %>% filter(weight01_fisher<=0.1) %>% filter(weight01_fisher>0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*1)), shape=24, stroke=1.5, alpha=1 )
p2 <- p2 + geom_point(data=table.merge.filter %>% filter(regulation<0) %>% filter(weight01_fisher<=0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*-1)), shape=25, stroke=2, alpha=1 )
p2 <- p2 + geom_point(data=table.merge.filter  %>% filter(regulation>0) %>% filter(weight01_fisher<=0.01), aes(x=spp, y=Term, size=Significant, colour=(abs(log2(weight01_fisher))*1)), shape=24, stroke=2, alpha=1) 
p2 <- p2 + scale_size(breaks = c(10,15,20,30,40,50,60,70,80,90,100,110,150), range = (c(1,10)))
p2 <- p2 + scale_color_gradientn(colours = c(rev(brewer.pal(9,"Blues")),brewer.pal(9,"Reds")))
p2 <- p2 + scale_x_discrete(limits = c("mxg","hybrid3n","msin","msac")) 
p2 <- p2 + scale_y_discrete(limits=rev(c("serine family amino acid metabolic proce...",
"reductive pentose-phosphate cycle",
"cell surface receptor signaling pathway",
"protein kinase activity",
"sucrose metabolic process",
"starch metabolic process",
"polysaccharide catabolic process",
"chlorophyll binding",
"calcium ion binding",
"protein serine/threonine kinase activity",
"iron ion binding",
"thylakoid membrane organization",
"carbohydrate binding",
"glycerol transport",
"oxidation-reduction process",
"carbon utilization",
"cellular water homeostasis",
"water channel activity",
"glycerol channel activity",
"glycolytic process",
"polysaccharide binding",
"cation binding",
"carbohydrate transmembrane transport",
"water transport",
"chloroplast organization",
"photosynthesis",
"phosphorylation",
"protein serine/threonine phosphatase act...",
"mannose metabolic process",
"identical protein binding",
"RNA binding",
"response to heat",
"isopentenyl diphosphate biosynthetic pro...",
"cysteine biosynthetic process",
"abscisic acid-activated signaling pathwa...",
"calmodulin binding",
"defense response",
"ribosome biogenesis",
"dioxygenase activity",
"ATPase activity, coupled to transmembran...",
"structural constituent of ribosome",
"translation",
"pyruvate metabolic process",
"nucleotide binding",
"metal ion transport",
"secondary metabolite biosynthetic proces...",
"nucleotide-sugar metabolic process",
"cellular aldehyde metabolic process",
"protein folding",
"protein dephosphorylation")))
p2 + theme_linedraw()