true

#SUMMARY

Miscanthus is a commercial lignocellulosic biomass crop owing to its high biomass productivity and low chemical input requirements. In previous work, we have shown that interspecific Miscanthus hybrids with high biomass yield had low concentrations of starch and sucrose in the stem, and high ratio of sucrose to starch, which were consistent across years and growing sites. Here, we have performed a transcriptional RNA¬-seq analysis between selected Miscanthus hybrids with contrasting values in these phenotypes to clarify how these phenotypes are genetically controlled. We observed that the carbohydrate and secondary metabolism, and the generation of related precursor metabolites and energy, were significantly enriched among differentially expressed genes. Particularly, genes directly involved in the synthesis of starch (AGP, SS2/3, SBE2) and sucrose (SPS5), and degradation of starch (AMY3, ISA3, SEX4, BAM1) and sucrose (SUS) were down¬-regulated in the stem of the hybrids with low starch concentrations and high biomass yielding, while triose phosphates exporting (TIM, PFK2) was not. Our results support that the gene expression of crucial enzymatic genes in specific genotypes would allow for high metabolic rates and low non-structural carbohydrates accumulation leading to high biomass production. Our work shows the strong interconnectivity between genotype, chemotype and complex phenotypes.

library(DESeq2,quietly = T)
library(dplyr,quietly = T)

setwd("~/analysis/miscanthus-rnaseq-Msin/miscanthus_starch_rnaseq")

#tximport

Kallisto count files, one from each of the 23 libraries, were imported in R using TXimport.

#library(tximport)
#library(rhdf5)
#samples <- read.delim("/Volumes/Workarea/[...]/kallisto/_list.txt", header = FALSE)
#samples
#files <- file.path("/Volumes/Workarea/[...]/kallisto",unlist(samples),"abundance.h5")
#names(files) <- unlist(samples)
#files
#mytxi <- tximport(files, type = "kallisto", txOut = TRUE)
#save(mytxi,file="../txi_import.R")
#####INPUTS
load("input/txi_import.R") #this loads the object 'mytxi' from the tximport step
dim(mytxi$counts) 
## [1] 67789    23
#View(mytxi$counts)
counts <- as.data.frame(mytxi$counts)
#write.csv(counts,file="txi_counts.csv")
#write.csv(mytxi$abundance,file="txi_abundance.csv")

pheno <- read.delim("input/pheno.txt", header = T, sep = '\t')
rownames(pheno) <- pheno$id
pheno
##              id genotype tissue replica visual spp ind
## LIB2338 LIB2338   few112   root    2014    few hyb   A
## LIB2339 LIB2339   few112   stem    2014    few hyb   A
## LIB2340 LIB2340   few112   leaf    2014    few hyb   A
## LIB2341 LIB2341    few90   root    2014    few hyb   B
## LIB2342 LIB2342    few90   stem    2014    few hyb   B
## LIB2343 LIB2343    few90   leaf    2014    few hyb   B
## LIB2344 LIB2344  many120   root    2014   many hyb   A
## LIB2345 LIB2345  many120   stem    2014   many hyb   A
## LIB2346 LIB2346  many120   leaf    2014   many hyb   A
## LIB2347 LIB2347   many18   root    2014   many hyb   B
## LIB2348 LIB2348   many18   stem    2014   many hyb   B
## LIB2349 LIB2349   many18   leaf    2014   many hyb   B
## LIB2350 LIB2350   msi102   stem    2014    msi msi   B
## LIB2351 LIB2351   msi102   leaf    2014    msi msi   B
## LIB2352 LIB2352   msa297   stem    2014    msa msa   A
## LIB2353 LIB2353   msa297   leaf    2014    msa msa   A
## SAM1158 SAM1158   msa297   stem    2013    msa msa   A
## SAM1159 SAM1159   msa297   root    2013    msa msa   A
## SAM1160 SAM1160   msa297   leaf    2013    msa msa   A
## SAM1161 SAM1161   msa297   root    2013    msa msa   A
## SAM1162 SAM1162   msi102   stem    2013    msi msi   B
## SAM1163 SAM1163   msi102   root    2013    msi msi   B
## SAM1164 SAM1164   msi102   leaf    2013    msi msi   B
colnames(mytxi$counts) == rownames(pheno) #check. TRUE!!
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE

#Explore the counts

##Counts distribution by sample

#counts exploratory
dds <- DESeqDataSetFromTximport(mytxi, pheno, ~tissue + spp)
dds <- DESeq(dds)
boxplot(log10(assays(dds)[["cooks"]]), range=0, las=2)

plotDispEsts(dds)

##PCA samples

#####PCA SAMPLES
library(ggplot2)
library(DESeq2)
mypca<-plotPCA(varianceStabilizingTransformation(dds,blind = T), intgroup=c("tissue","spp"), returnData=T)

ggplot(mypca, aes(PC1, PC2, shape=spp)) + 
  scale_shape_manual(values=c(23, 21, 22)) +
  geom_point(aes(fill=tissue), size=6) +
  xlab(paste0("PC1 ",attr(mypca,"percentVar")[1])) +
  ylab(paste0("PC2 ",attr(mypca,"percentVar")[2])) + 
  coord_fixed() + theme_linedraw()

ggplot(mypca, aes(PC1, PC2, shape=tissue)) + 
  scale_shape_manual(values=c(23, 21, 22)) +
  geom_point(aes(fill=spp), size=6) + scale_fill_manual(values =c("#1a9850","#d53e4f","#3288bd")) +
  xlab(paste0("PC1 ",attr(mypca,"percentVar")[1])) +
  ylab(paste0("PC2 ",attr(mypca,"percentVar")[2])) + 
  scale_y_continuous(limits=c(-75,75), breaks=c(-100,-50,0,50,100)) +
    scale_x_continuous(limits=c(-100,100), breaks=c(-100,-50,0,50,100)) +
  coord_fixed() + theme_linedraw()

##Heatmap samples

###HEATMAP SAMPLES
library("pheatmap")
ntd <- varianceStabilizingTransformation(dds, blind = T)
select <- order(rowSds(assay(ntd)),decreasing=TRUE)[1:1000]
df <- as.data.frame(colData(dds)[,c("spp","tissue")])

pheatmap(assay(ntd)[select,], cluster_rows=T, treeheight_row = 0, show_rownames=FALSE,
         cluster_cols=T, annotation_col=df,clustering_distance_rows = "correlation", clustering_distance_cols = "correlation")

#Differential analysis (DE) two groups of hybrids

##DESeq2

Differential analysis was performed using DESeq2 (REF) for each tissue (root, stem, leaf) independently. Genes with FDR < 0.05 were considered differentially expressed (DE). We compared the two groups of hybrids (“Low NSC” and “High NSC”) to each other. Each hybrid group was composed of two genotypes.

#####COMPARE HYBRIDS BY PHENOTYPE: FEWER TILLERS VS MANY TILLERS

colnames(mytxi$counts) == rownames(pheno) #check
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
dds <- DESeqDataSetFromTximport(mytxi, pheno, ~tissue + visual)
dds$group<- factor(paste0(pheno$tissue,pheno$visual))
model.matrix(~ dds$group)
##    (Intercept) dds$groupleafmany dds$groupleafmsa dds$groupleafmsi
## 1            1                 0                0                0
## 2            1                 0                0                0
## 3            1                 0                0                0
## 4            1                 0                0                0
## 5            1                 0                0                0
## 6            1                 0                0                0
## 7            1                 0                0                0
## 8            1                 0                0                0
## 9            1                 1                0                0
## 10           1                 0                0                0
## 11           1                 0                0                0
## 12           1                 1                0                0
## 13           1                 0                0                0
## 14           1                 0                0                1
## 15           1                 0                0                0
## 16           1                 0                1                0
## 17           1                 0                0                0
## 18           1                 0                0                0
## 19           1                 0                1                0
## 20           1                 0                0                0
## 21           1                 0                0                0
## 22           1                 0                0                0
## 23           1                 0                0                1
##    dds$grouprootfew dds$grouprootmany dds$grouprootmsa dds$grouprootmsi
## 1                 1                 0                0                0
## 2                 0                 0                0                0
## 3                 0                 0                0                0
## 4                 1                 0                0                0
## 5                 0                 0                0                0
## 6                 0                 0                0                0
## 7                 0                 1                0                0
## 8                 0                 0                0                0
## 9                 0                 0                0                0
## 10                0                 1                0                0
## 11                0                 0                0                0
## 12                0                 0                0                0
## 13                0                 0                0                0
## 14                0                 0                0                0
## 15                0                 0                0                0
## 16                0                 0                0                0
## 17                0                 0                0                0
## 18                0                 0                1                0
## 19                0                 0                0                0
## 20                0                 0                1                0
## 21                0                 0                0                0
## 22                0                 0                0                1
## 23                0                 0                0                0
##    dds$groupstemfew dds$groupstemmany dds$groupstemmsa dds$groupstemmsi
## 1                 0                 0                0                0
## 2                 1                 0                0                0
## 3                 0                 0                0                0
## 4                 0                 0                0                0
## 5                 1                 0                0                0
## 6                 0                 0                0                0
## 7                 0                 0                0                0
## 8                 0                 1                0                0
## 9                 0                 0                0                0
## 10                0                 0                0                0
## 11                0                 1                0                0
## 12                0                 0                0                0
## 13                0                 0                0                1
## 14                0                 0                0                0
## 15                0                 0                1                0
## 16                0                 0                0                0
## 17                0                 0                1                0
## 18                0                 0                0                0
## 19                0                 0                0                0
## 20                0                 0                0                0
## 21                0                 0                0                1
## 22                0                 0                0                0
## 23                0                 0                0                0
## attr(,"assign")
##  [1] 0 1 1 1 1 1 1 1 1 1 1 1
## attr(,"contrasts")
## attr(,"contrasts")$`dds$group`
## [1] "contr.treatment"
design(dds) <- ~ group
dds <- DESeq(dds)
resultsNames(dds)
##  [1] "Intercept"                 "group_leafmany_vs_leaffew"
##  [3] "group_leafmsa_vs_leaffew"  "group_leafmsi_vs_leaffew" 
##  [5] "group_rootfew_vs_leaffew"  "group_rootmany_vs_leaffew"
##  [7] "group_rootmsa_vs_leaffew"  "group_rootmsi_vs_leaffew" 
##  [9] "group_stemfew_vs_leaffew"  "group_stemmany_vs_leaffew"
## [11] "group_stemmsa_vs_leaffew"  "group_stemmsi_vs_leaffew"
vst <- varianceStabilizingTransformation(dds, blind = T)

#write.csv(vst,file="vst.csv")
#continue...

root_few_many <- lfcShrink(dds, contrast=c("group","rootfew","rootmany"), type="ashr")
summary(root_few_many, alpha =0.05) 
## 
## out of 65235 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)       : 116, 0.18%
## LFC < 0 (down)     : 137, 0.21%
## outliers [1]       : 0, 0%
## low counts [2]     : 8835, 14%
## (mean count < 2)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
plotMA(root_few_many, ylim=c(-2,2), alpha =0.05)

root_few_many.05 <- as.data.frame(root_few_many)
root_few_many.05$rownames <- rownames(root_few_many)
root_few_many.05 <- root_few_many.05 %>% filter(padj<0.05)
dim(root_few_many)
## [1] 67789     5
dim(root_few_many.05)
## [1] 253   6
stem_few_many <- lfcShrink(dds, contrast=c("group","stemfew","stemmany"), type="ashr")
summary(stem_few_many, alpha =0.05)  
## 
## out of 65235 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)       : 598, 0.92%
## LFC < 0 (down)     : 294, 0.45%
## outliers [1]       : 0, 0%
## low counts [2]     : 11360, 17%
## (mean count < 4)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
plotMA(stem_few_many, ylim=c(-2,2), alpha =0.05) 

stem_few_many.05 <- as.data.frame(stem_few_many)
stem_few_many.05$rownames <- rownames(stem_few_many)
stem_few_many.05 <- stem_few_many.05 %>% filter(padj<0.05)
dim(stem_few_many)
## [1] 67789     5
dim(stem_few_many.05)
## [1] 892   6
leaf_few_many <- lfcShrink(dds, contrast=c("group","leaffew","leafmany"), type="ashr")
summary(leaf_few_many, alpha =0.05) 
## 
## out of 65235 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)       : 410, 0.63%
## LFC < 0 (down)     : 331, 0.51%
## outliers [1]       : 0, 0%
## low counts [2]     : 11360, 17%
## (mean count < 4)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
plotMA(leaf_few_many, ylim=c(-2,2), alpha =0.05) 

leaf_few_many.05 <- as.data.frame(leaf_few_many)
leaf_few_many.05$rownames <- rownames(leaf_few_many)
leaf_few_many.05 <- leaf_few_many.05 %>% filter(padj<0.05)
dim(leaf_few_many)
## [1] 67789     5
dim(leaf_few_many.05)
## [1] 741   6

##Heatmap

library("pheatmap")
ntd <- varianceStabilizingTransformation(dds, blind = T) #same that vst
select <- order(rowSds(assay(ntd)),decreasing=TRUE)[1:1000]
df <- as.data.frame(colData(dds)[,c("tissue","visual")])

pheatmap(assay(ntd)[select,], cluster_rows=T, treeheight_row = 0, show_rownames=FALSE,
         cluster_cols=T, annotation_col=df,clustering_distance_rows = "correlation", clustering_distance_cols = "correlation")

##Venn

library(UpSetR)
library(eulerr)
library(RColorBrewer)

allgenes <- rownames(mytxi$counts)
list <- as.data.frame(allgenes)
list$root_few_many.05 <- as.integer(allgenes %in% root_few_many.05$rownames) 
list$leaf_few_many.05 <- as.integer(allgenes %in% leaf_few_many.05$rownames) 
list$stem_few_many.05 <- as.integer(allgenes %in% stem_few_many.05$rownames) 

upset(list, nsets = 6, number.angles = 0, point.size = 3, line.size = 1.2, mainbar.y.label = "Shared DEG", sets.x.label = "Total DEG", text.scale = c(2, 2, 2, 2, 2, 2), order.by = "freq")

#euler diagram
list.euler<-list(root=as.vector(root_few_many.05$rownames),
                 leaf=as.vector(leaf_few_many.05$rownames),
                 stem=as.vector(stem_few_many.05$rownames))

plot(euler(list.euler, shape = "ellipse"),legend = T, quantities = T, labels = T, fills = brewer.pal(n = 7,name = "Spectral"))

plot(euler(list.euler, shape = "ellipse"),legend = F, quantities = F, labels = F, fills = brewer.pal(n = 7,name = "Spectral"))

#Differential analysis (DE) hybrids vs each progenitor

We also compared the hybrids against either the M. sacchariflorus or M. sinensis parent. A gene only was considered DE between hybrids and parents when it was DE against both parents.

##DESeq2

#alternative
dds <- DESeqDataSetFromTximport(mytxi, pheno, ~tissue + spp)
dds$group<- factor(paste0(pheno$tissue,pheno$spp))
model.matrix(~ dds$group)
##    (Intercept) dds$groupleafmsa dds$groupleafmsi dds$grouproothyb
## 1            1                0                0                1
## 2            1                0                0                0
## 3            1                0                0                0
## 4            1                0                0                1
## 5            1                0                0                0
## 6            1                0                0                0
## 7            1                0                0                1
## 8            1                0                0                0
## 9            1                0                0                0
## 10           1                0                0                1
## 11           1                0                0                0
## 12           1                0                0                0
## 13           1                0                0                0
## 14           1                0                1                0
## 15           1                0                0                0
## 16           1                1                0                0
## 17           1                0                0                0
## 18           1                0                0                0
## 19           1                1                0                0
## 20           1                0                0                0
## 21           1                0                0                0
## 22           1                0                0                0
## 23           1                0                1                0
##    dds$grouprootmsa dds$grouprootmsi dds$groupstemhyb dds$groupstemmsa
## 1                 0                0                0                0
## 2                 0                0                1                0
## 3                 0                0                0                0
## 4                 0                0                0                0
## 5                 0                0                1                0
## 6                 0                0                0                0
## 7                 0                0                0                0
## 8                 0                0                1                0
## 9                 0                0                0                0
## 10                0                0                0                0
## 11                0                0                1                0
## 12                0                0                0                0
## 13                0                0                0                0
## 14                0                0                0                0
## 15                0                0                0                1
## 16                0                0                0                0
## 17                0                0                0                1
## 18                1                0                0                0
## 19                0                0                0                0
## 20                1                0                0                0
## 21                0                0                0                0
## 22                0                1                0                0
## 23                0                0                0                0
##    dds$groupstemmsi
## 1                 0
## 2                 0
## 3                 0
## 4                 0
## 5                 0
## 6                 0
## 7                 0
## 8                 0
## 9                 0
## 10                0
## 11                0
## 12                0
## 13                1
## 14                0
## 15                0
## 16                0
## 17                0
## 18                0
## 19                0
## 20                0
## 21                1
## 22                0
## 23                0
## attr(,"assign")
## [1] 0 1 1 1 1 1 1 1 1
## attr(,"contrasts")
## attr(,"contrasts")$`dds$group`
## [1] "contr.treatment"
design(dds) <- ~ group
dds <- DESeq(dds)
resultsNames(dds)
## [1] "Intercept"                "group_leafmsa_vs_leafhyb"
## [3] "group_leafmsi_vs_leafhyb" "group_roothyb_vs_leafhyb"
## [5] "group_rootmsa_vs_leafhyb" "group_rootmsi_vs_leafhyb"
## [7] "group_stemhyb_vs_leafhyb" "group_stemmsa_vs_leafhyb"
## [9] "group_stemmsi_vs_leafhyb"
#leaf
leaf_hyb_msa <- lfcShrink(dds, contrast=c("group","leafhyb","leafmsa"), type="ashr", alpha =0.05)
leaf_hyb_msi <- lfcShrink(dds, contrast=c("group","leafhyb","leafmsi"), type="ashr", alpha =0.05)
summary(leaf_hyb_msa, alpha =0.05) 
## 
## out of 65235 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)       : 4455, 6.8%
## LFC < 0 (down)     : 2942, 4.5%
## outliers [1]       : 316, 0.48%
## low counts [2]     : 8834, 14%
## (mean count < 2)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
summary(leaf_hyb_msi, alpha =0.05) 
## 
## out of 65235 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)       : 3883, 6%
## LFC < 0 (down)     : 2419, 3.7%
## outliers [1]       : 316, 0.48%
## low counts [2]     : 8834, 14%
## (mean count < 2)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
plotMA(leaf_hyb_msa, ylim=c(-2,2), alpha =0.05) 

plotMA(leaf_hyb_msi, ylim=c(-2,2), alpha =0.05) 

leaf_hyb_msa.05 <- as.data.frame(leaf_hyb_msa)
leaf_hyb_msa.05$rownames <- rownames(leaf_hyb_msa)
leaf_hyb_msa.05 <- leaf_hyb_msa.05 %>% filter(padj<0.05)
leaf_hyb_msi.05 <- as.data.frame(leaf_hyb_msi)
leaf_hyb_msi.05$rownames <- rownames(leaf_hyb_msi)
leaf_hyb_msi.05 <- leaf_hyb_msi.05 %>% filter(padj<0.05)

leaf_hyb_both <- intersect(leaf_hyb_msa.05$rownames,leaf_hyb_msi.05$rownames)
length(leaf_hyb_both)
## [1] 1464
#root
root_hyb_msa <- lfcShrink(dds, contrast=c("group","roothyb","rootmsa"), type="ashr", alpha =0.05)
root_hyb_msi <- lfcShrink(dds, contrast=c("group","roothyb","rootmsi"), type="ashr", alpha =0.05)
summary(root_hyb_msa, alpha =0.05) 
## 
## out of 65235 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)       : 6061, 9.3%
## LFC < 0 (down)     : 4392, 6.7%
## outliers [1]       : 316, 0.48%
## low counts [2]     : 10096, 15%
## (mean count < 3)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
summary(root_hyb_msi, alpha =0.05) 
## 
## out of 65235 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)       : 6115, 9.4%
## LFC < 0 (down)     : 4695, 7.2%
## outliers [1]       : 316, 0.48%
## low counts [2]     : 11353, 17%
## (mean count < 4)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
plotMA(root_hyb_msa, ylim=c(-2,2), alpha =0.05) 

plotMA(root_hyb_msi, ylim=c(-2,2), alpha =0.05)

root_hyb_msa.05 <- as.data.frame(root_hyb_msa)
root_hyb_msa.05$rownames <- rownames(root_hyb_msa)
root_hyb_msa.05 <- root_hyb_msa.05 %>% filter(padj<0.05)
root_hyb_msi.05 <- as.data.frame(root_hyb_msi)
root_hyb_msi.05$rownames <- rownames(root_hyb_msi)
root_hyb_msi.05 <- root_hyb_msi.05 %>% filter(padj<0.05)

root_hyb_both <- intersect(root_hyb_msa.05$rownames,root_hyb_msi.05$rownames)
length(root_hyb_both)
## [1] 2870
#stem
stem_hyb_msa <- lfcShrink(dds, contrast=c("group","stemhyb","stemmsa"), type="ashr", alpha =0.05)
stem_hyb_msi <- lfcShrink(dds, contrast=c("group","stemhyb","stemmsi"), type="ashr", alpha =0.05)
summary(stem_hyb_msa, alpha =0.05) 
## 
## out of 65235 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)       : 2777, 4.3%
## LFC < 0 (down)     : 1343, 2.1%
## outliers [1]       : 316, 0.48%
## low counts [2]     : 11353, 17%
## (mean count < 4)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
summary(stem_hyb_msi, alpha =0.05) 
## 
## out of 65235 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)       : 3122, 4.8%
## LFC < 0 (down)     : 1252, 1.9%
## outliers [1]       : 316, 0.48%
## low counts [2]     : 10096, 15%
## (mean count < 3)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
plotMA(stem_hyb_msa, ylim=c(-2,2), alpha =0.05) 

plotMA(stem_hyb_msi, ylim=c(-2,2), alpha =0.05) 

stem_hyb_msa.05 <- as.data.frame(stem_hyb_msa)
stem_hyb_msa.05$rownames <- rownames(stem_hyb_msa)
stem_hyb_msa.05 <- stem_hyb_msa.05 %>% filter(padj<0.05)
stem_hyb_msi.05 <- as.data.frame(stem_hyb_msi)
stem_hyb_msi.05$rownames <- rownames(stem_hyb_msi)
stem_hyb_msi.05 <- stem_hyb_msi.05 %>% filter(padj<0.05)

stem_hyb_both <- intersect(stem_hyb_msa.05$rownames,stem_hyb_msi.05$rownames)
length(stem_hyb_both)
## [1] 729

##Upset

library(UpSetR)
allgenes <- rownames(mytxi$counts)
list <- list2  <-list3  <- as.data.frame(allgenes)
list$leaf.msin <- list2$leaf.msin <- as.integer(allgenes %in% leaf_hyb_msi.05$rownames)
list$stem.msin <- list2$stem.msin <- as.integer(allgenes %in% stem_hyb_msi.05$rownames)
list$root.msin <- list2$root.msin <- as.integer(allgenes %in% root_hyb_msi.05$rownames)
list$leaf.msac <- list3$leaf.msac <-as.integer(allgenes %in% leaf_hyb_msa.05$rownames)
list$stem.msac <- list3$stem.msac <- as.integer(allgenes %in% stem_hyb_msa.05$rownames)
list$root.msac <- list3$root.msac <- as.integer(allgenes %in% root_hyb_msa.05$rownames)


upset(list, nsets = 6, number.angles = 0, point.size = 3, line.size = 1.2, mainbar.y.label = "Shared DEG", sets.x.label = "Total DEG", text.scale = c(2, 2, 2, 2, 2, 2), order.by = "freq")

upset(list2, nsets = 3, number.angles = 0, point.size = 3, line.size = 1.2, mainbar.y.label = "Shared DEG", sets.x.label = "Total DEG", text.scale = c(2, 2, 2, 2, 2, 2), order.by = "freq")

upset(list3, nsets = 3, number.angles = 0, point.size = 3, line.size = 1.2, mainbar.y.label = "Shared DEG", sets.x.label = "Total DEG", text.scale = c(2, 2, 2, 2, 2, 2), order.by = "freq")

##Venn

library(UpSetR)
allgenes <- rownames(mytxi$counts)
list <- as.data.frame(allgenes)
list$leaf.heterosis.05 <- as.integer(allgenes %in% leaf_hyb_both)
list$stem.heterosis.05 <- as.integer(allgenes %in% stem_hyb_both)
list$root.heterosis.05 <- as.integer(allgenes %in% root_hyb_both)

upset(list, nsets = 3, number.angles = 0, point.size = 3, line.size = 1.2, mainbar.y.label = "Shared DEG", sets.x.label = "Total DEG", text.scale = c(2, 2, 2, 2, 2, 2), order.by = "freq")

#euler diagram
list.euler<-list(root=as.vector(root_hyb_both),
                 leaf=as.vector(leaf_hyb_both),
                 stem=as.vector(stem_hyb_both))

plot(euler(list.euler, shape = "ellipse"),legend = T, quantities = T, labels = T, fills = brewer.pal(n = 7,name = "Spectral"))

plot(euler(list.euler, shape = "ellipse"),legend = F, quantities = F, labels = F, fills = brewer.pal(n = 7,name = "Spectral"))

##Compare progenitors

A gene only was considered DE between hybrids and parents when it was DE against both parents.

library(UpSetR)
allgenes <- rownames(mytxi$counts)
list <- as.data.frame(allgenes)
list$leaf_hyb_both <- as.integer(allgenes %in% leaf_hyb_both)
list$stem_hyb_both <- as.integer(allgenes %in% stem_hyb_both)
list$root_hyb_both <- as.integer(allgenes %in% root_hyb_both)
list$root_few_many.05 <- as.integer(allgenes %in% root_few_many.05$rownames) 
list$leaf_few_many.05 <- as.integer(allgenes %in% leaf_few_many.05$rownames) 
list$stem_few_many.05 <- as.integer(allgenes %in% stem_few_many.05$rownames) 

upset(list, nsets = 6, number.angles = 0, point.size = 3, line.size = 1.2, mainbar.y.label = "Shared DEG", sets.x.label = "Total DEG", text.scale = c(2, 2, 2, 2, 2, 2), order.by = "freq")

#Enrichment analysis

The enrichment analysis was based on an F-fisher test (FDR<0.05) using the library topGO with the “weight01” algorithm. Using the lists of DE genes and functional annotation as inputs, topGO compared the number of DEGs in each category with the expected number of genes for the whole transcriptome. The “weight01” algorithm resolves the relations between related GO ontology terms at different levels. Later, enriched categories were plotted using ggplot2.

##Functions

#PREFLIGHT
library(topGO,quietly = T)
library(data.table,quietly = T)

run.topgo.pipeline.BP <- function(mytemp) {
  #PIPE starts
  list.mytemp <- factor(as.integer(allgenes %in% mytemp))
  names(list.mytemp) <- allgenes
  #BP
  fdata.mytemp.BP <- new("topGOdata", ontology="BP", allGenes=list.mytemp, annot = annFUN.gene2GO, gene2GO = geneID2GO)
#  results.fdata.mytemp.BP <- GenTable(fdata.mytemp.BP, weight_fisher = runTest(fdata.mytemp.BP, algorithm = "weight", statistic = "fisher"), elim_fisher = runTest(fdata.mytemp.BP, algorithm = "elim", statistic = "fisher"), weight01_fisher = runTest(fdata.mytemp.BP, algorithm = "weight01", statistic = "fisher"), topNodes=150)
   results.fdata.mytemp.BP <- GenTable(fdata.mytemp.BP, weight01_fisher = runTest(fdata.mytemp.BP, algorithm = "weight01", statistic = "fisher"), topNodes=150)
  #add genes to dataframe
  results.fdata.mytemp.BP$genes <-   sapply(results.fdata.mytemp.BP$GO.ID, function(x) {
    genes<- genesInTerm(fdata.mytemp.BP, x)  
    genes[[1]][genes[[1]] %in% mytemp]
  })
  results.fdata.mytemp.BP  
}

#MF
run.topgo.pipeline.MF <- function(mytemp) {
  list.mytemp <- factor(as.integer(allgenes %in% mytemp))
  names(list.mytemp) <- allgenes
  fdata.mytemp.MF <- new("topGOdata", ontology="MF", allGenes=list.mytemp, annot = annFUN.gene2GO, gene2GO = geneID2GO)
#  results.fdata.mytemp.MF <- GenTable(fdata.mytemp.MF, weight_fisher = runTest(fdata.mytemp.MF, algorithm = "weight", statistic = "fisher"), elim_fisher = runTest(fdata.mytemp.MF, algorithm = "elim", statistic = "fisher"), weight01_fisher = runTest(fdata.mytemp.MF, algorithm = "weight01", statistic = "fisher"), topNodes=50)
    results.fdata.mytemp.MF <- GenTable(fdata.mytemp.MF, weight01_fisher = runTest(fdata.mytemp.MF, algorithm = "weight01", statistic = "fisher"), topNodes=50)
  #add genes to dataframe
  results.fdata.mytemp.MF$genes <-   sapply(results.fdata.mytemp.MF$GO.ID, function(x) {
    genes<-genesInTerm(fdata.mytemp.MF, x) 
    genes[[1]][genes[[1]] %in% mytemp]
  })
  results.fdata.mytemp.MF
}


full.topgo.pipeline.BP <- function(mytemp) {
  #PIPE starts
  list.mytemp <- factor(as.integer(allgenes %in% mytemp))
  names(list.mytemp) <- allgenes
  #BP
  fdata.mytemp.BP <- new("topGOdata", ontology="BP", allGenes=list.mytemp, annot = annFUN.gene2GO, gene2GO = geneID2GO)
#  results.fdata.mytemp.BP <- GenTable(fdata.mytemp.BP, weight_fisher = runTest(fdata.mytemp.BP, algorithm = "weight", statistic = "fisher"), elim_fisher = runTest(fdata.mytemp.BP, algorithm = "elim", statistic = "fisher"), weight01_fisher = runTest(fdata.mytemp.BP, algorithm = "weight01", statistic = "fisher"), topNodes=150)
   results.fdata.mytemp.BP <- GenTable(fdata.mytemp.BP, weight01_fisher = runTest(fdata.mytemp.BP, algorithm = "weight01", statistic = "fisher"), topNodes=300)
  #add genes to dataframe
  results.fdata.mytemp.BP$genes <-   sapply(results.fdata.mytemp.BP$GO.ID, function(x) {
    genes<- genesInTerm(fdata.mytemp.BP, x)  
    genes[[1]][genes[[1]] %in% mytemp]
  })
  results.fdata.mytemp.BP  
}

#MF
full.topgo.pipeline.MF <- function(mytemp) {
  list.mytemp <- factor(as.integer(allgenes %in% mytemp))
  names(list.mytemp) <- allgenes
  fdata.mytemp.MF <- new("topGOdata", ontology="MF", allGenes=list.mytemp, annot = annFUN.gene2GO, gene2GO = geneID2GO)
#  results.fdata.mytemp.MF <- GenTable(fdata.mytemp.MF, weight_fisher = runTest(fdata.mytemp.MF, algorithm = "weight", statistic = "fisher"), elim_fisher = runTest(fdata.mytemp.MF, algorithm = "elim", statistic = "fisher"), weight01_fisher = runTest(fdata.mytemp.MF, algorithm = "weight01", statistic = "fisher"), topNodes=50)
    results.fdata.mytemp.MF <- GenTable(fdata.mytemp.MF, weight01_fisher = runTest(fdata.mytemp.MF, algorithm = "weight01", statistic = "fisher"), topNodes=100)
  #add genes to dataframe
  results.fdata.mytemp.MF$genes <-   sapply(results.fdata.mytemp.MF$GO.ID, function(x) {
    genes<-genesInTerm(fdata.mytemp.MF, x) 
    genes[[1]][genes[[1]] %in% mytemp]
  })
  results.fdata.mytemp.MF
}

##END FUNCTIONS###

##Compute enrichment analysis (DE hybrids)

#COMPUTE ENRICHMENT ANALYSIS
library(topGO,quietly = T)
library(data.table,quietly = T)

#SLIM ANNOT IN HYBRIDS
allgenes <- rownames(counts)
topgo.file <- read.delim("Msin_b2go/slim_annot_TOPGO.annot", header = F)
geneID2GO <- readMappings(file = "Msin_b2go/slim_annot_TOPGO.annot")

slim_BP.leaf_few_many.05 <- run.topgo.pipeline.BP(leaf_few_many.05$rownames)
slim_MF.leaf_few_many.05 <- run.topgo.pipeline.MF(leaf_few_many.05$rownames)

slim_BP.root_few_many.05 <- run.topgo.pipeline.BP(root_few_many.05$rownames)
slim_MF.root_few_many.05 <- run.topgo.pipeline.MF(root_few_many.05$rownames)

slim_BP.stem_few_many.05 <- run.topgo.pipeline.BP(stem_few_many.05$rownames)
slim_MF.stem_few_many.05 <- run.topgo.pipeline.MF(stem_few_many.05$rownames)


#FULL GO ANNOTATION
allgenes <- rownames(counts)
topgo.file <- read.delim("Msin_b2go/full_annot_TOPGO.annot", header = F)
geneID2GO <- readMappings(file = "Msin_b2go/full_annot_TOPGO.annot")

full_BP.leaf_few_many.05 <- full.topgo.pipeline.BP(leaf_few_many.05$rownames)
full_MF.leaf_few_many.05 <- full.topgo.pipeline.MF(leaf_few_many.05$rownames)

full_BP.root_few_many.05 <- full.topgo.pipeline.BP(root_few_many.05$rownames)
full_MF.root_few_many.05 <- full.topgo.pipeline.MF(root_few_many.05$rownames)

full_BP.stem_few_many.05 <- full.topgo.pipeline.BP(stem_few_many.05$rownames)
full_MF.stem_few_many.05 <- full.topgo.pipeline.MF(stem_few_many.05$rownames)

##Plot SLIM GO

#BUBBLEPLOTS
library(corrplot)
library(reshape2)
library(RColorBrewer)
library(pheatmap)
library(dplyr)
library(ggplot2)

#add tissue column to each table
slim_BP.leaf_few_many.05$tissue <- "leaf"
slim_BP.root_few_many.05$tissue <- "root"
slim_BP.stem_few_many.05$tissue <- "stem"

slim_MF.leaf_few_many.05$tissue <- "leaf"
slim_MF.root_few_many.05$tissue <- "root"
slim_MF.stem_few_many.05$tissue <- "stem"

#join table:
slim_BP.few_many.05 <- rbind(slim_BP.leaf_few_many.05,slim_BP.root_few_many.05,slim_BP.stem_few_many.05)
#correct the scientific notation by converting from character to numeric:
slim_BP.few_many.05$weight01_fisher <- as.numeric(slim_BP.few_many.05$weight01_fisher)
write.table(as.matrix(slim_BP.few_many.05), file = "EA_slimGO_hybrids-few-vs-many.txt", sep = '\t')

#join table:
slim_MF.few_many.05 <- rbind(slim_MF.leaf_few_many.05,slim_MF.root_few_many.05,slim_MF.stem_few_many.05)
#correct the scientific notation by converting from character to numeric:
slim_MF.few_many.05$weight01_fisher <- as.numeric(slim_MF.few_many.05$weight01_fisher)
write.table(as.matrix(slim_MF.few_many.05), file = "MF_slimGO_hybrids-few-vs-many.txt", sep = '\t')


#list of GOs that are enrich in at least one analysis
enriched.GOs <- slim_BP.few_many.05 %>% filter(weight01_fisher<0.05) %>% filter (Significant>=10) # %>% select(GO.ID) 
#filter our enrich GOs by name
table.merge.filter <- slim_BP.few_many.05 %>% filter(GO.ID %in% unlist(enriched.GOs)) 
#PUT A BOTTOM CAP
table.merge.filter$weight01_fisher[table.merge.filter$weight01_fisher<0.0001] <- 0.0001

table.join <- table.merge.filter #save this object for later

p <- ggplot(data=table.merge.filter, aes(x=tissue,y=Term)) 
#p <- p + geom_point(data=table.merge.filter %>% filter(weight01_fisher>0.01), aes(x=tissue, y=Term, size=Significant, colour=log2(weight01_fisher)),  stroke=1, alpha=0.7) 
#p <- p + geom_point(data=table.merge.filter %>% filter(weight01_fisher<=0.01) %>% filter(weight01_fisher>0.01), aes(x=tissue, y=Term, size=Significant, colour=log2(weight01_fisher)),  stroke=1, alpha=0.9) 
#p <- p + geom_point(data=table.merge.filter %>% filter(weight01_fisher<=0.001), aes(x=tissue, y=Term, size=Significant, colour=log2(weight01_fisher)),  stroke=1, alpha=1) 
p <- p + geom_point(data=table.merge.filter, aes(x=tissue, y=Term, size=Significant, colour=log2(weight01_fisher)),  stroke=1, alpha=1)

#p <- p + scale_size(breaks = c(0.5,1,2,3,4,5), range=c(2,8))
#p <- p + scale_color_gradientn(trans = "log2", colours = brewer.pal(9,"Blues"), breaks = c(0,1,5,10,20,50,100,200))
p <- p + scale_size(breaks = c(10,20,30,40,50,60,70,80,90,100,500), range = (c(1,11)))
#p <- p + scale_color_gradientn(colours = c(rev(brewer.pal(9,"Blues")),"white",brewer.pal(9,"Reds")), limits = c(-15,15), breaks = c(-15,-10,-5,-2.5,-1,0,1,2.5,5,10,15))
#p <- p + scale_color_gradientn(colours = c(rev(brewer.pal(9,"Blues")),brewer.pal(9,"Reds")))
p <- p + scale_color_gradientn(colours = rev(brewer.pal(9,"Blues")))
#p <- p + scale_y_discrete(limits=rev(myTERMS))
p + theme_light()

#MF
enriched.GOs <- slim_MF.few_many.05 %>% filter(weight01_fisher<0.05) %>% filter (Significant>=10) # %>% select(GO.ID) 
table.merge.filter <- slim_MF.few_many.05 %>% filter(GO.ID %in% unlist(enriched.GOs)) 
table.merge.filter$weight01_fisher[table.merge.filter$weight01_fisher<0.0001] <- 0.0001

table.join <- rbind(table.join,table.merge.filter) #save this object for later

p <- ggplot(data=table.merge.filter, aes(x=tissue,y=Term)) 
p <- p + geom_point(data=table.merge.filter, aes(x=tissue, y=Term, size=Significant, colour=log2(weight01_fisher)),  stroke=1, alpha=1)
p <- p + scale_size(breaks = c(10,20,30,40,50,60,70,80,90,100,500), range = (c(1,11)))
p <- p + scale_color_gradientn(colours = rev(brewer.pal(9,"Blues")))
p + theme_light()

#BP+MF (table.join)
p <- ggplot(data=table.join, aes(x=tissue,y=Term)) 
p <- p + geom_point(data=table.join, aes(x=tissue, y=Term, size=Significant, colour=log2(weight01_fisher)),  alpha=1) +
  geom_point(data=table.join, aes(x=tissue, y=Term, size=Significant), shape = 1, colour = "darkgrey") 
p <- p + scale_size(breaks = c(10,20,30,40,50,60,70,80), range = (c(1,8)))
p <- p + scale_color_gradientn(colours = c(rev(brewer.pal(9,"YlGn")),"white"))
p <- p + scale_x_discrete(limits = c("root","stem","leaf"))
p + theme_linedraw()

#BP+MF (table.join)
myterms <- c(
    #MF                 
                      "methyltransferase activity",
    "phosphatase activity",
                                 "kinase activity",
                     "oxidoreductase activity",
 "hydrolase activity, acting on glycosyl b...",
 
 #BP
                    "cofactor metabolic process",
                       "lipid metabolic process",
         "cellular amino acid metabolic process",
            "sulfur compound metabolic process",
   "generation of precursor metabolites and ...",
 "carbohydrate metabolic process",
 "secondary metabolic process",
                            "response to stress")


p1 <- ggplot(data=table.join, aes(x=tissue,y=Term)) 
p1 <- p1 + geom_point(data=table.join, aes(x=tissue, y=Term, size=Significant, colour=log2(weight01_fisher)),  alpha=1) +
  geom_point(data=table.join, aes(x=tissue, y=Term, size=Significant), shape = 1, colour = "darkgrey") 
p1 <- p1 + scale_size(breaks = c(10,20,30,40,50,60,70,80), range = (c(2,10)))
p1 <- p1 + scale_color_gradientn(colours = c(rev(brewer.pal(9,"YlGn")),"white"))
p1 <- p1 + scale_x_discrete(limits = c("root","stem","leaf"))
p1 <- p1 + scale_y_discrete(limits=myterms)
p1 <- p1 + theme_linedraw()
p1

##Plot FULL GO

#BUBBLEPLOTS
library(corrplot)
library(reshape2)
library(RColorBrewer)
library(pheatmap)
library(dplyr)
library(ggplot2)

#add tissue column to each table
full_BP.leaf_few_many.05$tissue <- "leaf"
full_BP.root_few_many.05$tissue <- "root"
full_BP.stem_few_many.05$tissue <- "stem"

full_MF.leaf_few_many.05$tissue <- "leaf"
full_MF.root_few_many.05$tissue <- "root"
full_MF.stem_few_many.05$tissue <- "stem"


#join table:
full_BP.few_many.05 <- rbind(full_BP.leaf_few_many.05,full_BP.root_few_many.05,full_BP.stem_few_many.05)
#correct the scientific notation by converting from character to numeric:
full_BP.few_many.05$weight01_fisher <- as.numeric(full_BP.few_many.05$weight01_fisher)
write.table(as.matrix(full_BP.few_many.05), file = "EA_fullGO_hybrids-few-vs-many.txt", sep = '\t')


#join table:
full_MF.few_many.05 <- rbind(full_MF.leaf_few_many.05,full_MF.root_few_many.05,full_MF.stem_few_many.05)
#correct the scientific notation by converting from character to numeric:
full_MF.few_many.05$weight01_fisher <- as.numeric(full_MF.few_many.05$weight01_fisher)
write.table(as.matrix(full_MF.few_many.05), file = "MF_fullGO_hybrids-few-vs-many.txt", sep = '\t')




#list of GOs that are enrich in at least one analysis
enriched.GOs <- full_BP.few_many.05 %>% filter(weight01_fisher<0.05) %>% filter (Significant>=10) # %>% select(GO.ID) 
#filter our enrich GOs by name
table.merge.filter <- full_BP.few_many.05 %>% filter(GO.ID %in% unlist(enriched.GOs)) 
#PUT A BOTTOM CAP
table.merge.filter$weight01_fisher[table.merge.filter$weight01_fisher<0.0001] <- 0.0001

table.join <- table.merge.filter

p <- ggplot(data=table.merge.filter, aes(x=tissue,y=Term)) 
#p <- p + geom_point(data=table.merge.filter %>% filter(weight01_fisher>0.01), aes(x=tissue, y=Term, size=Significant, colour=log2(weight01_fisher)),  stroke=1, alpha=0.8) 
#p <- p + geom_point(data=table.merge.filter %>% filter(weight01_fisher<=0.01) %>% filter(weight01_fisher>0.01), aes(x=tissue, y=Term, size=Significant, colour=log2(weight01_fisher)),  stroke=1, alpha=0.8) 
#p <- p + geom_point(data=table.merge.filter %>% filter(weight01_fisher<=0.001), aes(x=tissue, y=Term, size=Significant, colour=log2(weight01_fisher)),  stroke=1, alpha=0.8) 
p <- p + geom_point(data=table.join, aes(x=tissue, y=Term, size=Significant, colour=log2(weight01_fisher)),  alpha=1) +
  geom_point(data=table.join, aes(x=tissue, y=Term, size=Significant), shape = 1, colour = "darkgrey") 
#p <- p + scale_size(breaks = c(0.5,1,2,3,4,5), range=c(2,8))
#p <- p + scale_color_gradientn(trans = "log2", colours = brewer.pal(9,"Blues"), breaks = c(0,1,5,10,20,50,100,200))
p <- p + scale_size(breaks = c(5,10,20,30,40,50,75,100), range = (c(2,9)))
#p <- p + scale_color_gradientn(colours = c(rev(brewer.pal(9,"Blues")),"white",brewer.pal(9,"Reds")), limits = c(-15,15), breaks = c(-15,-10,-5,-2.5,-1,0,1,2.5,5,10,15))
p <- p + scale_color_gradientn(colours = c(rev(brewer.pal(9,"YlGn")),"white"))
p <- p + scale_x_discrete(limits = c("root","stem","leaf"))

#p <- p + scale_y_discrete(limits=rev(myTERMS))
p + theme_light()

#MF
enriched.GOs <- full_MF.few_many.05 %>% filter(weight01_fisher<0.05) %>% filter (Significant>=10) # %>% select(GO.ID) 
table.merge.filter <- full_MF.few_many.05 %>% filter(GO.ID %in% unlist(enriched.GOs)) 
table.merge.filter$weight01_fisher[table.merge.filter$weight01_fisher<0.0001] <- 0.0001

table.join <- rbind(table.join,table.merge.filter)

p <- ggplot(data=table.merge.filter, aes(x=tissue,y=Term)) 
p <- p + geom_point(data=table.join, aes(x=tissue, y=Term, size=Significant, colour=log2(weight01_fisher)),  alpha=1) +
  geom_point(data=table.join, aes(x=tissue, y=Term, size=Significant), shape = 1, colour = "darkgrey") 
p <- p + scale_size(breaks = c(10,20,30,40,50,60,70,80,90,100,500), range = (c(1,11)))
p <- p + scale_color_gradientn(colours = c(rev(brewer.pal(9,"YlGn")),"white"))
p <- p + scale_x_discrete(limits = c("root","stem","leaf"))

p + theme_light()

#BP+MF (table.join)
p <- ggplot(data=table.join, aes(x=tissue,y=Term)) 
p <- p + geom_point(data=table.join, aes(x=tissue, y=Term, size=Significant, colour=log2(weight01_fisher)),  alpha=1) +
  geom_point(data=table.join, aes(x=tissue, y=Term, size=Significant), shape = 1, colour = "darkgrey") 
p <- p + scale_size(breaks = c(10,20,30,40,50,60,70,80,90,100,500), range = (c(1,11)))
p <- p + scale_color_gradientn(colours = c(rev(brewer.pal(9,"YlGn")),"white"))
p <- p + scale_x_discrete(limits = c("root","stem","leaf"))

p + theme_light()

##Compute enrichment analysis hybrids vs progenitors

#COMPUTE ENRICHMENT ANALYSIS
library(topGO,quietly = T)
library(data.table,quietly = T)

#SLIM ANNOT IN HYBRIDS
allgenes <- rownames(counts)
topgo.file <- read.delim("Msin_b2go/slim_annot_TOPGO.annot", header = F)
geneID2GO <- readMappings(file = "Msin_b2go/slim_annot_TOPGO.annot")

slim_BP.leaf_heterosis2.05 <- run.topgo.pipeline.BP(leaf_hyb_both)
slim_MF.leaf_heterosis2.05 <- run.topgo.pipeline.MF(leaf_hyb_both)

slim_BP.root_heterosis2.05 <- run.topgo.pipeline.BP(root_hyb_both)
slim_MF.root_heterosis2.05 <- run.topgo.pipeline.MF(root_hyb_both)

slim_BP.stem_heterosis2.05 <- run.topgo.pipeline.BP(stem_hyb_both)
slim_MF.stem_heterosis2.05 <- run.topgo.pipeline.MF(stem_hyb_both)


#FULL GO ANNOTATION
allgenes <- rownames(counts)
topgo.file <- read.delim("Msin_b2go/full_annot_TOPGO.annot", header = F)
geneID2GO <- readMappings(file = "Msin_b2go/full_annot_TOPGO.annot")

full_BP.leaf_heterosis2.05 <- full.topgo.pipeline.BP(leaf_hyb_both)
full_MF.leaf_heterosis2.05 <- full.topgo.pipeline.MF(leaf_hyb_both)

full_BP.root_heterosis2.05 <- full.topgo.pipeline.BP(root_hyb_both)
full_MF.root_heterosis2.05 <- full.topgo.pipeline.MF(root_hyb_both)

full_BP.stem_heterosis2.05 <- full.topgo.pipeline.BP(stem_hyb_both)
full_MF.stem_heterosis2.05 <- full.topgo.pipeline.MF(stem_hyb_both)

##Plot SLIM GO

#BUBBLEPLOTS
library(corrplot)
library(reshape2)
library(RColorBrewer)
library(pheatmap)
library(dplyr)
library(ggplot2)

#add tissue column to each table
slim_BP.leaf_heterosis2.05$tissue <- "leaf"
slim_BP.root_heterosis2.05$tissue <- "root"
slim_BP.stem_heterosis2.05$tissue <- "stem"

slim_MF.leaf_heterosis2.05$tissue <- "leaf"
slim_MF.root_heterosis2.05$tissue <- "root"
slim_MF.stem_heterosis2.05$tissue <- "stem"

#join table:
slim_BP.heterosis2.05 <- rbind(slim_BP.leaf_heterosis2.05,slim_BP.root_heterosis2.05,slim_BP.stem_heterosis2.05)
#correct the scientific notation by converting from character to numeric:
slim_BP.heterosis2.05$weight01_fisher <- as.numeric(slim_BP.heterosis2.05$weight01_fisher)
write.table(as.matrix(slim_BP.heterosis2.05), file = "EA_slimGO_heterosis_hyb-DE-both-progenitors.txt", sep = '\t')

#join table:
slim_MF.heterosis2.05 <- rbind(slim_MF.leaf_heterosis2.05,slim_MF.root_heterosis2.05,slim_MF.stem_heterosis2.05)
#correct the scientific notation by converting from character to numeric:
slim_MF.heterosis2.05$weight01_fisher <- as.numeric(slim_MF.heterosis2.05$weight01_fisher)
write.table(as.matrix(slim_MF.heterosis2.05), file = "MF_slimGO_heterosis_hyb-DE-both-progenitors.txt", sep = '\t')


#list of GOs that are enrich in at least one analysis
enriched.GOs <- slim_BP.heterosis2.05 %>% filter(weight01_fisher<0.01) %>% filter (Significant>=20) # %>% select(GO.ID) 
#filter our enrich GOs by name
table.merge.filter <- slim_BP.heterosis2.05 %>% filter(GO.ID %in% unlist(enriched.GOs)) 
#PUT A BOTTOM CAP
table.merge.filter$weight01_fisher[table.merge.filter$weight01_fisher<0.0001] <- 0.0001

table.join <- table.merge.filter #use later

p <- ggplot(data=table.merge.filter, aes(x=tissue,y=Term)) 
#p <- p + geom_point(data=table.merge.filter %>% filter(weight01_fisher>0.01), aes(x=tissue, y=Term, size=Significant, colour=log2(weight01_fisher)),  stroke=1, alpha=0.7) 
#p <- p + geom_point(data=table.merge.filter %>% filter(weight01_fisher<=0.01) %>% filter(weight01_fisher>0.01), aes(x=tissue, y=Term, size=Significant, colour=log2(weight01_fisher)),  stroke=1, alpha=0.9) 
#p <- p + geom_point(data=table.merge.filter %>% filter(weight01_fisher<=0.001), aes(x=tissue, y=Term, size=Significant, colour=log2(weight01_fisher)),  stroke=1, alpha=1) 
p <- p + geom_point(data=table.merge.filter, aes(x=tissue, y=Term, size=Significant, colour=log2(weight01_fisher)),  stroke=1, alpha=1)

#p <- p + scale_size(breaks = c(0.5,1,2,3,4,5), range=c(2,8))
#p <- p + scale_color_gradientn(trans = "log2", colours = brewer.pal(9,"Blues"), breaks = c(0,1,5,10,20,50,100,200))
p <- p + scale_size(breaks = c(10,20,30,40,50,60,70,80,90,100,500), range = (c(1,11)))
#p <- p + scale_color_gradientn(colours = c(rev(brewer.pal(9,"Blues")),"white",brewer.pal(9,"Reds")), limits = c(-15,15), breaks = c(-15,-10,-5,-2.5,-1,0,1,2.5,5,10,15))
#p <- p + scale_color_gradientn(colours = c(rev(brewer.pal(9,"Blues")),brewer.pal(9,"Reds")))
p <- p + scale_color_gradientn(colours = rev(brewer.pal(9,"Blues")))
#p <- p + scale_y_discrete(limits=rev(myTERMS))
p + theme_light()

#MF
enriched.GOs <- slim_MF.heterosis2.05 %>% filter(weight01_fisher<0.01) %>% filter (Significant>=20) # %>% select(GO.ID) 
table.merge.filter <- slim_MF.heterosis2.05 %>% filter(GO.ID %in% unlist(enriched.GOs)) 
table.merge.filter$weight01_fisher[table.merge.filter$weight01_fisher<0.0001] <- 0.0001

table.join <- rbind(table.join,table.merge.filter) #use later


p <- ggplot(data=table.merge.filter, aes(x=tissue,y=Term)) 
p <- p + geom_point(data=table.merge.filter, aes(x=tissue, y=Term, size=Significant, colour=log2(weight01_fisher)),  stroke=1, alpha=1)
p <- p + scale_size(breaks = c(10,20,30,40,50,60,70,80,90,100,500), range = (c(1,11)))
p <- p + scale_color_gradientn(colours = rev(brewer.pal(9,"Blues")))
p + theme_light()

#BP+MF (table.join)
p <- ggplot(data=table.join, aes(x=tissue,y=Term)) 
p <- p + geom_point(data=table.join, aes(x=tissue, y=Term, size=Significant, colour=log2(weight01_fisher)),  alpha=1) +
  geom_point(data=table.join, aes(x=tissue, y=Term, size=Significant), shape = 1, colour = "darkgrey") 
p <- p + scale_size(breaks = c(10,20,30,40,50,60,70,80), range = (c(1,8)))
p <- p + scale_color_gradientn(colours = c(rev(brewer.pal(9,"YlGn")),"white"))
p <- p + scale_x_discrete(limits = c("root","stem","leaf"))
p + theme_linedraw()

####
myterms <-c(
  "phosphatase activity",#
"translation factor activity, RNA binding",#
"ion binding",#
"DNA binding",#
  "helicase activity",#
#"rRNA binding",#
"structural constituent of ribosome",#
"RNA binding",#
  
  
  "tRNA metabolic process",
"protein-containing complex assembly",
"cellular amino acid metabolic process",

"cellular nitrogen compound metabolic pro...",
"small molecule metabolic process",
"reproduction",
"membrane organization",

"biosynthetic process",
#"ribonucleoprotein complex assembly",
"generation of precursor metabolites and ...",
"ribosome biogenesis",
"photosynthesis",
"translation")

p2 <- ggplot(data=table.join, aes(x=tissue,y=Term)) 
p2 <- p2 + geom_point(data=table.join, aes(x=tissue, y=Term, size=Significant, colour=log2(weight01_fisher)),  alpha=1) +
  geom_point(data=table.join, aes(x=tissue, y=Term, size=Significant), shape = 1, colour = "darkgrey") 
p2 <- p2 + scale_size(breaks = c(20,40,50,60,70,80,100,200,300,400), range = (c(4,14)))
p2 <- p2 + scale_color_gradientn(colours = c(rev(brewer.pal(9,"YlGn")),"white"))
p2 <- p2 + scale_x_discrete(limits = c("root","stem","leaf"))
p2 <- p2 + scale_y_discrete(limits=myterms)
p2 <- p2 + theme_linedraw()
p2

library(gridExtra)
grid.arrange(p1, p2,  layout_matrix=rbind(c(1,2),c(1,2),c(3,2)))