## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----eval = FALSE-------------------------------------------------------------
# # if (!require("remotes", quietly = TRUE))
# #     install.packages("remotes")
# #
# # remotes::install_github("statdivlab/radEmu")

## ----setup, message = FALSE---------------------------------------------------
library(magrittr)
library(dplyr)
library(ggplot2)
library(stringr)
library(radEmu)

## ----results = 'hide'---------------------------------------------------------
tse <- requireNamespace("TreeSummarizedExperiment", quietly = TRUE) == TRUE

## ----echo = FALSE-------------------------------------------------------------
print(paste0("TreeSummarizedExperiment is installed: ", tse))

## ----message = FALSE, eval = tse----------------------------------------------
library(TreeSummarizedExperiment)
library(SummarizedExperiment)
library(SingleCellExperiment)
setClassUnion("ExpData", c("matrix", "SummarizedExperiment"))

## ----eval = tse---------------------------------------------------------------
data(wirbel_sample)
data(wirbel_otu)
data(wirbel_taxonomy)
wirbel_tse <- TreeSummarizedExperiment(assays = list(Count = t(wirbel_otu)),
                                       rowData = wirbel_taxonomy,
                                       colData = wirbel_sample)
wirbel_tse

## ----eval = tse---------------------------------------------------------------
dim(colData(wirbel_tse))
head(colData(wirbel_tse))

## ----eval = tse---------------------------------------------------------------
dim(assay(wirbel_tse, "Count"))
# let's check out a subset
assay(wirbel_tse, "Count")[1:5, 1:3]

## ----eval = tse---------------------------------------------------------------
mOTU_names <- rownames(assay(wirbel_tse, "Count"))

## ----eval = tse---------------------------------------------------------------
head(rowData(wirbel_tse))

## ----eval = tse---------------------------------------------------------------
colData(wirbel_tse)$Group <- factor(colData(wirbel_tse)$Group, levels = c("CTR","CRC"))

## ----eval = tse---------------------------------------------------------------
chosen_genera <- c("Eubacterium", "Faecalibacterium", "Fusobacterium", "Porphyromonas")
genera_rows <- rowData(wirbel_tse)$genus %in% chosen_genera
wirbel_restrict <- wirbel_tse[genera_rows]

## ----eval = tse---------------------------------------------------------------
sample_cols <- colData(wirbel_restrict)$Country == "CHI"
wirbel_china <- wirbel_restrict[, sample_cols]

## ----eval = tse---------------------------------------------------------------
sum(colSums(assay(wirbel_china, "Count")) == 0) # no samples have a count sum of 0 
sum(rowSums(assay(wirbel_china, "Count")) == 0) # one category has a count sum of 0 
category_to_rm <- rowSums(assay(wirbel_china, "Count")) == 0
wirbel_china <- wirbel_china[!category_to_rm, ]
sum(rowSums(assay(wirbel_china, "Count")) == 0) # now no categories have a count sum of 0 

## ----eval = tse---------------------------------------------------------------
ch_fit <- emuFit(formula = ~ Group, 
                 Y = wirbel_china, 
                 assay_name = "Count",
                 run_score_tests = FALSE) 

## ----fig.height = 6, fig.width = 6, eval = tse--------------------------------
plot(ch_fit)$plots

