% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/selectBatchHVG.R
\name{selectBatchHVG}
\alias{selectBatchHVG}
\alias{selectBatchHVG.liger}
\alias{selectBatchHVG.ligerDataset}
\alias{selectBatchHVG.dgCMatrix}
\alias{selectBatchHVG.DelayedArray}
\title{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Batch-aware highly variable gene selection}
\usage{
selectBatchHVG(object, ...)

\method{selectBatchHVG}{liger}(
  object,
  nGenes = 2000,
  verbose = getOption("ligerVerbose", TRUE),
  ...
)

\method{selectBatchHVG}{ligerDataset}(
  object,
  nGenes = 2000,
  features = NULL,
  scaleFactor = NULL,
  verbose = getOption("ligerVerbose", TRUE),
  ...
)

\method{selectBatchHVG}{dgCMatrix}(
  object,
  nGenes = 2000,
  returnStats = FALSE,
  scaleFactor = NULL,
  verbose = getOption("ligerVerbose", TRUE),
  ...
)

\method{selectBatchHVG}{DelayedArray}(
  object,
  nGenes = 2000,
  means = NULL,
  scaleFactor = NULL,
  returnStats = FALSE,
  chunk = getOption("ligerChunkSize", 20000),
  verbose = getOption("ligerVerbose", TRUE),
  ...
)
}
\arguments{
\item{object}{A \code{\linkS4class{liger}} object,
\code{\linkS4class{ligerDataset}} object or a sparse/dense matrix. The liger
objects must have raw counts available. A direct matrix input is preferably
log-1p transformed from CPM normalized counts in cell per column orientation.}

\item{...}{Arguments passed to S3 methods.}

\item{nGenes}{Integer number of target genes to select. Default \code{2000}.}

\item{verbose}{Logical. Whether to show a progress bar. Default
\code{getOption("ligerVerbose")} or \code{TRUE} if users have not set.}

\item{features}{For ligerDataset method, the feature subset to limit the
selection to, mainly for limiting the selection to happen within the shared
genes of all datasets. Default \code{NULL} selects from all features in the
ligerDataset object.}

\item{scaleFactor}{Numeric vector of scaling factor to normalize the raw
counts to unit sum. This pre-calculated at liger object creation (stored as
\code{object$nUMI} and internally specified in S3 method chains, thus is
generally not needed to be specified by users.}

\item{returnStats}{Logical, for dgCMatrix-method, whether to return a data
frame of statistics for all features, or by default \code{FALSE} just return
a character vector of selected features.}

\item{means}{Numeric vector of pre-calculated means per gene, derived from
log1p CPM normalized expression.}

\item{chunk}{Integer. Number of maximum number of cells in each chunk when
working on HDF5Array Default \code{20000}.}
}
\value{
\itemize{
\item{liger-method: Returns the input liger object with the selected genes
updated in \code{varFeatures} slot, which can be accessed with
\code{varFeatures(object)}. Additionally, the statistics are updated in
the \code{featureMeta} slot of each ligerDataset object within the
\code{datasets} slot of the \code{object}.}
\item{ligerDataset-method: Returns the input ligerDataset object with the
statistics updated in the \code{featureMeta} slot.}
\item{dgCMatrix-method: By default returns a character vector of selected
variable features. If \code{returnStats = TRUE}, returns a data.frame of the
statistics.}
}
}
\description{
Method to select HVGs based on mean dispersions of genes that are highly
variable genes in all batches. Using a the top target_genes per batch by
average normalize dispersion. If target genes still hasn't been reached,
then HVGs in all but one batches are used to fill up. This is continued
until HVGs in a single batch are considered.

This is an \emph{rliger} implementation of the method originally published in
\href{https://scib.readthedocs.io/en/latest/api/scib.preprocessing.hvg_batch.html}{SCIB}.
We found the potential that it can improve integration under some
circumstances, and is currently testing it.

This function currently only works for shared features across all datasets.
For selection from only part of the datasets and selection for
dataset-specific unshared features, please use \code{\link{selectGenes}()}.
}
\examples{
pbmc <- selectBatchHVG(pbmc, nGenes = 10)
varFeatures(pbmc)
}
\references{
Luecken, M.D., Büttner, M., Chaichoompu, K. et al. (2022), Benchmarking
atlas-level data integration in single-cell genomics. \emph{Nat Methods}, 19,
41–50. https://doi.org/10.1038/s41592-021-01336-8.
}
\seealso{
\code{\link[=selectGenes]{selectGenes()}}
}
