\documentclass[codesnippet]{jss}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% declarations for jss.cls %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%% almost as usual
\author{Kem Phillips}
\title{R Functions to Symbolically Compute the Central and Non-central Moments of the Multivariate Normal Distribution}

%% for pretty printing and a nice hypersummary also set:
%%Plainauthor{Achim Zeileis, Second Author} %% comma-separated
%%Plaintitle{A Capitalized Title: Something About a Package foo} %% without formatting
\Shorttitle{Expected Values of Functions of a Multivariate Normal Random Variable} %% a short title (if necessary)

%% an abstract and keywords
\Abstract{The central moments of the multivariate normal distribution are functions of
its $n \times n$ variance-covariance matrix $\Sigma$.
These moments can be expressed symbolically as linear combinations of products of powers of the elements of $\Sigma$.
A formula for these moments derived by differentiating the characteristic function is developed.
The formula requires searching integer matrices for matrices
whose $n$ successive row and column sums equal the $n$ exponents of the moment.
This formula is implemented in R, with
\proglang{R} functions to display moments in {\LaTeX}
 and to evaluate moments at specified variance-covariance matrices are included.
 
 This vignette also describes how the \code{symmoments} package has been augmented
to calculate symbolic representations of non-central multivariate moments,
and to calculate the expected value of such moments as well as the expected value of multivariate polynomials.
In addition, it is shown that first non-central multivariate moments
are in a one-to-one relationship to phylogenetic trees.
Functions making this correspondence, as well as a function for the correspondence with matchings are provided.
}

\Keywords{non-central moments, multivariate Normal distribution, symbolic computation, multivariate polynomials, phylogenetic trees, \proglang{R}, \LaTeX}
\Plainkeywords{non-central moments, multivariate Normal distribution, symbolic computation, multivariate polynomials, phylogenetic trees, R, Latex} %% without formatting
%% at least one keyword must be supplied

%% publication information
%% NOTE: Typically, this can be left commented and will be filled out by the technical editor
%% \Volume{13}
%% \Issue{9}
%% \Month{September}
%% \Year{2004}
%% \Submitdate{2004-09-29}
%% \Acceptdate{2004-09-29}

%% The address of (at least) one author should be given
%% in the following format:
\Address{
  Kem Phillips \\
  PO Box 434 \\
  Cavendish \\
  VT 05142 USA \\
  E-mail: \email{kemphillips@comcast.net}\\}

%% It is also possible to add a telephone and fax number
%% before the e-mail in the following format:
%% Telephone: +43/1/31336-5053
%% Fax: +43/1/31336-734

%% for those who use Sweave please include the following line (with % symbols):
%% need no \usepackage{Sweave.sty}

%% end of declarations %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% \VignetteIndexEntry{R Functions to Symbolically Compute the Central and Non-central Moments of the Multivariate Normal Distribution}
\usepackage{amsmath,rotating,thumbpdf}
\SweaveOpts{engine = R, eps = FALSE, keep.source = TRUE}

\begin{document}


\section{Introduction}
\label{intro}

The central moments of an $n$-dimensional random vector $X$ are defined as
\begin{equation}
m_{k_{1},...,k_{n}} = E[(X_{1}-\mu_{1})^{k_{1}}(X_{2}-\mu_{2})^{k_{2}} \cdots (X_{n}-\mu_{n})^{k_{n}}],
\end{equation}
where $E[\cdots]$ denotes expected value.
Suppose that $X$ is distributed according to the multivariate normal distribution
with mean $\mu$ and variance-covariance matrix
\begin{equation}
\Sigma = (\sigma_{ij})
\end{equation}
where the variance terms are $\sigma_{ii}, i=1,\dots,n$,
the covariance terms are $\sigma_{ij}, i \neq j$,
and by symmetry $\sigma_{ij} = \sigma_{ji}$.
For the multivariate normal distribution the central moments are not functions of the mean vector $\mu$,
and depend only on the variance-covariance terms $\sigma_{ij}$.

Simple cases are familiar.  Setting $\mu$ to 0,
\begin{eqnarray}
m_{2} = E[ X_{ 1 }^{ 2 } ] =
  \sigma_{  1 , 1 }
\end{eqnarray}
\begin{eqnarray}
m_{1,2} = E[ X_{1}X_{ 2} ] =  \sigma_{1,2}
\end{eqnarray}
Slightly more complicated cases can be computed directly or by manipulating
simple expressions obtained for moments of the form $E[ X_{1} \cdots X_{n} ]$ \citep{wikipedia}.
For example,
\begin{eqnarray}
m_{2,2} = E[ X_{ 1 }^{ 2 } X_{ 2 }^{ 2 } ] =
2 \sigma_{  1 , 2 }^{ 2 }   +
  \sigma_{  1 , 1 } \sigma_{  2 , 2 }
\end{eqnarray}
\begin{eqnarray}
m_{2,1,1} = E[ X_{ 1 }^{ 2 } X_{ 2 }^{ 1 } X_{ 3 }^{ 1 } ] =
2 \sigma_{  1 , 2 } \sigma_{  1 , 3 }  +
  \sigma_{  1 , 1 } \sigma_{  2 , 3 }
\end{eqnarray}
\begin{eqnarray}
m_{1,1} = E[ X_{ 1 }^{ 3 } X_{ 2 }^{ 1 } ] =
3 \sigma_{  1 , 1 } \sigma_{  1 , 2 }
\end{eqnarray}
Although some higher order moments follow known patterns, most are much harder to determine by simple calculations.

We wish to compute the symbolic expression of any moment $m_{k_{1},...,k_{n}}$
in terms of the $n(n+1)$ symbols $\sigma_{ij}$.
Note that if $\sum_{i=1}^{n} k_{i}$ is an odd integer, the moment is 0, so that only moments where this sum is even need be considered.

The multivariate normal distribution is fundamental to mathematical statistics, and its moments
play a central role in statistical methodology.
Various methods have been developed to numerically compute them \citep[p.~46]{muirhead82} and \citep[p.~49]{anderson71}.
\cite{kan08} developed a formula (his Proposition 1) for the central moments as a repeated sum.
He gives an excellent review of other formulas that have been developed, and
cites \cite{isserli18} as deriving the first expression for the central moments.
Muirhead (page 49) used the matrix derivatives of the multivariate normal distribution's characteristic function
to derive a formula for multivariate cumulants.
\cite{tracy93} also used matrix derivatives
to derive an expression for the distribution's moments (their Theorem 2) based on a recurrence relationship of the derivatives.
This article develops a new explicit formula for the moments starting with the
derivatives of the characteristic function.
The expression for the moments is based on a search algorithm over certain integer matrices.
This formula has been translated into \proglang{R} functions
that produce symbolic representations of
moments in terms of the variance-covariance terms $\sigma_{ij}$.
In addition, a formula for non-central moments is developed based on central moments.
This formula is programmed, and is extended to the expected values of multivariate polynomials.
Finally, it is shown that first central moments are equivalent to phylogenetic trees, 
and \proglang{R} functions making this correspondence are described. 

The functions described here are based on \cite{phillips10}, and
are available in the package \pkg{symmoments} implemented in the \proglang{R} system for
statistical computing \citep{Rcitation}. Both \proglang{R} itself and the
\pkg{symmoments} package (as well as all other packages used in this paper) are available under
the terms of the General Public License (GPL) from the Comprehensive \proglang{R} Archive Network (CRAN,
\url{http://CRAN.R-project.org/}).

\section{Development of the formula for central moments}


The moments of any distribution can be represented by the derivatives of the distribution's characteristic function.
The characteristic function of the multivariate normal distribution is \citep[p. 5, 49]{muirhead82}
\begin{equation}
E[e^{it^{\top}X}] = e^{it^{\top}\mu - \frac{1}{2}t^{\top}\Sigma t}
\end{equation}
where $t=(t_{1},t_{2},...,t_{n})$.
Within a constant, the moment is the $k_{1},...,k_{n}$-order derivative of the characteristic function evaluated at $t=0$:
\begin{equation}
m_{k_{1},...,k_{n}} = i^{-\sum_{i=1}^{n}k_{i}} \; \frac{d^{\sum_{i=1}^{n}k_{i}}}{d^{k_{1}}t_{1} d^{k_{2}}t_{2} \cdots d^{k_{n}}t_{n}} \;\; E[e^{it^{\top}X}] \mid _{t=0}
\end{equation}
where $i$ is the imaginary unit.
Expanding the exponential into an infinite sum, this is
\begin{equation}
m_{k_{1},...,k_{n}} = i^{-\sum_{i=1}^{n}k_{i}} \; \frac{d^{\sum_{i=1}^{n}k_{i}}}{d^{k_{1}}t_{1} d^{k_{2}}t_{2} \cdots d^{k_{n}}t_{n}} \; \sum_{\ell=0}^{\infty} (it^{\top}\mu - \frac{1}{2}(t^{\top}\Sigma t))^{\ell}/\ell! \mid _{t=0}
\end{equation}
Since we are to compute the central moment, we will set $\mu = 0$, so that the term $it^{\top}\mu$ will not appear:
\begin{equation}
\label{chareq}
m_{k_{1},...,k_{n}} = i^{-\sum_{i=1}^{n}k_{i}} \; \frac{d^{\sum_{i=1}^{n}k_{i}}}{d^{k_{1}}t_{1} d^{k_{2}}t_{2} \cdots d^{k_{n}}t_{n}} \; \sum_{\ell=0}^{\infty} (-\frac{1}{2}(t^{\top}\Sigma t))^{\ell}/\ell! \mid _{t=0}
\end{equation}

Since $\sum_{i=1}^{n}k_{i}$ is even, the term $i^{-\sum_{i=1}^{n}k_{i}} = (-1)^{-\sum_{i=1}^{n}k_{i}/2}$.
We note that the term $i^{-\sum_{i=1}^{n}k_{i}}$ will ultimately cancel with the negative in the infinite sum,
and will be omitted for convenience in notation.

The expression in $t$ is
\begin{equation}
t^{\top}\Sigma t = \sum_{ij} \sigma_{ij}t_{i}t_{j}
\end{equation}
We need to find the coefficient of $t_{1}^{\ell_{1}}t_{2}^{\ell_{2}}\cdots t_{n}^{\ell_{n}}$ in
\begin{equation}
(t^{\top}\Sigma t)^{\ell} = (\sum_{ij} \sigma_{ij}t_{i}t_{j})^{\ell} =  \sum_{ij} \sigma_{ij}t_{i}t_{j}  \; \cdots \;  \sum_{ij} \sigma_{ij}t_{i}t_{j}
\end{equation}
All products in the elements of the sum will occur.
Any product will be obtained by choosing $\sigma_{ij}t_{i}t_{j}$ a certain number of times, say $\ell_{ij}$.
Since one term is chosen from each of $\ell$ terms, $\sum_{ij} \ell_{ij} = \ell$.
Further, for any such matrix $(\ell_{ij})$, there will be a term, since it can be constructed by choosing
$\sigma_{ij}t_{i}t_{j}$ from the first $\ell_{ij}$ terms, and so forth for each $(ij)$ until $\ell$ is exhausted.
For any $(\ell_{ij})$ there are
\begin{equation}
\left( \begin{array}{c}
\ell  \\
\ell_{11} \dots \ell_{nn}
\end{array}
\right)
\end{equation}
ways to choose the terms, where this is the multinomial coefficient.
So,
\begin{equation}
(\sum_{ij} \sigma_{ij}t_{i}t_{j})^{\ell} =
\sum_{\{(\ell_{ij})\mid \sum_{ij}\ell_{ij} = \ell\}} \;\; \left( \begin{array}{c}
\ell  \\
\ell_{11} \dots \ell_{nn}
\end{array}
\right)
\;\; \prod_{ij} (\sigma_{ij}t_{i}t_{j})^{\ell_{ij}}
\end{equation}
\begin{equation}
\label{sigmasum}
 =
\sum_{\{(\ell_{ij})\mid \sum_{ij}\ell_{ij} = \ell\}} \;\;
\left( \begin{array}{c}
\ell  \\
\ell_{11} \dots \ell_{nn}
\end{array}
\right)
\;\; \prod_{ij}\sigma_{ij}^{\ell_{ij}} \;\; \prod_{ij}(t_{i}t_{j})^{\ell_{ij}}
\end{equation}

For the moment, we distinguish between $\sigma_{ij}$ and $\sigma_{ji}$ as symbols.
As a result, each $\prod_{ij}\sigma_{ij}^{\ell_{ij}}$ is unique as determined by unique $(\ell_{ij})$.
However, $t_{i}t_{j}= t_{j}t_{i}$, so
since each $\sigma_{ij}$ is combined with two $t$'s, the total exponent in $t$ is $2\ell$.
That is, a term $t_{1}^{\ell_{1}}t_{2}^{\ell_{2}}\cdots t_{n}^{\ell_{n}}$ must have
$\sum_{i=1}^{n} \ell_{i} = 2\ell$.
We need to determine the terms for which, for any $(\ell_{1},\dots, \ell_{n})$,
\begin{equation}
\prod_{ij}(t_{i}t_{j})^{\ell_{ij}} = t_{1}^{\ell_{1}} \cdots t_{n}^{\ell_{n}}
\end{equation}
We will get $t_{k}$ in the product in the following mutually exclusive cases:
\begin{equation}
\begin{array}{lc}
\rm{Condition}  & \rm{Exponent \; of} \; t_{k}  \\
\hline
i=k, j\neq k  & 1\\
i\neq k, j= k & 1\\
i = j = k & 2\\
\end{array}
\end{equation}
So the exponent of $t_{k}$ will be
\begin{equation}
\sum_{i=k, j\neq k} \ell_{ij} + \sum_{i\neq k, j=k} \ell_{ij} + 2\ell_{kk}
\end{equation}
This sum is obtained by adding the sum of $\ell_{ij}$ across row $k$ to the sum across column $k$,
since the diagonal element $k$ occurs in both sums.
That is, we get
\begin{equation}
\sum_{i} \ell_{ik} + \sum_{j} \ell_{kj} = \sum_{i} (\ell_{ik} + \ell_{ki}) = \ell_{k}
\end{equation}
We can now partition the set of $(\ell_{ij})$ in Equation~\ref{sigmasum} according to these sums, that is, $\{\ell_{k},k=1\dots n\}$.
As stated before, the sum of the exponents, $\ell_{k}$, must be $2\ell$.
\begin{multline}
(\sum_{ij} \sigma_{ij}t_{i}t_{j})^{\ell} = \\
\sum_{\{(\ell_{1},\dots \ell_{n})\mid \sum_{k}\ell_{k} = 2\ell\}}
\sum_{\{(\ell_{11},...,\ell_{nn})\mid\sum_{i}(\ell_{ik}+\ell_{ki})=\ell_{k},k=1\dots n\}}
\left(
\left( \begin{array}{c}
\ell  \\
\ell_{11} \dots \ell_{nn}
\end{array}
\right)
 \prod_{ij}\sigma_{ij}^{\ell_{ij}}\right) \prod_{i=1}^{n}t_{i}^{\ell_{i}}
\end{multline}


Since differentiation is distributive with respect to addition and multiplication by constants,
the derivative of the product of $t$s can be determined from the derivatives of the individual terms:
\begin{equation}
\frac{d^{\sum_{i=1}^{n}k_{i}}}{d^{k_{1}}t_{1} d^{k_{2}}t_{2} \cdots d^{k_{n}}t_{n}} \; \prod_{i=1}^{n} t_{i}^{\ell_{i}} =
\prod_{i=1}^{n} \; \frac{d^{k_{i}}}{d t_{i}^{k_{i}}} \;  t_{i}^{\ell_{i}}
\end{equation}

\begin{equation}
 = \prod_{i=1}^{n} \; I\{k_{i}\leq \ell_{i}\}\frac{\ell_{i}!}{(\ell_{i}-k_{i})!}t_{i}^{\ell_{i}-k_{i}}
\end{equation}

Thus,
\begin{multline}
\frac{d^{\sum_{i=1}^{n}k_{i}}}{d^{k_{1}}d^{k_{2}}\cdots d^{k_{n}}}(\sum_{ij} \sigma_{ij}t_{i}t_{j})^{\ell} = \\
\sum_{\{(\ell_{1},...,\ell_{n})\mid\sum_{i}\ell_{i} = 2\ell\}}
\sum_{\{(\ell_{11},\ell_{12},...,\ell_{nn})\mid\sum_{i}(\ell_{ih}+\ell_{hi})=\ell_{h},h=1,\dots,n\}}
\left(
\left( \begin{array}{c}
\ell  \\
\ell_{11} \dots \ell_{nn}
\end{array}
\right)
   \prod_{ij}\sigma_{ij}^{\ell_{ij}}\right) \\
    \prod_{i=1}^{n} I\{k_{i}\leq \ell_{i}\}\frac{\ell_{i}!}{(\ell_{i}-k_{i})!}t_{i}^{\ell_{i}-k_{i}}
\end{multline}


Incorporating the constants from Equation~\ref{chareq}, noting again that the negative signs will cancel, the full sum is
\begin{multline}
\sum_{\ell=0}^{\infty} \;\; (\frac{1}{2})^{\ell}/\ell!   \sum_{\{(\ell_{1},...,\ell_{n})\mid\sum_{i}l_{i} = 2\ell\}}  \left(
\sum_{\{(\ell_{11},\ell_{12},...,\ell_{nn})\mid\sum_{j}(\ell_{hj}+\ell_{jh})=\ell_{i},h=1,\dots,n\}}
\left( \begin{array}{c}
\ell  \\
\ell_{11} \dots \ell_{nn}
\end{array}
\right)
\prod_{ij}\sigma_{ij}^{\ell_{ij}}\right)  \\
\prod_{i=1}^{n} I\{k_{i}\leq \ell_{i}\}\frac{\ell_{i}!}{(\ell_{i}-k_{i})!}t_{i}^{\ell_{i}-k_{i}}
\end{multline}

Setting $t=0$, only terms with $\ell_{i}=k_{i}$ for all $i$ will remain.
Otherwise, the only $\ell$ in the infinite sum which occurs is for $\ell = \sum_{i=1}^{n} k_{i}/2$.
So this reduces to
\begin{equation}
(\frac{1}{2})^{\sum_{i=1}^{n} k_{i}/2}/(\sum_{i=1}^{n} k_{i}/2)! \;\; \left(
\sum_{\{(\ell_{11},...,\ell_{nn})\mid\sum_{j}(\ell_{hj}+\ell_{jh})=k_{i},h=1,\dots,n\}}
\left( \begin{array}{c}
\sum_{i=1}^{n} k_{i}/2  \\
\ell_{11} \dots \ell_{nn}
\end{array}
\right)
\prod_{ij}\sigma_{ij}^{\ell_{ij}}\right)\,\, \prod_{i=1}^{n} k_{i}!
\end{equation}
Rearranging the terms, we have
\begin{equation}
m_{k_{1},...,k_{n}} = C \sum_{\{(\ell_{11},\ell_{12},...,\ell_{nn})\mid\sum_{j=1}^{n}(\ell_{jh}+\ell_{hj})=k_{h}, h=1,...,n\}}
\left( \begin{array}{c}
\sum_{i=1}^{n} k_{i}/2  \\
\ell_{11} \dots \ell_{nn}
\end{array}
\right)
\prod_{ij}\sigma_{ij}^{\ell_{ij}}
\end{equation}
where
\begin{equation}
C = \frac{1}{2}^{\sum_{i=1}^{n} k_{i}/2}(\prod_{i=1}^{n} k_{i}!)/(\sum_{i=1}^{n} k_{i}/2)!
\end{equation}

This formula shows that evaluating $m_{k_{1},...,k_{n}}$ symbolically requires enumerating
all $n \times n$-dimensional matrices of non-negative integers, $(\ell_{ij})$, which satisfy the condition
\begin{equation}
\label{criterion}
\sum_{j=1}^{n}(\ell_{ji}+\ell_{ij})=k_{i}, \;\;\; i=1,...,n
\end{equation}


Conceding now that the symbols $\sigma_{ij}$ and $\sigma_{ji}$ signify the same entity,
we can search for $(\ell_{ij})$ by looking only at terms $\prod_{ij}\sigma_{ij}^{\ell_{ij}}$ for which $i \leq j$.
In fact, any other matrix for the term can be obtained by decrementing $\ell_{ij}$ and incrementing $\ell_{ji}$ by the same integer
for one or more subscripts for which $i < j$.
For any $\sigma_{ij}^{\ell_{ij}}$ in the term, this can be done in $\ell_{ij}+1$ ways.
So there are a total of $\prod_{i < j} (\ell_{ij}+1)$ transpositions.
The multinomial coefficients derived above must be applied separately to each of these $(\ell_{ij})$ matrices.
Thus, the full coefficient for a matrix will include as a multiplier
the sum of these coefficients over all of the $\prod_{i < j} (\ell_{ij}+1)$ transposed matrices.
Let $\Upsilon$ be the set of upper-triangular integer matrices, and, for any $(\ell_{ij}) \in \Upsilon$,
let $\Lambda((\ell_{ij}))$ be the set of all integer matrices $(h_{ij})$ obtained by so transposing $(\ell_{ij})$.
Then the sum above can be decomposed in terms of $\Upsilon$ and $\Lambda((\ell_{ij}))$ for each $(\ell_{ij}) \in \Upsilon$:
\begin{multline}
m_{k_{1},...,k_{n}} = \\
C \sum_{\{(\ell_{11},\ell_{12},...,\ell_{nn})\in \Upsilon \mid\sum_{j=1}^{n}(\ell_{jh}+\ell_{hj})=k_{h}, h=1,...,n\}}
                      \sum_{\{(h_{ij}) \in \Lambda((\ell_{ij}))\}}
\left( \begin{array}{c}
\sum_{i=1}^{n} k_{i}/2  \\
h_{11} \dots h_{nn}
\end{array}
\right)
\prod_{ij}\sigma_{ij}^{h_{ij}}
\end{multline}
But by symmetry, the products in $(\sigma_{ij})$ are the same for each member of $\Lambda((\ell_{ij}))$,
specifically $\prod_{ij}\sigma_{ij}^{\ell_{ij}}$.
So the final formula is
\begin{multline}
\label{basicformula}
m_{k_{1},...,k_{n}} =
\left[ \frac{1}{2}^{\sum_{i=1}^{n} k_{i}/2}(\prod_{i=1}^{n} k_{i}!)/(\sum_{i=1}^{n} k_{i}/2)! \right] \\
\sum_{\{(\ell_{11},\ell_{12},...,\ell_{nn})\in \Upsilon \mid\sum_{j=1}^{n}(\ell_{jh}+\ell_{hj})=k_{h}, h=1,...,n\}}
\left[
\sum_{\{(h_{ij}) \in \Lambda((\ell_{ij}))\}}
\left( \begin{array}{c}
\sum_{i=1}^{n} k_{i}/2  \\
h_{11} \dots h_{nn}
\end{array}
\right)
\right]
\prod_{ij}\sigma_{ij}^{\ell_{ij}}
\end{multline}


\section{Non-central moments}
\label{nonzeromeans}

We now drop the condition that$\mu = 0$.
That is, we want to calculate the symbolic representation of $E[X_{1}^{k_{1}}X_{2}^{k_{2}} \cdots X_{n}^{k_{n}}\mid \mu,\Sigma]$, where $X \sim N(\mu,\Sigma)$.
First, transform to $Y = X -\mu \sim N(0,\Sigma)$, so that $E[Y\mid \mu,\Sigma] = 0$.
Then, since $X = Y + \mu$,
\begin{equation}
E[X_{1}^{k_{1}}X_{2}^{k_{2}} \cdots X_{n}^{k_{n}}\mid \mu,\Sigma] = E[(Y_{1}+\mu_{1})^{k_{1}}(Y_{2}+\mu_{2})^{k_{2}} \cdots (Y_{n}+\mu_{n})^{k_{n}}\mid \mu,\Sigma]
\end{equation}
For each term in the product we can choose $Y_{i}$ $l_{i}$ times and $\mu_{i}$ $k_{i}-l_{i}$ times,
and this can be done in
\begin{equation}
\left( \begin{array}{c}
k_{i}  \\
l_{i}

\end{array}
\right)
\end{equation}
ways.  So, using the notation $k = (k_{1},k_{2}, \cdots k_{n})$, this is
\begin{equation}
(Y_{1}+\mu_{1})^{k_{1}}(Y_{2}+\mu_{2})^{k_{2}} \cdots (Y_{n}+\mu_{n})^{k_{n}} =
\sum_{0 \leq l \leq k} \;\;
\;\; \prod_{i=1}^{n}
\left( \begin{array}{c}
k_{i}  \\
l_{i}
\end{array}
\right)
\prod_{i=1}^{n} Y_{i}^{l_{i}} \;\; \prod_{i=1}^{n} \mu_{i}^{k_{i}-l_{i}}
\end{equation}
Therefore, since $E[Y_{1}^{l_{1}}Y_{2}^{l_{2}} \cdots Y_{n}^{l_{n}}\mid \mu,\Sigma] = E[Y_{1}^{l_{1}}Y_{2}^{l_{2}} \cdots Y_{n}^{l_{n}}\mid \Sigma] = m_{l_{1},...,l_{n}}$,
\begin{equation}
E[X_{1}^{k_{1}}X_{2}^{k_{2}} \cdots X_{n}^{k_{n}}\mid \mu,\Sigma] =
\sum_{0 \leq l \leq k} \;\;
\;\; \left(\prod_{i=1}^{n}
 \left( \begin{array}{c}
k_{i}  \\
l_{i}
\end{array}
\right)
 \;\;  \prod_{i=1}^{n} \mu_{i}^{k_{i}-l_{i}}\right) \;\; m_{l_{1},...,l_{n}}
\end{equation}
There are $\lceil \prod_{i=1}^{n} (k_{i}+1) \rceil$ such moments. 
However, since the $m_{l_{1},...,l_{n}}$ are central moments, they are $0$ for odd moments,
and we need to consider only even moments, although $k$ itself does not have to be even.
So, this can be written
\begin{equation}
\label{basicmean}
E[X_{1}^{k_{1}}X_{2}^{k_{2}} \cdots X_{n}^{k_{n}}\mid \mu,\Sigma] =
\sum_{0 \leq l \leq k, \; l \; {\rm even}} \;\;
\;\; \left(\prod_{i=1}^{n}
 \left( \begin{array}{c}
k_{i}  \\
l_{i}
\end{array}
\right)
 \;\;  \prod_{i=1}^{n} \mu_{i}^{k_{i}-l_{i}}\right) \;\; m_{l_{1},...,l_{n}}
\end{equation}

This formula involves the symbols 
$\prod_{i=1}^{n} \mu_{i}^{l_{i}}, {\rm for} \; 0 \leq l \leq k$.
There are $\prod_{i=1}^{n} (k_{i}+1)$ such "$\mu$-products".
As a general formula, all these terms are required, but in numerical calculations if any $\mu_{i} = 0$,
all $\mu$-products containing that $\mu_{i}$ will be $0$.

Each $m_{l_{1},...,l_{n}}$ term in the sum has a unique $\sigma$-product
(such as $\sigma_{1,4}\sigma_{2,4}^{2}\sigma_{3,3}$),
so that the $\mu$-products, which are also unique, cannot be combined across $\sigma$-expressions.
That is, the sum above is the best we can do.

Here is an example of a non-central moment:
\begin{multline}
E[X_{1}^{1}X_{2}^{2}X_{3}^{3}\mid \mu,\Sigma]=\\
  \; \mu_{1}\mu_{2}^{2}\mu_{3}^{3}
\\
+ 2 \; \mu_{2}\mu_{3}^{3}
( \sigma_{1,2} )
 \\
+   \; \mu_{1}\mu_{3}^{3}
( \sigma_{2,2} )
 \\
+ 3 \; \mu_{2}^{2}\mu_{3}^{2}
( \sigma_{1,3} )
 \\
+ 6 \; \mu_{1}\mu_{2}\mu_{3}^{2}
( \sigma_{2,3} )
 \\
+ 3 \; \mu_{3}^{2}
( 2\sigma_{1,2}\sigma_{2,3}+
\sigma_{1,3}\sigma_{2,2} )
 \\
+ 3 \; \mu_{1}\mu_{2}^{2}\mu_{3}
( \sigma_{3,3} )
 \\
+ 6 \; \mu_{2}\mu_{3}
( \sigma_{1,2}\sigma_{3,3}+
2\sigma_{1,3}\sigma_{2,3} )
 \\
+ 3 \; \mu_{1}\mu_{3}
( \sigma_{2,2}\sigma_{3,3}+
2\sigma_{2,3}^{2} )
 \\
+   \; \mu_{2}^{2}
( 3\sigma_{1,3}\sigma_{3,3} )
 \\
+ 2 \; \mu_{1}\mu_{2}
( 3\sigma_{2,3}\sigma_{3,3} )
 \\
+   \;
( 6\sigma_{1,2}\sigma_{2,3}\sigma_{3,3}+
3\sigma_{1,3}\sigma_{2,2}\sigma_{3,3}+
6\sigma_{1,3}\sigma_{2,3}^{2} )
 \\
\end{multline}



\section{Expected Values of Multivariate Polynomials}
\label{multipol}


By a multivariate polynomial, we mean a linear combination of products of powers of the components of $X$.
That is, if $K$ is a set of exponents $k = (k_{1}, \cdots, k_{r})$, and $\left\{\gamma_{k} \mid k \in K\right\}$ is a corresponding set of constants,
a multivariate polynomial $p$ is defined as
\begin{equation}
p(X_{1}, \cdots X_{r}) = \sum_{k\in K} \; \gamma_{k} \; X_{1}^{k_{1}} \cdots X_{r}^{k_{r}}
\end{equation}
For example,
\begin{equation}
p(X_{1},X_{2},X_{3}) = 5 + 3 X_{1}X_{2}^{2} +  X_{1}^{2}X_{2}^{3} + X_{3}^{4}
\end{equation}

Multivariate polynomials are discussed by \cite{hankin08} and \cite{kahle14}.
Functions for the evaluation and algebra of multivariate polynomials is included in the \code{multipol} and \code{mpoly} packages.

A multivariate polynomial can be integrated against the multivariate normal distribution to give its expected value.
Since these polynomials are linear combinations of products of powers of the components of $X$,
their expectations are obtained easily from the corresponding noncentral moments.
So if $X \sim N(\mu,\Sigma)$, the expectation of the general multivariate polynomial above is
\begin{equation}
E[p(X_{1}, \cdots X_{r})\mid \mu,\Sigma]  = \sum_{k\in K} \; \gamma_{k} \; E\left[ X_{1}^{k_{1}} \cdots X_{r}^{k_{r}}\mid \mu,\Sigma \right] 
\end{equation}
where the expectations are given in equation \ref{basicmean}.
Functions to compute these expectations are described below.


\section{Multivariate moments, matchings, and phylogenetic trees}

Phylogenetic trees are used to ascertain evolutionary relationships among species \citep{felsenstein2004}.
A common model is  the rooted, bifurcating tree.
\cite{diaconis1998} describe such a tree as ``a binary rooted tree with $n$ labeled leaves.''
Felsenstein (Chapter 3) shows that the number of rooted, bifurcating trees for $n$ species is
\begin{equation}
\frac{(2n-3)!}{2^{n-2}(n-2)!}
\end{equation}
He also discusses the numbers of distinct shapes of trees and displays these shapes for $n=2$ to $n=6$ species.
Diaconis and Holmes described how such phylogenetic trees are closely related to matchings.

Surprisingly, at least for computationally feasible values of $n$,
for the first moment 
\begin{equation}
E[X_{1} \cdots X_{2(n-1)}]
\end{equation}
the number of upper-triangular integer matrices
that determine the moment is the same as the number of phylogenetic trees for $n$ species.
In fact, for any number of species there is a one-to-one correspondence between two seemingly disparate objects,
phylogenetic trees and the symbolic first central moments of the multivariate normal distribution.
The correspondence is established through matchings.


Diaconis and Holmes define a perfect matching on $2n$ points as a ``pairing of the points into $n$ groups of two.''
We show that there is a one-to-one correspondence, or bijection,
between the components of a representation of a multivariate normal first moment
with $2n$ components and matchings on $2n$ points.



For any $n$, let $\Lambda (2n)$ be the collection of $2n \times 2n$-dimensional matrices associated with the
symbolic first moment of the $2n$-dimensional multivariate normal distribution.
Let $\Gamma (2n)$ be the set of perfect matchings on a set $\Delta$ of $2n$ points.

First suppose that $L = (l_{ij}) \in\Lambda (2n)$. Then criterion \ref{criterion} becomes
\begin{equation}
\label{criterion1}
\sum_{j=1}^{2n}(l_{ji}+l_{ij}) = 1, \;\;\; i=1,...,2n
\end{equation}
That is, there is exactly one $l_{ij}=1$
in the union of the cell in row $k$ and column $k$ for each $1 \leq k \leq n$.
The sum of these sums over $i$ is twice the total count of cells with $1$s, and is obviously $2n$.
Therefore, the number of cells containing $1$ is $n$.


Let $\gamma = \{(i,j)\mid l_{i,j}=1, i=1,...,2n,j=1,...,2n\}$, that is, the set of cells containing $1$.
By criterion \ref{criterion1}, for any pair $(i,j) \in \Gamma$, $i \neq j$ since the diagonal terms in the sum occur twice,
so that if $i = j$ we would have $k_{j} \geq 2$.
Furthermore, if any two pairs $(i_{1},j_{1})$ and $(i_{2},j_{2})$ were to share a common index, say $i_{1} = j_{2} = h$,
then the union of row $h$ and column $h$ would have at least two cells containing $1$, leading to $k_{h} \geq 2$.
Therefore, the indices in these cells are unique, and since there are $n$ cells containing $1$,
there are $2n$ distinct indices among these cells, namely $1,...,2n$.
It is clear that $\gamma$ is a perfect matching on this set of indices.
So for any $L \in \Lambda (2n)$ there corresponds a perfect matching $\gamma$ on $1,...,2n$.


Now suppose that $\gamma \in \Gamma (2n)$.
For specificity, assume that the points in $\gamma$ are numbered $1,...,2n$.
Order the pairs in $\gamma$ lexicographically by the second components so that for any pair $(i,j)$, $i < j$.
Let $L = (l_{i,j})$ be a $2n \times 2n$-dimensional matrix of $0$s with the rows and columns labeled $1,...,2n$.
Set any cell in $L$ corresponding to a pair in $\gamma$ to $1$.
Since any point occurs in one and only one pair, for any index $i$
there is one and only one cell in row $i$ and column $i$ with a $1$ in it.
Every point is in the matching, so for each $i$, there must be a $1$ in
one cell of row $i$ and column $i$.
So for any $\gamma \in \Gamma (2n)$ there is an $L \in \Lambda (2n)$.

Invoking the algorithm projecting $\Lambda (2n)$ into $\Gamma (2n)$, the $L$ computed above will be mapped back into the matching just defined.
That is, these two procedures are inverses.


Paradis describes various formats for encoding trees
and \proglang{R} functions to transform one to another.
Functions are described below to transform between an \pkg{ape} matching object or Newick format to a moment object.
Using these functions and the functions \code{read.tree}, \code{write.tree}, \code{as.matching}, and \code{as.phylo.matching}
from the \pkg{ape} package,
trees can be transformed among the \code{moment}, \code{matching}, \code{phylo}, and Newick formats.
Table \ref{matchingfunctions} shows the functions used for each transformation.
Empty cells can be reached by obvious paths.
\small
\begin{table}[h]
\begin{center}
\begin{tabular}{l|cccc}
{\textbf{From}} & \multicolumn{4}{c}{\textbf{To}}  \\ \hline
              &  \underline{Newick}  &  \underline{Phylo}                   &  \underline{Matching}          &  \underline{Moment} \\
Newick        &    -----------      &  \code{read.tree}          &        & \code{toMoment}   \\
Phylo         & \code{write.tree}  &     -----------         & \code{as.Matching}   &         \\
Matching      &    & \code{as.phylo.matching}   &     -----------   &   \code{toMoment}      \\
Moment        & \code{toNewick}  &    & \code{toMatching}   &  -----------  \\ \hline
\end{tabular}
\end{center}
\caption{Functions for converting between tree formats}
\label{matchingfunctions}
\end{table}
\normalsize


\section{Discussion}


Formula~\ref{basicformula} was implemented in \proglang{R} with a recursive function
that determines the set of upper-triangular integer matrices that satisfy Criterion~\ref{criterion}.
A second function calculates their associated coefficients.
Additional functions were written to create {\LaTeX} \citep{latexcitation} code to display the moments symbolically,
and to calculate the moments for specified variance-covariance matrices,
and these have been augmented with functions for non-central moments.

The potential for complexity in these computations is seen from the results in Table~\ref{examples}.
In this table, $n$  is the dimension of the multivariate vector and
$\#(\sigma_{ij})$  is the number of distinct elements in the variance-covariance matrix, $N = \frac{n(n+1}{2}$.
{\it Size} is measured by two values, $M$ and $r$.
$M$ is the total of the exponents of the moment, $M = \sum_{i=1}^{n} k_{i}$.
The value $r$ is the number of terms for a moment $E[X_{1}^{1},...,X_{n}^{1}]$ with all exponents equal to $1$,
which is $(2M-1)!/(2^{M-1}(M-1)!)$  \citep{wikipedia}.
{\it Example} is a moment of the given {\it Size}.
{\it Potential Terms} is a maximum for the number of $(\ell_{ij})$ matrices to be checked for this example,
determined as the product of $1+\max(k_{i},k_{j})$ over $i\neq j$
times the product of $1+[k_{i}/2]$ over $i$, where $[\, ]$ denotes truncation.
The last column, {\it \# Terms}, is the actual number of terms in the moment as determined by the functions.
\begin{sidewaystable}


\begin{center}
\vspace{.1in}
\begin{tabular}{|lcccccc|}
\hline
$n$  &   $\#(\sigma_{ij})$   & \multicolumn{2}{c}{Size}    & Example & Potential Terms     &  \# Terms  \\
\hline
  & $N$  &  $M$  &   $r$    & $E[X_{1}^{k_{1}} \cdots X_{n}^{k_{n}}]$ &  &  \\
\hline
2 &  3  & 2   & 1              & $E[X_{1}^{1}X_{2}^{1}]$                    &       2          &       1 \\
2 &  3  & 6   & 15             & $E[X_{1}^{3}X_{2}^{3}]$                    &        16         &       2 \\
2 &  3  & 20  & 654,729,075      & $E[X_{1}^{10}X_{2}^{10}]$                  &        396         &       6 \\
4 &  10 & 8   & 105            & $E[X_{1}^{2}X_{2}^{2}X_{3}^{2}X_{4}^{2}]$  &        11,664         &       17 \\
4 &  10 & 12  & 10,395          & $E[X_{1}^{1}X_{2}^{3}X_{3}^{4}X_{4}^{4}]$  &     225,000            &       27 \\
4 &  10 & 20  &  654,729,075 & $E[X_{1}^{5}X_{2}^{5}X_{3}^{5}X_{4}^{5}]$  &     3,779,136            &       306 \\
6 &  21 & 12  & 10,395 & $E[X_{1}^{2}X_{2}^{2}X_{3}^{2}X_{4}^{2}X_{5}^{2}X_{6}^{2}]$  & 918,330,048    &     388 \\
6 &  21 & 18  & 34,459,425 & $E[X_{1}^{1}X_{3}^{2}X_{3}^{3}X_{4}^{4}X_{5}^{4}X_{6}^{4}]$  &  $1.265625\times 10^{12}$   &  2,082 \\
8 &  36 & 16  & 2,027,025 & $E[X_{1}^{2}X_{2}^{2}X_{3}^{2}X_{4}^{2}X_{5}^{2}X_{6}^{2}X_{7}^{2}X_{8}^{2}]$ &   $5.856459\times 10^{15}$             & 18,155  \\
8 &  36 & 24  & 316,234,143,225 & $E[X_{1}^{3}X_{2}^{3}X_{3}^{3}X_{4}^{3}X_{5}^{3}X_{6}^{3}X_{7}^{3}X_{8}^{3}]$ &   $1.844674\times 10^{19 }$ & $\geq 287,800^{*}$  \\
9 &  45 & 18  & 34,459,425 & $E[X_{1}^{2}X_{2}^{2}X_{3}^{2}X_{4}^{2}X_{5}^{2}X_{6}^{2}X_{7}^{2}X_{8}^{2}X_{9}^{2}]$ &  $7.68484 \times 10^{19 }$     & 6,763,895  \\
\hline
\end{tabular}
\caption{\label{examples}Examples of sizes of moment problem
(*: The function aborted after 2 days due a space allocation problem.)}
\end{center}
\end{sidewaystable}



It is clear that computation of high-order moments will be very intensive.
The equivalence of first moments of dimension $2(n-1)$ and phylogenetic trees
for $n$ species has been shown above.
Note that it would be possible that the $L$-representation of symbolic multivariate normal moments
is sufficient but not necessary to determine the moment.  
However, the $L$ representation of a multivariate normal moment can be determined by inspection
from the $\sigma$ terms, so that the symbolic moment and its $L$ representation are, in fact, equivalent. 


\citet[page 61]{felsenstein2004} notes that finding the best tree using maximum parsimony is NP-hard,
as shown by \cite{foulds82}.
Felsenstein also notes that the Fitch algorithm for finding the number of changes
in a given bifurcating tree is efficient \citep[page 11]{felsenstein2004}.
This implies that finding all trees is itself NP-hard,
and that therefore computing first multivariate moments is NP-hard.
Note that for any even $n$ and any integer $r$, the moment
$E[X_{1}^{r} \; X_{2}^{r} \;  \cdots  \; X_{n}^{r}]$
has $L$ components whose terms are $L$ matrices from
the representation of $E[X_{1}^{1} \; X_{2}^{1} \;  \cdots  \; X_{n}^{1}]$ multiplied by $r$. 
So the representation of this first moment can be obtained from inspection of 
the higher moment, implying that computing these higher moments is also NP-hard.


Finally, Criterion~\ref{criterion} might arise in other contexts, such as networks \citep{networkcitation}.
For example, suppose that there are $n$ airports and airport $i$ can accommodate $k_{i}$ arrivals or departures on a day,
where a plane may take off and land at the same airport.
This network is illustrated in Figure~\ref{airportnetwork}.
The problem is to determine the set of flights between airports that totally expend the capacities, $k_{i}$, of all airports.
For this problem, $\ell_{ij}$ represents the number of flights from airport $i$ to airport $j$,
and $\ell_{ii}$ is the number of flights which start and end at airport $i$.
The set of flights is the set satisfying Criterion~\ref{criterion}.
\setcounter{figure}{0}
\begin{figure}[th]
\label{airportslabel}
\begin{center}
\setlength{\unitlength}{.1in}

\begin{picture}(40,5)

\put(16,2){\circle{5}}
\put(15.4,1.8){$k_{i}$}

\put(26,2){\circle{5}}
\put(25.4,1.8){$k_{j}$}

\put(18,3.5){\vector(1,0){5.8}}
\put(20.6,4.5){$\ell_{ij}$}
\put(23.8,0.5){\vector(-1,0){5.8}}
\put(20.6,-1.2){$\ell_{ji}$}

\put(13.9,3.5){\vector(1,0){.2}}
\put(13.735,2){\oval(5.15,3)[l]}
\put(9.2,2){$\ell_{ii}$}

\put(28.1,3.5){\vector(-1,0){.2}}
\put(28.2,2){\oval(5.15,3)[r]}
\put(31.7,2){$\ell_{jj}$}

\end{picture}

\end{center}
\caption{\label{airportnetwork}Airport Capacity Network}
\end{figure}


\section{R functions}


\subsection{R functions for computing central moments}

The following \proglang{R} functions calculate expressions and values for central and non-central moments.

In these \proglang{R} functions,
upper-triangular matrices $(\ell_{ij})$ for $n$ dimensions are represented as vectors of length $n(n+1)$,
with row 1 followed by row 2, etc.
For example, for $n=2$, $(\ell_{ij})$ is represented as $(\ell_{11},\ell_{12},\ell_{22})$.
Each such matrix represents the exponents for a single product of $\sigma_{ij}$s.
For example, $(1,2,0)$ represents $\sigma_{11}^{1}\sigma_{12}^{2}\sigma_{22}^{0} = \sigma_{11}\sigma_{12}^{2}$.
The representations are accumulated and stored internally, raising the possibility of
space allocation problems as encountered in the second to last example in Table~\ref{examples}.
This problem could be alleviated by saving the representation matrix to a file instead.

The function \code{multmoments} searches the integer matrices for those satisfying Criterion~\ref{criterion}.
This is a recursive function which implements a branch-and-bound algorithm.
The function \code{multmoments} is called by \code{callmultmoments}.
This function initializes variables,
determines the coefficients of the terms from the upper-triangular representations,
and returns a list consisting of the original moment vector, the set of representations,
and the corresponding set of coefficients.
This list is set to class \code{moment}.
The \code{moment} class has three methods: \code{print}, \code{toLatex}, \code{evaluate}, and \code{simulate}.


The \code{print} method prints a moment object, usually the output of \code{callmultmoments},
showing a mathematical representation of the moment,
followed by the rows of the representation with the corresponding coefficient attached.


The \code{toLatex} method uses a moment object, usually the output of \code{callmultmoments},
to determine the {\LaTeX} code for the moment sorted lexicographically.
Note that it inserts double backslashes where {\LaTeX} requires a backslash;
these can be reset to single backslashes by writing the output to a file using the \proglang{R} function \code{writeLines} (\pkg{base} package),
as illustrated below.

The \code{evaluate} method determines the value of a \code{moment} object for
a specified variance-covariance matrix $\Sigma$, which must be represented as an upper-triangular matrix
in vector form.

The \code{simulate} method uses Monte Carlo integration \cite{rizzo08} to numerically approximate a \code{moment} object
for a specified mean and variance-covariance matrix $\Sigma$ (represented as a square matrix
in vector form), with a specified number of random samples.
Note that \code{simulate} uses only the moment definition, not the representation,
so can be used with any moment in vector notation by converting the vector to a moment object as shown below.
The \code{simulate} method uses the \code{rmvnorm} function from the \pkg{mvtnorm} package (\cite{mvtnorm08}.)

\subsection{R functions for computing non-central moments}

The \code{toLatex_noncentral} function produces a Latex expression for a noncentral moment.
This function uses central moments in the environment specified in the function call.  
The default environment is \code{symmoments}.

The \code{evaluate_noncentral} function evaluates a noncentral moment with specified mean and covariance matrix.
This function uses central moments in the environment specified in the function call.  
The default environment is \code{symmoments}.  


The \code{evaluate_expected.polynomial} function evaluates a multivariate polynomial,
assuming a specified non-central multivariate distribution.
This is done by calling \code{evaluate_noncentral} for each term in the polynomial.
The polynomial is expressed as a \code{multipol} object, an \code{mpoly} object, or defined by a list giving
the exponents in each term and its coefficient.
The central moment objects required for this computation must be computed before invoking this function.
The central moments must reside in the environment specified in the function call.  
The default environment is \code{symmoments}.  

The \code{convert.multipol} function converts between \code{multipol} objects and lists representing the same multivariate polynomial.

The \code{convert.mpoly} function converts between \code{mpoly} objects and lists representing the same multivariate polynomial.
Note that \code{convert.multipol} and \code{convert.mpoly} can be composed to transform between these two types of objects.
However, the names of the variables will not be preserved.

The \code{tounsorted} function produces an unsorted central moment object from a sorted object of class "moment".
Unsorted moments are those with exponents not in numeric order, e.g., $m_{3,1,2}$.

The \code{make.all.moments} function creates all central moment objects of a specified or smaller size. 
That is, if $c(k_{1},...,k_{n})$
is input, all moments $m_{l_{1},...,l_{n}}$ with $(l_{1},...,l_{n})\leq (k_{1},...,k_{n})$ are produced.
The moments are placed in the \code{symmoments} environment.  If this environment does not exist, the user is prompted to create it.
Moments that exist in the \code{symmoments} environment are not recreated.  
Unsorted moments, those with exponents are not in numeric order,
are created using the \code{tounsorted} function to transform from the sorted moment.
If the sorted moment does not exist, it is created.
Moments of lower dimension are not created; for example, if $c(2,4)$ is  input, $m_{20}$ is created, but $m_{2}$ is not.


As a convention, moments are named $mij..l$, e.g., $m136$.  If any exponent is greater than $9$,
lower case letters and then upper case letters are used.  For example,
m3bA is the name of the moment $m_{3,11,36}$.
The function \code{callmultmoments} theoretically accommodates larger exponents, but the largest exponent allowed by this scheme is $61$.
If an object with a name of this form exists but is not an object of class "moment",
it is replaced (overwritten) by the moment object.

The \code{integrate.polynomial} function integrates a multivariate polynomial against a specified non-central multivariate distribution 
using ordinary integration by invoking the \code{adaptIntegrate} function from the \pkg{cubature} package.
This function was written mainly for checking.


\subsection{R functions for correspondence with phylogenetic trees}

Three \proglang{R} functions were written to implement the correspondences among
the moment, matching, and Newick formats.

The \code{toMoment} function converts a tree to moment format.
Its input is a tree in Newick format \citep{newick} or a \code{matching} object from the \pkg{ape} package \citep{paradis2004}.
\code{toMoment} outputs an object of class \code{L-matrix}.
This class consists of $5$ components as defined in the function description,
and includes the $L$ matrix.
Note that the \pkg{symmoments} package
defines the \code{moment} class, which encompasses all L-matrices for
symbolic multivariate normal moments.

The \code{toNewick} function converts a tree in moment format to Newick format.
The input can be an \code{L-matrix} object, a square $L$ matrix,
or an $L$ matrix in reduced upper-triangular (vector) form.
The \code{toNewick} function sets its list output to class \code{L-Newick},
which has $5$ components, including the tree in Newick format.

The \code{toMatching} function converts a tree in moment format to an \pkg{ape} \code{matching} object.
The input can be an \code{L-matrix} object, a square $L$ matrix,
or an $L$ matrix in reduced  upper-triangular (vector) form.
\code{toMatching} sets its list output to class \code{matching}
to conform to the definition in the \pkg{ape} package.


\subsection{Examples of computing central moments}
%\scriptsize

<<lib,echo=FALSE>>=
library(symmoments)
@

These examples of the functions for central moments use the following moment:

\begin{multline}
\label{m1234example}
<<m1234, echo=FALSE, results=tex>>=
writeLines(toLatex(callmultmoments(c(1,2,3,4))))
@
\end{multline}
\normalsize
The use of the \code{toLatex} and \code{evaluate} methods and \code{writeLines}
is also illustrated.
The file created by \code{writeLines} can be included in a {\LaTeX} document using the \verb+\input+ command,
or can be included in Sweave as done here.

\clearpage
\subsection*{Compute the representation of a central moment (\code{callmultmoments})}
The following code calculates a central moment and shows the three components, \code{moment}, \code{representation}, and \code{coefficients}.
\scriptsize
<<callmultmoments>>=
m1234 <- callmultmoments(c(1,2,3,4))
unclass(m1234)
@
\normalsize
\clearpage
\subsection*{Print a representation of a central moment (\code{print} method)}

The following shows the result of using the \code{print} method with the moment in Equation~\ref{m1234example}.

<<print-method>>=
m1234
@

\clearpage
\subsection*{Compute the {\LaTeX} representation of a central moment (\code{toLatex} method)}

The following shows the computation of the representation of the central moment in Equation~\ref{m1234example}.
\tiny
<<toLatex-method>>=
toLatex(m1234)
@
\normalsize
The \LaTeX representation can be written to a file using the \code{writeLines} function as follows:
<<toLatex-method2, eval=FALSE>>=
writeLines(toLatex(m1234), "yourfilename")
@

\subsection*{Compute a value of a central moment (\code{evaluate} method)}

 
The code below evaluates the moment at the following variance-covariance matrix:
<<covariance,echo=FALSE>>=
matrix(c(4,2,1,1,2,3,1,1,1,1,2,1,1,1,1,2),nrow=4)
@

<<evaluate-method>>=
evaluate(m1234, c(4, 2, 1, 1, 3, 1, 1, 2, 1, 2))
@

\clearpage
\subsection*{Estimate a central moment using simulation (\code{simulate} method)}

The value of $E[ X_{ 1 }^{ 1 } X_{ 2 }^{ 2 } X_{ 3 }^{ 3 } X_{ 4 }^{ 4 } ]$ when
$X$ has a normal distribution with mean $\mu = (1,2,0,3)$ and the same variance-covariance matrix
as above could be estimated using \code{simulate} with $1000$ random samples
More samples may be required to obtain an accurate estimate.

<<simulate-method, eval=TRUE>>=
#simulate(m1234,1000,NULL, c(1,2,0,3),c(4,2,1,1,2,3,1,1,1,1,2,1,1,1,1,2))
@

\subsection{Examples of computing non-central moments}

 Note that several of these examples would require moments of various sizes exist, so have not been run.
 These moments can be created using the \code{make.all.moments} function,
 or directly using the \code{callmultmoments} function.
\subsection*{Calculate symbolic representation of a non-central moment (not run)}
<<noncentral1, echo=TRUE, term=TRUE, results=verbatim>>=
# as.matrix(toLatex_noncentral(c(1,3))) 
@
\subsection*{Create all 2-dimensional moment objects with exponents up to 3 (not run)}

Note that this will overwrite objects in the \code{symmoments} environment with names of the form  mxx, where 0 <= x <= 3
<<noncentral2, echo=TRUE, term=TRUE, results=verbatim>>=
# make.all.moments(c(3,3))
@
\subsection*{Evaluate a non-central moment at a specified mean and covariance matrix (not run)}

Note that this would require moments to exist of order up to c(1,3).
<<noncentral2a, echo=TRUE, term=TRUE, results=verbatim>>=
# evaluate_noncentral(c(1,3),c(1,2),c(1,0,1))
@

\subsection*{Create an \code{mpoly} object (not run)}
<<noncentral3a, echo=FALSE>>=
# the mpoly library is required for this example
@
<<noncentral3, echo=TRUE, term=TRUE, results=verbatim, print=TRUE>>=
# t0 <- mpoly::mpoly(list(c(coef=3,x1=2),c(coef=2,x1=1,x2=3),
#                    c(coef=-4,z=2),c(coef=1,x1=1,x2=2,z=1)))                       
# print(t0)
@
\subsection*{Convert an \code{mpoly} object to a moment object (not run)}
<<<noncentral4, echo=TRUE, term=TRUE, results=verbatim>>=
# t1 <<- convert.mpoly(t0)      
# t1
@
\subsection*{Convert a \code{moment} object to a \code{multipol} object (not run)}
<<noncentral5, echo=TRUE, term=TRUE, results=verbatim>>=
# t2 <<- convert.multipol(t1)      
# t2
@
\subsection*{Convert from \code{multipol} back to \code{mpoly} through \code{moment} (not run)}
<<noncentral6, echo=TRUE, term=TRUE, results=verbatim, print=TRUE>>=
# print(mpoly::mpoly(convert.mpoly(convert.multipol(t2))))     
@ 

\subsection*{Evaluate the expected value of a multivariate polynomial (not run)}

See the Latex example in the text. This requires moments to exist of order up to $c(1,2,3)$.
<<noncentral7, echo=TRUE, term=TRUE, results=verbatim, print=TRUE>>=
# evaluate_expected.polynomial(t0,c(1,2,3),c(1,0,0,1,0,1))
@

\normalsize
\subsection{Examples of correspondence between first multivariate moments and phylogenetic trees}
The following examples convert a tree in Newick format to
trees in phylo, matching, and moment format.

%\scriptsize
\subsection*{Create a Newick representation of a tree}
<<trees1, echo=TRUE, term=TRUE, results=verbatim>>=
exam.Newick      <- "(((a,b),c),d);"
@
\subsection*{Convert to \code{phylo} format}
<<trees2, echo=TRUE, term=TRUE, results=verbatim>>=
library(ape)
exam.phylo       <- read.tree(text=exam.Newick)
exam.phylo
@

\subsection*{Convert to matching format}
<<trees3, echo=TRUE, term=TRUE, results=verbatim>>=
exam.matching    <- as.matching(exam.phylo)
exam.matching 
@

\subsection*{Convert to L-matrix format}
<<trees3, echo=TRUE, term=TRUE, results=verbatim>>=
exam.L.matrix    <- toMoment(exam.matching)
exam.L.matrix
@

\subsection*{Convert back to various formats (not run)}
<<trees3, echo=TRUE, term=TRUE, results=verbatim>>=
# backto.matching  <- toMatching(exam.L.matrix )
# backto.L.matrix  <- toMoment(backto.matching$matching[,c(1,2)])
# backto.Newick  <- toNewick(exam.L.matrix$L,type="square")
# backto.L.matrix.tips <- toMoment(exam.Newick,tip.label=c("d","a","c","b"))
# backto.phylo     <- as.phylo.matching(backto.matching)
@

\normalsize


%%bibliographystyle{jss}
%%\bibliography{c:\research\multivariate_moments\moments}
\bibliography{moments}

\end{document}