%This is an AmS-TeX file
%producing 9 pages of output
\input amstex
\documentstyle{amsppt}
\NoRunningHeads
\TagsOnRight
\NoBlackBoxes
\define\lk{[\! [}
\define\[{[\! [}
\define\rk{]\! ]}
\define\]{]\! ]}
\topmatter
\title
combinatorial problems related to geometrically distributed random variables
\endtitle
\author Helmut Prodinger \endauthor
\affil
Department of Algebra and Discrete Mathematics \\
Technical University of Vienna, Austria
\endaffil
\address
\flushpar TU Vienna
\newline Wiedner Hauptstrasse 8--10
\newline A-1040 Vienna
\newline Austria
\endaddress
\email proding\@rsmb.tuwien.ac.at
\endemail
\endtopmatter
\redefine\P{\Bbb P}
\document
\head 1. Introduction \endhead
Motivated from Computer Science problems we consider the following situation
(compare \cite2 and \cite3). In these papers the reader will find a
more complete
description as well as additional references.
Let $X$ denote a geometrically distributed random variable, i.e.
$\P\{X=k\}=pq^{k-1}$ for $k\in\Bbb N$ and $q=1-p$. Assume that we have
$n$
independent random variables $X_1,\dots,X_n$ according to this distribution.
The first parameter of interest is the {\sl number of
left-to-right maxima\/},
where we say that $X_i$ is a left-to-right maximum (in the strict sense)
if it is strictly larger than the elements to left. A left-to-right maximum
in the loose sense is defined analogously but ``larger'' is replaced by
``larger or equal''.
The second parameter of interest is the {\sl (horizontal) path length\/},
i.e. the sum of the left-to-right maxima in the loose sense of all the
sequences $X_i,\dots,X_n$, where $i$ is running from 1 to $n$.
{\bf Example.} Consider the sequence $4,5,2,3,5$. It has 2
left-to-right maxima in the strict sense (4--5) and
3 left-to-right maxima in the loose sense (4--5--5).
For the path length we must consider the subsequences
$$\align
&4,5,2,3,5\\
&5,2,3,5\\
&2,3,5\\
&3,5\\
&5
\endalign
$$
with respectively $3, 2, 3, 2, 1$ left-to-right maxima. Therefore the
path length is $3+2+3+2+1=11$.
\head{2. Left-to-right maxima in the strict sense}\endhead
We can find a probability generating function by considering an
appropriate ``language''.
The ``letters'' will be denoted by $\bold 1,\bold 2,\dots\ $.
We decompose all sequences $X_1X_2\dots$ in a canonical way as follows:
We combine each left-to-right maximum
$\bold k$ with the following (smaller or equal) elements.
Such a part is decribed by
$$
\Cal A_k:=\bold k \{\bold 1, \dots , \bold{k}\}^*.
$$
Such a group may be present or not. This observation gives the desired
``language'', where $\varepsilon$ denotes the ``empty word'':
$$
\Cal L:=\big(\Cal A_1+\varepsilon\big)\cdot\big(\Cal A_2+\varepsilon\big)\cdot
\big(\Cal A_3+\varepsilon\big)\dots
$$
Now we want to mark each letter by a ``$z$'' and each left-to-right maximum
by a ``$y$''. The probability $pq^{k-1}$ for a letter $\bold k$ should
of course not being forgotten.
$\{\bold 1, \dots , \bold{k}\}$ maps into $z(1-q^k)$ and its star
$\{\bold 1, \dots , \bold{k}\}^*$ into $1\big/\big(1-z(1-q^k)\big)$.
So we obtain the generating function $F(z,y)$ as an infinite product:
$$
F(z,y)=\prod_{k\ge1}\left(1+\frac{yzpq^{k-1}}{1-z(1-q^k)}\right)
$$
To be explicit, the coefficient of $z^ny^k$ in $F(z,y)$
is the probability that
$n$ random variables have $k$ left-to-right maxima.
Observe that, as it is to be expected,
$F(z,1)=\dfrac{1}{1-z}$, as it is then a telescoping product.
Let $f(z)=\frac{\partial F(z,y)}{\partial y}\big|_{y=1}$. It is the
generating function for the expected values $E_n$, i.e. the $E_n=
[z^n]f(z)$. Performing this differentiation we are led to
$$
f(z)=\frac{pz}{1-z}\sum_{k\ge 0}\frac{q^k}{1-z(1-q^k)},
$$
which is also, by partial fraction decomposition,
$$
f(z)=p\sum_{k\ge 0}\left[\frac{1}{1-z}-\frac1{1-z(1-q^k)}\right].
$$
From this the coefficients $E_n$ are easy to see, because there are only
geometric series:
$$
E_n=
[z^n]f(z)=p\sum_{k\ge 0}\left[1-\left(1-q^k\right)^n\right]
=p\sum_{k=1}^n\binom nk(-1)^{k-1}\frac{1}{1-q^k}
$$
The asymptotic evaluation of such an alternating sum is conveniently
performed by {\sl Rice's method\/}, which we cite as a lemma.
\proclaim{Lemma}
Let $\Cal C$ be a curve surrounding the points $1,2,\dots,n$
in the complex plane and let $f(z)$ be analytic inside $\Cal C$. Then
$$
\sum_{k=1}^n \binom nk \, {(-1)}^k f(k)=
-\frac 1{2\pi i} \int_{\Cal C} [n;z] f(z) dz,
$$
where
$$
[n;z]=\frac{(-1)^{n-1} n!}{z(z-1)\dots(z-n)}=
\frac{\Gamma(n+1)\Gamma(-z)}{\Gamma(n+1-z)}=B(n+1,-z).\qed
$$
\endproclaim
Extending the contour of integration it turns out that
under suitable growth conditions on $f(z)$
the asymptotic expansion
of the alternating sum is given by
$$
\sum \text {Res} \big( [n;z] f(z)\big)+\text{smaller order terms}
$$
where the sum is taken over all poles $z_0$
different from $1,\dots,n$.
The range $1,\dots,n$ for the summation is not sacred;
if we sum, for example, over $k=2,\dots,n$, the contour must
encircle $2,\dots,n$, etc.
\bigskip
\proclaim{Theorem 1} The average number $E_n$ of left-to-right maxima
(strict model)
in the context of $n$ independently distributed geometric random variables
has the asymptotic expansion
$$
E_n=
p\left[\log_Qn+\frac{\gamma}{L}+\frac 12-\delta\left(\log_Qn\right)\right]
+O\big(\frac 1n\big)
$$
where $Q=q^{-1}$,
$L=\log Q$, $\gamma$ is Euler's constant and $\delta$ is a periodic function
of period 1, mean zero and small amplitude. Its Fourier series is
given by
$$
\delta(x)=\frac{1}{L}\sum_{k\ne 0}\Gamma\left(-\chi_k\right)
e^{2k\pi ix}.
$$
\endproclaim
The variance can also be computed, by considering the second derivative
of $F(z,y)$ with respect to $y$.
\proclaim{Theorem 2}The variance $V_n$ of the number of
left-to-right maxima
(strict model)
in the context of $n$ independently distributed geometric random variables
has the asymptotic expansion for $n\to\infty$
$$
V_n=pq\log_Qn+p^2\left(-\frac5{12}+\frac{\pi^2}{6L^2}-\frac\gamma L
-\left[\delta^2\right]_0\right)
+p\left(\frac\gamma L+\frac12\right)+ \delta_1(\log_Qn)+O\big(\frac 1n
\big).
$$
Here, $\left[\delta^2\right]_0$ is the mean of the square of
$\delta^2(x)$, a very
small quantity that can be neglected for numerical purposes. Furthermore,
$\delta_1(x)$ is a periodic function with mean 0; its Fourier
coefficients
could be described if needed.
\endproclaim
\head{3. Left-to-right maxima in the loose sense}\endhead
Again, we are defining an appropriate ``language'' $\Cal L$ from which
a bivariate generating function $F(z,y)$ can be derived.
Set
$\quad
\Cal A_k:=\bold k \{\bold 1,\dots,\bold{k-1}\}^*,\quad
$
then
$\quad
\Cal L:=\Cal A_1^*\cdot\Cal A_2^*\cdot\Cal A_3^*\dots\quad
$
and
$$
F(z,y)=\prod_{k\ge 1}\frac{1}{1-\dfrac{yzpq^{k-1}}{1-z(1-q^{k-1})} }=
\prod_{k\ge0}\frac{1-z(1-q^k)}{1-z+zq^k(1-py)}.
$$
Therefore
$$
f(z)=\frac{\partial F(z,y)}{\partial y}\bigg|_{y=1}
=\frac{pz}{1-z}\sum_{k\ge 0}\frac{q^k}{1-z(1-q^{k+1})}
=\frac pq\sum_{k\ge 1}\left[\frac{1}{1-z}-\frac{1}{1-z(1-q^k)}\right]
$$
and
$$
E_n=[z^n]f(z)=\frac pq\sum_{k\ge 1}
\left[1-\left(1-q^k\right)^n\right].
$$
\proclaim{Theorem 3} The average number $E_n$ of left-to-right maxima
(loose model)
in the context of $n$ independently distributed geometric random variables
has the asymptotic expansion
$$
E_n=
\frac pq
\left[\log_Qn+\frac{\gamma}{L}-\frac 12-\delta\left(\log_Qn\right)\right]
+O\big(\frac 1n\big).
$$
\endproclaim
\proclaim{Theorem 4}The variance $V_n$ of the number of
left-to-right maxima
(loose model)
in the context of $n$ independently distributed geometric random variables
has the asymptotic expansion for $n\to\infty$
$$
V_n=\frac{p}{q^2}\log_Qn+
\frac{p^2}{q^2}\left(-\frac5{12}+\frac{\pi^2}{6L^2}+\frac\gamma L-\frac 2L
-\left[\delta^2\right]_0\right)
+\frac pq\left(\frac\gamma L-\frac12\right)+ \delta_2(\log_Qn)+O\big(\frac 1n
\big).
$$
Here, $\left[\delta^2\right]_0$ is the mean of the square of $\delta^2(x)$, a very
small quantity that can be neglected for numerical purposes. Furthermore,
$\delta_2(x)$ is a periodic function with mean 0; its Fourier
coefficients
could be described if needed.
\endproclaim
\head 4. Path length \endhead
If we denote the path length of a ``word'' $\omega$ by $a(\omega)$, then
we have the following recursion formula
$$
a(\omega)=a(\rho m \sigma)=a(\rho)+a( \sigma)+1 +|\rho|
$$
with $\rho\in\{\bold 1,\dots,\bold m\}^*$ and $\sigma\in\{\bold 1,\dots,
\bold m-1\}^*$.
From this we get a functional equation for the generating functions.
(The upper index `$=m$' e.g. refers to all sequences where the maximal
element is $m$.)
$$
P^{=m}(z,y)=pq^{m-1}zyP^{\le m}(zy,y)P^{0,
\endaligned
$$
where $\displaystyle{
h(x)=\sum_{k\ge1}\frac{e^{kx}}{\big(e^{kx}-1\big)^2}
}$,\
$\displaystyle{
\alpha_1=L\sum_{k\ge1}\frac{1}{k(L^2+4k^2\pi^2)\sinh (2k\pi^2/L)}}$
and
$\delta_4(x)$ is again continuous, periodic of period 1 and mean zero.
\endproclaim
Both, $h(\cdot)$ and $\alpha_1$ are very small quantities, so that a
less accurate but more readible formula is
$$
V_n\sim(Q-1)^2n^2\bigg\{
\frac{Q+1}{2(Q-1)L}
+\frac{1}{L^2}-\frac{\pi^2}{6L^2}
\bigg\}.
$$
\head 5. A combinatorial interpretation of Euler's partition identities
\endhead
The identities in question are (compare \cite{1})
$$
\prod_{n\ge0}\big(1+tq^n\big)=\sum_{n\ge0}\frac{t^nq^{\binom n2}}{Q_n(q)}
$$
and
$$
\prod_{n\ge0}\frac 1{1-tq^n}=\sum_{n\ge0}\frac{t^n}{Q_n(q)},
$$
where
$$
Q_n(q)=\big(1-q^{1}\big)\big(1-q^{2}\big)\dots\big(1-q^{n}\big).
$$
Now consider
$$
\Bbb P\big\{X_1<\dots