Skip to content

Commit 054ab11

Browse files
committed
Added lecture 3
1 parent 4165723 commit 054ab11

File tree

3 files changed

+322
-0
lines changed

3 files changed

+322
-0
lines changed

expectation.pdf

12 KB
Binary file not shown.

lecture3.pdf

296 KB
Binary file not shown.

lecture3.tex

+322
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,322 @@
1+
\documentclass[aspectratio=169]{beamer}
2+
\mode<presentation>
3+
\usetheme{Hannover}
4+
\useoutertheme{sidebar}
5+
\usecolortheme{dolphin}
6+
7+
\usepackage{amsmath}
8+
\usepackage{amssymb}
9+
\usepackage{enumerate}
10+
11+
%some bold math symbosl
12+
\newcommand{\Cov}{\mathrm{Cov}}
13+
\newcommand{\Var}{\mathrm{Var}}
14+
\newcommand{\brho}{\boldsymbol{\rho}}
15+
\newcommand{\bSigma}{\boldsymbol{\Sigma}}
16+
\newcommand{\btheta}{\boldsymbol{\theta}}
17+
\newcommand{\bbeta}{\boldsymbol{\beta}}
18+
\newcommand{\bmu}{\boldsymbol{\mu}}
19+
\newcommand{\bW}{\mathbf{W}}
20+
\newcommand{\one}{\mathbf{1}}
21+
\newcommand{\bH}{\mathbf{H}}
22+
\newcommand{\by}{\mathbf{y}}
23+
\newcommand{\bolde}{\mathbf{e}}
24+
\newcommand{\bx}{\mathbf{x}}
25+
26+
\newcommand{\cpp}[1]{\texttt{#1}}
27+
28+
29+
30+
\title{Mathematical Biostatistics Boot Camp: Lecture 3, Expectations}
31+
32+
\author{Brian Caffo}
33+
\date{\today}
34+
\institute[Department of Biostatistics]{
35+
Department of Biostatistics \\
36+
Johns Hopkins Bloomberg School of Public Health\\
37+
Johns Hopkins University
38+
}
39+
40+
41+
\begin{document}
42+
43+
\frame{\titlepage}
44+
45+
\frame{
46+
\frametitle{Table of contents}
47+
\tableofcontents
48+
}
49+
50+
51+
\section{Outline}
52+
\frame{
53+
\frametitle{Outline}
54+
\begin{enumerate}
55+
\item Define expected values
56+
\item Properties of expected values
57+
\item Unbiasedness of the sample mean
58+
\item Define variances
59+
\item Define the standard deviation
60+
\item Calculate Bernoulli variance
61+
\end{enumerate}
62+
}
63+
64+
\section{Expected values}
65+
\subsection{Discrete random variables}
66+
\begin{frame}
67+
\frametitle{Expected values}
68+
\begin{itemize}
69+
\item The {\bf expected value} or {\bf mean} of a random variable is the center of its
70+
distribution
71+
\item For discrete random variable $X$ with PMF $p(x)$, it is defined as follows
72+
$$
73+
E[X] = \sum_x xp(x).
74+
$$
75+
where the sum is taken over the possible values of $x$
76+
\item $E[X]$ represents the center of mass of a collection of
77+
locations and weights, $\{x, p(x)\}$
78+
\end{itemize}
79+
\end{frame}
80+
81+
82+
\begin{frame}
83+
\frametitle{Example}
84+
\includegraphics[width=3.5in]{expectation.pdf}
85+
\end{frame}
86+
87+
88+
89+
\begin{frame}
90+
\frametitle{Example}
91+
\begin{itemize}
92+
\item Suppose a coin is flipped and $X$ is declared $0$ or $1$ corresponding
93+
to a head or a tail, respectively
94+
\item What is the expected value of $X$?
95+
$$
96+
E[X] = .5 \times 0 + .5 \times 1 = .5
97+
$$
98+
\item Note, if thought about geometrically, this answer is obvious; if two equal
99+
weights are spaced at 0 and 1, the center of mass will be $.5$
100+
\end{itemize}
101+
\end{frame}
102+
103+
\begin{frame}\frametitle{Example}
104+
\begin{itemize}
105+
\item Suppose that a die is tossed and $X$ is the number face up
106+
\item What is the expected value of $X$?
107+
$$
108+
E[X] = 1 \times \frac{1}{6} + 2 \times \frac{1}{6} +
109+
3 \times \frac{1}{6} + 4 \times \frac{1}{6} +
110+
5 \times \frac{1}{6} + 6 \times \frac{1}{6} = 3.5
111+
$$
112+
\item Again, the geometric argument makes this answer obvious without calculation.
113+
\end{itemize}
114+
\end{frame}
115+
116+
\subsection{Continuous random variables}
117+
\begin{frame}\frametitle{Continuous random variables}
118+
\begin{itemize}
119+
\item For a continuous random variable, $X$, with density, $f$, the expected
120+
value is defined as follows
121+
$$
122+
E[X] = \int_{-\infty}^\infty t f(t)dt
123+
$$
124+
\item This definition borrows from the definition of center of mass for
125+
a continuous body
126+
\end{itemize}
127+
\end{frame}
128+
129+
\begin{frame}\frametitle{Example}
130+
\begin{itemize}
131+
\item Consider a density where $f(x) = 1$ for $x$
132+
between zero and one
133+
\item (Is this a valid density?)
134+
\item Suppose that $X$ follows this density; what is its expected value?
135+
$$
136+
E[X] = \int_{0}^{1} x dx = \left. \frac{x^2}{2} ~\right|_{0}^{1} = 1/2
137+
$$
138+
\end{itemize}
139+
\end{frame}
140+
141+
\section{Rules about expected values}
142+
\begin{frame}\frametitle{Rules about expected values}
143+
\begin{itemize}
144+
\item The expected value is a linear operator
145+
\item If $a$ and $b$ are not random and $X$ and $Y$
146+
are two random variables then
147+
\begin{itemize}
148+
\item $E[aX + b] = a E[X] + b$
149+
\item $E[X + Y] = E[X] + E[Y]$
150+
\end{itemize}
151+
\item {\em In general} if $g$ is a function that is not linear,
152+
$$
153+
E[g(X)] \neq g(E[X])
154+
$$
155+
\item For example, in general, $E[X^2] \neq E[X]^2$
156+
\end{itemize}
157+
\end{frame}
158+
159+
\begin{frame}\frametitle{Example}
160+
\begin{itemize}
161+
\item You flip a coin, $X$ and simulate a uniform random number $Y$, what
162+
is the expected value of their sum?
163+
$$
164+
E[X + Y] = E[X] + E[Y] = .5 + .5 = 1
165+
$$
166+
\item Another example, you roll a coin twice. What is the expected value of
167+
the average?
168+
\item Let $X_1$ and $X_2$ be the results of the two rolls
169+
$$
170+
E[(X_1 + X_2) / 2] = \frac{1}{2}(E[X_1] + E[X_2])
171+
= \frac{1}{2}(3.5 + 3.5) = 3.5
172+
$$
173+
\end{itemize}
174+
\end{frame}
175+
176+
177+
\begin{frame}\frametitle{Example}
178+
\begin{enumerate}
179+
\item Let $X_i$ for $i=1,\ldots,n$ be a collection of random
180+
variables, each from a distribution with mean $\mu$
181+
\item Calculate the expected value of the sample average of the $X_i$
182+
\end{enumerate}
183+
\begin{eqnarray*}
184+
E\left[ \frac{1}{n}\sum_{i=1}^n X_i\right]
185+
& = & \frac{1}{n} E\left[\sum_{i=1}^n X_i\right] \\
186+
& = & \frac{1}{n} \sum_{i=1}^n E\left[X_i\right] \\
187+
& = & \frac{1}{n} \sum_{i=1}^n \mu = \mu.
188+
\end{eqnarray*}
189+
\end{frame}
190+
191+
\begin{frame}
192+
\frametitle{Remark}
193+
\begin{itemize}
194+
\item Therefore, the expected value of the {\bf sample mean} is the {\bf
195+
population mean} that it's trying to estimate
196+
\item When the expected value of an estimator is what its trying to estimate,
197+
we say that the estimator is {\bf unbiased}
198+
\end{itemize}
199+
\end{frame}
200+
201+
202+
\section{Variances}
203+
\begin{frame}\frametitle{The variance}
204+
\begin{itemize}
205+
\item The variance of a random variable is a measure of {\em spread}
206+
\item If $X$ is a random variable with mean $\mu$, the variance of
207+
$X$ is defined as
208+
$$
209+
\Var(X) = E[(X - \mu)^2]
210+
$$
211+
the expected (squared) distance from the mean
212+
\item Densities with a higher variance are more spread out than
213+
densities with a lower variance
214+
\end{itemize}
215+
\end{frame}
216+
217+
\begin{frame}
218+
\begin{itemize}
219+
\item Convenient computational form
220+
$$
221+
\Var(X) = E[X^2] - E[X]^2
222+
$$
223+
\item If $a$ is constant then $\Var(aX) = a^2 \Var(X)$
224+
\item The square root of the variance is called the {\bf standard deviation}
225+
\item The standard deviation has the same units as $X$
226+
\end{itemize}
227+
\end{frame}
228+
229+
230+
\begin{frame}\frametitle{Example}
231+
\begin{itemize}
232+
\item What's the sample variance from the result of a toss of a die?
233+
\begin{itemize}
234+
\item $E[X] = 3.5$
235+
\item $E[X^2] = 1 ^ 2 \times \frac{1}{6} + 2 ^ 2 \times \frac{1}{6} +
236+
3 ^ 2 \times \frac{1}{6} + 4 ^ 2 \times \frac{1}{6} +
237+
5 ^ 2 \times \frac{1}{6} + 6 ^ 2 \times \frac{1}{6} = 15.17$
238+
\end{itemize}
239+
\item $\Var(X) = E[X^2] - E[X]^2 \approx 2.92$
240+
\end{itemize}
241+
\end{frame}
242+
243+
\begin{frame}\frametitle{Example}
244+
\begin{itemize}
245+
\item What's the sample variance from the result of the toss of a coin
246+
with probability of heads (1) of $p$?
247+
\begin{itemize}
248+
\item $E[X] = 0 \times (1 - p) + 1 \times p = p$
249+
\item $E[X^2] = E[X] = p$
250+
\end{itemize}
251+
\item $\Var(X) = E[X^2] - E[X]^2 = p - p^2 = p(1 - p)$
252+
\end{itemize}
253+
\end{frame}
254+
255+
256+
\begin{frame}\frametitle{Example}
257+
\begin{itemize}
258+
\item Suppose that a random variable is such that $0 \leq X \leq 1$ and $E[X] = p$
259+
\item Note $X^2 \leq X$ so that $E[X^2] \leq E[X] = p$
260+
\item $\Var(X) = E[X^2] - E[X]^2 \leq E[X] - E[X]^2 = p(1-p)$
261+
\item Therefore the Bernoulli variance is the largest possible for random variables bounded between $0$ and $1$
262+
\end{itemize}
263+
\end{frame}
264+
265+
\section{Chebyshev's inequality}
266+
\begin{frame}
267+
\frametitle{Interpreting variances}
268+
\begin{itemize}
269+
\item Chebyshev's inequality is useful for interpreting variances
270+
\item This inequality states that
271+
$$
272+
P(|X - \mu| \geq k\sigma) \leq \frac{1}{k^2}
273+
$$
274+
\item For example, the probability that a random variable lies beyond $k$
275+
standard deviations from its mean is less than $1/k^2$
276+
\begin{eqnarray*}
277+
2\sigma & \rightarrow & 25\% \\
278+
3\sigma & \rightarrow & 11\% \\
279+
4\sigma & \rightarrow & 6\%
280+
\end{eqnarray*}
281+
\item Note this is only a bound; the actual probability might be
282+
quite a bit smaller
283+
\end{itemize}
284+
\end{frame}
285+
286+
\begin{frame} \frametitle{Proof of Chebyshev's inequality}
287+
\begin{eqnarray*}
288+
P(|X - \mu| > k\sigma) & = & \int_{\{x: |x-\mu| > k\sigma\}} f(x) dx \\
289+
& \leq & \int_{\{x:|x -\mu| > k\sigma\}}\frac{(x - \mu)^2}{k^2\sigma^2} f(x) dx \\
290+
& \leq & \int_{-\infty}^{\infty} \frac{(x - \mu)^2}{k^2\sigma^2} f(x) dx \\
291+
& = & \frac{1}{k^2}
292+
\end{eqnarray*}
293+
\end{frame}
294+
295+
\begin{frame} \frametitle{Example}
296+
\begin{itemize}
297+
\item IQs are often said to be distributed with a mean of $100$ and a sd of $15$
298+
\item What is the probability of a randomly drawn person having an IQ higher than
299+
$160$ or below $40$?
300+
\item Thus we want to know the probability of a person being more
301+
than $4$ standard deviations from the mean
302+
\item Thus Chebyshev's inequality suggests that this will be no larger than 6\%
303+
\item IQs distributions are often cited as being bell shaped, in which case this
304+
bound is very conservative
305+
\item The probability of a random draw from a bell curve being $4$
306+
standard deviations from the mean is on the order of $10^{-5}$ (one
307+
thousandth of one percent)
308+
\end{itemize}
309+
\end{frame}
310+
311+
\begin{frame} \frametitle{Example}
312+
\begin{itemize}
313+
\item A popular buzz phrase in industrial quality control is
314+
Motorola's``Six Sigma'' whereby businesses are suggested to
315+
control extreme events or rare defective parts
316+
\item Chebyshev's inequality states that the probability of a ``Six
317+
Sigma'' event is less than $1/6^2 \approx 3\%$
318+
\item If a bell curve is assumed, the probability of a ``six sigma''
319+
event is on the oder of $10^{-9}$ (one ten millionth of a percent)
320+
\end{itemize}
321+
\end{frame}
322+
\end{document}

0 commit comments

Comments
 (0)