FRE6871_Lecture_4.Rnw

% FRE6871_Lecture4
% Define knitr options
% !Rnw weave=knitr
% Set global chunk options
<<knitr_setup,include=FALSE,cache=FALSE>>=
library(knitr)
opts_chunk$set(prompt=TRUE, eval=FALSE, tidy=FALSE, strip.white=FALSE, comment=NA, highlight=FALSE, message=FALSE, warning=FALSE, size="tiny", fig.width=4, fig.height=4)
options(width=80, dev="pdf")
options(digits=3)
thm <- knit_theme$get("acid")
knit_theme$set(thm)
@


% Define document options
\documentclass[9pt]{beamer}
\DeclareMathSizes{8pt}{6pt}{6pt}{5pt}
\mode<presentation>
\usetheme{AnnArbor}
% \usecolortheme{whale}
% Uncover everything in a step-wise fashion
% \beamerdefaultoverlayspecification{<+->}
% mathtools package for math symbols
\usepackage{tikz}
\usetikzlibrary{positioning}
\usepackage{array}
\usepackage{multirow}
\usepackage{mathtools}
\usepackage[latin1]{inputenc}
\usepackage{bbold}
% bbm package for unitary vector or matrix symbol
\usepackage{bbm}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{amsfonts}
\usepackage{hyperref}
\usepackage{fancybox}
\usepackage{url}
\usepackage[backend=bibtex,style=alphabetic]{biblatex} % bibstyle=numeric
\usepackage{listings}
\usepackage{xcolor}
\definecolor{anti_flashwhite}{rgb}{0.95, 0.95, 0.96}
% \bibliographystyle{amsalpha} % doesn't work
% \addbibresource{FRE_lectures.bib}
% \addbibresource[location=remote]{http://www.citeulike.org/user/jerzyp}
\renewcommand\bibfont{\footnotesize}
\renewcommand{\pgfuseimage}[1]{\scalebox{0.75}{\includegraphics{#1}}} % scale bib icons
\setbeamertemplate{bibliography item}[text] % set bib icons
% \setbeamertemplate{bibliography item}{} % remove bib icons

% \usepackage{enumerate}
% \let\emph\textbf
% \let\alert\textbf
% Define colors for hyperlinks
\definecolor{links}{HTML}{2A1B81}
\hypersetup{colorlinks=true,linkcolor=,urlcolor=links}
% Make url text scriptsize
\renewcommand\UrlFont{\scriptsize}
% Make institute text italic and small
\setbeamerfont{institute}{size=\small,shape=\itshape}
\setbeamerfont{date}{size=\small}
\setbeamerfont{block title}{size=\normalsize} % shape=\itshape
\setbeamerfont{block body}{size=\footnotesize}


% Title page setup
\title[FRE6871 Lecture\#4]{FRE6871 \texttt{R} in Finance}
\subtitle{Lecture\#4, Fall 2024}

\institute[NYU Tandon]{NYU Tandon School of Engineering}
\titlegraphic{\includegraphics[scale=0.2]{image/tandon_long_color.png}}
\author[Jerzy Pawlowski]{Jerzy Pawlowski \emph{\href{mailto:jp3900@nyu.edu}{jp3900@nyu.edu}}}
% \email{jp3900@poly.edu}
\date{September 30, 2024}
% \date{\today}
% \pgfdeclareimage[height=0.5cm]{university-logo}{engineering_long_white}
% \logo{\pgfuseimage{engineering_long_white}}


%%%%%%%%%%%%%%%
\begin{document}


%%%%%%%%%%%%%%%
\maketitle


%%%%%%%%%%%%%%%
\section{Linear Algebra}


%%%%%%%%%%%%%%%
\subsection{Vector and Matrix Calculus}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
    \begin{columns}[T]
    \column{0.5\textwidth}
      Let $\mathbf{v}$ and $\mathbf{w}$ be vectors, with $\mathbf{v} = \left\{ v_i \right\}_{i=1}^{i=n}$, and let $\mathbbm{1}$ be the unit vector, with $\mathbbm{1} = \left\{ 1 \right\}_{i=1}^{i=n}$.
      \vskip1ex
      Then the inner product of $\mathbf{v}$ and $\mathbf{w}$ can be written as $\mathbf{v}^T \mathbf{w} = \mathbf{w}^T \mathbf{v} = {\sum_{i=1}^n {v_i w_i}}$.
      \vskip1ex
      We can then express the sum of the elements of $\mathbf{v}$ as the inner product: $\mathbf{v}^T \mathbbm{1} = \mathbbm{1}^T \mathbf{v} = {\sum_{i=1}^n v_i}$.
      \vskip1ex
      And the sum of squares of $\mathbf{v}$ as the inner product: $\mathbf{v}^T \mathbf{v} = {\sum_{i=1}^n v^2_i}$.
      \vskip1ex
      Let $\mathbb{A}$ be a matrix, with $\mathbb{A} = \left\{ A_{ij} \right\}_{{i,j}=1}^{{i,j}=n}$.
      \vskip1ex
      Then the inner product of matrix $\mathbb{A}$ with vectors $\mathbf{v}$ and $\mathbf{w}$ can be written as:
      \begin{displaymath}
        \mathbf{v}^T \mathbb{A} \, \mathbf{w} = \mathbf{w}^T \mathbb{A}^T \mathbf{v} = {\sum_{{i,j}=1}^n {A_{ij} v_i w_j}}
      \end{displaymath}
    \column{0.5\textwidth}
      The derivative of a scalar variable with respect to a vector variable is a vector, for example:
      \begin{align*}
        \frac{d (\mathbf{v}^T \mathbbm{1})}{d \mathbf{v}} = d_v[\mathbf{v}^T \mathbbm{1}] = d_v[\mathbbm{1}^T \mathbf{v}] = \mathbbm{1}^T \\
        d_v[\mathbf{v}^T \mathbf{w}] = d_v[\mathbf{w}^T \mathbf{v}] = \mathbf{w}^T \\
        d_v[\mathbf{v}^T \mathbb{A} \, \mathbf{w}] = \mathbf{w}^T \mathbb{A}^T \\
        d_v[\mathbf{v}^T \mathbb{A} \, \mathbf{v}] = \mathbf{v}^T \mathbb{A} + \mathbf{v}^T \mathbb{A}^T
      \end{align*}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Eigenvectors and Eigenvalues of Matrices}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The vector $w$ is an \emph{eigenvector} of the matrix $\mathbb{A}$, if it satisfies the \emph{eigenvalue} equation:
      \begin{displaymath}
        \mathbb{A} \, w = \lambda \, w
      \end{displaymath}
      Where $\lambda$ is the \emph{eigenvalue} corresponding to the \emph{eigenvector} $w$.
      \vskip1ex
      The number of \emph{eigenvalues} of a matrix is equal to its dimension.
      \vskip1ex
      Real symmetric matrices have real \emph{eigenvalues}, and their \emph{eigenvectors} are orthogonal to each other.
      \vskip1ex
      The \emph{eigenvectors} can be normalized to $1$.
      \vskip1ex
      The \emph{eigenvectors} form an \emph{orthonormal basis} in which the matrix $\mathbb{A}$ is diagonal.
      \vskip1ex
      The function \texttt{eigen()} calculates the \emph{eigenvectors} and \emph{eigenvalues} of numeric matrices.
      \vskip1ex
      An excellent interactive visualization of \emph{eigenvectors} and \emph{eigenvalues} is available here:\\
      \hskip1em\url{http://setosa.io/ev/eigenvectors-and-eigenvalues/}
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/eigen_values.png}
      <<echo=TRUE,eval=FALSE>>=
# Create a random real symmetric matrix
matv <- matrix(runif(25), nc=5)
matv <- matv + t(matv)
# Calculate the eigenvalues and eigenvectors
eigend <- eigen(matv)
eigenvec <- eigend$vectors
dim(eigenvec)
# Plot eigenvalues
barplot(eigend$values, xlab="", ylab="", las=3,
  names.arg=paste0("ev", 1:NROW(eigend$values)),
  main="Eigenvalues of a real symmetric matrix")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Eigen Decomposition of Matrices}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Real symmetric matrices have real \emph{eigenvalues}, and their \emph{eigenvectors} are orthogonal to each other.
      \vskip1ex
      The \emph{eigenvectors} form an \emph{orthonormal basis} in which the matrix $\mathbb{A}$ is diagonal:
      \begin{displaymath}
        \Sigma = \mathbb{O}^T \mathbb{A} \, \mathbb{O}
      \end{displaymath}
      Where $\Sigma$ is a \emph{diagonal} matrix containing the \emph{eigenvalues} of matrix $\mathbb{A}$, and $\mathbb{O}$ is an \emph{orthogonal} matrix of its \emph{eigenvectors}, with $\mathbb{O}^T \mathbb{O} = \mathbbm{1}$.
      \vskip1ex
      Any real symmetric matrix $\mathbb{A}$ can be decomposed into a product of its \emph{eigenvalues} and its \emph{eigenvectors} (the \emph{eigen decomposition}):
      \begin{displaymath}
        \mathbb{A} = \mathbb{O} \, \Sigma \, \mathbb{O}^T
      \end{displaymath}
      The \emph{eigen decomposition} expresses a matrix as the product of a rotation, followed by a scaling, followed by the inverse rotation.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Eigenvectors form an orthonormal basis
round(t(eigenvec) %*% eigenvec, digits=4)
# Diagonalize matrix using eigenvector matrix
round(t(eigenvec) %*% (matv %*% eigenvec), digits=4)
eigend$values
# Eigen decomposition of matrix by rotating the diagonal matrix
matrixe <- eigenvec %*% (eigend$values * t(eigenvec))
# Create diagonal matrix of eigenvalues
# diagmat <- diag(eigend$values)
# matrixe <- eigenvec %*% (diagmat %*% t(eigenvec))
all.equal(matv, matrixe)
      @
      \emph{Orthogonal} matrices represent rotations in \emph{hyperspace}, and their inverse is equal to their transpose: $\mathbb{O}^{-1} = \mathbb{O}^T$.
      \vskip1ex
      The \emph{diagonal} matrix $\Sigma$ represents a scaling (stretching) transformation proportional to the \emph{eigenvalues}.
      \vskip1ex
      The \texttt{\%*\%} operator performs \emph{inner} (\emph{scalar}) multiplication of vectors and matrices.
      \vskip1ex
      \emph{Inner} multiplication multiplies the rows of one matrix with the columns of another matrix, so that each pair produces a single number.
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{\protect\emph{Positive Definite} Matrices}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Matrices with positive \emph{eigenvalues} are called \emph{positive definite} matrices.
      \vskip1ex
      Matrices with non-negative \emph{eigenvalues} are called \emph{positive semi-definite} matrices (some of their \emph{eigenvalues} may be zero).
      \vskip1ex
      An example of \emph{positive definite} matrices are the covariance matrices of linearly independent variables.
      \vskip1ex
      But the covariance matrices of linearly dependent variables have some \emph{eigenvalues} equal to zero, in which case they are \emph{singular}, and only \emph{positive semi-definite}.
      \vskip1ex
      All covariance matrices are \emph{positive semi-definite} and all \emph{positive semi-definite} matrices are the covariance matrix of some multivariate distribution.
      \vskip1ex
      Matrices which have some \emph{eigenvalues} equal to zero are called \emph{singular} (degenerate) matrices.
      \vskip1ex
      For any real matrix $\mathbb{A}$, the matrix $\mathbb{A}^T \mathbb{A}$ is \emph{positive semi-definite}.
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/eigen_posdef.png}
      <<echo=TRUE,eval=FALSE>>=
# Create a random positive semi-definite matrix
matv <- matrix(runif(25), nc=5)
matv <- t(matv) %*% matv
# Calculate the eigenvalues and eigenvectors
eigend <- eigen(matv)
eigend$values
# Plot eigenvalues
barplot(eigend$values, las=3, xlab="", ylab="",
  names.arg=paste0("ev", 1:NROW(eigend$values)),
  main="Eigenvalues of positive semi-definite matrix")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Singular Value Decomposition (\protect\emph{SVD}) of Matrices}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The \emph{Singular Value Decomposition} (\emph{SVD}) is a generalization of the \emph{eigen decomposition} of square matrices.
      \vskip1ex
      The \emph{SVD} of a rectangular matrix $\mathbb{A}$ is defined as the factorization:
      \begin{displaymath}
        \mathbb{A} = \mathbb{U}  \, \Sigma  \, \mathbb{V}^T
      \end{displaymath}
      Where $\mathbb{U}$ and $\mathbb{V}$ are the left and right \emph{singular matrices}, and $\Sigma$ is a diagonal matrix of \emph{singular values}.
      \vskip1ex
      If $\mathbb{A}$ has \texttt{m} rows and \texttt{n} columns and if (\texttt{m > n}), then $\mathbb{U}$ is an (\texttt{m x n}) \emph{rectangular} matrix, $\Sigma$ is an (\texttt{n x n}) \emph{diagonal} matrix, and $\mathbb{V}$ is an (\texttt{n x n}) \emph{orthogonal} matrix, and if (\texttt{m < n}) then the dimensions are: (\texttt{m x m}), (\texttt{m x m}), and (\texttt{m x n}).
      \vskip1ex
      The left $\mathbb{U}$ and right $\mathbb{V}$ singular matrices consist of columns of \emph{orthonormal} vectors, so that $\mathbb{U}^T \mathbb{U} = \mathbb{V}^T \mathbb{V} = \mathbbm{1}$.
      \vskip1ex
      In the special case when $\mathbb{A}$ is a square matrix, then $\mathbb{U} = \mathbb{V}$, and the \emph{SVD} reduces to the \emph{eigen decomposition}.
    \column{0.5\textwidth}
      The function \texttt{svd()} performs \emph{Singular Value Decomposition} (\emph{SVD}) of a rectangular matrix, and returns a list of three elements: the \emph{singular values}, and the matrices of left-\emph{singular} vectors and the right-\emph{singular} vectors.
      <<echo=TRUE,eval=FALSE>>=
# Perform singular value decomposition
matv <- matrix(rnorm(50), nc=5)
svdec <- svd(matv)
# Recompose matv from SVD mat_rices
all.equal(matv, svdec$u %*% (svdec$d*t(svdec$v)))
# Columns of U and V are orthonormal
round(t(svdec$u) %*% svdec$u, 4)
round(t(svdec$v) %*% svdec$v, 4)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{The Left and Right Singular Matrices}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The left $\mathbb{U}$ and right $\mathbb{V}$ singular matrices define rotation transformations into a coordinate system where the matrix $\mathbb{A}$ becomes diagonal:
      \begin{displaymath}
        \Sigma = \mathbb{U}^T \mathbb{A} \mathbb{V}
      \end{displaymath}
      The columns of $\mathbb{U}$ and $\mathbb{V}$ are called the \emph{singular} vectors, and they are only defined up to a reflection (change in sign), i.e. if \texttt{vec} is a singular vector, then so is \texttt{-vec}.
      \vskip1ex
      The left singular matrix $\mathbb{U}$ forms the \emph{eigenvectors} of the matrix $\mathbb{A} \mathbb{A}^T$.
      \vskip1ex
      The right singular matrix $\mathbb{V}$ forms the \emph{eigenvectors} of the matrix $\mathbb{A}^T \mathbb{A}$.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Dimensions of left and right matrices
nrows <- 6 ; ncols <- 4
# Calculate the left matrix
leftmat <- matrix(runif(nrows^2), nc=nrows)
eigend <- eigen(crossprod(leftmat))
leftmat <- eigend$vectors[, 1:ncols]
# Calculate the right matrix and singular values
rightmat <- matrix(runif(ncols^2), nc=ncols)
eigend <- eigen(crossprod(rightmat))
rightmat <- eigend$vectors
singval <- sort(runif(ncols, min=1, max=5), decreasing=TRUE)
# Compose rectangular matrix
matv <- leftmat %*% (singval * t(rightmat))
# Perform singular value decomposition
svdec <- svd(matv)
# Recompose matv from SVD
all.equal(matv, svdec$u %*% (svdec$d*t(svdec$v)))
# Compare SVD with matv components
all.equal(abs(svdec$u), abs(leftmat))
all.equal(abs(svdec$v), abs(rightmat))
all.equal(svdec$d, singval)
# Eigen decomposition of matv squared
retsq <- matv %*% t(matv)
eigend <- eigen(retsq)
all.equal(eigend$values[1:ncols], singval^2)
all.equal(abs(eigend$vectors[, 1:ncols]), abs(leftmat))
# Eigen decomposition of matv squared
retsq <- t(matv) %*% matv
eigend <- eigen(retsq)
all.equal(eigend$values, singval^2)
all.equal(abs(eigend$vectors), abs(rightmat))
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Inverse of Symmetric Square Matrices}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The inverse of a square matrix $\mathbb{A}$ is defined as a square matrix $\mathbb{A}^{-1}$ that satisfies the equation:
      \begin{displaymath}
        \mathbb{A}^{-1} \mathbb{A} = \mathbb{A} \mathbb{A}^{-1} = \mathbbm{1}
      \end{displaymath}
      Where $\mathbbm{1}$ is the identity matrix.
      \vskip1ex
      The inverse $\mathbb{A}^{-1}$ of a \emph{symmetric} square matrix $\mathbb{A}$ can also be expressed as the product of the inverse of its \emph{eigenvalues} ($\Sigma$) and its \emph{eigenvectors} ($\mathbb{O}$):
      \begin{displaymath}
        \mathbb{A}^{-1} = \mathbb{O} \, \Sigma^{-1} \, \mathbb{O}^T
      \end{displaymath}
      But \emph{singular} (degenerate) matrices (which have some \emph{eigenvalues} equal to zero) don't have an inverse.
      \vskip1ex
      The inverse of \emph{non-symmetric} matrices can be calculated using \emph{Singular Value Decomposition} (\emph{SVD}).
      \vskip1ex
      The function \texttt{solve()} solves systems of linear equations, and also inverts square matrices.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Create a random positive semi-definite matrix
matv <- matrix(runif(25), nc=5)
matv <- t(matv) %*% matv
# Calculate the inverse of matv
invmat <- solve(a=matv)
# Multiply inverse with matrix
round(invmat %*% matv, 4)
round(matv %*% invmat, 4)
# Calculate the eigenvalues and eigenvectors
eigend <- eigen(matv)
eigenvec <- eigend$vectors
# Calculate the inverse from eigen decomposition
inveigen <- eigenvec %*% (t(eigenvec) / eigend$values)
all.equal(invmat, inveigen)
# Decompose diagonal matrix with inverse of eigenvalues
# diagmat <- diag(1/eigend$values)
# inveigen <- eigenvec %*% (diagmat %*% t(eigenvec))
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Generalized Inverse of Rectangular Matrices}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The generalized inverse of an (\texttt{m x n}) rectangular matrix $\mathbb{A}$ is defined as an (\texttt{n x m}) matrix $\mathbb{A}^{-1}$ that satisfies the equation:
      \begin{displaymath}
        \mathbb{A} \mathbb{A}^{-1} \mathbb{A} = \mathbbm{A}
      \end{displaymath}
      The generalized inverse matrix $\mathbb{A}^{-1}$ can be expressed as a product of the inverse of its \emph{singular values} ($\Sigma$) and its left and right \emph{singular} matrices ($\mathbb{U}$ and $\mathbb{V}$):
      \begin{displaymath}
        \mathbb{A}^{-1} = \mathbb{V} \, \Sigma^{-1} \, \mathbb{U}^T
      \end{displaymath}
      The generalized inverse $\mathbb{A}^{-1}$ can also be expressed as the \emph{Moore-Penrose pseudo-inverse}:
      \begin{displaymath}
        \mathbb{A}^{-1} = (\mathbb{A}^T \mathbb{A})^{-1} \mathbb{A}^T
      \end{displaymath}
      In the case when the inverse matrix $\mathbb{A}^{-1}$ exists, then the \emph{pseudo-inverse} matrix simplifies to the inverse: $(\mathbb{A}^T \mathbb{A})^{-1} \mathbb{A}^T = \mathbb{A}^{-1} (\mathbb{A}^T)^{-1} \mathbb{A}^T = \mathbb{A}^{-1}$
      \vskip1ex
      The function \texttt{MASS::ginv()} calculates the generalized inverse of a matrix.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Random rectangular matrix: nrows > ncols
nrows <- 6 ; ncols <- 4
matv <- matrix(runif(nrows*ncols), nc=ncols)
# Calculate the generalized inverse of matv
invmat <- MASS::ginv(matv)
round(invmat %*% matv, 4)
all.equal(matv, matv %*% invmat %*% matv)
# Random rectangular matrix: nrows < ncols
nrows <- 4 ; ncols <- 6
matv <- matrix(runif(nrows*ncols), nc=ncols)
# Calculate the generalized inverse of matv
invmat <- MASS::ginv(matv)
all.equal(matv, matv %*% invmat %*% matv)
round(matv %*% invmat, 4)
round(invmat %*% matv, 4)
# Perform singular value decomposition
svdec <- svd(matv)
# Calculate the generalized inverse from SVD
invsvd <- svdec$v %*% (t(svdec$u) / svdec$d)
all.equal(invsvd, invmat)
# Calculate the Moore-Penrose pseudo-inverse
invmp <- MASS::ginv(t(matv) %*% matv) %*% t(matv)
all.equal(invmp, invmat)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Regularized Inverse of Singular Matrices}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      \emph{Singular} matrices have some \emph{singular values} equal to zero, so they don't have an inverse matrix which satisfies the equation: $\mathbb{A} \mathbb{A}^{-1} \mathbb{A} = \mathbbm{A}$
      \vskip1ex
      But if the \emph{singular values} that are equal to zero are removed, then a \emph{regularized inverse} for \emph{singular} matrices can be specified by:
      \begin{displaymath}
        \mathbb{A}^{-1} = \mathbb{V}_n \, \Sigma_n^{-1} \, \mathbb{U}_n^T
      \end{displaymath}
      Where $\mathbb{U}_n$, $\mathbb{V}_n$ and $\Sigma_n$ are the \emph{SVD} matrices with the rows and columns corresponding to zero \emph{singular values} removed.
      <<echo=TRUE,eval=FALSE>>=
# Create a random singular matrix
# More columns than rows: ncols > nrows
nrows <- 4 ; ncols <- 6
matv <- matrix(runif(nrows*ncols), nc=ncols)
matv <- t(matv) %*% matv
# Perform singular value decomposition
svdec <- svd(matv)
# Incorrect inverse from SVD because of zero singular values
invsvd <- svdec$v %*% (t(svdec$u) / svdec$d)
# Inverse property doesn't hold
all.equal(matv, matv %*% invsvd %*% matv)
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Set tolerance for determining zero singular values
precv <- sqrt(.Machine$double.eps)
# Check for zero singular values
round(svdec$d, 12)
notzero <- (svdec$d > (precv*svdec$d[1]))
# Calculate the regularized inverse from SVD
invsvd <- svdec$v[, notzero] %*%
  (t(svdec$u[, notzero]) / svdec$d[notzero])
# Verify inverse property of matv
all.equal(matv, matv %*% invsvd %*% matv)
# Calculate the regularized inverse using MASS::ginv()
invmat <- MASS::ginv(matv)
all.equal(invsvd, invmat)
# Calculate the Moore-Penrose pseudo-inverse
invmp <- MASS::ginv(t(matv) %*% matv) %*% t(matv)
all.equal(invmp, invmat)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Diagonalizing the Inverse of Singular Matrices}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The left-\emph{singular} matrix $\mathbb{U}$ combined with the right-\emph{singular} matrix $\mathbb{V}$ define a rotation transformation into a coordinate system where the matrix $\mathbb{A}$ becomes diagonal:
      \begin{displaymath}
        \Sigma = \mathbb{U}^T \mathbb{A} \mathbb{V}
      \end{displaymath}
      The generalized inverse of \emph{singular} matrices doesn't satisfy the equation: $\mathbb{A}^{-1} \mathbb{A} = \mathbb{A} \mathbb{A}^{-1} = \mathbbm{1}$, but if it's rotated into the same coordinate system where $\mathbb{A}$ is diagonal, then we have:
      \begin{displaymath}
        \mathbb{U}^T (\mathbb{A}^{-1} \mathbb{A}) \, \mathbb{V} = \mathbbm{1}_n
      \end{displaymath}
      So that $\mathbb{A}^{-1} \mathbb{A}$ is diagonal in the same coordinate system where $\mathbb{A}$ is diagonal.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Diagonalize the unit matrix
unitmat <- matv %*% invmat
round(unitmat, 4)
round(matv %*% invmat, 4)
round(t(svdec$u) %*% unitmat %*% svdec$v, 4)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Solving Linear Equations Using \texttt{solve()}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      A system of linear equations can be defined as:
      \begin{displaymath}
        \mathbb{A} \, x = b
      \end{displaymath}
      Where $\mathbb{A}$ is a matrix, $b$ is a vector, and \texttt{x} is the unknown vector.
      \vskip1ex
      The solution of the system of linear equations is equal to:
      \begin{displaymath}
        x = \mathbb{A}^{-1} b
      \end{displaymath}
      Where $\mathbb{A}^{-1}$ is the \emph{inverse} of the matrix $\mathbb{A}$.
      \vskip1ex
      The function \texttt{solve()} solves systems of linear equations, and also inverts square matrices.
      \vskip1ex
      The \texttt{\%*\%} operator performs \emph{inner} (\emph{scalar}) multiplication of vectors and matrices.
      \vskip1ex
      \emph{Inner} multiplication multiplies the rows of one matrix with the columns of another matrix, so that each pair produces a single number:
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Define a square matrix
matv <- matrix(c(1, 2, -1, 2), nc=2)
vecv <- c(2, 1)
# Calculate the inverse of matv
invmat <- solve(a=matv)
invmat %*% matv
# Calculate the solution using inverse of matv
solutionv <- invmat %*% vecv
matv %*% solutionv
# Calculate the solution of linear system
solutionv <- solve(a=matv, b=vecv)
matv %*% solutionv
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Fast Matrix Inverse Using \texttt{C++}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The \emph{Armadillo} \texttt{C++} functions can be several times faster than \texttt{R} functions - even those that are compiled from \texttt{C++} code.
      \vskip1ex
      That's because the \emph{Armadillo} \texttt{C++} library calls routines optimized for fast numerical calculations.
      \vskip1ex
      The package \emph{RcppArmadillo} allows calling from \texttt{R} the high-level \emph{Armadillo} \texttt{C++} linear algebra library.
      \vskip1ex
      The \texttt{C++} \emph{Armadillo} function \texttt{arma::inv()} calculates the matrix inverse several times faster than the function \texttt{solve()}.
      \vskip1ex
      The function \texttt{solve()} calculates the matrix inverse several times faster than the function \texttt{MASS::ginv()}.
      \begin{lstlisting}[language=R,basicstyle=\tiny\ttfamily\bfseries,backgroundcolor=\color{anti_flashwhite},showstringspaces=FALSE]
// Rcpp header with information for C++ compiler
// [[Rcpp::depends(RcppArmadillo)]]
#include <RcppArmadillo.h> // include RcppArmadillo header file
using namespace arma; // use Armadillo C++ namespace

// [[Rcpp::export]]
arma::mat calc_invmat(arma::mat& matv) {
  
  return arma::inv(matv);
  
}  // end calc_invmat
    \end{lstlisting}
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Create a random matrix
matv <- matrix(rnorm(100), nc=10)
# Calculate the matrix inverse using solve()
invmatr <- solve(a=matv)
round(invmatr %*% matv, 4)
# Compile the C++ file using Rcpp
Rcpp::sourceCpp(file="/Users/jerzy/Develop/lecture_slides/scripts/calc_invmat.cpp")
# Calculate the matrix inverse using C++
invmat <- calc_invmat(matv)
all.equal(invmat, invmatr)
all.equal(invmat, MASS::ginv(matv))
# Compare the speed of RcppArmadillo with R code
library(microbenchmark)
summary(microbenchmark(
  ginv=MASS::ginv(matv),
  solve=solve(matv),
  cpp=calc_invmat(matv),
  times=10))[, c(1, 4, 5)]
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Cholesky Decomposition}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The \emph{Cholesky} decomposition of a \emph{positive definite} matrix $\mathbb{A}$ is defined as:
      \begin{displaymath}
        \mathbb{A} = \mathbb{L}^T \mathbb{L}
      \end{displaymath}
      Where $\mathbb{L}$ is an upper triangular matrix with positive diagonal elements.
      \vskip1ex
      The matrix $\mathbb{L}$ can be considered the square root of $\mathbb{A}$.
      \vskip1ex
      The vast majority of random \emph{positive semi-definite} matrices are also \emph{positive definite}.
      \vskip1ex
      The function \texttt{chol()} calculates the \emph{Cholesky} decomposition of a \emph{positive definite} matrix.
      \vskip1ex
      The functions \texttt{chol2inv()} and \texttt{chol()} calculate the inverse of a \emph{positive definite} matrix two times faster than \texttt{solve()}.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Create large random positive semi-definite matrix
matv <- matrix(runif(1e4), nc=100)
matv <- t(matv) %*% matv
# Calculate the eigen decomposition
eigend <- eigen(matv)
eigenval <- eigend$values
eigenvec <- eigend$vectors
# Set tolerance for determining zero singular values
precv <- sqrt(.Machine$double.eps)
# If needed convert to positive definite matrix
notzero <- (eigenval > (precv*eigenval[1]))
if (sum(!notzero) > 0) {
  eigenval[!notzero] <- 2*precv
  matv <- eigenvec %*% (eigenval * t(eigenvec))
}  # end if
# Calculate the Cholesky matv
cholmat <- chol(matv)
cholmat[1:5, 1:5]
all.equal(matv, t(cholmat) %*% cholmat)
# Calculate the inverse from Cholesky
invchol <- chol2inv(cholmat)
all.equal(solve(matv), invchol)
# Compare speed of Cholesky inversion
library(microbenchmark)
summary(microbenchmark(
  solve=solve(matv),
  cholmat=chol2inv(chol(matv)),
  times=10))[, c(1, 4, 5)]  # end microbenchmark summary
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Simulating Correlated Returns Using Cholesky Matrix}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The \emph{Cholesky} decomposition of a covariance matrix can be used to simulate correlated \emph{Normal} returns following the given covariance matrix: $\mathbb{C} = \mathbb{L}^T \mathbb{L}$
      \vskip1ex
      Let $\mathbb{R}$ be a matrix with columns of \emph{uncorrelated} returns following the \emph{Standard Normal} distribution.
      \vskip1ex
      The \emph{correlated} returns $\mathbb{R}_c$ can be calculated from the \emph{uncorrelated} returns $\mathbb{R}$ by multiplying them by the \emph{Cholesky} matrix $\mathbb{L}$:
      \begin{displaymath}
        \mathbb{R}_c = \mathbb{L}^T \mathbb{R}
      \end{displaymath}
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Calculate the random covariance matrix
covmat <- matrix(runif(25), nc=5)
covmat <- t(covmat) %*% covmat
# Calculate the Cholesky matrix
cholmat <- chol(covmat)
cholmat
# Simulate random uncorrelated returns
nassets <- 5
nrows <- 10000
retp <- matrix(rnorm(nassets*nrows), nc=nassets)
# Calculate the correlated returns by applying Cholesky
retscorr <- retp %*% cholmat
# Calculate the covariance matrix
covmat2 <- cov(retscorr)
all.equal(covmat, covmat2)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Eigenvalues of Singular Covariance Matrices}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      If $\mathbb{R}$ is a matrix of returns (with zero mean) for a portfolio of \texttt{k} stocks (columns), over \texttt{n} time periods (rows), then the sample covariance matrix is equal to:
      \begin{displaymath}
        \mathbb{C} = \mathbb{R}^T \mathbb{R} / (n-1)
      \end{displaymath}
      If the number of rows is less than the number of stocks, then the returns are \emph{collinear}, and the sample covariance matrix is \emph{singular}, with some \emph{eigenvalues} equal to zero.
      \vskip1ex
      The function \texttt{crossprod()} performs \emph{inner} (\emph{scalar}) multiplication, exactly the same as the \texttt{\%*\%} operator, but it is slightly faster.
      <<echo=TRUE,eval=FALSE>>=
# Simulate random stock returns
nassets <- 10
nrows <- 100
# Initialize the random number generator
set.seed(1121, "Mersenne-Twister", sample.kind="Rejection")
retp <- matrix(rnorm(nassets*nrows), nc=nassets)
# Calculate the centered (de-meaned) returns matrix
retp <- t(t(retp) - colMeans(retp))
# Or
retp <- apply(retp, MARGIN=2, function(x) (x-mean(x)))
# Calculate the covariance matrix
covmat <- crossprod(retp) /(nrows-1)
# Calculate the eigenvalues and eigenvectors
eigend <- eigen(covmat)
eigend$values
barplot(eigend$values, # Plot eigenvalues
  xlab="", ylab="", las=3,
  names.arg=paste0("ev", 1:NROW(eigend$values)),
  main="Eigenvalues of Covariance Matrix")
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/eigen_covmat.png}
      <<echo=TRUE,eval=FALSE>>=
# Calculate the eigenvalues and eigenvectors
# as function of number of returns
ndata <- ((nassets/2):(2*nassets))
eigenval <- sapply(ndata, function(x) {
  retp <- retp[1:x, ]
  retp <- apply(retp, MARGIN=2, function(y) (y - mean(y)))
  covmat <- crossprod(retp) / (x-1)
  min(eigen(covmat)$values)
})  # end sapply
plot(y=eigenval, x=ndata, t="l", xlab="", ylab="", lwd=3, col="blue",
  main="Smallest eigenvalue of covariance matrix
  as function of number of returns")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Regularized Inverse of Singular Covariance Matrices}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The \emph{regularization} technique allows calculating the inverse of \emph{singular} covariance matrices while reducing the effects of statistical noise.
      \vskip1ex
      If the number of time periods of returns is less than the number of assets (columns), then the covariance matrix of returns is \emph{singular}, and some of its \emph{eigenvalues} are zero, so it doesn't have an inverse.
      \vskip1ex
      The \emph{regularized} inverse $\mathbb{C}_n^{-1}$ is calculated by removing the higher order eigenvalues that are almost zero, and keeping only the first $n$ \emph{eigenvalues}:
      \begin{displaymath}
        \mathbb{C}_n^{-1} = \mathbb{O}_n \, \Sigma_n^{-1} \, \mathbb{O}_n^T
      \end{displaymath}
      Where $\Sigma_n$ and $\mathbb{O}_n$ are matrices with the higher order eigenvalues and eigenvectors removed.
      \vskip1ex
      The function \texttt{MASS::ginv()} calculates the \emph{regularized} inverse of a matrix.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Create rectangular matrix with collinear columns
matv <- matrix(rnorm(10*8), nc=10)
# Calculate the covariance matrix
covmat <- cov(matv)
# Calculate the inverse of covmat - error
invmat <- solve(covmat)
# Calculate the regularized inverse of covmat
invmat <- MASS::ginv(covmat)
# Verify inverse property of matv
all.equal(covmat, covmat %*% invmat %*% covmat)
# Perform eigen decomposition
eigend <- eigen(covmat)
eigenvec <- eigend$vectors
eigenval <- eigend$values
# Set tolerance for determining zero singular values
precv <- sqrt(.Machine$double.eps)
# Calculate the regularized inverse matrix
notzero <- (eigenval > (precv * eigenval[1]))
invreg <- eigenvec[, notzero] %*%
  (t(eigenvec[, notzero]) / eigenval[notzero])
# Verify that invmat is same as invreg
all.equal(invmat, invreg)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{The Bias-Variance Tradeoff of the Regularized Inverse}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Removing the very small higher order eigenvalues can also be used to reduce the propagation of statistical noise and improve the signal-to-noise ratio.
      \vskip1ex
      Removing a larger number of eigenvalues further reduces the noise, but it increases the bias of the covariance matrix.
      \vskip1ex
      This is an example of the \emph{bias-variance tradeoff}.
      \vskip1ex
      Even though the \emph{regularized} inverse $\mathbb{C}_n^{-1}$ does not satisfy the matrix inverse property, its out-of-sample forecasts may be more accurate than those using the actual inverse matrix.
      \vskip1ex
      The parameter \texttt{dimax} specifies the number of eigenvalues used for calculating the \emph{regularized} inverse of the covariance matrix of returns.
      \vskip1ex
      The optimal value of the parameter \texttt{dimax} can be determined using \emph{backtesting} (\emph{cross-validation}).
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Calculate the regularized inverse matrix using cutoff
dimax <- 3
invmat <- eigenvec[, 1:dimax] %*%
  (t(eigenvec[, 1:dimax]) / eigend$values[1:dimax])
# Verify that invmat is same as invreg
all.equal(invmat, invreg)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Shrinkage Estimator of Covariance Matrices}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The estimates of the covariance matrix suffer from statistical noise, and those noise are magnified when the covariance matrix is inverted.
      \vskip1ex
      In the \emph{shrinkage} technique the covariance matrix $\mathbb{C}_s$ is estimated as a weighted sum of the sample covariance estimator $\mathbb{C}$ plus a target matrix $\mathbb{T}$:
      \begin{displaymath}
        \mathbb{C}_s = (1-\alpha) \, \mathbb{C} + \alpha \, \mathbb{T}
      \end{displaymath}
      The target matrix $\mathbb{T}$ represents an estimate of the covariance matrix subject to some constraint, such as that all the correlations are equal to each other.
      \vskip1ex
      The shrinkage intensity $\alpha$ determines the amount of shrinkage that is applied, with $\alpha = 1$ representing a complete shrinkage towards the target matrix.
      \vskip1ex
      The \emph{shrinkage} estimator reduces the estimate variance at the expense of increasing its bias (known as the \emph{bias-variance tradeoff}).
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Create a random covariance matrix
set.seed(1121, "Mersenne-Twister", sample.kind="Rejection")
matv <- matrix(rnorm(5e2), nc=5)
covmat <- cov(matv)
cormat <- cor(matv)
stdev <- sqrt(diag(covmat))
# Calculate the target matrix
cormean <- mean(cormat[upper.tri(cormat)])
targetmat <- matrix(cormean, nr=NROW(covmat), nc=NCOL(covmat))
diag(targetmat) <- 1
targetmat <- t(t(targetmat * stdev) * stdev)
# Calculate the shrinkage covariance matrix
alphac <- 0.5
covshrink <- (1-alphac)*covmat + alphac*targetmat
# Calculate the inverse matrix
invmat <- solve(covshrink)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Recursive Matrix Inverse}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The inverse of a square matrix $\mathbb{A}$ can be calculated approximately using the recursive \emph{Schulz formula}:
      \begin{displaymath}
        \mathbb{A}_{i+1}^{-1} = 2 \mathbb{A}_i^{-1} - \mathbb{A}_i^{-1} \mathbb{A} \mathbb{A}_i^{-1}
      \end{displaymath}
      The \emph{Schulz formula} requires a good initial value for the inverse matrix $\mathbb{A}_1^{-1}$ or else the recursion diverges.
      \vskip1ex
      If the initial inverse matrix $\mathbb{A}_1^{-1}$ is very close to the actual inverse $\mathbb{A}^{-1}$, then the \emph{Schulz formula} produces a very good approximation with just a few iterations.
      \vskip1ex
      The \emph{Schulz formula} is useful for updating the inverse when the matrix $\mathbb{A}$ changes only slightly.  For example, for updating the inverse of the covariance matrix as it changes slowly over time.
      \vskip1ex
      The super-assignment operator \texttt{"<<-"} modifies variables in the \emph{enclosing} environment in which the function was \emph{defined} (\emph{lexical} scoping).
      <<echo=TRUE,eval=FALSE>>=
# Create a random matrix
matv <- matrix(rnorm(100), nc=10)
# Calculate the inverse of matv
invmat <- solve(a=matv)
# Multiply inverse with matrix
round(invmat %*% matv, 4)
# Calculate the initial inverse
invmatr <- invmat + matrix(rnorm(100, sd=0.1), nc=10)
# Calculate the approximate recursive inverse of matv
invmatr <- (2*invmatr - invmatr %*% matv %*% invmatr)
# Calculate the sum of the off-diagonal elements
sum((invmatr %*% matv)[upper.tri(matv)])
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/eigen_recursive.png}
      <<echo=TRUE,eval=FALSE>>=
# Calculate the recursive inverse of matv in a loop
invmatr <- invmat + matrix(rnorm(100, sd=0.1), nc=10)
iterv <- sapply(1:5, function(x) {
# Calculate the recursive inverse of matv
  invmatr <<- (2*invmatr - invmatr %*% matv %*% invmatr)
# Calculate the sum of the off-diagonal elements
  sum((invmatr %*% matv)[upper.tri(matv)])
})  # end sapply
# Plot the iterations
plot(x=1:5, y=iterv, t="l", xlab="iterations", ylab="error",
     main="Iterations of Recursive Matrix Inverse")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\section{Bonds and Interest Rates}


%%%%%%%%%%%%%%%
\subsection{Downloading Treasury Bond Rates from \protect\emph{FRED}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The constant maturity Treasury rates are yields of hypothetical fixed-maturity bonds, interpolated from the market yields of actual Treasury bonds.
      \vskip1ex
      The \emph{FRED} database contains current and historical constant maturity Treasury rates, \\
      \hskip1em\url{https://fred.stlouisfed.org/series/DGS5}
      \vskip1ex
      \texttt{quantmod::getSymbols()} creates objects in the specified \emph{environment} from the input strings (names).
      \vskip1ex
      It then assigns the data to those objects, without returning them as a function value, as a \emph{side effect}.
      <<echo=TRUE,eval=FALSE>>=
# Symbols for constant maturity Treasury rates
symbolv <- c("DGS1", "DGS2", "DGS5", "DGS10", "DGS20", "DGS30")
# Create new environment for time series
ratesenv <- new.env()
# Download time series for symbolv into ratesenv
quantmod::getSymbols(symbolv, env=ratesenv, src="FRED")
# Remove NA values in ratesenv
sapply(ratesenv, function(x) sum(is.na(x)))
sapply(ls(ratesenv), function(namev) {
  assign(x=namev, value=na.omit(get(namev, ratesenv)), 
         envir=ratesenv)
}) # end sapply
sapply(ratesenv, function(x) sum(is.na(x)))
# Get class of all objects in ratesenv
sapply(ratesenv, class)
# Get class of all objects in R workspace
sapply(ls(), function(namev) class(get(namev)))
# Save the time series environment into a binary .RData file
save(ratesenv, file="/Users/jerzy/Develop/lecture_slides/data/rates_data.RData")
      @
    \column{0.5\textwidth}
      \includegraphics[width=0.45\paperwidth]{figure/treas_10y_rate.png}
      <<echo=TRUE,eval=FALSE>>=
# Get class of time series object DGS10
class(get(x="DGS10", envir=ratesenv))
# Another way
class(ratesenv$DGS10)
# Get first 6 rows of time series
head(ratesenv$DGS10)
# Plot dygraphs of 10-year Treasury rate
dygraphs::dygraph(ratesenv$DGS10, main="10-year Treasury Rate") %>%
  dyOptions(colors="blue", strokeWidth=2)
# Plot 10-year constant maturity Treasury rate
x11(width=6, height=5)
par(mar=c(2, 2, 0, 0), oma=c(0, 0, 0, 0))
chart_Series(ratesenv$DGS10["1990/"], name="10-year Treasury Rate")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Treasury Yield Curve}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The \emph{yield curve} is a vector of interest rates at different maturities, on a given date.
      \vskip1ex
      The \emph{yield curve} shape changes depending on the economic conditions: in recessions rates drop and the curve flattens, while in expansions rates rise and the curve steepens.
      <<echo=TRUE,eval=FALSE>>=
# Load constant maturity Treasury rates
load(file="/Users/jerzy/Develop/lecture_slides/data/rates_data.RData")
# Get most recent yield curve
ycnow <- eapply(ratesenv, xts::last)
class(ycnow)
ycnow <- do.call(cbind, ycnow)
# Check if 2020-03-25 is not a holiday
date2020 <- as.Date("2020-03-25")
weekdays(date2020)
# Get yield curve from 2020-03-25
yc2020 <- eapply(ratesenv, function(x) x[date2020])
yc2020 <- do.call(cbind, yc2020)
# Combine the yield curves
ycurves <- c(yc2020, ycnow)
# Rename columns and rows, sort columns, and transpose into matrix
colnames(ycurves) <- substr(colnames(ycurves), start=4, stop=11)
ycurves <- ycurves[, order(as.numeric(colnames(ycurves)))]
colnames(ycurves) <- paste0(colnames(ycurves), "yr")
ycurves <- t(ycurves)
colnames(ycurves) <- substr(colnames(ycurves), start=1, stop=4)
      @
    \column{0.5\textwidth}
    \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/yield_curve.png}
      <<echo=(-(1:2)),eval=FALSE>>=
x11(width=6, height=5)
par(mar=c(3, 3, 2, 0), oma=c(0, 0, 0, 0), mgp=c(2, 1, 0))
# Plot using matplot()
colorv <- c("blue", "red")
matplot(ycurves, main="Yield Curves in 2020 and 2023", xaxt="n", lwd=3, lty=1,
        type="l", xlab="maturity", ylab="yield", col=colorv)
# Add x-axis
axis(1, seq_along(rownames(ycurves)), rownames(ycurves))
# Add legend
legend("topleft", legend=colnames(ycurves), y.intersp=0.1,
       bty="n", col=colorv, lty=1, lwd=6, inset=0.05, cex=1.0)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Treasury Yield Curve Over Time}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The \emph{yield curve} has changed shape dramatically depending on the economic conditions: in recessions rates drop and the curve flattens, while in expansions rates rise and the curve steepens.
      <<echo=(-(1:2)),eval=FALSE>>=
x11(width=6, height=5)
par(mar=c(3, 3, 2, 0), oma=c(0, 0, 0, 0), mgp=c(2, 1, 0))
# Load constant maturity Treasury rates
load(file="/Users/jerzy/Develop/lecture_slides/data/rates_data.RData")
# Get end-of-year dates since 2006
datev <- xts::endpoints(ratesenv$DGS1["2006/"], on="years")
datev <- zoo::index(ratesenv$DGS1["2006/"][datev])
# Create time series of end-of-year rates
ycurves <- eapply(ratesenv, function(ratev) ratev[datev])
ycurves <- rutils::do_call(cbind, ycurves)
# Rename columns and rows, sort columns, and transpose into matrix
colnames(ycurves) <- substr(colnames(ycurves), start=4, stop=11)
ycurves <- ycurves[, order(as.numeric(colnames(ycurves)))]
colnames(ycurves) <- paste0(colnames(ycurves), "yr")
ycurves <- t(ycurves)
colnames(ycurves) <- substr(colnames(ycurves), start=1, stop=4)
# Plot matrix using plot.zoo()
colorv <- colorRampPalette(c("red", "blue"))(NCOL(ycurves))
plot.zoo(ycurves, main="Yield Curve Since 2006", lwd=3, xaxt="n",
         plot.type="single", xlab="maturity", ylab="yield", col=colorv)
# Add x-axis
axis(1, seq_along(rownames(ycurves)), rownames(ycurves))
# Add legend
legend("topleft", legend=colnames(ycurves), y.intersp=0.1,
       bty="n", col=colorv, lty=1, lwd=4, inset=0.05, cex=0.8)
      @
    \column{0.5\textwidth}
    \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/yield_curve_time.png}
      <<echo=TRUE,eval=FALSE>>=
# Alternative plot using matplot()
matplot(ycurves, main="Yield curve since 2006", xaxt="n", lwd=3, lty=1,
        type="l", xlab="maturity", ylab="yield", col=colorv)
# Add x-axis
axis(1, seq_along(rownames(ycurves)), rownames(ycurves))
# Add legend
legend("topleft", legend=colnames(ycurves), y.intersp=0.1,
       bty="n", col=colorv, lty=1, lwd=4, inset=0.05, cex=0.8)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Covariance Matrix of Interest Rates}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The covariance matrix $\mathbb{C}$, of the interest rate matrix $\mathbf{r}$ is given by:
      \begin{displaymath}
        \mathbb{C} = \frac{(\mathbf{r} - \bar{\mathbf{r}})^T (\mathbf{r} - \bar{\mathbf{r}})} {n-1}
      \end{displaymath}
      <<echo=TRUE,eval=FALSE>>=
# Extract rates from ratesenv
symbolv <- c("DGS1", "DGS2", "DGS5", "DGS10", "DGS20")
ratem <- mget(symbolv, envir=ratesenv)
ratem <- rutils::do_call(cbind, ratem)
ratem <- zoo::na.locf(ratem, na.rm=FALSE)
ratem <- zoo::na.locf(ratem, fromLast=TRUE)
# Calculate daily percentage rates changes
retp <- rutils::diffit(log(ratem))
# Center (de-mean) the returns
retp <- lapply(retp, function(x) {x - mean(x)})
retp <- rutils::do_call(cbind, retp)
sapply(retp, mean)
# Covariance and Correlation matrices of Treasury rates
covmat <- cov(retp)
cormat <- cor(retp)
# Reorder correlation matrix based on clusters
library(corrplot)
ordern <- corrMatOrder(cormat, order="hclust",
  hclust.method="complete")
cormat <- cormat[ordern, ordern]
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.4\paperwidth]{figure/yield_cor.png}
      <<echo=TRUE,eval=FALSE>>=
# Plot the correlation matrix
x11(width=6, height=6)
colorv <- colorRampPalette(c("red", "white", "blue"))
corrplot(cormat, title=NA, tl.col="black",
    method="square", col=colorv(NCOL(cormat)), tl.cex=0.8,
    cl.offset=0.75, cl.cex=0.7, cl.align.text="l", cl.ratio=0.25)
title("Correlation of Treasury Rates", line=1)
# Draw rectangles on the correlation matrix plot
corrRect.hclust(cormat, k=NROW(cormat) %/% 2,
  method="complete", col="red")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Principal Component Vectors}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      \emph{Principal components} are linear combinations of the \texttt{k} return vectors $\mathbf{r}_i$:
      \begin{displaymath}
        \mathbf{pc}_j = \sum_{i=1}^k {w_{ij} \, \mathbf{r}_i}
      \end{displaymath}
      Where $\mathbf{w}_j$ is a vector of weights (loadings) of the \emph{principal component} \texttt{j}, with $\mathbf{w}_j^T \mathbf{w}_j = 1$.
      \vskip1ex
      The weights $\mathbf{w}_j$ are chosen to maximize the variance of the \emph{principal components}, under the condition that they are orthogonal:
      \begin{align*}
        \mathbf{w}_j = {\operatorname{\arg \, \max}} \, \left\{ \mathbf{pc}_j^T \, \mathbf{pc}_j \right\} \\
        \mathbf{pc}_i^T \, \mathbf{pc}_j = 0 \> (i \neq j)
      \end{align*}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Create initial vector of portfolio weights
nweights <- NROW(symbolv)
weightv <- rep(1/sqrt(nweights), nweights)
names(weightv) <- symbolv
# Objective function equal to minus portfolio variance
objfun <- function(weightv, retp) {
  retp <- retp %*% weightv
  -1e7*var(retp) + 1e7*(1 - sum(weightv*weightv))^2
}  # end objfun
# Objective function for equal weight portfolio
objfun(weightv, retp)
# Compare speed of vector multiplication methods
library(microbenchmark)
summary(microbenchmark(
  transp=t(retp) %*% retp,
  sumv=sum(retp*retp),
  times=10))[, c(1, 4, 5)]
      @
    \column{0.5\textwidth}
      \includegraphics[width=0.35\paperwidth]{figure/pca_rates_load1.png}
      <<echo=TRUE,eval=FALSE>>=
# Find weights with maximum variance
optiml <- optim(par=weightv,
  fn=objfun,
  retp=retp,
  method="L-BFGS-B",
  upper=rep(5.0, nweights),
  lower=rep(-5.0, nweights))
# Optimal weights and maximum variance
weights1 <- optiml$par
objfun(weights1, retp)
# Plot first principal component loadings
x11(width=6, height=5)
par(mar=c(3, 3, 2, 1), oma=c(0, 0, 0, 0), mgp=c(2, 1, 0))
barplot(weights1, names.arg=names(weights1),
  xlab="", ylab="", main="First Principal Component Loadings")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Higher Order Principal Components}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The \emph{second principal component} can be calculated by maximizing its variance, under the constraint that it must be orthogonal to the \emph{first principal component}.
      \vskip1ex
      Similarly, higher order \emph{principal components} can be calculated by maximizing their variances, under the constraint that they must be orthogonal to all the previous \emph{principal components}.
      \vskip1ex
      The number of principal components is equal to the dimension of the covariance matrix.
      <<echo=TRUE,eval=FALSE>>=
# pc1 weights and returns
pc1 <- drop(retp %*% weights1)
# Redefine objective function
objfun <- function(weightv, retp) {
  retp <- retp %*% weightv
  -1e7*var(retp) + 1e7*(1 - sum(weightv^2))^2 +
    1e7*sum(weights1*weightv)^2
}  # end objfun
# Find second principal component weights
optiml <- optim(par=weightv,
                   fn=objfun,
                   retp=retp,
                   method="L-BFGS-B",
                   upper=rep(5.0, nweights),
                   lower=rep(-5.0, nweights))
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.35\paperwidth]{figure/pca_rates_load2.png}
      <<echo=TRUE,eval=FALSE>>=
# pc2 weights and returns
weights2 <- optiml$par
pc2 <- drop(retp %*% weights2)
sum(pc1*pc2)
# Plot second principal component loadings
barplot(weights2, names.arg=names(weights2),
        xlab="", ylab="", main="Second Principal Component Loadings")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Eigenvalues of the Covariance Matrix}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The portfolio variance: $\mathbf{w}^T \mathbb{C} \, \mathbf{w}$ can be maximized under the \emph{quadratic} weights constraint $\mathbf{w}^T \mathbf{w} = 1$, by maximizing the \emph{Lagrangian} $\mathcal{L}$:
      \begin{displaymath}
        \mathcal{L} = \mathbf{w}^T \mathbb{C} \, \mathbf{w} \, - \, \lambda \, (\mathbf{w}^T \mathbf{w} - 1)
      \end{displaymath}
      Where $\lambda$ is a \emph{Lagrange multiplier}.
      \vskip1ex
      The maximum variance portfolio weights can be found by differentiating $\mathcal{L}$ with respect to $\mathbf{w}$ and setting it to zero:
      \begin{displaymath}
        \mathbb{C} \, \mathbf{w} = \lambda \, \mathbf{w}
      \end{displaymath}
      The above is the \emph{eigenvalue} equation of the covariance matrix $\mathbb{C}$, with the optimal weights $\mathbf{w}$ forming an \emph{eigenvector}, and $\lambda$ is the \emph{eigenvalue} corresponding to the \emph{eigenvector} $\mathbf{w}$.
      \vskip1ex
      The \emph{eigenvalues} are the variances of the \emph{eigenvectors}, and their sum is equal to the sum of the return variances:
      \begin{displaymath}
        \sum_{i=1}^k \lambda_i = \frac{1}{1-k} \sum_{i=1}^k {\mathbf{r}_i^T \mathbf{r}_i}
      \end{displaymath}
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.35\paperwidth]{figure/pca_rates_eigenvalues.png}
      <<echo=TRUE,eval=FALSE>>=
eigend <- eigen(covmat)
eigend$vectors
# Compare with optimization
all.equal(sum(diag(covmat)), sum(eigend$values))
all.equal(abs(eigend$vectors[, 1]), abs(weights1), check.attributes=FALSE)
all.equal(abs(eigend$vectors[, 2]), abs(weights2), check.attributes=FALSE)
all.equal(eigend$values[1], var(pc1), check.attributes=FALSE)
all.equal(eigend$values[2], var(pc2), check.attributes=FALSE)
# Eigenvalue equations are satisfied approximately
(covmat %*% weights1) / weights1 / var(pc1)
(covmat %*% weights2) / weights2 / var(pc2)
# Plot eigenvalues
barplot(eigend$values, names.arg=paste0("PC", 1:nweights),
  las=3, xlab="", ylab="", main="Principal Component Variances")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{\protect\emph{Principal Component Analysis} Versus \protect\emph{Eigen Decomposition}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      \emph{Principal Component Analysis} (\emph{PCA}) is equivalent to the \emph{eigen decomposition} of either the correlation or the covariance matrix.
      \vskip1ex
      If the input time series \emph{are} scaled, then \emph{PCA} is equivalent to the eigen decomposition of the \emph{correlation matrix}.
      \vskip1ex
      If the input time series \emph{are not} scaled, then \emph{PCA} is equivalent to the eigen decomposition of the \emph{covariance matrix}.
      \vskip1ex
      Scaling the input time series improves the accuracy of the \emph{PCA dimension reduction}, allowing a smaller number of \emph{principal components} to more accurately capture the data contained in the input time series.
      \vskip1ex
      The function \texttt{prcomp()} performs \emph{Principal Component Analysis} on a matrix of data (with the time series as columns), and returns the results as a list of class \texttt{prcomp}.
      \vskip1ex
      The \texttt{prcomp()} argument \texttt{scale=TRUE} specifies that the input time series should be scaled by their standard deviations.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Eigen decomposition of correlation matrix
eigend <- eigen(cormat)
# Perform PCA with scaling
pcad <- prcomp(retp, scale=TRUE)
# Compare outputs
all.equal(eigend$values, pcad$sdev^2)
all.equal(abs(eigend$vectors), abs(pcad$rotation),
          check.attributes=FALSE)
# Eigen decomposition of covariance matrix
eigend <- eigen(covmat)
# Perform PCA without scaling
pcad <- prcomp(retp, scale=FALSE)
# Compare outputs
all.equal(eigend$values, pcad$sdev^2)
all.equal(abs(eigend$vectors), abs(pcad$rotation),
          check.attributes=FALSE)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Principal Component Analysis of the Yield Curve}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      \emph{Principal Component Analysis} (\emph{PCA}) is a \emph{dimension reduction} technique, that explains the returns of a large number of correlated time series as linear combinations of a smaller number of principal component time series.
      \vskip1ex
      The input time series are often scaled by their standard deviations, to improve the accuracy of \emph{PCA dimension reduction}, so that more information is retained by the first few \emph{principal component} time series.
      \vskip1ex
      If the input time series are not scaled, then \emph{PCA} analysis is equivalent to the \emph{eigen decomposition} of the covariance matrix, and if they are scaled, then \emph{PCA} analysis is equivalent to the \emph{eigen decomposition} of the correlation matrix.
      \vskip1ex
      The function \texttt{prcomp()} performs \emph{Principal Component Analysis} on a matrix of data (with the time series as columns), and returns the results as a list of class \texttt{prcomp}.
      \vskip1ex
      The \texttt{prcomp()} argument \texttt{scale=TRUE} specifies that the input time series should be scaled by their standard deviations.
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.35\paperwidth]{figure/pca_rates_scree.png}\\
      A \emph{scree plot} is a bar plot of the volatilities of the \emph{principal components}.
      <<echo=TRUE,eval=FALSE>>=
# Perform principal component analysis PCA
pcad <- prcomp(retp, scale=TRUE)
# Plot standard deviations
barplot(pcad$sdev, names.arg=colnames(pcad$rotation),
  las=3, xlab="", ylab="",
  main="Scree Plot: Volatilities of Principal Components
  of Treasury rates")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Yield Curve Principal Component Loadings (Weights)}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      \emph{Principal component} loadings are the weights of portfolios which have mutually orthogonal returns.
      \vskip1ex
      The \emph{principal component} portfolios represent the different orthogonal modes of the data variance.
      \vskip1ex
      The first \emph{principal component} of the \emph{yield curve} is the correlated movement of all rates up and down.
      \vskip1ex
      The second \emph{principal component} is \emph{yield curve} steepening and flattening.
      \vskip1ex
      The third \emph{principal component} is the \emph{yield curve} butterfly movement.
      <<echo=(-(1:1)),eval=FALSE>>=
x11(width=6, height=7)
# Calculate principal component loadings (weights)
pcad$rotation
# Plot loading barplots in multiple panels
par(mfrow=c(3,2))
par(mar=c(3.5, 2, 2, 1), oma=c(0, 0, 0, 0))
for (ordern in 1:NCOL(pcad$rotation)) {
  barplot(pcad$rotation[, ordern], las=3, xlab="", ylab="", main="")
  title(paste0("PC", ordern), line=-2.0, col.main="red")
}  # end for
      @
    \column{0.5\textwidth}
      \includegraphics[width=0.45\paperwidth]{figure/pca_rates_loadings.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Yield Curve Principal Component Time Series}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The time series of the \emph{principal components} can be calculated by multiplying the loadings (weights) times the original data.
      \vskip1ex
      The \emph{principal component} time series have mutually orthogonal returns.
      \vskip1ex
      Higher order \emph{principal components} are gradually less volatile.
      <<echo=TRUE,eval=FALSE>>=
# Standardize (center and scale) the returns
retp <- lapply(retp, function(x) {(x - mean(x))/sd(x)})
retp <- rutils::do_call(cbind, retp)
sapply(retp, mean)
sapply(retp, sd)
# Calculate principal component time series
retpcac <- retp %*% pcad$rotation
all.equal(pcad$x, retpcac, check.attributes=FALSE)
# Calculate products of principal component time series
round(t(retpcac) %*% retpcac, 2)
# Coerce to xts time series
retpcac <- xts(retpcac, order.by=zoo::index(retp))
retpcac <- cumsum(retpcac)
# Plot principal component time series in multiple panels
par(mfrow=c(3,2))
par(mar=c(2, 2, 0, 1), oma=c(0, 0, 0, 0))
rangev <- range(retpcac)
for (ordern in 1:NCOL(retpcac)) {
  plot.zoo(retpcac[, ordern], ylim=rangev, xlab="", ylab="")
  title(paste0("PC", ordern), line=-1, col.main="red")
}  # end for
      @
    \column{0.5\textwidth}
      \includegraphics[width=0.45\paperwidth]{figure/pca_rates_series.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Inverting Principal Component Analysis}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The original time series can be calculated \emph{exactly} from the time series of all the \emph{principal components}, by inverting the loadings matrix.
      \vskip1ex
      The function \texttt{solve()} solves systems of linear equations, and also inverts square matrices.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Invert all the principal component time series
retpca <- retp %*% pcad$rotation
solved <- retpca %*% solve(pcad$rotation)
all.equal(coredata(retp), solved)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{\protect\emph{Dimension Reduction} Using Principal Component Analysis}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The original time series can be calculated \emph{approximately} from just the first few \emph{principal components}, which demonstrates that \emph{PCA} is a form of \emph{dimension reduction}.
      \vskip1ex
      A popular rule of thumb is to use the \emph{principal components} with the largest variances, which sum up to \texttt{80\%} of the total variance of returns.
      \vskip1ex
      The \emph{Kaiser-Guttman} rule uses only \emph{principal components} with variance greater than $1$.
      <<echo=TRUE,eval=FALSE>>=
# Invert first 3 principal component time series
solved <- retpca[, 1:3] %*% solve(pcad$rotation)[1:3, ]
solved <- xts::xts(solved, zoo::index(retp))
solved <- cumsum(solved)
retc <- cumsum(retp)
# Plot the solved returns
par(mfrow=c(3,2))
par(mar=c(2, 2, 0, 1), oma=c(0, 0, 0, 0))
for (symbol in symbolv) {
  plot.zoo(cbind(retc[, symbol], solved[, symbol]),
    plot.type="single", col=c("black", "blue"), xlab="", ylab="")
  legend(x="topleft", bty="n", y.intersp=0.1,
         legend=paste0(symboln, c("", " solved")),
         title=NULL, inset=0.0, cex=1.0, lwd=6,
         lty=1, col=c("black", "blue"))
}  # end for
      @
    \column{0.5\textwidth}
      \includegraphics[width=0.45\paperwidth]{figure/pca_rates_series_solved.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Calibrating Yield Curve Using Package \protect\emph{RQuantLib}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The package
      \href{https://cran.r-project.org/web/packages/RQuantLib/index.html}{\emph{RQuantLib}}
      is an interface to the
      \href{http://quantlib.org/index.shtml}{\emph{QuantLib}}
      open source \texttt{C/C++} library for quantitative finance, mostly designed for pricing fixed-income instruments and options.
      \vskip1ex
      The function \texttt{DiscountCurve()} calibrates a \emph{zero coupon yield curve} from \emph{money market} rates, \emph{Eurodollar} futures, and \emph{swap} rates.
      \vskip1ex
      The function \texttt{DiscountCurve()} interpolates the \emph{zero coupon} rates into a vector of dates specified by the \texttt{times} argument.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=(-(1:1)),eval=FALSE>>=
library(quantmod)  # Load quantmod
library(RQuantLib)  # Load RQuantLib
# Specify curve parameters
curvep <- list(tradeDate=as.Date("2018-01-17"),
               settleDate=as.Date("2018-01-19"),
               dt=0.25,
               interpWhat="discount",
               interpHow="loglinear")
# Specify market data: prices of FI instruments
pricev <- list(d3m=0.0363,
               fut1=96.2875,
               fut2=96.7875,
               fut3=96.9875,
               fut4=96.6875,
               s5y=0.0443,
               s10y=0.05165,
               s15y=0.055175)
# Specify dates for calculating the zero rates
datev <- seq(0, 10, 0.25)
# Specify the evaluation (as of) date
setEvaluationDate(as.Date("2018-01-17"))
# Calculate the zero rates
ratev <- DiscountCurve(params=curvep, tsQuotes=pricev, times=datev)
# Plot the zero rates
x11()
plot(x=ratev$zerorates, t="l", main="zerorates")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\section{Univariate Regression}


%%%%%%%%%%%%%%%
\subsection{Vector and Matrix Calculus}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
    \begin{columns}[T]
    \column{0.5\textwidth}
      Let $\mathbf{v}$ and $\mathbf{w}$ be vectors, with $\mathbf{v} = \left\{ v_i \right\}_{i=1}^{i=n}$, and let $\mathbbm{1}$ be the unit vector, with $\mathbbm{1} = \left\{ 1 \right\}_{i=1}^{i=n}$.
      \vskip1ex
      Then the inner product of $\mathbf{v}$ and $\mathbf{w}$ can be written as $\mathbf{v}^T \mathbf{w} = \mathbf{w}^T \mathbf{v} = {\sum_{i=1}^n {v_i w_i}}$.
      \vskip1ex
      We can then express the sum of the elements of $\mathbf{v}$ as the inner product: $\mathbf{v}^T \mathbbm{1} = \mathbbm{1}^T \mathbf{v} = {\sum_{i=1}^n v_i}$.
      \vskip1ex
      And the sum of squares of $\mathbf{v}$ as the inner product: $\mathbf{v}^T \mathbf{v} = {\sum_{i=1}^n v^2_i}$.
      \vskip1ex
      Let $\mathbb{A}$ be a matrix, with $\mathbb{A} = \left\{ A_{ij} \right\}_{{i,j}=1}^{{i,j}=n}$.
      \vskip1ex
      Then the inner product of matrix $\mathbb{A}$ with vectors $\mathbf{v}$ and $\mathbf{w}$ can be written as:
      \begin{displaymath}
        \mathbf{v}^T \mathbb{A} \, \mathbf{w} = \mathbf{w}^T \mathbb{A}^T \mathbf{v} = {\sum_{{i,j}=1}^n {A_{ij} v_i w_j}}
      \end{displaymath}
    \column{0.5\textwidth}
      The derivative of a scalar variable with respect to a vector variable is a vector, for example:
      \begin{align*}
        \frac{d (\mathbf{v}^T \mathbbm{1})}{d \mathbf{v}} = d_v[\mathbf{v}^T \mathbbm{1}] = d_v[\mathbbm{1}^T \mathbf{v}] = \mathbbm{1}^T\\
        d_v[\mathbf{v}^T \mathbf{w}] = d_v[\mathbf{w}^T \mathbf{v}] = \mathbf{w}^T\\
        d_v[\mathbf{v}^T \mathbb{A} \, \mathbf{w}] = \mathbf{w}^T \mathbb{A}^T\\
        d_v[\mathbf{v}^T \mathbb{A} \, \mathbf{v}] = \mathbf{v}^T \mathbb{A} + \mathbf{v}^T \mathbb{A}^T
      \end{align*}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Formula Objects}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Formulas in \texttt{R} are defined using the "\textasciitilde{}" operator followed by a series of terms separated by the \texttt{"+"} operator.
      \vskip1ex
      Formulas can be defined as separate objects, manipulated, and passed to functions.
      \vskip1ex
      The formula "\texttt{z} \textasciitilde{} \texttt{x}" means the \emph{response vector} $z$ is explained by the \emph{predictor} $x$ (also called the \emph{explanatory variable} or \emph{independent variable}).
      \vskip1ex
      The formula "\texttt{z \textasciitilde{} x + y}" represents a linear model: \texttt{z = ax  + by + c}.
      \vskip1ex
      The formula "\texttt{z \textasciitilde{} x - 1}" or "\texttt{z \textasciitilde{} x + 0}" represents a linear model with zero intercept: $z = ax$.
      \vskip1ex
      The function \texttt{update()} modifies existing \texttt{formulas}.
      \vskip1ex
      The \texttt{"."} symbol represents either all the remaining data, or the variable that was in this part of the formula.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Formula of linear model with zero intercept
formulav <- z ~ x + y - 1
formulav

# Collapse vector of strings into single text string
paste0("x", 1:5)
paste(paste0("x", 1:5), collapse="+")

# Create formula from text string
formulav <- as.formula(
  # Coerce text strings to formula
  paste("z ~ ",
        paste(paste0("x", 1:5), collapse="+")
  )  # end paste
)  # end as.formula
class(formulav)
formulav
# Modify the formula using "update"
update(formulav, log(.) ~ . + beta)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Simple \protect\emph{Linear Regression}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      A Simple Linear Regression is a linear model between a \emph{response vector} $y$ and a single \emph{predictor} $x$, defined by the formula:
      \begin{displaymath}
        y_i = \alpha + \beta x_i + \varepsilon_i
      \end{displaymath}
      $\alpha$ and $\beta$ are the unknown \emph{regression coefficients}.
      \vskip1ex
      $\varepsilon_i$ are the \emph{residuals}, which are usually assumed to be standard normally distributed $\phi(0, \sigma_\varepsilon)$, independent, and stationary.
      \vskip1ex
      In the Ordinary Least Squares method (\emph{OLS}), the regression parameters are estimated by minimizing the \emph{Residual Sum of Squares} (\emph{RSS}):
      \begin{align*}
        RSS = \sum_{i=1}^n {\varepsilon^2_i} = \sum_{i=1}^n {(y_i - \alpha - \beta x_i)^2}\\ = (y - \alpha \mathbbm{1} - \beta x)^T (y - \alpha \mathbbm{1} - \beta x)
      \end{align*}
      Where $\mathbbm{1}$ is the unit vector, with $\mathbbm{1}^T \mathbbm{1} = n$ and $\mathbbm{1}^T x = x^T \mathbbm{1} = \sum_{i=1}^n {x_i}$
      \vskip1ex
      The data consists of $n$ pairs of observations $(x_i, y_i)$ of the response and predictor variables, with the index $i$ ranging from $1$ to $n$.
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/reg_scatter_plot.png}
        <<echo=TRUE,eval=TRUE>>=
# Define explanatory (predm) variable
nrows <- 100
# Initialize the random number generator
set.seed(1121, "Mersenne-Twister", sample.kind="Rejection")
predm <- runif(nrows)
noisev <- rnorm(nrows)
# Response equals linear form plus random noise
respv <- (-3 + 2*predm + noisev)
      @
      The \emph{response vector} and the \emph{predictor matrix} don't have to be normally distributed.
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Solution of \protect\emph{Linear Regression}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The \emph{OLS} solution for the \emph{regression coefficients} is found by equating the \emph{RSS} derivatives to zero:
      \begin{align*}
        RSS_\alpha = -2 (y - \alpha \mathbbm{1} - \beta x)^T \mathbbm{1} = 0\\
        RSS_\beta = -2 (y - \alpha \mathbbm{1} - \beta x)^T x = 0
      \end{align*}
      The solution for $\alpha$ is given by:
      \begin{displaymath}
        \alpha = \bar{y} - \beta \bar{x}
      \end{displaymath}
      The solution for $\beta$ can be obtained by manipulating the equation for $RSS_\beta$ as follows:
      \begin{flalign*}
        & (y - (\bar{y} - \beta \bar{x}) \mathbbm{1} - \beta x)^T (x - \bar{x} \mathbbm{1}) = \\
        & ((y - \bar{y} \mathbbm{1}) - \beta (x - \bar{x} \mathbbm{1}))^T (x - \bar{x} \mathbbm{1}) = \\
        & (\hat{y} - \beta \hat{x})^T \hat{x} = \hat{y}^T \hat{x} - \beta \hat{x}^T \hat{x} = 0
      \end{flalign*}
      Where $\hat{x} = x - \bar{x} \mathbbm{1}$ and $\hat{y} = y - \bar{y} \mathbbm{1}$ are the centered (de-meaned) variables.  Then $\beta$ is given by:
      \begin{displaymath}
        \beta = \frac {\hat{y}^T \hat{x}} {\hat{x}^T \hat{x}} = \frac {\sigma_y}{\sigma_x} \rho_{xy}
      \end{displaymath}
    \column{0.5\textwidth}
      $\beta$ is proportional to the correlation coefficient $\rho_{xy}$ between the response and predictor variables.
      \vskip1ex
      If the response and predictor variables have zero mean, then $\alpha=0$ and $\beta=\frac {y^T x} {x^T x}$.
      \vskip1ex
      The \emph{residuals} $\varepsilon = y - \alpha \mathbbm{1} - \beta x$ have zero mean: $RSS_\alpha = -2 \varepsilon^T \mathbbm{1} = 0$.
      \vskip1ex
      The \emph{residuals} $\varepsilon$ are orthogonal to the \emph{predictor} $x$: $RSS_\beta = -2 \varepsilon^T x = 0$.
      \vskip1ex
      The expected value of the \emph{RSS} is equal to the \emph{degrees of freedom} $(n-2)$ times the variance $\sigma^2_\varepsilon$ of the \emph{residuals} $\varepsilon_i$: $\mathbb{E}[RSS] = (n-2) \sigma^2_\varepsilon$.
        <<echo=TRUE,eval=TRUE>>=
# Calculate the regression beta
betac <- cov(predm, respv)/var(predm)
# Calculate the regression alpha
alphac <- mean(respv) - betac*mean(predm)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{\protect\emph{Linear Regression} Using Function \texttt{lm()}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Let the data generating process for the response variable be given as: $z = \alpha_{lat} + \beta_{lat} x + \varepsilon_{lat}$
      \vskip1ex
      Where $\alpha_{lat}$ and $\beta_{lat}$ are latent (unknown) coefficients, and $\varepsilon_{lat}$ is an unknown vector of random noise (error terms).
      \vskip1ex
      The error terms are the difference between the measured values of the response minus the (unknown) actual response values.
      \vskip1ex
      The function \texttt{lm()} fits a linear model into a set of data, and returns an object of class \texttt{"lm"}, which is a list containing the results of fitting the model:
      \begin{itemize}
        \item call - the model formula,
        \item coefficients - the fitted model coefficients ($\alpha$, $\beta_j$),
        \item residuals - the model residuals (respv minus fitted values),
      \end{itemize}
      The regression \emph{residuals} are not the same as the error terms, because the regression coefficients are not equal to the coefficients of the data generating process.
    \column{0.5\textwidth}
      \vspace{-1em}
        <<echo=TRUE,eval=TRUE>>=
# Specify regression formula
formulav <- respv ~ predm
regmod <- lm(formulav)  # Perform regression
class(regmod)  # Regressions have class lm
attributes(regmod)
eval(regmod$call$formula)  # Regression formula
regmod$coeff  # Regression coefficients
all.equal(coef(regmod), c(alphac, betac),
      check.attributes=FALSE)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{The \protect\emph{Fitted Values} of Linear Regression}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The \emph{fitted values} $y_{fit}$ are the estimates of the \emph{response vector} obtained from the regression model:
      \begin{displaymath}
        y_{fit} = \alpha + \beta x
      \end{displaymath}
      \vskip1ex
      The \emph{generic function} \texttt{plot()} produces a scatterplot when it's called on the regression formula.
      \vskip1ex
      \texttt{abline()} plots a straight line corresponding to the regression coefficients, when it's called on the regression object.
        <<echo=-(1:3),eval=FALSE>>=
# x11(width=5, height=4)  # Open x11 for plotting
# Set plot parameters to reduce whitespace around plot
# par(mar=c(5, 5, 2, 1), oma=c(0, 0, 0, 0))
fitv <- (alphac + betac*predm)
all.equal(fitv, regmod$fitted.values, check.attributes=FALSE)
# Plot scatterplot using formula
plot(formulav, xlab="predictor", ylab="response")
title(main="Simple Regression", line=0.5)
# Add regression line
abline(regmod, lwd=3, col="blue")
# Plot fitted (forecast) response values
points(x=predm, y=regmod$fitted.values, pch=16, col="blue")
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/reg_scatter_plot.png}
        <<echo=TRUE,eval=FALSE>>=
# Plot response without noise
lines(x=predm, y=(respv-noisev), col="red", lwd=3)
legend(x="topleft", # Add legend
       legend=c("response without noise", "fitted values"),
       title=NULL, inset=0.0, cex=1.0, y.intersp=0.3,
       bty="n", lwd=6, lty=1, col=c("red", "blue"))
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{\protect\emph{Linear Regression} Residuals}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The \emph{residuals} $\varepsilon_i$ of a \emph{linear regression} are defined as the \emph{response vector} minus the fitted values:
      \begin{displaymath}
        \varepsilon_i = y_i - y_{fit}
      \end{displaymath}
        <<echo=TRUE,eval=TRUE>>=
# Calculate the residuals
fitv <- (alphac + betac*predm)
resids <- (respv - fitv)
all.equal(resids, regmod$residuals, check.attributes=FALSE)
# Residuals are orthogonal to the predictor
all.equal(sum(resids*predm), target=0)
# Residuals are orthogonal to the fitted values
all.equal(sum(resids*fitv), target=0)
# Sum of residuals is equal to zero
all.equal(mean(resids), target=0)
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/reg_residuals.png}
        <<echo=TRUE,eval=FALSE>>=
x11(width=6, height=5)  # Open x11 for plotting
# Set plot parameters to reduce whitespace around plot
par(mar=c(5, 5, 1, 1), oma=c(0, 0, 0, 0))
# Extract residuals
datav <- cbind(predm, regmod$residuals)
colnames(datav) <- c("predictor", "residuals")
# Plot residuals
plot(datav)
title(main="Residuals of the Linear Regression", line=-1)
abline(h=0, lwd=3, col="red")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Standard Errors of Regression Coefficients}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The \emph{residuals} are the source of error in the regression model, producing uncertainty in the \emph{response vector} $y$ and in the regression coefficients: $y_i = \alpha + \beta x_i + \varepsilon_i$.
      \vskip1ex
      The standard errors of the regression coefficients are equal to their standard deviations, given the \emph{residuals} as the source of error.
      \vskip1ex
      Since $\beta = \frac {\hat{y}^T \hat{x}} {\hat{x}^T \hat{x}}$, then its variance is equal to:
      \begin{displaymath}
        \sigma^2_\beta = \frac{1}{(n-2)} \frac {E[(\varepsilon^T \hat{x})^2]} {(\hat{x}^T \hat{x})^2} = \frac{1}{(n-2)} \frac {E[\varepsilon^2]} {\hat{x}^T \hat{x}} = \frac {\sigma^2_\varepsilon} {\hat{x}^T \hat{x}}
      \end{displaymath}
      Since $\alpha = \bar{y} - \beta \bar{x}$, then its variance is equal to:
      \begin{displaymath}
        \sigma^2_\alpha = \frac{\sigma^2_\varepsilon}{n} + \sigma^2_\beta \bar{x}^2 = \sigma^2_\varepsilon (\frac{1}{n} + \frac {\bar{x}^2} {\hat{x}^T \hat{x}})
      \end{displaymath}
    \column{0.5\textwidth}
      \vspace{-1em}
        <<echo=TRUE,eval=TRUE>>=
# Calculate the centered (de-meaned) predictor and response vectors
predc <- predm - mean(predm)
respc <- respv - mean(respv)
# Degrees of freedom of residuals
degf <- regmod$df.residual
# Standard deviation of residuals
residsd <- sqrt(sum(resids^2)/degf)
# Standard error of beta
betasd <- residsd/sqrt(sum(predc^2))
# Standard error of alpha
alphasd <- residsd*sqrt(1/nrows + mean(predm)^2/sum(predc^2))
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{\protect\emph{Linear Regression} Summary}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The function \texttt{summary.lm()} produces a list of regression model diagnostic statistics:
      \begin{itemize}
        \item coefficients: matrix with estimated coefficients, their \emph{t}-statistics, and \emph{p}-values,
        \item r.squared: fraction of response variance explained by the model,
        \item adj.r.squared: r.squared adjusted for higher model complexity,
        \item fstatistic: ratio of variance explained by the model divided by unexplained variance,
      \end{itemize}
      The regression \texttt{summary} is a list, and its elements can be accessed individually.
      \vskip1ex
    \column{0.5\textwidth}
      \vspace{-1em}
        <<echo=TRUE,eval=TRUE>>=
regsum <- summary(regmod)  # Copy regression summary
regsum  # Print the summary to console
attributes(regsum)$names  # get summary elements
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Regression Model Diagnostic Statistics}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The \emph{null hypothesis} for regression is that the coefficients are \emph{zero}.
      \vskip1ex
      The \emph{t}-statistic (\emph{t}-value) is the ratio of the estimated value divided by its standard error.
      \vskip1ex
      The \emph{p}-value is the probability of obtaining values exceeding the \emph{t}-statistic, assuming the \emph{null hypothesis} is true.
      \vskip1ex
      A small \emph{p}-value means that the regression coefficients are very unlikely to be zero (given the data).
      \vskip1ex
      The key assumption in the formula for the standard error is that the \emph{residuals} are normally distributed, independent, and stationary.
      \vskip1ex
      If they are not, then the standard error and the \emph{p}-value may be much bigger than reported by \texttt{summary.lm()}, and therefore the regression may not be statistically significant.
      \vskip1ex
      Asset returns are very far from normal, so the small \emph{p}-values shouldn't be automatically interpreted as meaning that the regression is statistically significant.
    \column{0.5\textwidth}
      \vspace{-1em}
        <<echo=TRUE,eval=TRUE>>=
regsum$coeff
# Standard errors
regsum$coefficients[2, "Std. Error"]
all.equal(c(alphasd, betasd), regsum$coefficients[, "Std. Error"], 
  check.attributes=FALSE)
# R-squared
regsum$r.squared
regsum$adj.r.squared
# F-statistic and ANOVA
regsum$fstatistic
anova(regmod)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Weak Regression}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      If the relationship between the response and predictor variables is weak compared to the error terms (noisev), then the regression will have low statistical significance.
      \vskip1ex
    \column{0.5\textwidth}
      \vspace{-1em}
        <<echo=(-(1:1)),eval=TRUE>>=
# Initialize the random number generator
set.seed(1121, "Mersenne-Twister", sample.kind="Rejection")
# High noise compared to coefficient
respv <- (-3 + 2*predm + rnorm(nrows, sd=8))
regmod <- lm(formulav)  # Perform regression
# Values of regression coefficients are not
# Statistically significant
summary(regmod)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Influence of Noise on Regression}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
    \vspace{-1em}
      <<reg_noise,eval=FALSE,echo=(-(1:1)),fig.height=5.2,fig.show='hide'>>=
par(oma=c(1, 1, 1, 1), mgp=c(0, 0.5, 0), mar=c(1, 1, 1, 1), cex.lab=1.0, cex.axis=1.0, cex.main=1.0, cex.sub=1.0)
regstats <- function(stdev) {  # Noisy regression
  set.seed(1121, "Mersenne-Twister", sample.kind="Rejection")  # initialize number generator
# Define explanatory (predm) and response variables
  predm <- rnorm(100, mean=2)
  respv <- (1 + 0.2*predm + rnorm(nrows, sd=stdev))
# Specify regression formula
  formulav <- respv ~ predm
# Perform regression and get summary
  regsum <- summary(lm(formulav))
# Extract regression statistics
  with(regsum, c(pval=coefficients[2, 4],
         adj_rsquared=adj.r.squared,
         fstat=fstatistic[1]))
}  # end regstats
# Apply regstats() to vector of stdev dev values
vecsd <- seq(from=0.1, to=0.5, by=0.1)
names(vecsd) <- paste0("sd=", vecsd)
statsmat <- t(sapply(vecsd, regstats))
# Plot in loop
par(mfrow=c(NCOL(statsmat), 1))
for (it in 1:NCOL(statsmat)) {
  plot(statsmat[, it], type="l",
       xaxt="n", xlab="", ylab="", main="")
  title(main=colnames(statsmat)[it], line=-1.0)
  axis(1, at=1:(NROW(statsmat)), labels=rownames(statsmat))
}  # end for
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/reg_noise-1}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Influence of Noise on Regression Another Method}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
    \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
regstats <- function(datav) {  # get regression
# Perform regression and get summary
  colv <- colnames(datav)
  formulav <- paste(colv[2], colv[1], sep="~")
  regsum <- summary(lm(formulav, data=datav))
# Extract regression statistics
  with(regsum, c(pval=coefficients[2, 4],
         adj_rsquared=adj.r.squared,
         fstat=fstatistic[1]))
}  # end regstats
# Apply regstats() to vector of stdev dev values
vecsd <- seq(from=0.1, to=0.5, by=0.1)
names(vecsd) <- paste0("sd=", vecsd)
statsmat <- t(sapply(vecsd, function(stdev) {
    set.seed(1121, "Mersenne-Twister", sample.kind="Rejection")  # initialize number generator
# Define explanatory (predm) and response variables
    predm <- rnorm(100, mean=2)
    respv <- (1 + 0.2*predm + rnorm(nrows, sd=stdev))
    regstats(data.frame(predm, respv))
    }))
# Plot in loop
par(mfrow=c(NCOL(statsmat), 1))
for (it in 1:NCOL(statsmat)) {
  plot(statsmat[, it], type="l",
       xaxt="n", xlab="", ylab="", main="")
  title(main=colnames(statsmat)[it], line=-1.0)
  axis(1, at=1:(NROW(statsmat)),
       labels=rownames(statsmat))
}  # end for
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/reg_noise-1}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{\protect\emph{Linear Regression} Diagnostic Plots}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      \texttt{plot()} produces diagnostic scatterplots for the \emph{residuals}, when called on the regression object.
      \vskip1ex
      {\scriptsize
      The diagnostic scatterplots allow for visual inspection to determine the quality of the regression fit.
      \vskip1ex
      "Residuals vs Fitted" is a scatterplot of the residuals vs. the forecast responses.
      \vskip1ex
      "Scale-Location" is a scatterplot of the square root of the standardized residuals vs. the forecast responses.
      \vskip1ex
      The residuals should be randomly distributed around the horizontal line representing zero residual error.
      \vskip1ex
      A pattern in the residuals indicates that the model was not able to capture the relationship between the variables, or that the variables don't follow the statistical assumptions of the regression model.
      \vskip1ex
      "Normal Q-Q" is the standard Q-Q plot, and the points should fall on the diagonal line, indicating that the residuals are normally distributed.
      \vskip1ex
      "Residuals vs Leverage" is a scatterplot of the residuals vs. their leverage.
      \vskip1ex
      Leverage measures the amount by which the fitted values would change if the response values were shifted by a small amount.
      \vskip1ex
      Cook's distance measures the influence of a single observation on the fitted values, and is proportional to the sum of the squared differences between forecasts made with all observations and forecasts made without the observation.
      \vskip1ex
      Points with large leverage, or a Cook's distance greater than 1 suggest the presence of an outlier or a poor model,
      }
    \column{0.5\textwidth}
      \vspace{-1em}
      <<plot_reg,eval=FALSE,echo=(-(1:2)),fig.show='hide'>>=
# Set plot paramaters - margins and font scale
par(oma=c(1,0,1,0), mgp=c(2,1,0), mar=c(2,1,2,1), cex.lab=0.8, cex.axis=1.0, cex.main=0.8, cex.sub=0.5)
par(mfrow=c(2, 2))  # Plot 2x2 panels
plot(regmod)  # Plot diagnostic scatterplots
plot(regmod, which=2)  # Plot just Q-Q
      @
      \vspace{-1em}
      \hspace*{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/plot_reg-1}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Durbin-Watson Test of Autocorrelation of Residuals}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The \emph{Durbin-Watson} test is designed to test the \emph{null hypothesis} that the autocorrelations of regression \emph{residuals} are equal to zero.
      \vskip1ex
      The test statistic is equal to:
      \begin{displaymath}
        DW = \frac {\sum_{i=2}^n (\varepsilon_i - \varepsilon_{i-1})^2} {\sum_{i=1}^n \varepsilon^2_i}
      \end{displaymath}
      Where $\varepsilon_i$ are the regression \emph{residuals}.
      \vskip1ex
      The value of the \emph{Durbin-Watson} statistic \emph{DW} is close to zero for large positive autocorrelations, and close to four for large negative autocorrelations.
      \vskip1ex
      The \emph{DW} is close to two for autocorrelations close to zero.
      \vskip1ex
      The \emph{p}-value for the \texttt{reg\_model} regression is large, and we conclude that the \emph{null hypothesis} is \texttt{TRUE}, and the regression \emph{residuals} are uncorrelated.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=TRUE>>=
library(lmtest)  # Load lmtest
# Perform Durbin-Watson test
lmtest::dwtest(regmod)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\section{Forecasts From Univariate Regression Models}


%%%%%%%%%%%%%%%
% \subsection{The \protect\emph{Influence Matrix} of Univariate Regression}
\subsection{Univariate Regression in Homogeneous Form}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The \emph{linear regression} can be written in \emph{homogeneous form} by defining a \emph{predictor matrix} $\mathbb{X} = (\mathbbm{1}, x)$ with two columns, with the unit column representing the intercept:
      \begin{displaymath}
        y = \mathbb{X} \beta + \varepsilon
      \end{displaymath}
      The two \emph{regression coefficients} are combined into a vector: $\beta = (\alpha, \beta)$.
      \vskip1ex
      The solution for the regression coefficients $\beta$ is given by:
      \begin{displaymath}
        \beta = (\hat{\mathbb{X}}^T \hat{\mathbb{X}})^{-1} \hat{\mathbb{X}}^T y = \hat{\mathbb{X}}^{inv} y
      \end{displaymath}
      The matrix $\hat{\mathbb{X}}^{inv} = (\hat{\mathbb{X}}^T \hat{\mathbb{X}})^{-1} \hat{\mathbb{X}}^T$ is the generalized inverse of the \emph{predictor matrix} $\hat{\mathbb{X}}$.
      \vskip1ex
    \column{0.5\textwidth}
      \vspace{-1em}
        <<echo=TRUE,eval=TRUE>>=
# Define linear regression data
set.seed(1121, "Mersenne-Twister", sample.kind="Rejection")
nrows <- 100
# Define predictor matrix
predm <- runif(nrows)
# Define response with noise
noisev <- rnorm(nrows)
respv <- (-3 + 2*predm + noisev)
# Add unit column to predictor
predm <- cbind(rep(1, nrows), predm)
colnames(predm)[1] <- "intercept"
# Solve the regression using lm()
formulav <- respv ~ predm[, 2]
regmod <- lm(formulav)  # Perform regression
betalm <- regmod$coeff  # Regression coefficients
# Solve the regression using the generalized inverse
predinv <- MASS::ginv(predm)
betac <- drop(predinv %*% respv)
all.equal(betalm, betac, check.attributes=FALSE)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{The \protect\emph{Influence Matrix} of Univariate Regression}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The fitted values $y_{fit}$ are equal to the response $y$ multiplied by the \emph{influence matrix} $H$:
      \begin{displaymath}
        y_{fit} = \mathbb{X} \beta = \mathbb{X} (\hat{\mathbb{X}}^T \hat{\mathbb{X}})^{-1} \hat{\mathbb{X}}^T y = \mathbb{H} y
      \end{displaymath}
      Where $\mathbb{H} = \mathbb{X} (\hat{\mathbb{X}}^T \hat{\mathbb{X}})^{-1} \hat{\mathbb{X}}^T$ is the \emph{influence matrix}.
      \vskip1ex
      The \emph{influence matrix} projects the response vector $y$ onto the regression line, to obtain the fitted values $y_{fit}$.
      \vskip1ex
      The square of the \emph{influence matrix} $\mathbb{H}$ is equal to itself (it's idempotent): $\mathbb{H} \, \mathbb{H}^T = \mathbb{H}$.
      \vskip1ex
      For univariate regression, the \emph{influence matrix} $\mathbb{H}$ is given by:
      \begin{displaymath}
        \mathbb{H}_{ij} = [\mathbb{X} (\mathbb{X}^T \mathbb{X})^{-1} \mathbb{X}^T]_{ij} =
        \frac{1}{n} + \frac{(x_i - \bar{x})(x_j - \bar{x})}{\sum_{i=1}^n (x_i - \bar{x})^2}
      \end{displaymath}
      The first term is due to the influence of the regression intercept $\alpha$, and the second term is due to the influence of the regression slope $\beta$.
    \column{0.5\textwidth}
      \vspace{-1em}
        <<echo=TRUE,eval=FALSE>>=
# Calculate the influence matrix
infmat <- predm %*% predinv
# The influence matrix is idempotent
all.equal(infmat, infmat %*% infmat)
# Calculate the fitted values using influence matrix
fitv <- drop(infmat %*% respv)
all.equal(fitv, regmod$fitted.values, check.attributes=FALSE)
# Calculate the fitted values from regression coefficients
fitv <- drop(predm %*% betac)
all.equal(fitv, regmod$fitted.values, check.attributes=FALSE)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{\protect\emph{Covariance Matrix} of Fitted Values in Univariate Regression}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The response values $y$ can be considered to be \emph{random variables} $\hat{y}$.  
      Then the fitted values $y_{fit}$ are also \emph{random variables} $\hat{y}_{fit}$:
      \begin{displaymath}
        \hat{y}_{fit} = \mathbb{H} \hat{y} = \mathbb{H} (y_{fit} + \hat\varepsilon) = y_{fit} + \mathbb{H} \hat\varepsilon
      \end{displaymath}
      The \emph{covariance matrix} of the fitted values $\hat{y}_{fit}$ is:
      \begin{align*}
        & \sigma^2_{fit} = \frac{\mathbbm{E}[\mathbb{H} \hat\varepsilon (\mathbb{H} \hat\varepsilon)^T]}{d_{free}} = \frac{\mathbbm{E}[\mathbb{H} \, \hat\varepsilon \hat\varepsilon^T \mathbb{H}^T]}{d_{free}} = \\
        & \frac{\mathbb{H} \, \mathbbm{E}[\hat\varepsilon \hat\varepsilon^T] \, \mathbb{H}^T}{d_{free}} = \sigma^2_\varepsilon \, \mathbb{H} = \sigma^2_\varepsilon \, \mathbb{X} (\mathbb{X}^T \mathbb{X})^{-1} \mathbb{X}^T
      \end{align*}
      The variance of the fitted values $\sigma^2_{fit}$ increases with the distance of the \emph{predictors} from their mean values.
      \vskip1ex
      This is because the fitted values farther away from their mean are more sensitive to the variance of the regression slope.
      \vskip1ex
      The diagonal elements of the \emph{influence matrix} $\mathbb{H}_{ii}$ form the \emph{leverage vector}.
      \vskip1ex
      The leverage is the amount by which the fitted values would change if the response values were shifted by a small amount.
      \vskip1ex
      The response values farther away from their mean have more \emph{leverage}, that is, more influence on the fitted values, than response values close to the mean.
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/reg_fitsd.png}
        <<echo=TRUE,eval=FALSE>>=
# Calculate the covariance and standard deviations of fitted values
resids <- drop(respv - fitv)
degf <- (NROW(predm) - NCOL(predm))
residsd <- sqrt(sum(resids^2)/degf)
fitcovar <- residsd*infmat
fitsd <- sqrt(diag(fitcovar))
# Plot the standard deviations
fitdata <- cbind(fitted=fitv, stdev=fitsd)
fitdata <- fitdata[order(fitv), ]
plot(fitdata, type="l", lwd=3, col="blue",
     xlab="Fitted Value", ylab="Standard Deviation",
     main="Standard Deviations of Fitted Values\nin Univariate Regression")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Fitted Values for Different Realizations of Random Noise}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The fitted values are more volatile for \emph{predictor} values that are further away from their mean, because those points have higher \emph{leverage}.
      \vskip1ex
      The higher \emph{leverage} of points further away from the mean of the \emph{predictor} is due to their greater sensitivity to changes in the slope of the regression.
      \vskip1ex
      The fitted values for different realizations of random noise can be calculated using the influence matrix.
        <<echo=TRUE,eval=FALSE>>=
# Calculate the response without random noise for univariate regression,
# equal to weighted sum over columns of predictor.
respn <- predm %*% c(-1, 1)
# Perform loop over different realizations of random noise
fitm <- lapply(1:50, function(it) {
  # Add random noise to response
  respv <- respn + rnorm(nrows, sd=1.0)
  # Calculate the fitted values using influence matrix
  infmat %*% respv
})  # end lapply
fitm <- rutils::do_call(cbind, fitm)
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/reg_fitted.png}
        <<echo=(-(1:3)),eval=FALSE>>=
x11(width=5, height=4)  # Open x11 for plotting
# Set plot parameters to reduce whitespace around plot
par(mar=c(5, 5, 2, 1), oma=c(0, 0, 0, 0))
# Plot fitted values
matplot(x=predm[, 2], y=fitm,
        type="l", lty="solid", lwd=1, col="blue",
        xlab="predictor", ylab="fitted",
        main="Fitted Values for Different Realizations
        of Random Noise")
lines(x=predm[, 2], y=respn, col="red", lwd=4)
legend(x="topleft", # Add legend
       legend=c("response without noise", "fitted values"),
       title=NULL, inset=0.05, cex=1.0, lwd=6, y.intersp=0.4,
       bty="n", lty=1, col=c("red", "blue"))
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Forecasts From \protect\emph{Univariate Regression} Models}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The forecast $y_f$ from a regression model is equal to the \emph{response value} corresponding to the \emph{predictor} vector with the new data $\mathbb{X}_{new}$:
      \begin{displaymath}
        y_f = \mathbb{X}_{new} \, \beta
      \end{displaymath}
      The variance $\sigma^2_f$ of the \emph{forecast value} is equal to the \emph{predictor} vector multiplied by the \emph{covariance matrix} of the \emph{regression coefficients} $\sigma^2_\beta$:
      \begin{align*}
        & \sigma^2_f = \frac{\mathbbm{E}[\mathbb{X}_{new} \mathbb{X}_{inv} \hat\varepsilon \, (\mathbb{X}_{new} \mathbb{X}_{inv} \hat\varepsilon)^T]}{d_{free}} = \\
        & \frac{\mathbbm{E}[\mathbb{X}_{new} \mathbb{X}_{inv} \hat\varepsilon \hat\varepsilon^T \mathbb{X}_{inv}^T \mathbb{X}_{new}^T]}{d_{free}} = 
        \sigma^2_\varepsilon \mathbb{X}_{new} \mathbb{X}_{inv} \mathbb{X}_{inv}^T \mathbb{X}_{new}^T = \\
        & \sigma^2_\varepsilon \, \mathbb{X}_{new} (\mathbb{X}^T \mathbb{X})^{-1} \mathbb{X}_{new}^T =
        \mathbb{X}_{new} \, \sigma^2_\beta \, \mathbb{X}_{new}^T
      \end{align*}
    \column{0.5\textwidth}
      \vspace{-1em}
        <<echo=TRUE,eval=FALSE>>=
# Define new predictor
newdata <- (max(predm[, 2]) + 10*(1:5)/nrows)
predn <- cbind(rep(1, NROW(newdata)), newdata)
# Calculate the forecast values
fcast <- drop(predn %*% betac)
# Calculate the inverse of the predictor matrix squared
pred2 <- MASS::ginv(crossprod(predm))
# Calculate the standard errors
predsd <- residsd*sqrt(predn %*% pred2 %*% t(predn))
# Combine the forecast values and standard errors
fcast <- cbind(forecast=fcast, stdev=diag(predsd))
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Confidence Intervals of Regression Forecasts}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The variables $\sigma^2_\varepsilon$ and $\sigma^2_y$ follow the \emph{chi-squared} distribution with $d_{free} = (n-k-1)$ degrees of freedom, so the \emph{forecast value} $y_f$ follows the \emph{t-distribution}.
        <<echo=TRUE,eval=FALSE>>=
# Prepare plot data
xdata <- c(predm[, 2], newdata)
ydata <- c(fitv, fcast[, 1])
# Calculate the t-quantile
tquant <- qt(pnorm(2), df=degf)
fcastl <- fcast[, 1] - tquant*fcast[, 2]
fcasth <- fcast[, 1] + tquant*fcast[, 2]
# Plot the regression forecasts
xlim <- range(xdata)
ylim <- range(c(respv, ydata, fcastl, fcasth))
plot(x=xdata, y=ydata, xlim=xlim, ylim=ylim,
     type="l", lwd=3, col="blue",
     xlab="predictor", ylab="forecast",
     main="Forecasts from Linear Regression")
points(x=predm[, 2], y=respv, col="blue")
points(x=newdata, y=fcast[, 1], pch=16, col="blue")
lines(x=newdata, y=fcasth, lwd=3, col="red")
lines(x=newdata, y=fcastl, lwd=3, col="green")
legend(x="topleft", # Add legend
       legend=c("forecasts", "+2SD", "-2SD"),
       title=NULL, inset=0.05, cex=1.0, lwd=6, y.intersp=0.4,
       bty="n", lty=1, col=c("blue", "red", "green"))
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/reg_forecast.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Forecasts of \protect\emph{Linear Regression} Using \texttt{predict.lm()}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The function \texttt{predict()} is a \emph{generic function} for forecasting based on a given model.
      \vskip1ex
      \texttt{predict.lm()} is the forecasting method for linear models (regressions) produced by the function \texttt{lm()}.
        <<echo=TRUE,eval=FALSE>>=
# Perform univariate regression
dframe <- data.frame(resp=respv, pred=predm[, 2])
regmod <- lm(resp ~ pred, data=dframe)
# Calculate the forecasts from regression
newdf <- data.frame(pred=predn[, 2]) # Same column name
fcastlm <- predict.lm(object=regmod,
  newdata=newdf, confl=1-2*(1-pnorm(2)),
  interval="confidence")
rownames(fcastlm) <- NULL
all.equal(fcastlm[, "fit"], fcast[, 1])
all.equal(fcastlm[, "lwr"], fcastl)
all.equal(fcastlm[, "upr"], fcasth)
plot(x=xdata, y=ydata, xlim=xlim, ylim=ylim,
     type="l", lwd=3, col="blue",
     xlab="predictor", ylab="forecast",
     main="Forecasts from lm() Regression")
points(x=predm[, 2], y=respv, col="blue")
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/reg_forecastlm.png}
        <<echo=TRUE,eval=FALSE>>=
abline(regmod, col="blue", lwd=3)
points(x=newdata, y=fcastlm[, "fit"], pch=16, col="blue")
lines(x=newdata, y=fcastlm[, "lwr"], lwd=3, col="green")
lines(x=newdata, y=fcastlm[, "upr"], lwd=3, col="red")
legend(x="topleft", # Add legend
       legend=c("forecasts", "+2SD", "-2SD"),
       title=NULL, inset=0.05, cex=0.8, lwd=6, y.intersp=0.4,
       bty="n", lty=1, col=c("blue", "red", "green"))
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Spurious Time Series Regression}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Regression of non-stationary time series creates \emph{spurious} regressions.
      \vskip1ex
      The \emph{t}-statistics, \emph{p}-values, and \emph{R}-squared all indicate a statistically significant regression.
      \vskip1ex
      But the Durbin-Watson test shows residuals are autocorrelated, which invalidates the other tests.
      \vskip1ex
      The Q-Q plot also shows that residuals are \emph{not} normally distributed.
        <<echo=(-(1:3)),eval=FALSE>>=
set.seed(1121, "Mersenne-Twister", sample.kind="Rejection")
library(lmtest)
# Spurious regression in unit root time series
predm <- cumsum(rnorm(100))  # Unit root time series
respv <- cumsum(rnorm(100))
formulav <- respv ~ predm
regmod <- lm(formulav)  # Perform regression
# Summary indicates statistically significant regression
regsum <- summary(regmod)
regsum$coeff
regsum$r.squared
# Durbin-Watson test shows residuals are autocorrelated
dwtest <- lmtest::dwtest(regmod)
c(dwtest$statistic[[1]], dwtest$p.value)
      @
      \vspace{-2em}
        <<spur_reg,echo=(-(1:2)),eval=FALSE,fig.height=8,fig.show='hide'>>=
par(oma=c(15, 1, 1, 1), mgp=c(0, 0.5, 0), mar=c(1, 1, 1, 1), cex.lab=0.8, cex.axis=0.8, cex.main=0.8, cex.sub=0.5)
par(mfrow=c(2,1))  # Set plot panels
plot(formulav, xlab="", ylab="")  # Plot scatterplot using formula
title(main="Spurious Regression", line=-1)
# Add regression line
abline(regmod, lwd=2, col="red")
plot(regmod, which=2, ask=FALSE)  # Plot just Q-Q
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \hspace*{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/spur_reg-1}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\section{Multivariate Regression}


%%%%%%%%%%%%%%%
\subsection{\protect\emph{Multivariate} Linear Regression}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      A \emph{multivariate} linear regression model with $k$ \emph{predictors} ${x_j}$, is defined by the formula:
      \begin{displaymath}
        y_i = \alpha + \sum_{j=1}^{k} {\beta_j x_{i,j}} + \varepsilon_i
      \end{displaymath}
      $\alpha$ and $\beta$ are the unknown regression coefficients, with $\alpha$ a scalar and $\beta$ a vector of length $k$.
      \vskip1ex
      The \emph{residuals} $\varepsilon_i$ are assumed to be normally distributed $\phi(0, \sigma_\varepsilon)$, independent, and stationary.
      \vskip1ex
      The data consists of $n$ observations, with each observation containing $k$ \emph{predictors} and one \emph{response} value.
      \vskip1ex
      The \emph{response vector} $y$, the \emph{predictor} vectors ${x_j}$, and the \emph{residuals} $\varepsilon$ are vectors of length $n$.
      \vskip1ex
      The $k$ \emph{predictors} ${x_j}$ form the columns of the $(n,k)$-dimensional \emph{predictor matrix} $\mathbb{X}$.
    \column{0.5\textwidth}
      The \emph{multivariate regression} model can be written in vector notation as:
      \begin{flalign*}
        & y = \alpha + \mathbb{X} \beta + \varepsilon = y_{fit} + \varepsilon\\
        & y_{fit} = \alpha + \mathbb{X} \beta
      \end{flalign*}
      Where $y_{fit}$ are the fitted values of the model.
        <<echo=TRUE,eval=TRUE>>=
# Define predictor matrix
nrows <- 100
ncols <- 5
# Initialize the random number generator
set.seed(1121, "Mersenne-Twister", sample.kind="Rejection")
predm <- matrix(runif(nrows*ncols), ncol=ncols)
# Add column names
colnames(predm) <- paste0("pred", 1:ncols)
# Define the predictor weights
weightv <- runif(3:(ncols+2), min=(-1), max=1)
# Response equals weighted predictor plus random noise
noisev <- rnorm(nrows, sd=2)
respv <- (1 + predm %*% weightv + noisev)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Solution of Multivariate Regression}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The \emph{Residual Sum of Squares} (\emph{RSS}) is defined as the sum of the squared \emph{residuals}:
      \begin{align*}
        RSS &= \varepsilon^T \varepsilon = (y - y_{fit})^T (y - y_{fit}) = \\
        & (y - \alpha + \mathbb{X} \beta)^T (y - \alpha + \mathbb{X} \beta)
      \end{align*}
      The \emph{OLS} solution for the regression coefficients is found by equating the \emph{RSS} derivatives to zero:
      \begin{flalign*}
        RSS_\alpha = -2 (y - \alpha - \mathbb{X} \beta)^T \mathbbm{1} = 0 \\
        RSS_\beta = -2 (y - \alpha - \mathbb{X} \beta)^T \mathbb{X} = 0
      \end{flalign*}
      The solutions for $\alpha$ and $\beta$ are given by:
      \begin{flalign*}
        & \alpha = \bar{y} - \bar{\mathbb{X}} \beta\\
        & RSS_\beta = -2 (\hat{y} - \hat{\mathbb{X}} \beta)^T \hat{\mathbb{X}} = 0\\
        & \hat{\mathbb{X}}^T \hat{y} - \hat{\mathbb{X}}^T \hat{\mathbb{X}} \beta = 0\\
        & \beta = (\hat{\mathbb{X}}^T \hat{\mathbb{X}})^{-1} \hat{\mathbb{X}}^T \hat{y} = \hat{\mathbb{X}}^{inv} \hat{y}
      \end{flalign*}
      Where $\bar{y}$ and $\bar{\mathbb{X}}$ are the column means, and $\hat{\mathbb{X}} = \mathbb{X} - \bar{\mathbb{X}}$ and $\hat{y} = y - \bar{y} = \hat{\mathbb{X}} \beta + \varepsilon$ are the centered (de-meaned) variables.
    \column{0.5\textwidth}
      The matrix $\hat{\mathbb{X}}^{inv}$ is the generalized inverse of the centered (de-meaned) \emph{predictor matrix} $\hat{\mathbb{X}}$.
      \vskip1ex
      The matrix $\mathbb{C} = \hat{\mathbb{X}}^T \hat{\mathbb{X}} / (n-1)$ is the \emph{covariance matrix} of the matrix $\mathbb{X}$, and it's invertible only if the columns of $\mathbb{X}$ are linearly independent.
        <<echo=TRUE,eval=TRUE>>=
# Perform multivariate regression using lm()
regmod <- lm(respv ~ predm)
# Solve multivariate regression using matrix algebra
# Calculate the centered (de-meaned) predictor matrix and response vector
# predc <- t(t(predm) - colMeans(predm))
predc <- apply(predm, 2, function(x) (x-mean(x)))
respc <- respv - mean(respv)
# Calculate the regression coefficients
betac <- drop(MASS::ginv(predc) %*% respc)
# Calculate the regression alpha
alphac <- mean(respv) - sum(colSums(predm)*betac)/nrows
# Compare with coefficients from lm()
all.equal(coef(regmod), c(alphac, betac), check.attributes=FALSE)
# Compare with actual coefficients
all.equal(c(1, weightv), c(alphac, betac), check.attributes=FALSE)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Multivariate Regression in Homogeneous Form}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      If an extra unit column is added to the \emph{predictor matrix} $\mathbb{X} = (\mathbbm{1}, \mathbb{X})$ for the intercept term, then the \emph{linear regression} can be written in \emph{homogeneous form}:
      \begin{displaymath}
        y = \mathbb{X} \beta + \varepsilon
      \end{displaymath}
      Where the \emph{regression coefficients} $\beta$ now contain the intercept $\alpha$: $\beta = (\alpha, \beta_1, \ldots, \beta_k)$, and the \emph{predictor matrix} $\mathbb{X}$ has $k+1$ columns and $n$ rows.
      \vskip1ex
      The \emph{OLS} solution for the $\beta$ coefficients is found by equating the \emph{RSS} derivative to zero:
      \begin{flalign*}
        & RSS_\beta = -2 (y - \mathbb{X} \beta)^T \mathbb{X} = 0\\
        & \mathbb{X}^T y - \mathbb{X}^T \mathbb{X} \beta = 0\\
        & \beta = (\mathbb{X}^T \mathbb{X})^{-1} \mathbb{X}^T y = \mathbb{X}_{inv} y
      \end{flalign*}
      The matrix $\mathbb{X}_{inv} = (\mathbb{X}^T \mathbb{X})^{-1} \mathbb{X}^T$ is the generalized inverse of the \emph{predictor matrix} $\mathbb{X}$.
      \vskip1ex
      The coefficients $\beta$ can be interpreted as the projections of the \emph{response vector} $y$ onto the columns of the \emph{predictor matrix} $\mathbb{X}$.
    \column{0.5\textwidth}
      The \emph{predictor matrix} $\mathbb{X}$ maps the \emph{regression coefficients} $\beta$ into the \emph{response vector} $y$.
      \vskip1ex
      The generalized inverse of the \emph{predictor matrix} $\mathbb{X}_{inv}$ maps the \emph{response vector} $y$ into the \emph{regression coefficients} $\beta$.
        <<echo=TRUE,eval=TRUE>>=
# Add intercept column to predictor matrix
predm <- cbind(rep(1, nrows), predm)
ncols <- NCOL(predm)
# Add column name
colnames(predm)[1] <- "intercept"
# Calculate the generalized inverse of the predictor matrix
predinv <- MASS::ginv(predm)
# Calculate the regression coefficients
betac <- predinv %*% respv
# Perform multivariate regression without intercept term
regmod <- lm(respv ~ predm - 1)
all.equal(drop(betac), coef(regmod), check.attributes=FALSE)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{The \protect\emph{Residuals} of Multivariate Regression}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The \emph{multivariate regression} model can be written in vector notation as:
      \begin{flalign*}
        & y = \mathbb{X} \beta + \varepsilon = y_{fit} + \varepsilon\\
        & y_{fit} = \mathbb{X} \beta
      \end{flalign*}
      Where $y_{fit}$ are the fitted values of the model.
      \vskip1ex
      The \emph{residuals} are equal to the \emph{response vector} minus the fitted values: $\varepsilon = y - y_{fit}$.
      \vskip1ex
      The \emph{residuals} $\varepsilon$ are orthogonal to the columns of the \emph{predictor matrix} $\mathbb{X}$ (the \emph{predictors}):
      \begin{flalign*}
        & \varepsilon^T \mathbb{X} = (y - \mathbb{X} (\mathbb{X}^T \mathbb{X})^{-1} \mathbb{X}^T y)^T \mathbb{X} = \\
        & y^T \mathbb{X} - y^T \mathbb{X} (\mathbb{X}^T \mathbb{X})^{-1} \mathbb{X}^T \mathbb{X} = y^T \mathbb{X} - y^T \mathbb{X} = 0
      \end{flalign*}
      Therefore the \emph{residuals} are also orthogonal to the fitted values: $\varepsilon^T y_{fit} = \varepsilon^T \mathbb{X} \beta = 0$.
      \vskip1ex
      Since the first column of the \emph{predictor matrix} $\mathbb{X}$ is a unit vector, the \emph{residuals} $\varepsilon$ have zero mean: $\varepsilon^T \mathbbm{1} = 0$.
    \column{0.5\textwidth}
      \vspace{-1em}
        <<echo=TRUE,eval=TRUE>>=
# Calculate the fitted values from regression coefficients
fitv <- drop(predm %*% betac)
all.equal(fitv, regmod$fitted.values, check.attributes=FALSE)
# Calculate the residuals
resids <- drop(respv - fitv)
all.equal(resids, regmod$residuals, check.attributes=FALSE)
# Residuals are orthogonal to predictor columns (predms)
sapply(resids %*% predm, all.equal, target=0)
# Residuals are orthogonal to the fitted values
all.equal(sum(resids*fitv), target=0)
# Sum of residuals is equal to zero
all.equal(sum(resids), target=0)
      @
      \vspace{-1em}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{The \protect\emph{Influence Matrix} of Multivariate Regression}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The vector $y_{fit} = \mathbb{X} \beta$ are the fitted values corresponding to the \emph{response vector} $y$:
      \begin{displaymath}
        y_{fit} = \mathbb{X} \beta = \mathbb{X} (\mathbb{X}^T \mathbb{X})^{-1} \mathbb{X}^T y = \mathbb{X} \mathbb{X}_{inv} y = \mathbb{H} y
      \end{displaymath}
      Where $\mathbb{H} = \mathbb{X} \mathbb{X}_{inv} = \mathbb{X} (\mathbb{X}^T \mathbb{X})^{-1} \mathbb{X}^T$ is the \emph{influence matrix} (or hat matrix), which maps the \emph{response vector} $y$ into the fitted values $y_{fit}$.
      \vskip1ex
      The \emph{influence matrix} $\mathbb{H}$ is a projection matrix, and it measures the changes in the fitted values $y_{fit}$ due to changes in the \emph{response vector} $y$.
      \begin{displaymath}
        \mathbb{H}_{ij} = \frac{\partial{y^{fit}_i}}{\partial{y_j}}
      \end{displaymath}
      The square of the \emph{influence matrix} $\mathbb{H}$ is equal to itself (it's idempotent): $\mathbb{H} \, \mathbb{H}^T = \mathbb{H}$.
    \column{0.5\textwidth}
      \vspace{-1em}
        <<echo=TRUE,eval=TRUE>>=
# Calculate the influence matrix
infmat <- predm %*% predinv
# The influence matrix is idempotent
all.equal(infmat, infmat %*% infmat)
# Calculate the fitted values using influence matrix
fitv <- drop(infmat %*% respv)
all.equal(fitv, regmod$fitted.values, check.attributes=FALSE)
# Calculate the fitted values from regression coefficients
fitv <- drop(predm %*% betac)
all.equal(fitv, regmod$fitted.values, check.attributes=FALSE)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Multivariate Regression With Centered Variables}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The \emph{multivariate regression} model can be written in vector notation as:
      \begin{displaymath}
        y = \alpha + \mathbb{X} \beta + \varepsilon
      \end{displaymath}
      The intercept $\alpha$ can be substituted with its solution: $\alpha = \bar{y} - \bar{\mathbb{X}} \beta$ to obtain the regression model with centered (de-meaned) response and predictor matrix:
      \begin{flalign*}
        & y = \bar{y} - \bar{\mathbb{X}} \beta + \mathbb{X} \beta \\
        & \hat{y} = \hat{\mathbb{X}} \beta + \varepsilon
      \end{flalign*}
      The regression model with a centered (de-meaned) \emph{predictor matrix} produces the same fitted values (only shifted by their mean) and \emph{residuals} as the original regression model, so it's equivalent to it.
      \vskip1ex
      But the centered regression model has a different \emph{influence matrix}, which maps the centered \emph{response vector} $\hat{y}$ into the centered fitted values $\hat{y}_{fit}$.
    \column{0.5\textwidth}
      \vspace{-1em}
        <<echo=TRUE,eval=TRUE>>=
# Calculate the centered (de-meaned) fitted values
predc <- t(t(predm) - colMeans(predm))
fittedc <- drop(predc %*% betac)
all.equal(fittedc, regmod$fitted.values - mean(respv),
  check.attributes=FALSE)
# Calculate the residuals
respc <- respv - mean(respv)
resids <- drop(respc - fittedc)
all.equal(resids, regmod$residuals, check.attributes=FALSE)
# Calculate the influence matrix
infmatc <- predc %*% MASS::ginv(predc)
# Compare the fitted values
all.equal(fittedc, drop(infmatc %*% respc), check.attributes=FALSE)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Multivariate Regression for Orthogonal Predictors}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The generalized inverse can be written as:
      \begin{displaymath}
        \mathbb{X}_{inv} = (\mathbb{X}^T \mathbb{X})^{-1} \mathbb{X}^T = \mathbb{C}^{-1} \mathbb{X}^T
      \end{displaymath}
      Where $\mathbb{C} = \mathbb{X}^T \mathbb{X}$ is the matrix of inner products of the predictors $\mathbb{X}$.
      \vskip1ex
      If the predictors are orthogonal ($x_i \cdot x_j = 0$ for $i \neq j$, and $x_i \cdot x_i = \sigma^2_i$) then the squared predictor matrix $\mathbb{C}$ is diagonal:
      \begin{displaymath}
        \mathbb{C} = \begin{pmatrix}
          \sigma^2_1 & 0 & \cdots & 0 \\
          0 & \sigma^2_2 & \cdots & 0 \\
          \vdots  & \vdots  & \ddots & \vdots \\
          0 & 0 & \cdots & \sigma^2_n
        \end{pmatrix}
      \end{displaymath}
      And the inverse of the squared predictor matrix $\mathbb{C}^{-1}$ is also diagonal, so the \emph{regression coefficients} can then be written simply as: 
      \begin{displaymath}
        \beta_i = \frac{x_i \cdot y}{\sigma^2_i}
      \end{displaymath}
      Where $x_i \cdot y$ are the inner products of the predictors $x_i$ times the \emph{response vector} $y$.
      \vskip1ex
      Conversely, if the predictors are \emph{collinear} then their squared predictor matrix is \emph{singular} and the regression is also singular.  Predictors are \emph{collinear} if there's a linear combination that is constant.
    \column{0.5\textwidth}
      \vspace{-1em}
        <<echo=TRUE,eval=FALSE>>=
# Perform PCA of the predictors
pcad <- prcomp(predm, center=FALSE, scale=FALSE)
# Calculate the PCA predictors
predpca <- predm %*% pcad$rotation
# Principal components are orthogonal to each other
round(t(predpca) %*% predpca, 2)
# Calculate the PCA regression coefficients using lm()
regmod <- lm(respv ~ predpca - 1)
summary(regmod)
regmod$coefficients
# Calculate the PCA regression coefficients directly
colSums(predpca*drop(respv))/colSums(predpca^2)
# Create almost collinear predictors
predcol <- predm
predcol[, 1] <- (predcol[, 1]/1e3 + predcol[, 2])
# Calculate the PCA predictors
pcad <- prcomp(predcol, center=FALSE, scale=FALSE)
predpca <- predcol %*% pcad$rotation
round(t(predpca) %*% predpca, 6)
# Calculate the PCA regression coefficients
drop(MASS::ginv(predpca) %*% respv)
# Calculate the PCA regression coefficients directly
colSums(predpca*drop(respv))/colSums(predpca^2)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\section{Regression Diagnostics}


%%%%%%%%%%%%%%%
\subsection{Regression Coefficients as \protect\emph{Random Variables}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The \emph{residuals} $\hat\varepsilon$ can be considered to be \emph{random variables}, with expected value equal to zero $\mathbbm{E}[\hat\varepsilon] = 0$, and variance equal to $\sigma^2_\varepsilon$.
      \vskip1ex
      The variance of the \emph{residuals} is equal to the expected value of the squared \emph{residuals} divided by the number of \emph{degrees of freedom}:
      \begin{displaymath}
        \sigma^2_\varepsilon = \frac{\mathbbm{E}[\varepsilon^T \varepsilon]}{d_{free}}
      \end{displaymath}
      Where $d_{free} = (n-k)$ is the number of \emph{degrees of freedom} of the \emph{residuals}, equal to the number of observations $n$, minus the number of \emph{predictors} $k$ (including the intercept term).
      \vskip1ex
      The \emph{response vector} $y$ can also be considered to be a \emph{random variable} $\hat{y}$, equal to the sum of the deterministic fitted values $y_{fit}$ plus the random \emph{residuals} $\hat\varepsilon$:
      \begin{displaymath}
        \hat{y} = \mathbb{X} \beta + \hat\varepsilon = y_{fit} + \hat\varepsilon
      \end{displaymath}
    \column{0.5\textwidth}
      The \emph{regression coefficients} $\beta$ can also be considered to be \emph{random variables} $\hat\beta$:
      \begin{flalign*}
        & \hat\beta = \mathbb{X}_{inv} \hat{y} = \mathbb{X}_{inv} (y_{fit} + \hat\varepsilon) = \\
        & (\mathbb{X}^T \mathbb{X})^{-1} \mathbb{X}^T (\mathbb{X} \beta + \hat\varepsilon) =
        \beta + \mathbb{X}_{inv} \hat\varepsilon
      \end{flalign*}
      Where $\beta$ is equal to the expected value of $\hat\beta$: $\beta = \mathbbm{E}[\hat\beta] = \mathbb{X}_{inv} y_{fit} = \mathbb{X}_{inv} y$.
        <<echo=TRUE,eval=TRUE>>=
# Regression model summary
regsum <- summary(regmod)
# Degrees of freedom of residuals
nrows <- NROW(predm)
ncols <- NCOL(predm)
degf <- (nrows - ncols)
all.equal(degf, regsum$df[2])
# Variance of residuals
residsd <- sum(resids^2)/degf
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{\protect\emph{Covariance Matrix} of the Regression Coefficients}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The \emph{covariance matrix} of the \emph{regression coefficients} $\hat\beta$ is given by:
      \begin{align*}
        & \sigma^2_\beta = \frac{\mathbbm{E}[(\hat\beta - \beta) (\hat\beta - \beta)^T]}{d_{free}} = \\
        & \frac{\mathbbm{E}[\mathbb{X}_{inv} \hat\varepsilon (\mathbb{X}_{inv} \hat\varepsilon)^T]}{d_{free}} = \frac{\mathbbm{E}[\mathbb{X}_{inv} \hat\varepsilon \hat\varepsilon^T \mathbb{X}_{inv}^T]}{d_{free}} = \\
        & \frac{(\mathbb{X}^T \mathbb{X})^{-1} \mathbb{X}^T \mathbbm{E}[\hat\varepsilon \hat\varepsilon^T] \, \mathbb{X} (\mathbb{X}^T \mathbb{X})^{-1}}{d_{free}} = \\
        & (\mathbb{X}^T \mathbb{X})^{-1} \mathbb{X}^T \sigma^2_\varepsilon \mathbbm{1} \, \mathbb{X} (\mathbb{X}^T \mathbb{X})^{-1} =
        \sigma^2_\varepsilon (\mathbb{X}^T \mathbb{X})^{-1}
      \end{align*}
      Where the expected values of the squared residuals are proportional to the diagonal unit matrix $\mathbbm{1}$: $\frac{\mathbbm{E}[\hat\varepsilon \hat\varepsilon^T]}{d_{free}} = \sigma^2_\varepsilon \mathbbm{1}$
      \vskip1ex
      If the predictors are close to being \emph{collinear}, then the squared predictor matrix becomes singular, and the covariance of their regression coefficients becomes very large.
      \vskip1ex
      The matrix $\mathbb{X}_{inv} = (\mathbb{X}^T \mathbb{X})^{-1} \mathbb{X}^T$ is the generalized inverse of the \emph{predictor matrix} $\mathbb{X}$.
    \column{0.5\textwidth}
      \vspace{-1em}
        <<echo=TRUE,eval=TRUE>>=
# Inverse of predictor matrix squared
pred2 <- MASS::ginv(crossprod(predm))
# pred2 <- t(predm) %*% predm
# Variance of residuals
residsd <- sum(resids^2)/degf
# Calculate the covariance matrix of betas
betacovar <- residsd*pred2
# round(betacovar, 3)
betasd <- sqrt(diag(betacovar))
all.equal(betasd, regsum$coeff[, 2], check.attributes=FALSE)
# Calculate the t-values of betas
betatvals <- drop(betac)/betasd
all.equal(betatvals, regsum$coeff[, 3], check.attributes=FALSE)
# Calculate the two-sided p-values of betas
betapvals <- 2*pt(-abs(betatvals), df=degf)
all.equal(betapvals, regsum$coeff[, 4], check.attributes=FALSE)
# The square of the generalized inverse is equal
# to the inverse of the square
all.equal(MASS::ginv(crossprod(predm)), predinv %*% t(predinv))
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{\protect\emph{Covariance Matrix} of the Fitted Values}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The fitted values $y_{fit}$ can also be considered to be \emph{random variables} $\hat{y}_{fit}$, because the \emph{regression coefficients} $\hat\beta$ are \emph{random variables}: $\hat{y}_{fit} = \mathbb{X} \hat\beta = \mathbb{X} (\beta + \mathbb{X}_{inv} \hat\varepsilon) = y_{fit} + \mathbb{X} \mathbb{X}_{inv} \hat\varepsilon$.
      \vskip1ex
      The \emph{covariance matrix} of the fitted values $\sigma^2_{fit}$ is:
      \begin{align*}
        & \sigma^2_{fit} = \frac{\mathbbm{E}[\mathbb{X} \mathbb{X}_{inv} \hat\varepsilon \, (\mathbb{X} \mathbb{X}_{inv} \hat\varepsilon)^T]}{d_{free}} = \frac{\mathbbm{E}[\mathbb{H} \, \hat\varepsilon \hat\varepsilon^T \mathbb{H}^T]}{d_{free}} = \\
        & \frac{\mathbb{H} \, \mathbbm{E}[\hat\varepsilon \hat\varepsilon^T] \, \mathbb{H}^T}{d_{free}} = \sigma^2_\varepsilon \, \mathbb{H} = \sigma^2_\varepsilon \, \mathbb{X} (\mathbb{X}^T \mathbb{X})^{-1} \mathbb{X}^T
      \end{align*}
      The square of the \emph{influence matrix} $\mathbb{H}$ is equal to itself (it's idempotent): $\mathbb{H} \, \mathbb{H}^T = \mathbb{H}$.
      \vskip1ex
      The variance of the fitted values $\sigma^2_{fit}$ increases with the distance of the \emph{predictors} from their mean values.
      \vskip1ex
      This is because the fitted values farther from their mean are more sensitive to the variance of the regression slope.
        <<echo=TRUE,eval=FALSE>>=
# Calculate the influence matrix
infmat <- predm %*% predinv
# The influence matrix is idempotent
all.equal(infmat, infmat %*% infmat)
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/regm_fitsd.png}
        <<echo=TRUE,eval=FALSE>>=
# Calculate the covariance and standard deviations of fitted values
fitcovar <- residsd*infmat
fitsd <- sqrt(diag(fitcovar))
# Sort the standard deviations
fitsd <- cbind(fitted=fitv, stdev=fitsd)
fitsd <- fitsd[order(fitv), ]
# Plot the standard deviations
plot(fitsd, type="l", lwd=3, col="blue",
     xlab="Fitted Value", ylab="Standard Deviation",
     main="Standard Deviations of Fitted Values\nin Multivariate Regression")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Standard Errors of Time Series Regression}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Bootstrapping the regression of asset returns shows that the actual standard errors can be over twice as large as those reported by the function \texttt{lm()}.
      \vskip1ex
      This is because the function \texttt{lm()} assumes that the data is normally distributed, while in reality asset returns have very large skewness and kurtosis.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Load time series of ETF percentage returns
retp <- rutils::etfenv$returns[, c("XLF", "XLE")]
retp <- na.omit(retp)
nrows <- NROW(retp)
head(retp)
# Define regression formula
formulav <- paste(colnames(retp)[1],
  paste(colnames(retp)[-1], collapse="+"),
  sep=" ~ ")
# Standard regression
regmod <- lm(formulav, data=retp)
regsum <- summary(regmod)
# Bootstrap of regression
# Initialize the random number generator
set.seed(1121, "Mersenne-Twister", sample.kind="Rejection")
bootd <- sapply(1:100, function(x) {
  samplev <- sample.int(nrows, replace=TRUE)
  regmod <- lm(formulav, data=retp[samplev, ])
  regmod$coefficients
})  # end sapply
# Means and standard errors from regression
regsum$coefficients
# Means and standard errors from bootstrap
dim(bootd)
t(apply(bootd, MARGIN=1,
      function(x) c(mean=mean(x), stderror=sd(x))))
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Forecasts From Multivariate Regression Models}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The forecast $y_f$ from a regression model is equal to the \emph{response value} corresponding to the \emph{predictor} vector with the new data $\mathbb{X}_{new}$:
      \begin{displaymath}
        y_f = \mathbb{X}_{new} \, \beta
      \end{displaymath}
      The forecast is a \emph{random variable} $\hat{y}_f$, because the \emph{regression coefficients} $\hat\beta$ are \emph{random variables}:
      \begin{align*}
        \hat{y}_f = \mathbb{X}_{new} \hat\beta = \mathbb{X}_{new} (\beta + \mathbb{X}_{inv} \hat\varepsilon) = \\
        y_f + \mathbb{X}_{new} \mathbb{X}_{inv} \hat\varepsilon
      \end{align*}
      The variance $\sigma^2_f$ of the \emph{forecast value} is:
      \begin{align*}
        & \sigma^2_f = \frac{\mathbbm{E}[\mathbb{X}_{new} \mathbb{X}_{inv} \hat\varepsilon \, (\mathbb{X}_{new} \mathbb{X}_{inv} \hat\varepsilon)^T]}{d_{free}} = \\
        & \frac{\mathbbm{E}[\mathbb{X}_{new} \mathbb{X}_{inv} \hat\varepsilon \hat\varepsilon^T \mathbb{X}_{inv}^T \mathbb{X}_{new}^T]}{d_{free}} = \\
        & \sigma^2_\varepsilon \mathbb{X}_{new} \mathbb{X}_{inv} \mathbb{X}_{inv}^T \mathbb{X}_{new}^T = \\
        & \sigma^2_\varepsilon \, \mathbb{X}_{new} (\mathbb{X}^T \mathbb{X})^{-1} \mathbb{X}_{new}^T =
        \mathbb{X}_{new} \, \sigma^2_\beta \, \mathbb{X}_{new}^T
      \end{align*}
    \column{0.5\textwidth}
      The variance $\sigma^2_f$ of the \emph{forecast value} is equal to the \emph{predictor} vector multiplied by the \emph{covariance matrix} of the \emph{regression coefficients} $\sigma^2_\beta$.
        <<echo=TRUE,eval=TRUE>>=
# New data predictor is a data frame or row vector
set.seed(1121, "Mersenne-Twister", sample.kind="Rejection")
newdata <- data.frame(matrix(c(1, rnorm(5)), nr=1))
colv <- colnames(predm)
colnames(newdata) <- colv
newdata <- as.matrix(newdata)
fcast <- drop(newdata %*% betac)
predsd <- drop(sqrt(newdata %*% betacovar %*% t(newdata)))
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Forecasts From Multivariate Regression Using \texttt{lm()}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The function \texttt{predict()} is a \emph{generic function} for forecasting based on a given model.
      \vskip1ex
      \texttt{predict.lm()} is the forecasting method for linear models (regressions) produced by the function \texttt{lm()}.
      \vskip1ex
      In order for \texttt{predict.lm()} to work properly, the multivariate regression must be specified using a formula.
    \column{0.5\textwidth}
      \vspace{-1em}
        <<echo=TRUE,eval=FALSE>>=
# Create formula from text string
formulav <- paste0("respv ~ ",
  paste(colnames(predm), collapse=" + "), " - 1")
# Specify multivariate regression using formula
regmod <- lm(formulav, data=data.frame(cbind(respv, predm)))
regsum <- summary(regmod)
# Predict from lm object
fcastlm <- predict.lm(object=model, newdata=newdata,
           interval="confidence", confl=1-2*(1-pnorm(2)))
# Calculate the t-quantile
tquant <- qt(pnorm(2), df=degf)
fcasth <- (fcast + tquant*predsd)
fcastl <- (fcast - tquant*predsd)
# Compare with matrix calculations
all.equal(fcastlm[1, "fit"], fcast)
all.equal(fcastlm[1, "lwr"], fcastl)
all.equal(fcastlm[1, "upr"], fcasth)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{\protect\emph{Total Sum of Squares} and \protect\emph{Explained Sum of Squares}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The \emph{Total Sum of Squares} (\emph{TSS}), the \emph{Explained Sum of Squares} (\emph{ESS}), and the \emph{Residual Sum of Squares} (\emph{RSS}) are defined as:
      \begin{flalign*}
        & TSS = (y - \bar{y})^T (y - \bar{y})\\
        & ESS = (y_{fit} - \bar{y})^T (y_{fit} - \bar{y})\\
        & RSS = (y - y_{fit})^T (y - y_{fit})
      \end{flalign*}
      Since the \emph{residuals} $\varepsilon = y - y_{fit}$ are orthogonal to the fitted values $y_{fit}$, they are also orthogonal to the \emph{fitted} excess values $(y_{fit} - \bar{y})$:
      \begin{displaymath}
        (y - y_{fit})^T (y_{fit} - \bar{y}) = 0
      \end{displaymath}
      Therefore the \emph{TSS} can be expressed as the sum of the \emph{ESS} plus the \emph{RSS}:
      \begin{displaymath}
        TSS = ESS + RSS
      \end{displaymath}
      It also follows that the $RSS$ and the $ESS$ follow independent \emph{chi-squared} distributions with $(n-k)$ and $(k-1)$ degrees of freedom.
      \vskip1ex
      The degrees of freedom of the \emph{Total Sum of Squares} is equal to the sum of the $RSS$ plus the $ESS$: $d^{TSS}_{free} = (n-k) + (k-1) = n-1$.
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/reg_tss.png}
        <<echo=TRUE,eval=TRUE>>=
# TSS = ESS + RSS
tss <- sum((respv-mean(respv))^2)
ess <- sum((fitv-mean(fitv))^2)
rss <- sum(resids^2)
all.equal(tss, ess + rss)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{\protect\emph{R-squared} of Multivariate Regression}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The \emph{R-squared} is the fraction of the \emph{Explained Sum of Squares} (\emph{ESS}) divided by the \emph{Total Sum of Squares} (\emph{TSS}):
      \begin{displaymath}
        R^2 = \frac{ESS}{TSS} = 1 - \frac{RSS}{TSS}
      \end{displaymath}
      The \emph{R-squared} is a measure of the model \emph{goodness of fit}, with \emph{R-squared} close to $1$ for models fitting the data very well, and \emph{R-squared} close to $0$ for poorly fitting models.
      \vskip1ex
      The \emph{R-squared} is equal to the squared correlation between the response and the fitted values:
      \begin{flalign*}
        & \rho_{yy_{fit}} = \frac{(y_{fit} - \bar{y})^T (y - \bar{y})}{\sqrt{TSS \cdot ESS}} = \\
        & \frac{(y_{fit} - \bar{y})^T (y_{fit} - \bar{y})}{\sqrt{TSS \cdot ESS}} = \sqrt{\frac{ESS}{TSS}}
      \end{flalign*}
    \column{0.5\textwidth}
      \vspace{-1em}
        <<echo=TRUE,eval=TRUE>>=
# Set regression attribute for intercept
attributes(regmod$terms)$intercept <- 1
# Regression summary
regsum <- summary(regmod)
# Regression R-squared
rsquared <- ess/tss
all.equal(rsquared, regsum$r.squared)
# Correlation between response and fitted values
corfit <- drop(cor(respv, fitv))
# Squared correlation between response and fitted values
all.equal(corfit^2, rsquared)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{\protect\emph{Adjusted R-squared} of Multivariate Regression}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The weakness of \emph{R-squared} is that it increases with the number of predictors (even for predictors which are purely random), so it may provide an inflated measure of the quality of a model with many predictors.
      \vskip1ex
      This is remedied by using the \emph{residual variance} ($\sigma^2_\varepsilon = \frac{RSS}{d_{free}}$) instead of the \emph{RSS}, and the \emph{response variance} ($\sigma^2_y = \frac{TSS}{n-1}$) instead of the \emph{TSS}.
      \vskip1ex
      The \emph{adjusted R-squared} is equal to $1$ minus the fraction of the \emph{residual variance} divided by the \emph{response variance}:
      \begin{displaymath}
        R^2_{adj} = 1 - \frac{\sigma^2_\varepsilon}{\sigma^2_y} = 1 - \frac{RSS/d_{free}}{TSS/(n-1)}
      \end{displaymath}
      Where $d_{free} = (n-k)$ is the number of \emph{degrees of freedom} of the \emph{residuals}.
      \vskip1ex
      The \emph{adjusted R-squared} is always smaller than the \emph{R-squared}.
    \column{0.5\textwidth}
      The performance of two different models can be compared by comparing their \emph{adjusted R-squared}, since the model with the larger \emph{adjusted R-squared} has a smaller \emph{residual variance}, so it's better able to explain the \emph{response}.
      <<echo=TRUE,eval=TRUE>>=
nrows <- NROW(predm)
ncols <- NCOL(predm)
# Degrees of freedom of residuals
degf <- (nrows - ncols)
# Adjusted R-squared
rsqadj <- (1-sum(resids^2)/degf/var(respv))
# Compare adjusted R-squared from lm()
all.equal(drop(rsqadj), regsum$adj.r.squared)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Fisher's \protect\emph{F-distribution}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Let $\chi^2_m$ and $\chi^2_n$ be independent random variables following \emph{chi-squared} distributions with $m$ and $n$ degrees of freedom.
      \vskip1ex
      Then the random variable:
      \begin{displaymath}
        F = \frac{\chi^2_m / m}{\chi^2_n / n}
      \end{displaymath}
      Follows the \emph{F-distribution} with $m$ and $n$ degrees of freedom, with the probability density function:
      \begin{displaymath}
        f(F) = \frac{\Gamma((m+n)/2) m^{m/2} n^{n/2}}{\Gamma(m/2) \Gamma(n/2)} \frac{F^{m/2-1}}{(n+mF)^{(m+n)/2}}
      \end{displaymath}
      The \emph{F-distribution} depends on the ratio $F$ and also on the degrees of freedom, $m$ and $n$.
      \vskip1ex
      The function \texttt{df()} calculates the probability density of the \emph{F-distribution}. 
      <<echo=TRUE,eval=FALSE>>=
# Plot four curves in loop
degf <- c(3, 5, 9, 21)  # Degrees of freedom
colorv <- c("black", "red", "blue", "green")
for (indeks in 1:NROW(degf)) {
  curve(expr=df(x, df1=degf[indeks], df2=3),
    xlim=c(0, 4), xlab="", ylab="", lwd=2, 
    col=colorv[indeks], add=as.logical(indeks-1))
}  # end for
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.4\paperwidth]{figure/f_dist.png}
        <<echo=TRUE,eval=FALSE>>=
# Add title
title(main="F-Distributions", line=0.5)
# Add legend
labelv <- paste("degf", degf, sep=" = ")
legend("topright", title="Degrees of Freedom", inset=0.0, bty="n",
       y.intersp=0.4, labelv, cex=1.2, lwd=6, lty=1, col=colorv)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{The \protect\emph{F-test} For the Variance Ratio}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Let $x$ and $y$ be independent standard \emph{Normal} variables, and let
      $\sigma^2_x = \frac{1}{m-1} \sum_{i=1}^m (x_i-\bar{x})^2$
      and
      $\sigma^2_y = \frac{1}{n-1} \sum_{i=1}^n (y_i-\bar{y})^2$
      be their sample variances.
      \vskip1ex
      The ratio $F = \sigma^2_x / \sigma^2_y$ of the sample variances follows the \emph{F-distribution} with $m-1$ and $n-1$ degrees of freedom.
      \vskip1ex
      The \emph{null hypothesis} of the \emph{F-test} test is that the \emph{F-statistic} $F$ is not significantly greater than $1$ (the variance $\sigma^2_x$ is not significantly greater than $\sigma^2_y$).
      \vskip1ex
      A large value of the \emph{F-statistic} $F$ indicates that the variances are unlikely to be equal.
      \vskip1ex
      The function \texttt{pf(q)} returns the cumulative probability of the \emph{F-distribution}, i.e. the cumulative probability that the \emph{F-statistic} $F$ is less than the quantile $q$.
      \vskip1ex
      This \emph{F-test} is very sensitive to the assumption of the normality of the variables.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=TRUE>>=
sigmax <- var(rnorm(nrows))
sigmay <- var(rnorm(nrows))
fratio <- sigmax/sigmay
# Cumulative probability for q = fratio
pf(fratio, nrows-1, nrows-1)
# p-value for fratios
1-pf((10:20)/10, nrows-1, nrows-1)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{The \protect\emph{F-statistic} for Linear Regression}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The performance of two different regression models can be compared by directly comparing their \emph{Residual Sum of Squares} (\emph{RSS}), since the model with a smaller \emph{RSS} is better able to explain the \emph{response} data.
      \vskip1ex
      Let the \emph{restricted} model have $p_1$ parameters with $df_1 = n - p_1$ degrees of freedom, and the \emph{unrestricted} model have $p_2$ parameters with $df_2 = n - p_2$ degrees of freedom, with $p_1 < p_2$.
      \vskip1ex
      Then their \emph{Residual Sum of Squares} $RSS_1$ and $RSS_2$ are independent \emph{chi-squared} random variables with $df_1$ and $df_2$ degrees of freedom.
      \vskip1ex
      And their difference $(RSS_1 - RSS_2)$ follows a \emph{chi-squared} distribution with $(df_1 - df_2)$ degrees of freedom.
      \vskip1ex
      So the \emph{F}-statistic $F$:
      \begin{displaymath}
        F = \frac{(RSS_1 - RSS_2)/(df_1 - df_2)}{RSS_2/df_2}
      \end{displaymath}
      Follows the \emph{F-distribution} with $(df_1 - df_2)$ and $df_2$ degrees of freedom (assuming that the \emph{residuals} are normally distributed).
    \column{0.5\textwidth}
      If the \emph{restricted} model has only one parameter (the constant intercept term), then $df_1 = n - 1$, and its fitted values are equal to the average of the \emph{response}: $y^{fit}_i = \bar{y}$, so $RSS_1$ is equal to the $TSS$: $RSS_1 = TSS = (y - \bar{y})^2$, so its \emph{Explained Sum of Squares} is equal to zero: $ESS_1 = TSS - RSS_1 = 0$.
      \vskip1ex
      Let the \emph{unrestricted} multivariate regression model be defined as:
      \begin{displaymath}
        y = \mathbb{X} \beta + \varepsilon
      \end{displaymath}
      Where $y$ is the \emph{response}, $\mathbb{X}$ is the \emph{predictor matrix} (with $k$ \emph{predictors}, including the intercept term), and $\beta$ are the $k$ \emph{regression coefficients}.
      \vskip1ex
      So the \emph{unrestricted} model has $k$ parameters ($p_2 = k$), and $RSS_2 = RSS$ and $ESS_2 = ESS$, and then the \emph{F}-statistic can be written as:
      \begin{displaymath}
        F = \frac{ESS/(k-1)}{RSS/(n-k)}
      \end{displaymath}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{The \protect\emph{F-test} for Linear Regression}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The sum of the \emph{Explained Sum of Squares} (\emph{ESS}) and the \emph{Residual Sum of Squares} (\emph{RSS}) is equal to the \emph{Total Sum of Squares} (\emph{TSS}):
      \begin{displaymath}
        TSS = ESS + RSS
      \end{displaymath}
      A regression model that better explains the \emph{response} data will have a larger \emph{ESS} and a smaller \emph{RSS}.
      \vskip1ex
      The $RSS$ and the $ESS$ follow independent \emph{chi-squared} distributions with $(n-k)$ and $(k-1)$ degrees of freedom.  Where $k$ is the number of explanatory variables (including the intercept term).
      \vskip1ex
      Then the \emph{F}-statistic, equal to the ratio of the \emph{ESS} divided by \emph{RSS}:
      \begin{displaymath}
        F = \frac{ESS/(k-1)}{RSS/(n-k)}
      \end{displaymath}
      Follows the \emph{F-distribution} with $(k-1)$ and $(n-k)$ degrees of freedom (assuming that the \emph{residuals} are normally distributed).
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=TRUE>>=
# F-statistic from lm()
regsum$fstatistic
# Degrees of freedom of residuals
degf <- (nrows - ncols)
# F-statistic from ESS and RSS
fstat <- (ess/(ncols-1))/(rss/degf)
all.equal(fstat, regsum$fstatistic[1], check.attributes=FALSE)
# p-value of F-statistic
1-pf(q=fstat, df1=(ncols-1), df2=(nrows-ncols))
      @
      The \emph{null hypothesis} of the \emph{F-test} test is that the \emph{F-statistic} $F$ is not significantly greater than $1$ (the variance of $ESS$ is not significantly greater than of $RSS$).
      \vskip1ex
      A large value of the \emph{F-statistic} $F$ indicates that the $ESS$ is significantly greater than the $RSS$, and that the regression is able to explain the \emph{response} data well.
      \vskip1ex
      A regression model that better explains the \emph{response} data will have a larger \emph{ESS} and a smaller \emph{RSS}, so the \emph{F-statistic} $F$ will be significantly greater than $1$.
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Omitted Variable Bias}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      \emph{Omitted Variable Bias} occurs in a regression model that omits important predictors.
      \vskip1ex
      The parameter estimates are biased, even though the \emph{t}-statistics, \emph{p}-values, and \emph{R}-squared all indicate a statistically significant regression.
      \vskip1ex
      But the Durbin-Watson test shows that the residuals are autocorrelated, which means that the regression coefficients may not be statistically significant (different from zero).
        <<echo=TRUE,eval=FALSE>>=
library(lmtest)  # Load lmtest
# Define predictor matrix
predm <- 1:30
omitv <- sin(0.2*1:30)
# Response depends on both predictors
respv <- 0.2*predm + omitv + 0.2*rnorm(30)
# Mis-specified regression only one predictor
modovb <- lm(respv ~ predm)
regsum <- summary(modovb)
regsum$coeff
regsum$r.squared
# Durbin-Watson test shows residuals are autocorrelated
lmtest::dwtest(modovb)
# Plot the regression diagnostic plots
x11(width=5, height=7)
par(mfrow=c(2,1))  # Set plot panels
par(mar=c(3, 2, 1, 1), oma=c(1, 0, 0, 0))
plot(respv ~ predm)
abline(modovb, lwd=2, col="red")
title(main="Omitted Variable Regression", line=-1)
plot(modovb, which=2, ask=FALSE)  # Plot just Q-Q
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/reg_ovb.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\section{Homework Assignment}


%%%%%%%%%%%%%%%
\subsection{Homework Assignment}
\begin{frame}[t]{\secname}
\vspace{-1em}
\begin{block}{Required}
  \begin{itemize}[]
    \item Study all the lecture slides in \emph{FRE6871\_Lecture\_4.pdf}, and run all the code in \emph{FRE6871\_Lecture\_4.R}
  \end{itemize}
\end{block}

\end{frame}


\end{document}