markets_trading.Rnw

% Define knitr options
% !Rnw weave=knitr
% Set global chunk options
<<knitr_setup,include=FALSE,cache=FALSE>>=
library(knitr)
opts_chunk$set(prompt=TRUE, eval=FALSE, tidy=FALSE, strip.white=FALSE, comment=NA, highlight=FALSE, message=FALSE, warning=FALSE, size="tiny", fig.width=6, fig.height=5)
options(width=80, dev="pdf")
options(digits=3)
thm <- knit_theme$get("acid")
knit_theme$set(thm)
@


% Define document options
\documentclass[9pt]{beamer}
\DeclareMathSizes{8pt}{6pt}{6pt}{5pt}
\mode<presentation>
\usetheme{AnnArbor}
% \usecolortheme{whale}
% Uncover everything in a step-wise fashion
% \beamerdefaultoverlayspecification{<+->}
% \usepackage{caption}
% tikz package for plotting and tables
\usepackage{tikz}
\usetikzlibrary{positioning}
\usepackage{array}
\usepackage{multirow}
% mathtools package for math symbols
\usepackage{mathtools}
% bbm package for unitary vector or matrix symbol
\usepackage{bbm}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{amsfonts}
\usepackage[latin1]{inputenc}
\usepackage{hyperref}
\usepackage{fancybox}
\usepackage{url}
\usepackage[backend=bibtex,style=alphabetic]{biblatex} % bibstyle=numeric
\usepackage{listings}
\usepackage{xcolor}
\definecolor{cmd_background}{rgb}{0.2, 0.2, 0.0}
\definecolor{vba_background}{rgb}{0.0, 0.0, 0.9}
% \colorlet{cmd_background_light}{cmd_background!20!black}
% \bibliographystyle{amsalpha} % doesn't work
% \addbibresource{FRE_lectures.bib}
% \addbibresource[location=remote]{http://www.citeulike.org/user/jerzyp}
\renewcommand\bibfont{\footnotesize}
\renewcommand{\pgfuseimage}[1]{\scalebox{0.75}{\includegraphics{#1}}} % scale bib icons
\setbeamertemplate{bibliography item}[text] % set bib icons
% \setbeamertemplate{bibliography item}{} % remove bib icons

% \usepackage{enumerate}
% \let\emph\textbf
% \let\alert\textbf
% Define colors for hyperlinks
\definecolor{links}{HTML}{2A1B81}
\hypersetup{colorlinks=true,linkcolor=,urlcolor=links}
% Make url text scriptsize
\renewcommand\UrlFont{\scriptsize}
% Make institute text italic and small
\setbeamerfont{institute}{size=\small,shape=\itshape}
\setbeamerfont{date}{size=\small}
\setbeamerfont{block title}{size=\normalsize} % shape=\itshape
\setbeamerfont{block body}{size=\footnotesize}


% Title page setup
\title[Markets and Trading]{Markets and Trading}
\subtitle{FRE6871 \& FRE7241, Fall 2024}
\institute[NYU Tandon]{NYU Tandon School of Engineering}
\titlegraphic{\includegraphics[scale=0.2]{image/tandon_long_color.png}}
\author[Jerzy Pawlowski]{Jerzy Pawlowski \emph{\href{mailto:jp3900@nyu.edu}{jp3900@nyu.edu}}}
\date{\today}


%%%%%%%%%%%%%%%
\begin{document}


%%%%%%%%%%%%%%%
\maketitle


%%%%%%%%%%%%%%%
\section{Bonds and Interest Rates}


%%%%%%%%%%%%%%%
\subsection{Downloading Treasury Bond Rates from \protect\emph{FRED}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The constant maturity Treasury rates are yields of hypothetical fixed-maturity bonds, interpolated from the market yields of actual Treasury bonds.
      \vskip1ex
      The \emph{FRED} database contains current and historical constant maturity Treasury rates, \\
      \hskip1em\url{https://fred.stlouisfed.org/series/DGS5}
      \vskip1ex
      \texttt{quantmod::getSymbols()} creates objects in the specified \emph{environment} from the input strings (names).
      \vskip1ex
      It then assigns the data to those objects, without returning them as a function value, as a \emph{side effect}.
      <<echo=TRUE,eval=FALSE>>=
# Symbols for constant maturity Treasury rates
symbolv <- c("DGS1", "DGS2", "DGS5", "DGS10", "DGS20", "DGS30")
# Create new environment for time series
ratesenv <- new.env()
# Download time series for symbolv into ratesenv
quantmod::getSymbols(symbolv, env=ratesenv, src="FRED")
# Remove NA values in ratesenv
sapply(ratesenv, function(x) sum(is.na(x)))
sapply(ls(ratesenv), function(namev) {
  assign(x=namev, value=na.omit(get(namev, ratesenv)), 
         envir=ratesenv)
}) # end sapply
sapply(ratesenv, function(x) sum(is.na(x)))
# Get class of all objects in ratesenv
sapply(ratesenv, class)
# Get class of all objects in R workspace
sapply(ls(), function(namev) class(get(namev)))
# Save the time series environment into a binary .RData file
save(ratesenv, file="/Users/jerzy/Develop/lecture_slides/data/rates_data.RData")
      @
    \column{0.5\textwidth}
      \includegraphics[width=0.45\paperwidth]{figure/treas_10y_rate.png}
      <<echo=TRUE,eval=FALSE>>=
# Get class of time series object DGS10
class(get(x="DGS10", envir=ratesenv))
# Another way
class(ratesenv$DGS10)
# Get first 6 rows of time series
head(ratesenv$DGS10)
# Plot dygraphs of 10-year Treasury rate
dygraphs::dygraph(ratesenv$DGS10, main="10-year Treasury Rate") %>%
  dyOptions(colors="blue", strokeWidth=2)
# Plot 10-year constant maturity Treasury rate
x11(width=6, height=5)
par(mar=c(2, 2, 0, 0), oma=c(0, 0, 0, 0))
chart_Series(ratesenv$DGS10["1990/"], name="10-year Treasury Rate")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Treasury Yield Curve}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The \emph{yield curve} is a vector of interest rates at different maturities, on a given date.
      \vskip1ex
      The \emph{yield curve} shape changes depending on the economic conditions: in recessions rates drop and the curve flattens, while in expansions rates rise and the curve steepens.
      <<echo=TRUE,eval=FALSE>>=
# Load constant maturity Treasury rates
load(file="/Users/jerzy/Develop/lecture_slides/data/rates_data.RData")
# Get most recent yield curve
ycnow <- eapply(ratesenv, xts::last)
class(ycnow)
ycnow <- do.call(cbind, ycnow)
# Check if 2020-03-25 is not a holiday
date2020 <- as.Date("2020-03-25")
weekdays(date2020)
# Get yield curve from 2020-03-25
yc2020 <- eapply(ratesenv, function(x) x[date2020])
yc2020 <- do.call(cbind, yc2020)
# Combine the yield curves
ycurves <- c(yc2020, ycnow)
# Rename columns and rows, sort columns, and transpose into matrix
colnames(ycurves) <- substr(colnames(ycurves), start=4, stop=11)
ycurves <- ycurves[, order(as.numeric(colnames(ycurves)))]
colnames(ycurves) <- paste0(colnames(ycurves), "yr")
ycurves <- t(ycurves)
colnames(ycurves) <- substr(colnames(ycurves), start=1, stop=4)
      @
    \column{0.5\textwidth}
    \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/yield_curve.png}
      <<echo=(-(1:2)),eval=FALSE>>=
x11(width=6, height=5)
par(mar=c(3, 3, 2, 0), oma=c(0, 0, 0, 0), mgp=c(2, 1, 0))
# Plot using matplot()
colorv <- c("blue", "red")
matplot(ycurves, main="Yield Curves in 2020 and 2023", xaxt="n", lwd=3, lty=1,
        type="l", xlab="maturity", ylab="yield", col=colorv)
# Add x-axis
axis(1, seq_along(rownames(ycurves)), rownames(ycurves))
# Add legend
legend("topleft", legend=colnames(ycurves), y.intersp=0.1,
       bty="n", col=colorv, lty=1, lwd=6, inset=0.05, cex=1.0)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Treasury Yield Curve Over Time}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The \emph{yield curve} has changed shape dramatically depending on the economic conditions: in recessions rates drop and the curve flattens, while in expansions rates rise and the curve steepens.
      <<echo=(-(1:2)),eval=FALSE>>=
x11(width=6, height=5)
par(mar=c(3, 3, 2, 0), oma=c(0, 0, 0, 0), mgp=c(2, 1, 0))
# Load constant maturity Treasury rates
load(file="/Users/jerzy/Develop/lecture_slides/data/rates_data.RData")
# Get end-of-year dates since 2006
datev <- xts::endpoints(ratesenv$DGS1["2006/"], on="years")
datev <- zoo::index(ratesenv$DGS1["2006/"][datev])
# Create time series of end-of-year rates
ycurves <- eapply(ratesenv, function(ratev) ratev[datev])
ycurves <- rutils::do_call(cbind, ycurves)
# Rename columns and rows, sort columns, and transpose into matrix
colnames(ycurves) <- substr(colnames(ycurves), start=4, stop=11)
ycurves <- ycurves[, order(as.numeric(colnames(ycurves)))]
colnames(ycurves) <- paste0(colnames(ycurves), "yr")
ycurves <- t(ycurves)
colnames(ycurves) <- substr(colnames(ycurves), start=1, stop=4)
# Plot matrix using plot.zoo()
colorv <- colorRampPalette(c("red", "blue"))(NCOL(ycurves))
plot.zoo(ycurves, main="Yield Curve Since 2006", lwd=3, xaxt="n",
         plot.type="single", xlab="maturity", ylab="yield", col=colorv)
# Add x-axis
axis(1, seq_along(rownames(ycurves)), rownames(ycurves))
# Add legend
legend("topleft", legend=colnames(ycurves), y.intersp=0.1,
       bty="n", col=colorv, lty=1, lwd=4, inset=0.05, cex=0.8)
      @
    \column{0.5\textwidth}
    \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/yield_curve_time.png}
      <<echo=TRUE,eval=FALSE>>=
# Alternative plot using matplot()
matplot(ycurves, main="Yield curve since 2006", xaxt="n", lwd=3, lty=1,
        type="l", xlab="maturity", ylab="yield", col=colorv)
# Add x-axis
axis(1, seq_along(rownames(ycurves)), rownames(ycurves))
# Add legend
legend("topleft", legend=colnames(ycurves), y.intersp=0.1,
       bty="n", col=colorv, lty=1, lwd=4, inset=0.05, cex=0.8)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Covariance Matrix of Interest Rates}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The covariance matrix $\mathbb{C}$, of the interest rate matrix $\mathbf{r}$ is given by:
      \begin{displaymath}
        \mathbb{C} = \frac{(\mathbf{r} - \bar{\mathbf{r}})^T (\mathbf{r} - \bar{\mathbf{r}})} {n-1}
      \end{displaymath}
      <<echo=TRUE,eval=FALSE>>=
# Extract rates from ratesenv
symbolv <- c("DGS1", "DGS2", "DGS5", "DGS10", "DGS20")
ratem <- mget(symbolv, envir=ratesenv)
ratem <- rutils::do_call(cbind, ratem)
ratem <- zoo::na.locf(ratem, na.rm=FALSE)
ratem <- zoo::na.locf(ratem, fromLast=TRUE)
# Calculate daily percentage rates changes
retp <- rutils::diffit(log(ratem))
# Center (de-mean) the returns
retp <- lapply(retp, function(x) {x - mean(x)})
retp <- rutils::do_call(cbind, retp)
sapply(retp, mean)
# Covariance and Correlation matrices of Treasury rates
covmat <- cov(retp)
cormat <- cor(retp)
# Reorder correlation matrix based on clusters
library(corrplot)
ordern <- corrMatOrder(cormat, order="hclust",
  hclust.method="complete")
cormat <- cormat[ordern, ordern]
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.4\paperwidth]{figure/yield_cor.png}
      <<echo=TRUE,eval=FALSE>>=
# Plot the correlation matrix
x11(width=6, height=6)
colorv <- colorRampPalette(c("red", "white", "blue"))
corrplot(cormat, title=NA, tl.col="black",
    method="square", col=colorv(NCOL(cormat)), tl.cex=0.8,
    cl.offset=0.75, cl.cex=0.7, cl.align.text="l", cl.ratio=0.25)
title("Correlation of Treasury Rates", line=1)
# Draw rectangles on the correlation matrix plot
corrRect.hclust(cormat, k=NROW(cormat) %/% 2,
  method="complete", col="red")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Principal Component Vectors}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      \emph{Principal components} are linear combinations of the \texttt{k} return vectors $\mathbf{r}_i$:
      \begin{displaymath}
        \mathbf{pc}_j = \sum_{i=1}^k {w_{ij} \, \mathbf{r}_i}
      \end{displaymath}
      Where $\mathbf{w}_j$ is a vector of weights (loadings) of the \emph{principal component} \texttt{j}, with $\mathbf{w}_j^T \mathbf{w}_j = 1$.
      \vskip1ex
      The weights $\mathbf{w}_j$ are chosen to maximize the variance of the \emph{principal components}, under the condition that they are orthogonal:
      \begin{align*}
        \mathbf{w}_j = {\operatorname{\arg \, \max}} \, \left\{ \mathbf{pc}_j^T \, \mathbf{pc}_j \right\} \\
        \mathbf{pc}_i^T \, \mathbf{pc}_j = 0 \> (i \neq j)
      \end{align*}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Create initial vector of portfolio weights
nweights <- NROW(symbolv)
weightv <- rep(1/sqrt(nweights), nweights)
names(weightv) <- symbolv
# Objective function equal to minus portfolio variance
objfun <- function(weightv, retp) {
  retp <- retp %*% weightv
  -1e7*var(retp) + 1e7*(1 - sum(weightv*weightv))^2
}  # end objfun
# Objective function for equal weight portfolio
objfun(weightv, retp)
# Compare speed of vector multiplication methods
library(microbenchmark)
summary(microbenchmark(
  transp=t(retp) %*% retp,
  sumv=sum(retp*retp),
  times=10))[, c(1, 4, 5)]
      @
    \column{0.5\textwidth}
      \includegraphics[width=0.35\paperwidth]{figure/pca_rates_load1.png}
      <<echo=TRUE,eval=FALSE>>=
# Find weights with maximum variance
optiml <- optim(par=weightv,
  fn=objfun,
  retp=retp,
  method="L-BFGS-B",
  upper=rep(5.0, nweights),
  lower=rep(-5.0, nweights))
# Optimal weights and maximum variance
weights1 <- optiml$par
objfun(weights1, retp)
# Plot first principal component loadings
x11(width=6, height=5)
par(mar=c(3, 3, 2, 1), oma=c(0, 0, 0, 0), mgp=c(2, 1, 0))
barplot(weights1, names.arg=names(weights1),
  xlab="", ylab="", main="First Principal Component Loadings")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Higher Order Principal Components}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The \emph{second principal component} can be calculated by maximizing its variance, under the constraint that it must be orthogonal to the \emph{first principal component}.
      \vskip1ex
      Similarly, higher order \emph{principal components} can be calculated by maximizing their variances, under the constraint that they must be orthogonal to all the previous \emph{principal components}.
      \vskip1ex
      The number of principal components is equal to the dimension of the covariance matrix.
      <<echo=TRUE,eval=FALSE>>=
# pc1 weights and returns
pc1 <- drop(retp %*% weights1)
# Redefine objective function
objfun <- function(weightv, retp) {
  retp <- retp %*% weightv
  -1e7*var(retp) + 1e7*(1 - sum(weightv^2))^2 +
    1e7*sum(weights1*weightv)^2
}  # end objfun
# Find second principal component weights
optiml <- optim(par=weightv,
                   fn=objfun,
                   retp=retp,
                   method="L-BFGS-B",
                   upper=rep(5.0, nweights),
                   lower=rep(-5.0, nweights))
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.35\paperwidth]{figure/pca_rates_load2.png}
      <<echo=TRUE,eval=FALSE>>=
# pc2 weights and returns
weights2 <- optiml$par
pc2 <- drop(retp %*% weights2)
sum(pc1*pc2)
# Plot second principal component loadings
barplot(weights2, names.arg=names(weights2),
        xlab="", ylab="", main="Second Principal Component Loadings")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Eigenvalues of the Covariance Matrix}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The portfolio variance: $\mathbf{w}^T \mathbb{C} \, \mathbf{w}$ can be maximized under the \emph{quadratic} weights constraint $\mathbf{w}^T \mathbf{w} = 1$, by maximizing the \emph{Lagrangian} $\mathcal{L}$:
      \begin{displaymath}
        \mathcal{L} = \mathbf{w}^T \mathbb{C} \, \mathbf{w} \, - \, \lambda \, (\mathbf{w}^T \mathbf{w} - 1)
      \end{displaymath}
      Where $\lambda$ is a \emph{Lagrange multiplier}.
      \vskip1ex
      The maximum variance portfolio weights can be found by differentiating $\mathcal{L}$ with respect to $\mathbf{w}$ and setting it to zero:
      \begin{displaymath}
        \mathbb{C} \, \mathbf{w} = \lambda \, \mathbf{w}
      \end{displaymath}
      The above is the \emph{eigenvalue} equation of the covariance matrix $\mathbb{C}$, with the optimal weights $\mathbf{w}$ forming an \emph{eigenvector}, and $\lambda$ is the \emph{eigenvalue} corresponding to the \emph{eigenvector} $\mathbf{w}$.
      \vskip1ex
      The \emph{eigenvalues} are the variances of the \emph{eigenvectors}, and their sum is equal to the sum of the return variances:
      \begin{displaymath}
        \sum_{i=1}^k \lambda_i = \frac{1}{1-k} \sum_{i=1}^k {\mathbf{r}_i^T \mathbf{r}_i}
      \end{displaymath}
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.35\paperwidth]{figure/pca_rates_eigenvalues.png}
      <<echo=TRUE,eval=FALSE>>=
eigend <- eigen(covmat)
eigend$vectors
# Compare with optimization
all.equal(sum(diag(covmat)), sum(eigend$values))
all.equal(abs(eigend$vectors[, 1]), abs(weights1), check.attributes=FALSE)
all.equal(abs(eigend$vectors[, 2]), abs(weights2), check.attributes=FALSE)
all.equal(eigend$values[1], var(pc1), check.attributes=FALSE)
all.equal(eigend$values[2], var(pc2), check.attributes=FALSE)
# Eigenvalue equations are satisfied approximately
(covmat %*% weights1) / weights1 / var(pc1)
(covmat %*% weights2) / weights2 / var(pc2)
# Plot eigenvalues
barplot(eigend$values, names.arg=paste0("PC", 1:nweights),
  las=3, xlab="", ylab="", main="Principal Component Variances")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{\protect\emph{Principal Component Analysis} Versus \protect\emph{Eigen Decomposition}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      \emph{Principal Component Analysis} (\emph{PCA}) is equivalent to the \emph{eigen decomposition} of either the correlation or the covariance matrix.
      \vskip1ex
      If the input time series \emph{are} scaled, then \emph{PCA} is equivalent to the eigen decomposition of the \emph{correlation matrix}.
      \vskip1ex
      If the input time series \emph{are not} scaled, then \emph{PCA} is equivalent to the eigen decomposition of the \emph{covariance matrix}.
      \vskip1ex
      Scaling the input time series improves the accuracy of the \emph{PCA dimension reduction}, allowing a smaller number of \emph{principal components} to more accurately capture the data contained in the input time series.
      \vskip1ex
      The function \texttt{prcomp()} performs \emph{Principal Component Analysis} on a matrix of data (with the time series as columns), and returns the results as a list of class \texttt{prcomp}.
      \vskip1ex
      The \texttt{prcomp()} argument \texttt{scale=TRUE} specifies that the input time series should be scaled by their standard deviations.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Eigen decomposition of correlation matrix
eigend <- eigen(cormat)
# Perform PCA with scaling
pcad <- prcomp(retp, scale=TRUE)
# Compare outputs
all.equal(eigend$values, pcad$sdev^2)
all.equal(abs(eigend$vectors), abs(pcad$rotation),
          check.attributes=FALSE)
# Eigen decomposition of covariance matrix
eigend <- eigen(covmat)
# Perform PCA without scaling
pcad <- prcomp(retp, scale=FALSE)
# Compare outputs
all.equal(eigend$values, pcad$sdev^2)
all.equal(abs(eigend$vectors), abs(pcad$rotation),
          check.attributes=FALSE)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Principal Component Analysis of the Yield Curve}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      \emph{Principal Component Analysis} (\emph{PCA}) is a \emph{dimension reduction} technique, that explains the returns of a large number of correlated time series as linear combinations of a smaller number of principal component time series.
      \vskip1ex
      The input time series are often scaled by their standard deviations, to improve the accuracy of \emph{PCA dimension reduction}, so that more information is retained by the first few \emph{principal component} time series.
      \vskip1ex
      If the input time series are not scaled, then \emph{PCA} analysis is equivalent to the \emph{eigen decomposition} of the covariance matrix, and if they are scaled, then \emph{PCA} analysis is equivalent to the \emph{eigen decomposition} of the correlation matrix.
      \vskip1ex
      The function \texttt{prcomp()} performs \emph{Principal Component Analysis} on a matrix of data (with the time series as columns), and returns the results as a list of class \texttt{prcomp}.
      \vskip1ex
      The \texttt{prcomp()} argument \texttt{scale=TRUE} specifies that the input time series should be scaled by their standard deviations.
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.35\paperwidth]{figure/pca_rates_scree.png}\\
      A \emph{scree plot} is a bar plot of the volatilities of the \emph{principal components}.
      <<echo=TRUE,eval=FALSE>>=
# Perform principal component analysis PCA
pcad <- prcomp(retp, scale=TRUE)
# Plot standard deviations
barplot(pcad$sdev, names.arg=colnames(pcad$rotation),
  las=3, xlab="", ylab="",
  main="Scree Plot: Volatilities of Principal Components
  of Treasury rates")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Yield Curve Principal Component Loadings (Weights)}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      \emph{Principal component} loadings are the weights of portfolios which have mutually orthogonal returns.
      \vskip1ex
      The \emph{principal component} portfolios represent the different orthogonal modes of the data variance.
      \vskip1ex
      The first \emph{principal component} of the \emph{yield curve} is the correlated movement of all rates up and down.
      \vskip1ex
      The second \emph{principal component} is \emph{yield curve} steepening and flattening.
      \vskip1ex
      The third \emph{principal component} is the \emph{yield curve} butterfly movement.
      <<echo=(-(1:1)),eval=FALSE>>=
x11(width=6, height=7)
# Calculate principal component loadings (weights)
pcad$rotation
# Plot loading barplots in multiple panels
par(mfrow=c(3,2))
par(mar=c(3.5, 2, 2, 1), oma=c(0, 0, 0, 0))
for (ordern in 1:NCOL(pcad$rotation)) {
  barplot(pcad$rotation[, ordern], las=3, xlab="", ylab="", main="")
  title(paste0("PC", ordern), line=-2.0, col.main="red")
}  # end for
      @
    \column{0.5\textwidth}
      \includegraphics[width=0.45\paperwidth]{figure/pca_rates_loadings.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Yield Curve Principal Component Time Series}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The time series of the \emph{principal components} can be calculated by multiplying the loadings (weights) times the original data.
      \vskip1ex
      The \emph{principal component} time series have mutually orthogonal returns.
      \vskip1ex
      Higher order \emph{principal components} are gradually less volatile.
      <<echo=TRUE,eval=FALSE>>=
# Standardize (center and scale) the returns
retp <- lapply(retp, function(x) {(x - mean(x))/sd(x)})
retp <- rutils::do_call(cbind, retp)
sapply(retp, mean)
sapply(retp, sd)
# Calculate principal component time series
retpcac <- retp %*% pcad$rotation
all.equal(pcad$x, retpcac, check.attributes=FALSE)
# Calculate products of principal component time series
round(t(retpcac) %*% retpcac, 2)
# Coerce to xts time series
retpcac <- xts(retpcac, order.by=zoo::index(retp))
retpcac <- cumsum(retpcac)
# Plot principal component time series in multiple panels
par(mfrow=c(3,2))
par(mar=c(2, 2, 0, 1), oma=c(0, 0, 0, 0))
rangev <- range(retpcac)
for (ordern in 1:NCOL(retpcac)) {
  plot.zoo(retpcac[, ordern], ylim=rangev, xlab="", ylab="")
  title(paste0("PC", ordern), line=-1, col.main="red")
}  # end for
      @
    \column{0.5\textwidth}
      \includegraphics[width=0.45\paperwidth]{figure/pca_rates_series.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Inverting Principal Component Analysis}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The original time series can be calculated \emph{exactly} from the time series of all the \emph{principal components}, by inverting the loadings matrix.
      \vskip1ex
      The function \texttt{solve()} solves systems of linear equations, and also inverts square matrices.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Invert all the principal component time series
retpca <- retp %*% pcad$rotation
solved <- retpca %*% solve(pcad$rotation)
all.equal(coredata(retp), solved)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{\protect\emph{Dimension Reduction} Using Principal Component Analysis}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The original time series can be calculated \emph{approximately} from just the first few \emph{principal components}, which demonstrates that \emph{PCA} is a form of \emph{dimension reduction}.
      \vskip1ex
      A popular rule of thumb is to use the \emph{principal components} with the largest variances, which sum up to \texttt{80\%} of the total variance of returns.
      \vskip1ex
      The \emph{Kaiser-Guttman} rule uses only \emph{principal components} with variance greater than $1$.
      <<echo=TRUE,eval=FALSE>>=
# Invert first 3 principal component time series
solved <- retpca[, 1:3] %*% solve(pcad$rotation)[1:3, ]
solved <- xts::xts(solved, zoo::index(retp))
solved <- cumsum(solved)
retc <- cumsum(retp)
# Plot the solved returns
par(mfrow=c(3,2))
par(mar=c(2, 2, 0, 1), oma=c(0, 0, 0, 0))
for (symbol in symbolv) {
  plot.zoo(cbind(retc[, symbol], solved[, symbol]),
    plot.type="single", col=c("black", "blue"), xlab="", ylab="")
  legend(x="topleft", bty="n", y.intersp=0.1,
         legend=paste0(symboln, c("", " solved")),
         title=NULL, inset=0.0, cex=1.0, lwd=6,
         lty=1, col=c("black", "blue"))
}  # end for
      @
    \column{0.5\textwidth}
      \includegraphics[width=0.45\paperwidth]{figure/pca_rates_series_solved.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Calibrating Yield Curve Using Package \protect\emph{RQuantLib}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The package
      \href{https://cran.r-project.org/web/packages/RQuantLib/index.html}{\emph{RQuantLib}}
      is an interface to the
      \href{http://quantlib.org/index.shtml}{\emph{QuantLib}}
      open source \texttt{C/C++} library for quantitative finance, mostly designed for pricing fixed-income instruments and options.
      \vskip1ex
      The function \texttt{DiscountCurve()} calibrates a \emph{zero coupon yield curve} from \emph{money market} rates, \emph{Eurodollar} futures, and \emph{swap} rates.
      \vskip1ex
      The function \texttt{DiscountCurve()} interpolates the \emph{zero coupon} rates into a vector of dates specified by the \texttt{times} argument.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=(-(1:1)),eval=FALSE>>=
library(quantmod)  # Load quantmod
library(RQuantLib)  # Load RQuantLib
# Specify curve parameters
curvep <- list(tradeDate=as.Date("2018-01-17"),
               settleDate=as.Date("2018-01-19"),
               dt=0.25,
               interpWhat="discount",
               interpHow="loglinear")
# Specify market data: prices of FI instruments
pricev <- list(d3m=0.0363,
               fut1=96.2875,
               fut2=96.7875,
               fut3=96.9875,
               fut4=96.6875,
               s5y=0.0443,
               s10y=0.05165,
               s15y=0.055175)
# Specify dates for calculating the zero rates
datev <- seq(0, 10, 0.25)
# Specify the evaluation (as of) date
setEvaluationDate(as.Date("2018-01-17"))
# Calculate the zero rates
ratev <- DiscountCurve(params=curvep, tsQuotes=pricev, times=datev)
# Plot the zero rates
x11()
plot(x=ratev$zerorates, t="l", main="zerorates")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\section{Financial and Commodity Futures}


%%%%%%%%%%%%%%%
\subsection{Financial and Commodity Futures Contracts}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The underlying assets delivered in \emph{commodity futures} contracts are commodities, such as grains (corn, wheat), or raw materials and metals (oil, aluminum).
      \vskip1ex
      The underlying assets delivered in \emph{financial futures} contracts are financial assets, such as stocks, bonds, and currencies.
      \vskip1ex
      Many futures contracts use cash settlement instead of physical delivery of the asset.
      \vskip1ex
      Futures contracts on different underlying assets can have quarterly, monthly, or even weekly expiration dates.
      \vskip1ex
      The front month futures contract is the contract with the closest expiration date to the current date.
      \vskip1ex
      Symbols of futures contracts are obtained by combining the contract code with the month code and the year.
      \vskip1ex
      For example, \emph{ESM9} is the symbol for the \emph{S\&P500} index E-mini futures expiring in June 2019.
    \column{0.5\textwidth}
    \vspace{-1em}
    \begin{table}[htb]
    \begin{minipage}{0.48\textwidth}
    % \centering
    <<echo=FALSE,eval=TRUE,results='asis'>>=
# Futures contracts codes
futures <- rbind(c("S&P500 index", "ES"),
                  c("10yr Treasury", "ZN"),
                  c("VIX index", "VX"),
                  c("Gold", "GC"),
                  c("Oil", "CL"),
                  c("Euro FX", "EC"),
                  c("Swiss franc", "SF"),
                  c("Japanese Yen", "JY"))
colnames(futures) <- c("Futures contract", "Code")
print(xtable::xtable(futures), comment=FALSE, size="scriptsize", include.rownames=FALSE, latex.environments="flushleft")
@
% \captionof{table}{The first table}
\end{minipage}
\begin{minipage}{0.48\textwidth}
% \centering
<<echo=FALSE,eval=TRUE,results='asis'>>=
# Monthly futures contract codes
codes <- cbind(c("January", "February", "September", "April", "May", "June", "July", "August", "September", "October", "November", "December"),
                     c("F", "G", "H", "J", "K", "M", "N", "Q", "U", "V", "X", "Z"))
colnames(codes) <- c("Month", "Code")
print(xtable::xtable(codes), comment=FALSE, size="scriptsize", include.rownames=FALSE, latex.environments="flushright")
      @
      % \captionof{table}{The second table}
      \end{minipage}
      \end{table}
      \vspace{-1em}
      Interactive Brokers provides more information about futures contracts:\\
      \hskip1em\href{https://www.interactivebrokers.com/en/index.php?f=463}{IB Contract and Symbol Database}\\
      \hskip1em\href{https://www.interactivebrokers.com/en/index.php?f=1563&p=fut}{IB Traded Products}
      \vskip1ex
      List of
      \href{https://www.purefinancialacademy.com/futures-markets}{Popular Futures Contracts}.
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{\protect\emph{E-mini} Futures Contracts}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      \emph{E-mini} futures are contracts with smaller notionals and tick values, which are more suitable for retail investors.
      \vskip1ex
      For example, the
      \href{https://www.cmegroup.com/trading/energy/crude-oil/emini-crude-oil.html}{\emph{QM} E-mini oil future}
      notional is \texttt{500} barrels, while the standard
      \href{https://www.cmegroup.com/trading/energy/crude-oil/light-sweet-crude_quotes_globex.html}{\emph{CL} oil future} notional is \texttt{1,000} barrels.
      \vskip1ex
      The tick value is the change in the dollar value of the futures contract due to a one tick change in the underlying price.
      \vskip1ex
      For example, the tick value of the \emph{ES} E-mini \emph{S\&P500} future is \texttt{\$12.50}, and one tick is \texttt{0.25}.
      \vskip1ex
      So if the \emph{S\&P500} index changes by one tick (\texttt{0.25}), then the value of a single \emph{ES} E-mini contract changes by \texttt{\$12.50}, while the standard \emph{SP} contract value changes by \texttt{\$62.5}.
      \vskip1ex
      The
      \href{https://www.cmegroup.com/trading/equity-index/us-index/e-mini-sandp500.html}{\emph{ES} E-mini \emph{S\&P500} futures} trade almost continuously 24 hours per day, from 6:00 PM Eastern Time (ET) on Sunday night to 5:00 PM Friday night (with a trading halt between 4:15 and 4:30 PM ET each day).
    \column{0.5\textwidth}
    \vspace{-1em}
    \begin{table}[htb]
    <<echo=FALSE,eval=TRUE,results='asis'>>=
# Futures contracts codes
futures <- rbind(c("S&P500 index", "SP", "ES"),
                  c("10yr Treasury", "ZN", "ZN"),
                  c("VIX index", "VX", "delisted"),
                  c("Gold", "GC", "YG"),
                  c("Oil", "CL", "QM"),
                  c("Euro FX", "EC", "E7"),
                  c("Swiss franc", "SF", "MSF"),
                  c("Japanese Yen", "JY", "J7"))
colnames(futures) <- c("Futures contract", "Standard", "E-mini")
print(xtable::xtable(futures), comment=FALSE, size="scriptsize", include.rownames=FALSE, latex.environments="flushleft")
@
      \end{table}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Plotting \protect\emph{S\&P500} Futures Data}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The function \texttt{data.table::fread()} reads \texttt{.csv} files over five times faster than the function \texttt{read.csv()}!
      \vskip1ex
      The function \texttt{as.POSIXct.numeric()} coerces a \texttt{numeric} value representing the \emph{moment of time} into a \texttt{POSIXct} \emph{date-time}, equal to the \emph{clock time} in the local \emph{time zone}.
      <<echo=TRUE,eval=FALSE>>=
# Load data for S&P Emini futures June 2019 contract
dirn <- "/Users/jerzy/Develop/data/ib_data"
filen <- file.path(dirn, "ESohlc.csv")
# Read a data table from CSV file
pricev <- data.table::fread(filen)
class(pricev)
# Coerce first column from string to date-time
unlist(sapply(pricev, class))
tail(pricev)
prices$Index <- as.POSIXct(prices$Index,
  tz="America/New_York", origin="1970-01-01")
# Coerce prices into xts series
pricev <- data.table::as.xts.data.table(pricev)
class(pricev)
tail(pricev)
colnames(pricev)[1:5] <- c("Open", "High", "Low", "Close", "Volume")
tail(pricev)
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/sp500_futures.png}
      <<echo=TRUE,eval=FALSE>>=
# Plot OHLC data in x11 window
x11(width=5, height=4)  # Open x11 for plotting
par(mar=c(5, 5, 2, 1), oma=c(0, 0, 0, 0))
chart_Series(x=pricev, TA="add_Vo()",
  name="S&P500 futures")
# Plot dygraph
dygraphs::dygraph(pricev[, 1:4], main="OHLC prices") %>%
  dyCandlestick()
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Consecutive Contract Futures Volumes}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The trading volumes of a futures contract drop significantly shortly before its expiration, and the successive contract volumes increase.
      \vskip1ex
      The contract with the highest trading volume is usually considered the most liquid contract.
      <<echo=TRUE,eval=FALSE>>=
# Load ESU8 data
dirn <- "/Users/jerzy/Develop/data/ib_data"
filen <- file.path(dirn, "ESU8.csv")
ESU8 <- data.table::fread(filen)
# Coerce ESU8 into xts series
ESU8$V1 <- as.Date(as.POSIXct.numeric(ESU8$V1,
    tz="America/New_York", origin="1970-01-01"))
ESU8 <- data.table::as.xts.data.table(ESU8)
colnames(ESU8)[1:5] <- c("Open", "High", "Low", "Close", "Volume")
# Load ESM8 data
filen <- file.path(dirn, "ESM8.csv")
ESM8 <- data.table::fread(filen)
# Coerce ESM8 into xts series
ESM8$V1 <- as.Date(as.POSIXct.numeric(ESM8$V1,
    tz="America/New_York", origin="1970-01-01"))
ESM8 <- data.table::as.xts.data.table(ESM8)
colnames(ESM8)[1:5] <- c("Open", "High", "Low", "Close", "Volume")
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/sp500_futures_volumes.png}
      <<echo=(-(1:1)),eval=FALSE>>=
x11(width=6, height=5)  # Open x11 for plotting
# Plot last month of ESU8 and ESM8 volume data
endd <- end(ESM8)
startd <- (endd - 30)
volumm <- cbind(Vo(ESU8), Vo(ESM8))[paste0(startd, "/", endd)]
colnames(volumm) <- c("ESU8", "ESM8")
colorv <- c("blue", "green")
plot(volumm, col=colorv, lwd=3, major.ticks="days",
     format.labels="%b-%d", observation.based=TRUE,
     main="Volumes of ESU8 and ESM8 futures")
legend("topleft", legend=colnames(volumm), col=colorv, y.intersp=0.1,
       title=NULL, bty="n", lty=1, lwd=6, inset=0.1, cex=0.7)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Chaining Together Futures Prices}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Chaining futures means splicing together prices from several consecutive futures contracts.
      \vskip1ex
      A continuous futures contract is a time series of prices obtained by chaining together prices from consecutive futures contracts.
      \vskip1ex
      The price of the continuous contract is equal to the most liquid contract times a scaling factor.
      \vskip1ex
      When the next contract becomes more liquid, then the continuous contract price is rolled over to that contract.
      \vskip1ex
      Futures contracts with different maturities (expiration datev) trade at different prices because of the futures curve, which causes price jumps between consecutive futures contracts.
      \vskip1ex
      The old contract price is multiplied by a scaling factor after that contract is rolled, to remove price jumps.
      \vskip1ex
      So the continuous contract prices are not equal to the past futures prices.
      \vskip1ex
      Interactive Brokers provides information about Continuous Contract Futures market data:\\
      \hskip1em\href{https://www.interactivebrokers.com/en/software/tws/usersguidebook/technicalanalytics/continuous.htm}{Continuous Contract Futures Data}
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/sp500_futures_chained.png}
      <<echo=TRUE,eval=FALSE>>=
# Find date when ESU8 volume exceeds ESM8
exceeds <- (volumm[, "ESU8"] > volumm[, "ESM8"])
indeks <- match(TRUE, exceeds)
# indeks <- min(which(exceeds))
# Scale the ESM8 prices
indeks <- zoo::index(exceeds[indeks])
ratio <- as.numeric(Cl(ESU8[indeks])/Cl(ESM8[indeks]))
ESM8[, 1:4] <- ratio*ESM8[, 1:4]
# Calculate continuous contract prices
chaind <- rbind(ESM8[zoo::index(ESM8) < indeks],
                  ESU8[zoo::index(ESU8) >= indeks])
# Or
# chaind <- rbind(ESM8[paste0("/", indeks-1)],
#                   ESU8[paste0(indeks, "/")])
# Plot continuous contract prices
chart_Series(x=chaind["2018"], TA="add_Vo()",
  name="S&P500 chained futures")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{\protect\emph{VIX} Volatility Index}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The \emph{VIX} Volatility Index is an average of the implied volatilities of options on the \emph{S\&P500} Index (SPX).
      \vskip1ex
      The \emph{VIX} index is an estimate of the \emph{future} stock market volatility that is expected (anticipated) by investors.
      \vskip1ex
      The \emph{VIX} index is not a directly tradable asset, but it can be traded using \emph{VIX} futures.
      \vskip1ex
      The \emph{CBOE} provides daily historical data for the \emph{VIX} index.
      <<echo=TRUE,eval=FALSE>>=
# Download VIX index data from CBOE
vix_index <- data.table::fread("http://www.cboe.com/publish/scheduledtask/mktdata/datahouse/vixcurrent.csv", skip=1)
class(vix_index)
dim(vix_index)
tail(vix_index)
sapply(vix_index, class)
vix_index <- xts(vix_index[, -1],
  order.by=as.Date(vix_index$Date, format="%m/%d/%Y"))
colnames(vix_index) <- c("Open", "High", "Low", "Close")
# Save the VIX data to binary file
load(file="/Users/jerzy/Develop/data/ib_data/vix_cboe.RData")
ls(vixenv)
vixenv$vix_index <- vix_index
save(vixenv, file="/Users/jerzy/Develop/data/ib_data/vix_cboe.RData")
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/vix_historical.png}
      <<echo=TRUE,eval=FALSE>>=
# Plot VIX OHLC data in x11 window
chart_Series(x=vix_index["2018"], name="VIX Index")
# Plot dygraph
dygraphs::dygraph(vix_index, main="VIX Index") %>%
  dyCandlestick()
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{\protect\emph{VIX} Futures Contracts}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      \emph{VIX} futures are cash-settled futures contracts on the \emph{VIX} Index.
      \vskip1ex
      The most liquid \emph{VIX} futures are with monthly expiration dates (\href{http://www.cboe.com/framed/pdfframed?content=/aboutcboe/xcal2018.pdf&section=SEC_RESOURCES&title=2018+Cboe+Expiration+Calendar}{CBOE Expiration Calendar}), but weekly \emph{VIX} futures are also traded.
      \vskip1ex
      These are the \href{http://www.macroption.com/vix-expiration-calendar/}{VIX Futures Monthly Expiration Dates} from 2004 to 2019.
      \vskip1ex
      \emph{VIX} futures are traded on the \emph{CFE} (CBOE Futures Exchange):\\
      \hskip1em\url{http://cfe.cboe.com/}\\
      \hskip1em\url{http://www.cboe.com/vix}
      \vskip1ex
      \emph{VIX} Contract Specifications:\\
      \hskip1em\href{http://cfe.cboe.com/cfe-products/vx-cboe-volatility-index-vix-futures/contract-specifications}{VIX Contract Specifications}\\
      \hskip1em\href{http://www.macroption.com/vix-expiration-calendar/}{VIX Expiration Calendar}
      \vskip1ex
      Standard and Poor's explains the methodology of the
      \href{https://us.spindices.com/documents/methodologies/methodology-sp-vix-futures-indices.pdf
}{\emph{VIX} Futures Indices}
    \column{0.5\textwidth}
    \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Read CBOE monthly futures expiration dates
datev <- read.csv(file="/Users/jerzy/Develop/data/vix_data/vix_dates.csv")
datev <- as.Date(datev[, 1])
yearv <- format(datev, format="%Y")
yearv <- substring(yearv, 4)
# Monthly futures contract codes
codes <-
  c("F", "G", "H", "J", "K", "M",
    "N", "Q", "U", "V", "X", "Z")
symbolv <- paste0("VX", codes, yearv)
datev <- as.data.frame(datev)
colnames(datev) <- "exp_dates"
rownames(datev) <- symbolv
# Write dates to CSV file, with row names
write.csv(datev, row.names=TRUE,
  file="/Users/jerzy/Develop/data/vix_data/vix_futures.csv")
# Read back CBOE futures expiration dates
datev <- read.csv(file="/Users/jerzy/Develop/data/vix_data/vix_futures.csv",
  row.names=1)
datev[, 1] <- as.Date(datev[, 1])
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{\protect\emph{VIX} Futures Curve}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Futures contracts with different expiration dates trade at different prices, known as the \emph{futures curve} (or \emph{term structure}).
      \vskip1ex
      The \emph{VIX} futures curve is similar to the interest rate \emph{yield curve}, which displays yields at different bond maturities.
      \vskip1ex
      The \emph{VIX} futures curve is not the same as the \emph{VIX} index term structure.
      \vskip1ex
      More information about the \emph{VIX} Index and the \emph{VIX} futures curve:\\
      \hskip1em\href{http://www.macroption.com/vix-futures/}{VIX Futures}\\
      \hskip1em\href{http://www.macroption.com/vix-futures-historical-data/}{VIX Futures Data}\\
      \hskip1em\href{http://www.macroption.com/vix-futures-curve/}{VIX Futures Curve}\\
      \hskip1em\href{http://www.macroption.com/vix-term-structure/}{VIX Index Term Structure}
    \column{0.5\textwidth}
    \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Load VIX futures data from binary file
load(file="/Users/jerzy/Develop/data/vix_data/vix_cboe.RData")
# Get all VIX futures for 2018 except January
symbolv <- ls(vixenv)
symbolv <- symbolv[grep(glob2rx("*8"), symbolv)]
symbolv <- symbolv[2:9]
# Specify dates for curves
startd <- as.Date("2018-01-11")
endd <- as.Date("2018-02-05")
# Extract all VIX futures prices on the dates
futcurves <- lapply(symbolv, function(symboln) {
  xtsv <- get(x=symboln, envir=vixenv)
  Cl(xtsv[c(startd, endd)])
})  # end lapply
futcurves <- rutils::do_call(cbind, futcurves)
colnames(futcurves) <- symbolv
futcurves <- t(coredata(futcurves))
colnames(futcurves) <- c("Contango 01/11/2018",
                       "Backwardation 02/05/2018")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{\protect\emph{Contango} and \protect\emph{Backwardation} of \protect\emph{VIX} Futures Curve}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      When prices are \emph{low} then the futures curve is usually \emph{upward sloping}, known as \emph{contango}.
      \vskip1ex
      Futures prices are in \emph{contango} most of the time.
      \vskip1ex
      When prices are \emph{high} then the curve is usually \emph{downward sloping}, known as \emph{backwardation}.
      <<echo=TRUE,eval=FALSE>>=
x11(width=7, height=5)
par(mar=c(3, 2, 1, 1), oma=c(0, 0, 0, 0))
plot(futcurves[, 1], type="l", lty=1, col="blue", lwd=3,
     xaxt="n", xlab="", ylab="", ylim=range(futcurves),
     main="VIX Futures Curves")
axis(1, at=(1:NROW(futcurves)), labels=rownames(futcurves))
lines(futcurves[, 2], lty=1, lwd=3, col="red")
legend(x="topright", legend=colnames(futcurves),
       inset=0.05, cex=1.0, bty="n", y.intersp=0.1,
       col=c("blue", "red"), lwd=6, lty=1)
      @
    \column{0.5\textwidth}
      \includegraphics[width=0.45\paperwidth]{figure/vix_curves.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Futures Prices at Constant Maturity}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      A constant maturity futures price is the price of a hypothetical futures contract with an expiration date at a fixed number of days in the future.
      \vskip1ex
      Futures prices at a constant maturity can be calculated by interpolating the prices of contracts with neighboring expiration dates.
    \column{0.5\textwidth}
    \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Load VIX futures data from binary file
load(file="/Users/jerzy/Develop/data/vix_data/vix_cboe.RData")
# Read CBOE futures expiration dates
datev <- read.csv(file="/Users/jerzy/Develop/data/vix_data/vix_futures.csv",
  row.names=1)
symbolv <- rownames(datev)
datev <- as.Date(datev[, 1])
tday <- as.Date("2018-05-07")
maturd <- (tday + 30)
# Find neighboring futures contracts
indeks <- match(TRUE, datev > maturd)
frontd <- datev[indeks-1]
backd <- datev[indeks]
symbolf <- symbolv[indeks-1]
symbolb <- symbolv[indeks]
pricef <- get(x=symbolf, envir=vixenv)
pricef <- as.numeric(Cl(pricef[tday]))
priceb <- get(x=symbolb, envir=vixenv)
priceb <- as.numeric(Cl(priceb[tday]))
# Calculate the constant maturity 30-day futures price
ratiop <- as.numeric(maturd - frontd)/as.numeric(backd - frontd)
pricec <- (ratiop*priceb + (1-ratiop)*pricef)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{\protect\emph{VIX} Futures Investing}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The volatility index moves in the opposite direction to the underlying asset price.
      \vskip1ex
      An increase in the \emph{VIX} index coincides with a drop in stock prices, and vice versa.
      \vskip1ex
      Taking a \emph{long} position in \emph{VIX} futures is similar to a \emph{short} position in stocks, and vice versa.
      \vskip1ex
      There are several exchange-traded funds (\emph{ETFs}) and exchange traded notes (\emph{ETNs}) which are linked to \emph{VIX} futures.
      \vskip1ex
      The 
      \href{https://ipathetn.barclays/details.app;instrumentId=341408}{VXX ETN}
      provides the total return on the long VIX futures index.
      \vskip1ex
      VXX is short market risk because it's long VIX futures, and the VIX rises when stock prices drop.
      \vskip1ex
      The 
      \href{https://www.proshares.com/our-etfs/strategic/svxy?gad=1&gclid=EAIaIQobChMI4erxxr6egQMVqqNaBR2NKwZcEAAYASAAEgLXofD_BwE}{SVXY ETF}
      provides the total return of short VIX futures contracts.
      \vskip1ex
      SVXY is long market risk because it's short VIX futures, and the VIX drops when stock prices rise.
      \vskip1ex
      Standard and Poor's explains the calculation of the
      \href{https://www.spglobal.com/spdji/en/documents/methodologies/methodology-sp-vix-futures-indices.pdf}{Total Return on VIX Futures Indices}.
      <<echo=(-(1:3)),eval=FALSE>>=
x11(width=5, height=3)  # Open x11 for plotting
# Load VIX futures data from binary file
load(file="/Users/jerzy/Develop/data/vix_data/vix_cboe.RData")
# Plot VIX and SVXY data in x11 window
plot_theme <- chart_theme()
plot_theme$col$line.col <- "blue"
chart_Series(x=Cl(vixenv$vix_index["2007/"]),
             theme=plot_theme, name="VIX Index")
chart_Series(x=Cl(rutils::etfenv$VTI["2007/"]),
             theme=plot_theme, name="VTI ETF")
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/vix_historical2.png}
      \includegraphics[width=0.45\paperwidth]{figure/vix_vti.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{\protect\emph{VIX} Crash on February 5th 2018}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The \emph{SVXY} and \emph{XIV} ETFs rallied strongly after the financial crisis of 2008, so they became very popular with individual investors, and became very "crowded trades".
      \vskip1ex
      The \emph{SVXY} and \emph{XIV} ETFs had \$3.6 billion of assets at the beginning of 2018.
      \vskip1ex
      On February 5th 2018 the U.S. stock markets experienced a mini-crash, which was exacerbated by \emph{VIX} futures short sellers.
      \vskip1ex
      As a result, the \emph{XIV} ETF hit its termination event and its value dropped to zero:\\
      \hskip1em\href{https://www.bloomberg.com/news/articles/2018-02-07/how-two-tiny-volatility-products-helped-fuel-sudden-stock-slump}{Volatility Caused Stock Market Crash}\\
      \hskip1em\href{https://riskreversal.com/2018/02/06/volatility-etn-terminated-xiv/
}{XIV ETF Termination Event}
      <<echo=TRUE,eval=FALSE>>=
chart_Series(x=Cl(vixenv$vix_index["2017/2018"]),
             theme=plot_theme, name="VIX Index")
chart_Series(x=Cl(rutils::etfenv$SVXY["2017/2018"]),
             theme=plot_theme, name="SVXY ETF")
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/vix_historical3.png}
      \includegraphics[width=0.45\paperwidth]{figure/vix_svxy2.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{draft: Types of Market Data}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Fundamental company data summarizes the financial, economic, and regulatory state of a company.
      \vskip1ex
      Fundamental company data can be divided into several different types:
      \begin{itemize}
        \item Balance Sheet (assets and liabilities),
        \item Income Statement (profits and losses),
        \item Cash Flow Statement (current operating cash flows),
        \item Financial Ratios (performance and risk measures),
      \end{itemize}
    \column{0.5\textwidth}
    Financial ratios summarize the performance and risk measures of a company, and can be used for investment decisions, like value investing.
      <<echo=(-(1:1)),eval=TRUE>>=
library(xtable)
# Read table of fundamental data into data frame
dframe <- read.csv(file="/Users/jerzy/Develop/lecture_slides/data/fundamental_stock_data.csv")
      @
  \end{columns}
      <<echo=FALSE,eval=TRUE,results='asis'>>=
print(xtable(dframe), comment=FALSE, size="scriptsize", include.rownames=FALSE)
      @
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\section{Fundamental Company Data}


%%%%%%%%%%%%%%%
\subsection{Types of Fundamental Company Data}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Fundamental company data summarizes the financial, economic, and regulatory state of a company.
      \vskip1ex
      Fundamental company data can be divided into several different types:
      \begin{itemize}
        \item Balance Sheet (assets and liabilities),
        \item Income Statement (profits and losses),
        \item Cash Flow Statement (operating cash flows),
        \item Financial Ratios (performance and risk measures),
      \end{itemize}
    \column{0.5\textwidth}
    Financial ratios summarize the performance and risk measures of a company, and can be used for investment decisions, like value investing.
      <<echo=(-(1:1)),eval=TRUE>>=
library(xtable)
# Read table of fundamental data into data frame
dframe <- read.csv(file="/Users/jerzy/Develop/lecture_slides/data/fundamental_stock_data.csv")
      @
  \end{columns}
      <<echo=FALSE,eval=TRUE,results='asis'>>=
print(xtable(dframe), comment=FALSE, size="scriptsize", include.rownames=FALSE)
      @
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Sources of Fundamental Company Data}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Fundamental company data is obtained from financial statements provided to government agencies and from earnings statements provided to investors.
      \vskip1ex
      U.S. companies are required to provide quarterly \emph{10Q} reports and annual \emph{10K} reports to the \emph{Securities and Exchange Commission} (\href{https://www.sec.gov/}{\emph{SEC}}).
      \vskip1ex
      The \emph{10Q} and \emph{10K} reports are publicly available on the \href{https://www.sec.gov/edgar.shtml}{\emph{SEC EDGAR}} website.
      \vskip1ex
      The data provided by \href{https://www.sec.gov/edgar.shtml}{\emph{EDGAR}} only goes back to the year $2009$, because the \href{https://www.sec.gov/}{\emph{SEC}} did not mandate electronic filing before then.
      \vskip1ex
      The data from the \emph{10Q} and \emph{10K} reports is compiled by vendors such as \href{http://www.compustat.com}{\emph{Compustat}} and \href{http://www.factset.com/}{\emph{FactSet}}, into premium databases.
      \vskip1ex
      There are also free databases of fundamental company data, from providers such as \href{https://simfin.com/}{\emph{SimFin}}, \href{http://usfundamentals.com/}{\emph{usfundamentals}}, and \href{https://www.quandl.com/}{\emph{Quandl}}.
    \column{0.5\textwidth}
      \href{https://wrds-www.wharton.upenn.edu}{\emph{WRDS}} redistributes fundamental company data from \href{http://www.compustat.com}{\emph{Compustat}}, \href{http://www.capitaliq.com}{\emph{S\&P Capital IQ}}, \href{https://www.thomsonreuters.com/en.html}{\emph{Thomson Reuters}}, \href{http://www.factset.com/}{\emph{FactSet}}, \href{https://ihsmarkit.com/index.html}{\emph{Markit}}, etc.
      \vskip1ex
      There are occasional gaps in the quarterly data, because companies are not required to file quarterly reports on dates when they're also filing annual reports.
      \vskip1ex
      The report harmonization procedure fails when companies change their accounting treatment from year to year.
      \vskip1ex
      \href{https://www.quandl.com/}{\emph{Quandl}} has written \href{https://blog.quandl.com/api-for-stock-data\#SEC-Indicators-Legend}{a review} of both premium and free sources of fundamental company data.
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Identifiers of Company Data}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      \emph{Identifiers} are strings that correspond to \emph{corporate entities} and \emph{securities}.
      \vskip1ex
      \emph{Headers} are current identifiers, while \emph{historical identifiers} have date ranges when they were valid.
      \vskip1ex
      Some \emph{universal identifiers} are company namev, tickers, CUSIP, SEDOL, ISIN, and CIK.
      \vskip1ex
      Most \emph{universal identifiers}, such as company namev, tickers, CUSIPs, etc. change over time due to corporate events such as mergers, bankruptices, etc.
      \vskip1ex
      \emph{Tickers} are short and recognizable strings, but are not unique, and can change over time and can be reused by a different company.
      \vskip1ex
      \emph{CUSIPs} change infrequently and are never reused, and identify securities in the U.S. and Canada.
      \vskip1ex
      \emph{SEDOL} identify securities in the UK.
      \vskip1ex
      \emph{ISIN} identify international securities in the UK.
      \vskip1ex
      \emph{CIK} identify corporations and individuals who have filed disclosures with the SEC.
      \vskip1ex
    \column{0.5\textwidth}
      Data vendors such as \href{http://www.compustat.com}{\emph{Compustat}}, \href{https://www.capitaliq.com/}{\emph{Capital IQ}}, \href{https://www.refinitiv.com/en/financial-data/company-data/institutional-brokers-estimate-system-ibes}{\emph{IBES}}, \href{https://ihsmarkit.com/index.html}{\emph{Markit}}, and \href{http://www.factset.com/}{\emph{FactSet}} have introduced \emph{proprietary identifiers} that are \emph{permanent}.
      \vskip1ex
      CRSP \emph{PERMCO} and \emph{PERMNO} are identifiers of companies and securities.
      \vskip1ex
      Compustat \emph{GVKEYs} and \emph{IIDs} are identifiers of companies and securities.
      \vskip1ex
      The \emph{PERMCO} and \emph{PERMNO} are \emph{permanent identifiers} which never change, even if other identifiers such as tickers do change.
      \vskip1ex
      So analysts use \emph{permanent identifiers}, such as \emph{PERMCO}, \emph{PERMNO}, and \emph{GVKEY}.
      \vskip1ex
      Capital IQ \emph{Company IDs} are identifiers of companies.
      \vskip1ex
      IBES \emph{tickers} are identifiers of securities.
      \vskip1ex
      Markit \emph{RED codes} are identifiers of companies.
      \vskip1ex
      Factset \emph{Entity IDs} and \emph{Perm Sec IDs} are identifiers of companies and securities.
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Wharton Research Data Services \protect\emph{WRDS}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Wharton Research Data Services (\href{https://wrds-www.wharton.upenn.edu}{\emph{WRDS}}) is a distributor of premium third party data for the academic and research communities.
      \vskip1ex
      \emph{WRDS} provides time series of security prices and fundamental company data, and other financial, econometric, and social datasets.
      \vskip1ex
      \emph{WRDS} provides stock prices, options and implied volatilities, stock fundamentals, financial ratios, zoo::indexes, earnings estimates, analyst ratings, etc.
      \vskip1ex
      \emph{WRDS} redistributes fundamental company data from \href{http://www.compustat.com}{\emph{Compustat}}, \href{http://www.capitaliq.com}{\emph{S\&P Capital IQ}}, \href{https://www.thomsonreuters.com/en.html}{\emph{Thomson Reuters}}, \href{http://www.factset.com/}{\emph{FactSet}}, \emph{Hedge Fund Research}, \href{https://ihsmarkit.com/index.html}{\emph{Markit}}, etc.
      \vskip1ex
      NYU students can obtain user accounts for \emph{WRDS} data.
    \column{0.5\textwidth}
      \includegraphics[width=0.45\paperwidth]{figure/wrds_home.pdf}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{\protect\emph{WRDS} Help and Documentation}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      WRDS provides extensive
      \href{https://wrds-www.wharton.upenn.edupages/classroom/}{\emph{Learning Resources and Documentation}}.
      \vskip1ex
      \href{https://wrds-www.wharton.upenn.edu}{\emph{WRDS}} provides online help and a guide to its datasets:\\
      \hskip1em\href{https://wrds-www.wharton.upenn.edupages/grid-items/getting-started/}{\emph{Getting Started}}\\
      \hskip1em\href{https://wrds-www.wharton.upenn.edupages/grid-items/what-data-wrds/}{\emph{WRDS Data}}\\
      \hskip1em\href{https://wrds-www.wharton.upenn.edupages/grid-items/finding-data-you-want/}{\emph{Finding Data}}\\
      \hskip1em\href{https://wrds-www.wharton.upenn.edupages/grid-items/understanding-identifiers/}{\emph{Understanding Identifiers}}\\
      \hskip1em\href{https://wrds-www.wharton.upenn.edupages/classroom/identifiers-tracking-companies/}{\emph{Company Identifiers}}\\
      \hskip1em\href{https://wrds-www.wharton.upenn.edupages/classroom/futures-contracts/}{\emph{Futures Contracts}}\\
      \hskip1em\href{https://wrds-www.wharton.upenn.edupages/grid-items/using-sas-studio/}{\emph{Using SAS Studio}}
    \column{0.5\textwidth}
      \includegraphics[width=0.45\paperwidth]{figure/wrds_classroom.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{\protect\emph{WRDS} Vendor and Database Manuals}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      WRDS provides many
      \href{https://wrds-www.wharton.upenn.edupages/support/manuals-and-overviews/}{\emph{Vendor and Database Manuals}}.
      \vskip1ex
    \column{0.5\textwidth}
      \includegraphics[width=0.45\paperwidth]{figure/wrds_manuals.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Downloading Data From \protect\emph{WRDS}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The two main databases on \emph{WRDS} are the
      \href{https://wrds-web.wharton.upenn.edu/wrds/query_forms/navigation.cfm?navId=60}{\emph{Compustat}} database of fundamental company data and security prices,
      and the
      \href{https://wrds-web.wharton.upenn.edu/wrds/query_forms/navigation.cfm?navId=118}{\emph{CRSP}} database of security prices.
      \vskip1ex
      The \emph{WRDS} data can be accessed through vendor pages, which are conveniently organized by
      \href{https://wrds-www.wharton.upenn.edupages/browse-data-concept/}{\emph{concept}}.
      \vskip1ex
      The easest way to download data from \emph{WRDS} into \texttt{R}, is by first downloading it into a \texttt{.csv} file, and then reading it into \texttt{R}.
    \column{0.5\textwidth}
      \includegraphics[width=0.45\paperwidth]{figure/wrds_browse_data.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{The \protect\emph{Compustat} Database on \protect\emph{WRDS}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      \href{http://www.compustat.com}{\emph{Compustat}} is a provider of fundamental company data, and also security prices, and other data such as credit ratings.
      \vskip1ex
      Daily North American security prices can be downloaded from the
      \href{https://wrds-web.wharton.upenn.edu/wrds/ds/compd/secd/index.cfm?navId=83}{\emph{Compustat Daily Prices}} database, which can be reached by navigating from the \href{https://wrds-www.wharton.upenn.edu}{\emph{WRDS}} home page to
      \href{https://wrds-web.wharton.upenn.edu/wrds/query_forms/navigation.cfm?navId=83}{\emph{North America - Daily}} and then to
      \href{https://wrds-web.wharton.upenn.edu/wrds/ds/compd/secd/index.cfm?navId=83}{\emph{Security Daily}}.
      \vskip1ex
      \href{http://www.compustat.com}{\emph{Compustat}} is part of \href{http://www.capitaliq.com}{\emph{S\&P Capital IQ}}.
      \vskip1ex
      \href{http://www.compustat.com}{\emph{Compustat}} uses the \emph{GVKEY} and \emph{IID} identifiers of companies and securities.
      \vskip1ex
      \emph{Compustat} provides only recent time series of data starting in $1983$.
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/wrds_compustat.png}\\
      \includegraphics[width=0.45\paperwidth]{figure/wrds_compustat_north_america.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Package \protect\emph{rwrds} for Downloading Data From \protect\emph{WRDS}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The package \emph{rwrds} contains functions for downloading data from \emph{WRDS}.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Install package "rwrds"
devtools::install_github("davidsovich/rwrds")
# Load package "rwrds"
library(rwrds)
# Get documentation for package "rwrds"
packageDescription("rwrds")
# Load help page
help(package="rwrds")
# List all datasets in "rwrds"
data(package="rwrds")
# List all objects in "rwrds"
ls("package:rwrds")
# Remove rwrds from search path
detach("package:rwrds")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Downloading Names Table From \protect\emph{Compustat}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The \emph{Compustat} names table is a database containing various company \emph{identifiers} (company namev, tickers, CUSIPs, GVKEYs).
      \vskip1ex
      The names table can be used to cross-reference the \emph{identifiers}, for example to find the \emph{GVKEYs} from the company \emph{tickers}.
      \vskip1ex
      The function \texttt{rwrds::compustat\_names()} downloads the names table from \emph{Compustat}.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
library(rwrds)
library(dplyr)
# Establish connection to WRDS
connv <- rwrds::connvnect(username="jp3900", password="RipvanWinkle20")
# Download Compustat names table as dplyr object
namet <- rwrds::compustat_names(wrds=connv, subset=FALSE, dl=TRUE)
dim(namet)
# Save names table as csv file
write.csv(namet, file="/Users/jerzy/Develop/lecture_slides/data/compustat_table.csv", row.names=FALSE)
# rm(namet)
# Read names table from csv file
namet <- read.csv(file="/Users/jerzy/Develop/lecture_slides/data/compustat_table.csv")
# symboln <- "VTI"
# match(symboln, namet$tic)
# Create ETF symbols (tickers)
symbolv <- c("VTI", "VEU", "EEM")
# Get cusips of symbolv
indeks <- match(symbolv, namet$tic)
names(indeks) <- symbolv
etf_cusips <- namet$cusip[indeks]
names(etf_cusips) <- symbolv
# Save cusips into text file
cat(etf_cusips, file="/Users/jerzy/Develop/lecture_slides/data/etf_cusips.txt", sep="\n")
# Save gvkeys into text file
etf_gvkeys <- namet$gvkey[indeks]
names(etf_gvkeys) <- symbolv
cat(etf_gvkeys, file="/Users/jerzy/Develop/lecture_slides/data/etf_gvkeys.txt", sep="\n")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{draft: Downloading Index Constituents From \protect\emph{Compustat}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The names and tickers of the constituents of a given index can be downloaded from the
      \href{https://wrds-web.wharton.upenn.edu/wrds/ds/compd/index/constituents.cfm?navId=83}{\emph{WRDS Compustat web page}}.
      \vskip1ex
      The constituents can be downloaded for a specific date, or for a range of dates, to obtain the names and tickers of all the companies which belonged to the index during that time.
      \vskip1ex
      The \emph{S\&P500 Index} ticker is equal to \emph{i0003}, and its \emph{GVKEY} is equal to $3$.
      \vskip1ex
      \emph{WRDS} provides
      \href{http://asklib.library.hbs.edu/faq/47512}{\emph{help about how to download index constituents}}.
      The table with the index constituents can be read into \texttt{R} a \texttt{.csv} from the file downloaded from the
      \href{https://wrds-web.wharton.upenn.edu/wrds/ds/compd/index/constituents.cfm?navId=83}{\emph{WRDS Compustat web page}}.
      \vskip1ex
      The table of constituents is a data frame containing various company \emph{identifiers} (company namev, tickers, CUSIPs, GVKEYs).
      \vskip1ex
      Some \emph{GVKEYs} are duplicates because some companies entered the index more than once.
      \vskip1ex
      The \emph{GVKEYs} and \emph{CUSIPs} can be saved into text files for use in \emph{WRDS} data queries.
      <<echo=TRUE,eval=FALSE>>=
# Read .csv file with S&P500 constituents
sp500table <- read.csv(file="/Users/jerzy/Develop/lecture_slides/data/sp500_constituents.csv")
class(sp500table)
dim(sp500table)
head(sp500table)
# Select unique sp500 tickers and save them into text file
ticksp500 <- unique(sp500table$co_tic)
cat(ticksp500, file="/Users/jerzy/Develop/lecture_slides/data/ticksp500.txt", sep="\n")
# Some gvkeys are duplicates
duplicatv <- table(sp500table$gvkey)
duplicatv <- duplicatv[duplicatv > 1]
duplicatv <- sp500table[match(as.numeric(names(duplicatv)), sp500table$gvkey), ]
# Select unique gvkeys
keysp500 <- unique(sp500table$gvkey)
# foo <- sp500table[match(keysp500, sp500table$gvkey), ]
# Save gvkeys into text file
cat(keysp500, file="/Users/jerzy/Develop/lecture_slides/data/keysp500.txt", sep="\n")
# Select unique cusips and save into text file
cusipsp500 <- unique(sp500table$co_cusip)
# Remove empty cusips
which(cusipsp500 == "")
cusipsp500 <- cusipsp500[-which(cusipsp500 == "")]
cat(cusipsp500, file="/Users/jerzy/Develop/lecture_slides/data/cusipsp500.txt", sep="\n")
# Find the rows corresponding to the cusipsp500
rows_cusips <- sp500table[match(cusipsp500, sp500table$co_cusip), ]
# Find the rows corresponding to duplicate gvkeys
duplicatv <- table(rows_cusips$gvkey)
duplicatv <- duplicatv[duplicatv > 1]
duplicatv <- rows_cusips[rows_cusips$gvkey %in% as.numeric(names(duplicatv)), ]
      @
    \column{0.5\textwidth}
      \includegraphics[width=0.45\paperwidth]{figure/wrds_index_constituents.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Downloading the \protect\emph{S\&P500} Index Constituents}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      As of July 2020, the \emph{S\&P500} stock index constituents data was removed from 
      \href{http://www.compustat.com}{\emph{Compustat}}, so it's no longer available on 
\href{https://wrds-www.wharton.upenn.edu}{\emph{WRDS}}.
      \vskip1ex
      As an alternative, users can download the \emph{SPY ETF} constituents from
      \href{https://www.ssga.com/us/en/intermediary/etfs/funds/spdr-sp-500-etf-trust-spy}{\emph{State Street}}.
      \vskip1ex
      The file \texttt{sp500\_constituents.csv} contains the symbols (tickers) for almost \texttt{800} stocks belonging to the \emph{S\&P500} stock index, either now or in the past.
      \vskip1ex
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Read .csv file with S&P500 constituents
sp500table <- read.csv(file="/Users/jerzy/Develop/lecture_slides/data/sp500_constituents.csv")
class(sp500table)
dim(sp500table)
head(sp500table)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Downloading Fundamental Company Data}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Fundamental company data can be downloaded from the
      \href{https://wrds-web.wharton.upenn.edu/wrds/ds/compd/fundq/index.cfm?navId=83}{\emph{Compustat Fundamentals}} database.
      \vskip1ex
      The user can download data for either a single identifier, or for many identifiers supplied in a text file.
    \column{0.5\textwidth}
      \includegraphics[width=0.45\paperwidth]{figure/wrds_compustat_fundamental.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\section{Corporate Stock and Bond Data}


%%%%%%%%%%%%%%%
\subsection{Downloading Daily Prices From \protect\emph{Compustat}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Daily security prices can be downloaded from the
      \href{https://wrds-web.wharton.upenn.edu/wrds/ds/compd/secd/index.cfm?navId=83}{\emph{Compustat Daily Prices}} database.
      \vskip1ex
      The user can download data for either a single identifier, or for many identifiers in a text file.\\
      \includegraphics[width=0.45\paperwidth]{figure/wrds_compustat_prices.png}
    \column{0.5\textwidth}
      The query for \emph{OHLC} prices should also include the \emph{split adjustment factor} and the \emph{total return factor}.\\
      \includegraphics[width=0.45\paperwidth]{figure/wrds_ohlc_query.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Reading Daily Prices Into \texttt{R}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The time series data downloaded into a \texttt{.csv} file can then be read into \texttt{R}.
      \vskip1ex
      But the data downloaded from the
      \href{https://wrds-web.wharton.upenn.edu/wrds/ds/compd/secd/index.cfm?navId=83}{\emph{Compustat Daily Prices}}
      database may contain data for several related securities, so it must be properly selected (pruned).
      \vskip1ex
      The \emph{OHLC} prices can be adjusted by dividing them by the \emph{split adjustment factor} and multiplying them by the \emph{total return factor}.
      \vskip1ex
      The \emph{OHLC} prices are also divided by the final \emph{total return factor} so that the most recent adjusted prices are equal to the most recent market prices.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=(-(1:1)),eval=FALSE>>=
library(rutils)  # Load package rutils
# Read .csv file with TAP OHLC prices
ohlc <- read.csv(file="/Users/jerzy/Develop/lecture_slides/data/TAP.csv")
# ohlc contains cusips not in cusipsp500
cusips <- unique(ohlc$cusip)
cusips %in% cusipsp500
# Select data only for cusipsp500
ohlc <- ohlc[ohlc$cusip %in% cusipsp500, ]
# ohlc contains tickers not in ticksp500
tickers <- unique(ohlc$tic)
tickers %in% ticksp500
# Select data only for ticksp500
ohlc <- ohlc[ohlc$tic %in% ticksp500, ]
# Select ticker from sp500table
symboln <- sp500table$co_tic[match(ohlc$gvkey[1], sp500table$gvkey)]
# Adjustment factor and total return factor
factadj <- drop(ohlc[, "ajexdi"])
factr <- drop(ohlc[, "trfd"])
# Extract index of dates
datev <- drop(ohlc[, "datadate"])
datev <- lubridate::ymd(datev)
# Select only OHLCV data columns
ohlc <- ohlc[, c("prcod", "prchd", "prcld", "prccd", "cshtrd")]
colnames(ohlc) <- paste(symboln, c("Open", "High", "Low", "Close", "Volume"), sep=".")
# Coerce to xts series
ohlc <- xts::xts(ohlc, datev)
# Fill the missing (NA) Open prices
isna <- is.na(ohlc[, 1])
ohlc[isna, 1] <- (ohlc[isna, 2] + ohlc[isna, 3])/2
sum(is.na(ohlc))
# Adjust all the prices
ohlc[, 1:4] <- factr*ohlc[, 1:4]/factadj/factr[NROW(factr)]
ohlc <- na.omit(ohlc)
plot(quantmod::Cl(ohlc), main="TAP Stock")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Function for Reading Daily Prices Into \texttt{R}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The function \texttt{format\_ohlc()} formats raw data downloaded from \emph{Compustat} into a time series of \emph{OHLC} prices.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Define formatting function for OHLC prices
format_ohlc <- function(ohlc, pricenv) {
  # Select ticker from sp500table
  symboln <- sp500table$co_tic[match(ohlc$gvkey[1], sp500table$gvkey)]
  # Split adjustment and total return factors
  factadj <- drop(ohlc[, c("ajexdi")])
  factr <- drop(ohlc[, "trfd"])
  factr <- ifelse(is.na(factr), 1, factr)
  # Extract dates index
  datev <- drop(ohlc[, "datadate"])
  datev <- lubridate::ymd(datev)
  # Select only OHLCV data
  ohlc <- ohlc[, c("prcod", "prchd", "prcld", "prccd", "cshtrd")]
  colnames(ohlc) <- paste(symboln, c("Open", "High", "Low", "Close", "Volume"), sep=".")
  # Coerce to xts series
  ohlc <- xts::xts(ohlc, datev)
  # Fill NA prices
  isna <- is.na(ohlc[, 1])
  ohlc[isna, 1] <- (ohlc[isna, 2] + ohlc[isna, 3])/2
  # Adjust the prices
  ohlc[, 1:4] <- factr*ohlc[, 1:4]/factadj/factr[NROW(factr)]
  # Copy the OHLCV data to pricenv
  ohlc <- na.omit(ohlc)
  assign(x=symboln, value=ohlc, envir=pricenv)
  symbol
}  # end format_ohlc
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Reading Index Constituent Prices Into \texttt{R}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The prices for all the index constituents are first downloaded from \emph{Compustat} into a single \texttt{.csv} file, and then they are read into \texttt{R} and formatted into a time series of \emph{OHLC} prices.
      \vskip1ex
      But the prices downloaded from the
      \href{https://wrds-web.wharton.upenn.edu/wrds/ds/compd/secd/index.cfm?navId=83}{\emph{Compustat Daily Prices}}
      database often contain prices for several related securities from the same company, so they must be properly selected (pruned) to match the index constituents.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Load OHLC prices from .csv file downloaded from WRDS by cusip
pricesp500 <- read.csv(file="/Users/jerzy/Develop/lecture_slides/data/sp500_prices_bycusip.csv")
# sp500_prices contains cusips not in cusipsp500
cusips <- unique(sp500_prices$cusip)
NROW(cusipsp500); NROW(cusips)
# Select data only for cusipsp500
pricesp500 <- pricesp500[sp500_prices$cusip %in% cusipsp500, ]
# sp500_prices contains tickers not in ticksp500
tickers <- unique(sp500_prices$tic)
NROW(ticksp500); NROW(tickers)
# Select data only for ticksp500
pricesp500 <- pricesp500[sp500_prices$tic %in% ticksp500, ]
# Create new data environment
sp500env <- new.env()
# Perform OHLC aggregations by cusip column
pricesp500 <- split(pricesp500, sp500_prices$cusip)
process_ed <- lapply(pricesp500, format_ohlc, pricenv=sp500env)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Managing Exceptions in Index Constituent Prices}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Some securities of past index constituents may no longer be trading because of corporate events (mergers, bankruptcies, etc.), so they should be removed from the data.
      \vskip1ex
      Tickers which contain a dot in their name (like "BRK.B") are not valid symbols in \texttt{R}, so they must be renamed.
      \vskip1ex
      The column names for symbol \texttt{"LOW"} (Lowe's company) must be renamed for the extractor function \texttt{quantmod::Lo()} to work properly.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Get end dates of series in sp500env
endd <- eapply(sp500env, end)
endd <- unlist(endd)
endd <- as.Date(endd)
# Remove elements with short end dates
ishort <- (endd < max(endd))
rm(list=names(sp500env)[ishort], envir=sp500env)
# Rename element "BRK.B" to "BRKB"
sp500env$BRKB <- sp500env$BRK.B
rm(BRK.B, envir=sp500env)
names(sp500env$BRKB) <- paste("BRKB",
  c("Open", "High", "Low", "Close", "Volume"), sep=".")
# Rename element "LOW" to "LOWES"
sp500env$LOWES <- sp500env$LOW
rm(LOW, envir=sp500env)
names(sp500env$LOWES) <- paste("LOWES",
  c("Open", "High", "Low", "Close", "Volume"), sep=".")
# Rename element "BF.B" to "BFB"
sp500env$BFB <- sp500env$BF.B
rm(BF.B, envir=sp500env)
names(sp500env$BFB) <- paste("BFB",
  c("Open", "High", "Low", "Close", "Volume"), sep=".")
# Save OHLC prices to .RData file
save(sp500env, file="/Users/jerzy/Develop/lecture_slides/data/sp500.RData")
plot(quantmod::Cl(sp500env$MSFT))
      @

  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Saving \protect\emph{WRDS} Queries}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      \emph{WRDS} queries can be saved and reused again.
      \vskip1ex
    \column{0.5\textwidth}
      \includegraphics[width=0.45\paperwidth]{figure/wrds_saved_queries.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{draft: Downloading Fundamental Company Data From \protect\emph{Quandl}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      \href{https://www.sec.gov/}{\emph{SEC}} is a free database of stock fundamentals extracted from SEC and (but not harmonized),\\
      \hskip1em\url{https://www.quandl.com/data/SEC}
      \vskip1ex
      \emph{RAYMOND} is a free database of harmonized stock fundamentals, based on the \href{https://www.sec.gov/}{\emph{SEC}} database,\\
      \hskip1em\url{https://www.quandl.com/data/RAYMOND}
      \hskip1em\url{https://www.quandl.com/data/RAYMOND?keyword=aapl}
      \vskip1ex
      Wharton Research Data Services (\href{https://wrds-www.wharton.upenn.edu}{\emph{WRDS}}) is a distributor of third party data for the academic and research communities.
      \vskip1ex
      \emph{WRDS} provides time series of stock prices and fundamental company data, and other financial, econometric, and social datasets.
      \vskip1ex
      \emph{WRDS} redistributes fundamental company data from \href{http://www.compustat.com}{\emph{Compustat}}, \href{http://www.capitaliq.com}{\emph{S\&P Capital IQ}}, \href{https://www.thomsonreuters.com/en.html}{\emph{Thomson Reuters}}, \href{http://www.factset.com/}{\emph{FactSet}}, \emph{Hedge Fund Research}, \href{https://ihsmarkit.com/index.html}{\emph{Markit}}, etc.
      \vskip1ex
      The Center for Research in Security Prices (\href{http://www.crsp.org/}{\emph{CRSP}} is a provider of historical security prices for the academic and research communities.
      \vskip1ex
      Much of the \emph{WRDS} data is free, while premium data can be obtained under a temporary license.
      \vskip1ex
      \emph{WRDS} provides online help and a guide to its datasets:\\
      \hskip1em\url{https://wrds-www.wharton.upenn.edu}\\
      \vskip1ex
      \emph{WRDS} provides stock prices, stock fundamentals, financial ratios, zoo::indexes, options and volatility, earnings estimates, analyst ratings, etc.:\\
      \hskip1em\url{https://www.WRDS.com/blog/api-for-stock-data}
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
library(Quandl)  # Load package Quandl
# Register Quandl API key
Quandl.api_key("pVJi9Nv3V8CD3Js5s7Qx")

# Quandl stock market data
# https://blog.quandl.com/stock-market-data-ultimate-guide-part-1
# https://blog.quandl.com/stock-market-data-the-ultimate-guide-part-2

# Download RAYMOND metadata
# https://www.quandl.com/data/RAYMOND-Raymond/documentation/metadata

# Download S&P500 Index constituents
# https://s3.amazonaws.com/static.quandl.com/tickers/SP500.csv

# Download AAPL gross profits from RAYMOND
profitaapl <- Quandl("RAYMOND/AAPL_GROSS_PROFIT_Q", type="xts")
chart_Series(profitaapl, name="AAPL gross profits")

# Download multiple time series
pricev <- Quandl(code=c("NSE/OIL", "WIKI/AAPL"),
         startd="2013-01-01", type="xts")

# Download datasets for AAPL
# https://www.quandl.com/api/v3/datasets/WIKI/AAPL.json

# Download metadata for AAPL
pricev <- Quandl(code=c("NSE/OIL", "WIKI/AAPL"),
         startd="2013-01-01", type="xts")
# https://www.quandl.com/api/v3/datasets/WIKI/AAPL/metadata.json

# scrape fundamental data from Google using quantmod - doesn't work
funda_mentals <- getFinancials("HPQ", src="google", auto.assign=FALSE)
# view quarterly fundamentals
viewFinancials(funda_mentals,  period="Q")
viewFinancials(funda_mentals)

# scrape fundamental data from Yahoo using quantmod
# table of Yahoo data fields
# http://www.financialwisdomforum.org/gummy-stuff/Yahoo-data.htm

met_rics <- yahooQF(c("Price/Sales",
                      "P/E Ratio",
                      "Price/EPS Estimate Next Year",
                      "PEG Ratio",
                      "Dividend Yield",
                      "Market Capitalization"))


symbolv <- c("AAPL", "IBM", "MSFT")
# Not all the metrics are returned by Yahoo.
funda_mentals <- getQuote(paste(symbolv, sep="", collapse=";"), src="yahoo", what=met_rics)
viewFinancials(funda_mentals,  period="Q")

funda_mentals <- getFinancials("HPQ", src="yahoo", auto.assign=FALSE)
viewFinancials(funda_mentals)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{The \protect\emph{CRSP} Database on \protect\emph{WRDS}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The Center for Research in Security Prices (\href{http://www.crsp.org/}{\emph{CRSP}}) is a provider of historical security prices, and is an affiliate of the University of Chicago Booth School of Business.
      \vskip1ex
      \emph{CRSP} data provides daily prices, but it's updated only monthly, quarterly, and annually.
      \vskip1ex
      \emph{CRSP} provides very long time series of data starting in $1926$.
      \vskip1ex
      \emph{CRSP} data is indexed using the \emph{PERMCO} company identifier and the \emph{PERMNO} security identifier.
      \vskip1ex
      Since a single company can have many securities, therefore a single \emph{PERMCO} may be linked to multiple \emph{PERMNOs}.
      \vskip1ex
      \emph{WRDS} provides a tool to translate tickers to PERMCO/PERMNO:\\
      \hskip1em\url{https://wrds-web.wharton.upenn.edu/wrds/ds/crsp/tools_a/dse/translate/index.cfm}
    \column{0.5\textwidth}
      \includegraphics[width=0.45\paperwidth]{figure/wrds_crsp.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Downloading Daily Prices From \protect\emph{CRSP}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Daily North American security prices can be downloaded from the
      \href{https://wrds-web.wharton.upenn.edu/wrds/ds/crsp/stock_a/dsf.cfm?navId=128}{\emph{CRSP daily}} database.
      \vskip1ex
      The time series data can be downloaded into a \texttt{.csv} file, and then read into \texttt{R}.
      <<echo=(-(1:1)),eval=FALSE>>=
library(rutils)  # Load package rutils
# Specify class for column "TICKER" so that "F" doesn't become FALSE
col_class <- "character"
names(col_class) <- "TICKER"
# Read .csv file with Ford OHLC prices
ohlc <- read.csv(file="/Users/jerzy/Develop/lecture_slides/data/F_CRSP.csv",
  colClasses=col_class)
symboln <- ohlc[1, "TICKER"]
# Adjustment factor
factadj <- drop(ohlc[, "CFACPR"])
# Extract dates index
datev <- drop(ohlc[, "date"])
datev <- lubridate::ymd(datev)
# Select only OHLCV data
ohlc <- ohlc[, c("OPENPRC", "ASKHI", "BIDLO", "PRC", "VOL")]
colnames(ohlc) <- paste(symboln, c("Open", "High", "Low", "Close", "Volume"), sep=".")
# Coerce to xts series
ohlc <- xts::xts(ohlc, datev)
# Fill missing Open NA prices
isna <- is.na(ohlc[, 1])
ohlc[isna, 1] <- (ohlc[isna, 2] + ohlc[isna, 3])/2
# Adjust all the prices
ohlc[, 1:4] <- ohlc[, 1:4]/factadj/factr[NROW(factr)]
plot(quantmod::Cl(ohlc), main="Ford Stock")
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/wrds_crsp_prices.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{draft: The Merged \protect\emph{CRSP/Compustat} Database}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Links:\\
      % http://crsp.org/files/ccm_data_guide_0.pdf
      http://kaichen.work/?p=138\\
      http://www.crsp.org/products/documentation/crsp-link\\
      https://mingze-gao.com/posts/merge-compustat-and-crsp/\\
      http://www.crsp.org/products/documentation/crspcompustat-merged-database-guide
      \vskip1ex

      The Center for Research in Security Prices (\href{http://www.crsp.org/}{\emph{CRSP}} is a provider of historical security prices.
      \vskip1ex
      \emph{CRSP} data is indexed using the \emph{PERMCO} company identifier and the \emph{PERMNO} security identifier.
      \vskip1ex
      Therefore a given \emph{PERMCO} may be linked to multiple \emph{PERMNOs}.
      \vskip1ex
      Compustat provides financial statement data of a company. The micro unit on Compustat is each and every company. However, it is less known that Compustat also provides security data. In addition, because the coverage of Compustat is more extensive than that of \emph{CRSP}, Compustat contains addtional security data that are unavailable on \emph{CRSP}.
      \vskip1ex
      Compustat uses \emph{IID} and \emph{GVKEY} to identify all securities.  A marker item, PRIMISS, indicates whether a security is primary or secondary.
      \vskip1ex
      Similar to \emph{PERMCO} on \emph{CRSP}, one \emph{GVKEY} may have multiple \emph{IIDs} assigned to it.
      \vskip1ex
      Links\\
      http://kaichen.work/?p=138\\
      % https://wrds-www.wharton.upenn.edupages/support/manuals-and-overviews/crsp/crspcompustat-merged-ccm/wrds-overview-crspcompustat-merged-ccm/\\
      % https://wrds-www.wharton.upenn.edupages/support/data-overview/wrds-overview-crspcompustat-merged-ccm/
      \vskip1ex
      \vskip1ex
      \vskip1ex
      Time series data for a portfolio of stocks can be downloaded by performing a loop over the function \texttt{Quandl()} from package \emph{Quandl}.
      \vskip1ex
      The \texttt{assign()} function assigns a value to an object in a specified \emph{environment}, by referencing it using a character string (name).
    \column{0.5\textwidth}
      \includegraphics[width=0.45\paperwidth]{figure/wrds_crsp_compustat.png}
      <<echo=FALSE,eval=FALSE,results='asis'>>=
crsp_compustat <- read.csv(file="/Users/jerzy/Develop/lecture_slides/data/crsp_compustat_indices.csv")
colnames(crsp_compustat) <- crsp_compustat[1, ]
crsp_compustat <- crsp_compustat[-1, ]
print(xtable(crsp_compustat), comment=FALSE, size="tiny", include.rownames=FALSE)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{draft: Downloading Daily Prices From Merged \protect\emph{CRSP/Compustat} Database}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Daily North American security prices can be downloaded from the
      \href{https://wrds-web.wharton.upenn.edu/wrds/ds/crsp/ccm_a/secd/index.cfm?navId=120}{\emph{CRSP/Compustat daily}} database.
      \vskip1ex
      The time series data can be downloaded into a \texttt{.csv} file, and then read into \texttt{R}.
      <<echo=(-(1:1)),eval=FALSE>>=
library(rutils)  # Load package rutils
# Read .csv file with TAP OHLC prices
ohlc <- read.csv(file="/Users/jerzy/Develop/lecture_slides/data/TAP.csv")
symboln <- ohlc[1, "tic"]
# Adjustment factor and total return factor
factadj <- drop(ohlc[, "ajexdi"])
factr <- drop(ohlc[, "trfd"])
# Extract dates index
datev <- drop(ohlc[, "datadate"])
datev <- lubridate::ymd(datev)
# Select only OHLCV data
ohlc <- ohlc[, c("prcod", "prchd", "prcld", "prccd", "cshtrd")]
colnames(ohlc) <- paste(symboln, c("Open", "High", "Low", "Close", "Volume"), sep=".")
# Coerce to xts series
ohlc <- xts::xts(ohlc, datev)
# Fill missing Open NA prices
isna <- is.na(ohlc[, 1])
ohlc[isna, 1] <- (ohlc[isna, 2] + ohlc[isna, 3])/2
sum(is.na(ohlc))
# Adjust all the prices
ohlc[, 1:4] <- factr*ohlc[, 1:4]/factadj/factr[NROW(factr)]
plot(quantmod::Cl(ohlc), main="TAP Stock")
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/wrds_compustat_prices.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Downloading Fama-French Factors from \protect\emph{Quandl}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The Fama/French factors are constructed using six value-weight portfolios formed on size and book-to-market,
      \hskip1em\url{https://www.quandl.com/data/KFRENCH/FACTORS_D}
      \vskip1ex
      \texttt{Mkt-RF} is the excess return on the market (value-weighted NYSE, AMEX, and NASDAQ stocks minus the one-month Treasury bill rate).
      \vskip1ex
      \texttt{SMB} (Small Minus Big) is the return on the three small-cap portfolios minus the three big-cap portfolios.
      \vskip1ex
      \texttt{HML} (High Minus Low) is the return on the two value portfolios minus the two growth portfolios.
      \vskip1ex
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=(-(1:1)),eval=FALSE>>=
library(rutils)  # Load package rutils
# Download Fama-French factors from KFRENCH database
factors <- Quandl(code="KFRENCH/FACTORS_D",
        startd="2001-01-01", type="xts")
dim(factors)
head(factors)
tail(factors)
chart_Series(cumsum(factors["2001/", 1]/100),
        name="Fama-French factors")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\section{High Frequency and Intraday Stock Prices}


%%%%%%%%%%%%%%%
\subsection{Trade and Quote (\protect\emph{TAQ}) Data}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      \emph{High frequency data} is typically formatted as either Trade and Quote (\emph{TAQ}) data, or \emph{Open-High-Low-Close} (\emph{OHLC}) data.
      \vskip1ex
      Trade and Quote (\emph{TAQ}) data contains intraday \emph{trades} and \emph{quotes} on exchange-traded stocks and futures.
      \vskip1ex
      \emph{TAQ} data is often called \emph{tick data}, with a \emph{tick} being a row of data containing new \emph{trades} or \emph{quotes}.
      \vskip1ex
      The \emph{TAQ} data is spaced irregularly in time, with data recorded each time a new trade or quote arrives.
      \vskip1ex
      Each row of \emph{TAQ} data may contain the quote and trade prices, and the corresponding quote size or trade volume:
      \emph{Bid.Price, Bid.Size, Ask.Price, Ask.Size, Trade.Price, Volume}.
      \vskip1ex
      \emph{TAQ} data is often split into \emph{trade} data and \emph{quote} data.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Load package HighFreq
library(HighFreq)
# Or load the high frequency data file directly:
# symbolv <- load("/Users/jerzy/Develop/R/HighFreq/data/hf_data.RData")
head(HighFreq::SPY_TAQ)
head(HighFreq::SPY)
tail(HighFreq::SPY)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Downloading \protect\emph{TAQ} Data From \protect\emph{WRDS}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      \emph{TAQ} data can be downloaded from the
      \href{https://wrds-web.wharton.upenn.edu/wrds/ds/taq/ctm/index.cfm}{\emph{WRDS TAQ}} web page.
      \vskip1ex
      The \emph{TAQ} data are at millisecond frequency, and are \emph{consolidated} (combined) from the New York Stock Exchange \emph{NYSE} and other exchanges.
      \vskip1ex
      The
      \href{https://wrds-web.wharton.upenn.edu/wrds/ds/taq/ctm/index.cfm}{\emph{WRDS TAQ}}
      web page provides separately \emph{trades} data and separately \emph{quotes} data.
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/wrds_taq_data.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Reading \protect\emph{TAQ} Data From \texttt{.csv} Files}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Trade and Quote (\emph{TAQ}) data stored in \texttt{.csv} files can be very large, so it's better to read it using the function \texttt{data.table::fread()} which is much faster than the function \texttt{read.csv()}.
      \vskip1ex
      Each \emph{trade} or \emph{quote} contributes a \emph{tick} (row) of data, and the number of ticks can be very large (hundred of thousands per day, or more).
      \vskip1ex
      The function \texttt{strptime()} coerces \texttt{character} strings representing the date and time into \texttt{POSIXlt} \emph{date-time} objects.
      \vskip1ex
      The argument \texttt{format="\%H:\%M:\%OS"} allows the parsing of fractional seconds, for example \texttt{"15:59:59.989847074"}.
      \vskip1ex
      The function \texttt{as.POSIXct()} coerces objects into \texttt{POSIXct} \emph{date-time} objects, with a \texttt{numeric} value representing the \emph{moment of time} in seconds.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
library(HighFreq)
# Read TAQ trade data from csv file
taq <- data.table::fread(file="/Users/jerzy/Develop/lecture_slides/data/xlk_tick_trades_20200316.csv")
# Inspect the TAQ data in data.table format
taq
class(taq)
colnames(taq)
sapply(taq, class)
symboln <- taq$SYM_ROOT[1]
# Create date-time index
datev <- paste(taq$DATE, taq$TIME_M)
# Coerce date-time index to POSIXlt
datev <- strptime(datev, "%Y%m%d %H:%M:%OS")
class(datev)
# Display more significant digits
# options("digits")
options(digits=20, digits.secs=10)
last(datev)
unclass(last(datev))
as.numeric(last(datev))
# Coerce date-time index to POSIXct
datev <- as.POSIXct(datev)
class(datev)
last(datev)
unclass(last(datev))
as.numeric(last(datev))
# Calculate the number of seconds
as.numeric(last(datev)) - as.numeric(first(datev))
# Calculate the number of ticks per second
NROW(taq)/(6.5*3600)
# Select TAQ data columns
taq <- taq[, .(price=PRICE, volume=SIZE)]
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Trading Volumes in High Frequency Data}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The trading volumes represent the number of shares traded at a given price.  
      \vskip1ex
      The histogram of the trading volumes shows that the highest frequencies of trades are for \emph{round lots} - trades that are multiples of \texttt{100} shares.
      \vskip1ex
      There are also significant frequencies for \emph{odd lots}, with small volumes of less than \texttt{100} shares.
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.4\paperwidth]{figure/hf_ticks_volume_histogram.png}
      <<echo=TRUE,eval=FALSE>>=
# Coerce trade ticks to xts series
xlk <- xts::xts(taq[, .(price, volume)], datev)
colnames(xlk) <- c("price", "volume")
save(xlk, file="/Users/jerzy/Develop/data/xlk_tick_trades_20200316.RData")
# Plot histogram of the trading volumes
hist(xlk$volume, main="Histogram of XLK Trading Volumes", 
     breaks=1e5, xlim=c(1, 400), xlab="number of shares")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Microstructure Noise in High Frequency Data}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      High frequency data contains \emph{microstructure noise} in the form of \emph{price spikes} and the \emph{bid-ask bounce}.
      \vskip1ex
      \emph{Price spikes} are single ticks with prices far away from the average.
      \vskip1ex
      \emph{Price spikes} are often caused by data collection errors, but sometimes they represent actual trades with very large lot (trade) sizes.
      \vskip1ex
      The \emph{bid-ask bounce} is the bouncing of traded prices between the bid and ask prices.
      \vskip1ex
      The \emph{bid-ask bounce} creates an illusion of rapidly changing prices, while in reality the mid price is unchanged.
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/hf_ticks_raw.png}
      % \vspace{-1em}
      % \includegraphics[width=0.45\paperwidth]{figure/hf_ticks_bounce.png}
      <<echo=TRUE,eval=FALSE>>=
# Plot dygraph
dygraphs::dygraph(xlk$price, main="XLK Intraday Prices for 2020-03-16") %>%
  dyOptions(colors="blue", strokeWidth=1)
# Plot in x11 window
x11(width=6, height=5)
quantmod::chart_Series(x=xlk$price, name="XLK Intraday Prices for 2020-03-16")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{The Bid-ask Bounce of High Frequency Prices}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The \emph{bid-ask bounce} is the bouncing of traded prices between the bid and ask prices.
      \vskip1ex
      The \emph{bid-ask bounce} is prominent at very high frequency time scales or in periods of low volatility.
      \vskip1ex
      The \emph{bid-ask bounce} creates an illusion of rapidly changing prices, while in fact the mid price is constant.
      \vskip1ex
      The \emph{bid-ask bounce} inflates the estimates of realized volatility, above the actual volatility.
      \vskip1ex
      The \emph{bid-ask bounce} creates the appearance of mean reversion (negative autocorrelation), that isn't tradeable for most traders.
      <<echo=TRUE,eval=FALSE>>=
pricev <- read.zoo(file="/Users/jerzy/Develop/lecture_slides/data/bid_ask_bounce.csv",
  header=TRUE, sep=",")
pricev <- as.xts(pricev)
dygraphs::dygraph(pricev$Close, 
  main="S&P500 Futures Prices Bid-Ask Bounce") %>%
  dyOptions(colors="blue", strokeWidth=2)
      @
    \column{0.5\textwidth}
      \includegraphics[width=0.45\paperwidth]{figure/hf_futures_bounce.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Price Spikes And Trading Volumes in High Frequency Data}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The number of the \emph{price spikes} depends on the level of trading volumes, with the number decreasing with higher trading volumes.
      \vskip1ex
      The number of price spikes is lower for trade prices with larger trading volumes.
    \column{0.5\textwidth}
      \includegraphics[width=0.45\paperwidth]{figure/hf_ticks_biglots.png}
      % \vspace{-1em}
      % \includegraphics[width=0.45\paperwidth]{figure/hf_ticks_bounce.png}
      <<echo=TRUE,eval=FALSE>>=
# Plot dygraph of trade prices of at least 100 shares
dygraphs::dygraph(xlk$price[xlk$volume >= 100, ],
  main="XLK Prices for Trades of At Least 100 Shares") %>%
  dyOptions(colors="blue", strokeWidth=1)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Removing Odd Lot Trades From \protect\emph{TAQ} Data}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The trading volumes represent the number of shares traded at a given price.  
      \vskip1ex
      The histogram of the trading volumes shows that the highest frequencies of trades are for \emph{round lots} - trades that are multiples of \texttt{100} shares.
      \vskip1ex
      There are also significant frequencies for \emph{odd lots}, with small volumes of less than \texttt{100} shares.
      \vskip1ex
      The \emph{odd lot} ticks are often removed to reduce the size of the \emph{TAQ} data.
      \vskip1ex
      Selecting only the large lot trades reduces microstructure noise (price spikes, bid-ask bounce) in high frequency data.
      <<echo=TRUE,eval=FALSE>>=
# Select the large trade lots of at least 100 shares
dim(taq)
tickb <- taq[taq$volume >= 100]
dim(tickb)
# Number of large lot ticks per second
NROW(tickb)/(6.5*3600)
# Plot histogram of the trading volumes
hist(tickb$volume, main="Histogram of XLK Trading Volumes", 
     breaks=100000, xlim=c(1, 400), xlab="number of shares")
# Save trade ticks with large lots
data.table::fwrite(tickb, file="/Users/jerzy/Develop/data/xlk_tick_trades_20200316_biglots.csv")
# Coerce trade prices to xts
xlkb <- xts::xts(tickb[, .(price, volume)], tickb$index)
colnames(xlkb) <- c("price", "volume")
      @
    \column{0.5\textwidth}
      \includegraphics[width=0.45\paperwidth]{figure/hf_ticks_biglots.png}
      <<echo=TRUE,eval=FALSE>>=
# Plot dygraph of the large lots
dygraphs::dygraph(xlkb$price, 
  main="XLK Prices for Trades of At Least 100 Shares") %>%
  dyOptions(colors="blue", strokeWidth=1)
# Plot the large lots
x11(width=6, height=5)
quantmod::chart_Series(x=xlk$price,
  name="XLK Trade Ticks for 2020-03-16 (large lots only)")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\section{Scrubbing Bad Market Data}


%%%%%%%%%%%%%%%
\subsection{draft: Identifying Isolated Price Spikes In Intraday Data}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Isolated price spikes in high frequency data can be identified using a \emph{three-point filter} (tri-filter).
      histogram of returns
      \vskip1ex
      The tri-filter compares two price ratios of three neighboring prices: 
      
      p0, p1, p2
      is a median filter that replaces the central value with the median of the three values.
      Create three-point classifier for bad prices.
Identify bad prices in-sample using 

      \emph{z-scores} are equal to the prices minus the median of the prices, divided by the median absolute deviation (\emph{MAD}) of prices:
      \begin{displaymath}
        z_i = \frac{p_i - \operatorname{median}(\mathbf{p})}{\operatorname{MAD}}
      \end{displaymath}
      If the absolute value of the \emph{z-score} exceeds the \emph{threshold value} then it's classified as \emph{bad data}, and it can be removed or replaced.
      <<echo=TRUE,eval=FALSE>>=
# Calculate the centered Hampel filter to remove bad prices
lookb <- 71 # Look-back interval
halfb <- lookb %/% 2 # Half-back interval
pricev <- xlk$price
# Calculate the trailing median and MAD
medianv <- HighFreq::roll_mean(pricev, lookb=lookb, method="nonparametric")
colnames(medianv) <- c("median")
madv <- HighFreq::roll_var(pricev, lookb=lookb, method="nonparametric")
# madv <- TTR::runMAD(pricev, n=lookb)
# Center the median and the MAD
medianv <- rutils::lagit(medianv, lagg=(-halfb), pad_zeros=FALSE)
madv <- rutils::lagit(madv, lagg=(-halfb), pad_zeros=FALSE)
# Calculate the Z-scores
zscores <- ifelse(madv > 0, (pricev - medianv)/madv, 0)
# Z-scores have very fat tails
range(zscores); mad(zscores)
madz <- mad(zscores[abs(zscores) > 0])
hist(zscores, breaks=50000, xlim=c(-2*madz, 2*madz))
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/hf_ticks_scrubbed.png}
      <<echo=TRUE,eval=FALSE>>=
# Define discrimination threshold value
threshv <- 6*madz
# Identify good prices with small z-scores
isgood <- (abs(zscores) < threshv)
# Calculate the number of bad prices
sum(!isgood)
# Scrub bad prices by replacing them with previous good prices
priceg <- pricev
priceg[!isgood] <- NA
priceg <- zoo::na.locf(priceg)
# Plot dygraph of the scrubbed prices
dygraphs::dygraph(priceg, main="Scrubbed XLK Intraday Prices") %>%
  dyOptions(colors="blue", strokeWidth=1)
# Plot using chart_Series()
x11(width=6, height=5)
quantmod::chart_Series(x=priceg,
  name="Clean XLK Intraday Prices for 2020-03-16")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{The Hampel Filter For Filtering Price Spikes}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Price spikes in high frequency data can be identified using a \emph{Hampel filter}.
      \vskip1ex
      The \emph{z-scores} are equal to the prices minus the median of the prices, divided by the median absolute deviation (\emph{MAD}) of prices:
      \begin{displaymath}
        z_i = \frac{p_i - \operatorname{median}(\mathbf{p})}{\operatorname{MAD}}
      \end{displaymath}
      If the absolute value of the \emph{z-score} exceeds the \emph{threshold value} then it's classified as \emph{bad data}, and it can be removed or replaced.
      <<echo=TRUE,eval=FALSE>>=
# Calculate the centered Hampel filter to remove bad prices
lookb <- 71 # Look-back interval
halfb <- lookb %/% 2 # Half-back interval
pricev <- xlk$price
# Calculate the trailing median and MAD
medianv <- HighFreq::roll_mean(pricev, lookb=lookb, method="nonparametric")
colnames(medianv) <- c("median")
madv <- HighFreq::roll_var(pricev, lookb=lookb, method="nonparametric")
# madv <- TTR::runMAD(pricev, n=lookb)
# Center the median and the MAD
medianv <- rutils::lagit(medianv, lagg=(-halfb), pad_zeros=FALSE)
madv <- rutils::lagit(madv, lagg=(-halfb), pad_zeros=FALSE)
# Calculate the Z-scores
zscores <- ifelse(madv > 0, (pricev - medianv)/madv, 0)
# Z-scores have very fat tails
range(zscores); mad(zscores)
madz <- mad(zscores[abs(zscores) > 0])
hist(zscores, breaks=50000, xlim=c(-2*madz, 2*madz))
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/hf_ticks_scrubbed.png}
      <<echo=TRUE,eval=FALSE>>=
# Define discrimination threshold value
threshv <- 6*madz
# Identify good prices with small z-scores
isgood <- (abs(zscores) < threshv)
# Calculate the number of bad prices
sum(!isgood)
# Overwrite bad prices and calculate time series of scrubbed prices
priceg <- pricev
priceg[!isgood] <- NA
priceg <- zoo::na.locf(priceg)
# Plot dygraph of the scrubbed prices
dygraphs::dygraph(priceg, main="Scrubbed XLK Intraday Prices") %>%
  dyOptions(colors="blue", strokeWidth=1)
# Plot using chart_Series()
x11(width=6, height=5)
quantmod::chart_Series(x=priceg,
  name="Clean XLK Intraday Prices for 2020-03-16")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Classifying Data Outliers Using the Hampel Filter}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The Hampel filter is a \emph{classifier} which classifies the prices as either good or bad data points.
      \vskip1ex
      In order to measure the performance of the Hampel filter, we add price spikes to the clean prices, to see how accurately they're classified. 
      \vskip1ex
      Let the \emph{null hypothesis} be that the given price is a good data point.
      \vskip1ex
      A positive result corresponds to rejecting the \emph{null hypothesis}, while a negative result corresponds to accepting the \emph{null hypothesis}.
      \vskip1ex
      The classifications are subject to two different types of errors: \emph{type I} and \emph{type II} errors.
      \vskip1ex
      A \emph{type I} error is the incorrect rejection of a \texttt{TRUE} \emph{null hypothesis} (i.e. a "false positive"), when good data is classified as bad.
      \vskip1ex
      A \emph{type II} error is the incorrect acceptance of a \texttt{FALSE} \emph{null hypothesis} (i.e. a "false negative"), when bad data is classified as good.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Add 200 random price spikes to the clean prices
set.seed(1121, "Mersenne-Twister", sample.kind="Rejection")
nspikes <- 200
nrows <- NROW(priceg)
ispike <- logical(nrows)
ispike[sample(x=nrows, size=nspikes)] <- TRUE
priceb <- priceg
priceb[ispike] <- priceb[ispike]*
  sample(c(0.999, 1.001), size=nspikes, replace=TRUE)
# Plot the bad prices and their medians
medianv <- HighFreq::roll_mean(priceb, lookb=lookb, method="nonparametric")
pricem <- cbind(priceb, medianv)
colnames(pricem) <- c("prices with spikes", "median")
dygraphs::dygraph(pricem, main="XLK Prices With Spikes") %>%
  dyOptions(colors=c("red", "blue"))
# Calculate the z-scores
madv <- HighFreq::roll_var(priceb, lookb=lookb, method="nonparametric")
zscores <- ifelse(madv > 0, (priceb - medianv)/madv, 0)
# Z-scores have very fat tails
range(zscores); mad(zscores)
madz <- mad(zscores[abs(zscores) > 0])
hist(zscores, breaks=10000, xlim=c(-4*madz, 4*madz))
# Identify good prices with small z-scores
threshv <- 3*madz
isgood <- (abs(zscores) < threshv)
# Calculate the number of bad prices
sum(!isgood)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Confusion Matrix of a Binary Classification Model}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      A \emph{binary classification model} categorizes cases based on its forecasts whether the \emph{null hypothesis} is \texttt{TRUE} or \texttt{FALSE}.
      \vskip1ex
      The confusion matrix summarizes the performance of a classification model on a set of test data for which the actual values of the \emph{null hypothesis} are known.
      \vskip1ex
      \newcommand\MyBox[2]{
        \fbox{\lower0.75cm
          \vbox to 1.2cm{\vfil
            \hbox to 1.7cm{\parbox{\textwidth}{#1\\#2}}
            \vfil}
        }
      }
      \renewcommand\arraystretch{0.3}
      \setlength\tabcolsep{0pt}
      {\tiny
      \begin{tabular}{c >{\bfseries}r @{\hspace{0.5em}}c @{\hspace{0.4em}}c @{\hspace{0.5em}}l}
      \multirow{10}{*}{\parbox{0.5cm}{\bfseries Actual}} &
      & \multicolumn{2}{c}{\bfseries Forecast} & \\
      & & \bfseries Null is FALSE & \bfseries Null is TRUE \\
      & Null is FALSE & \MyBox{True Positive}{(sensitivity)} & \MyBox{False Negative}{(type II error)} \\[2.4em]
      & Null is TRUE & \MyBox{False Positive}{(type I error)} & \MyBox{True Negative}{(specificity)}
      \end{tabular}}
      <<echo=TRUE,eval=FALSE>>=
# Calculate the confusion matrix
table(actual=!ispike, forecast=isgood)
sum(!isgood)
# FALSE positive (type I error)
sum(!ispike & !isgood)
# FALSE negative (type II error)
sum(ispike & isgood)
      @
    \column{0.5\textwidth}
      Let the \emph{null hypothesis} be that the given price is a good data point.
      \vskip1ex
      The \emph{true positive} rate (known as the \emph{sensitivity}) is the fraction of \texttt{FALSE} \emph{null hypothesis} cases that are correctly classified as \texttt{FALSE}.
      \vskip1ex
      The \emph{false negative} rate is the fraction of \texttt{FALSE} \emph{null hypothesis} cases that are incorrectly classified as \texttt{TRUE} (\emph{type II} error).
      \vskip1ex
      The sum of the \emph{true positive} plus the \emph{false negative} rate is equal to $1$.
      \vskip1ex
      The \emph{true negative} rate (known as the \emph{specificity}) is the fraction of \texttt{TRUE} \emph{null hypothesis} cases that are correctly classified as \texttt{TRUE}.
      \vskip1ex
      The \emph{false positive} rate is the fraction of \texttt{TRUE} \emph{null hypothesis} cases that are incorrectly classified as \texttt{FALSE} (\emph{type I} error).
      \vskip1ex
      The sum of the \emph{true negative} plus the \emph{false positive} rate is equal to $1$.
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Receiver Operating Characteristic (ROC) Curve}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The \emph{ROC curve} is the plot of the \emph{true positive} rate, as a function of the \emph{false positive} rate, and illustrates the performance of a binary classifier.
      \vskip1ex
      The area under the \emph{ROC curve} (AUC) measures the classification ability of a binary classifier.
      <<echo=TRUE,eval=FALSE>>=
# Confusion matrix as function of threshold
confun <- function(actualv, zscores, threshv) {
    confmat <- table(actualv, (abs(zscores) < threshv))
    confmat <- confmat / rowSums(confmat)
    c(typeI=confmat[2, 1], typeII=confmat[1, 2])
}  # end confun
confun(!ispike, zscores, threshv=threshv)
# Define vector of discrimination thresholds
threshv <- madz*seq(from=0.1, to=3.0, by=0.05)/2
# Calculate the error rates
errorr <- sapply(threshv, confun, actualv=!ispike, zscores=zscores)
errorr <- t(errorr)
rownames(errorr) <- threshv
errorr <- rbind(c(1, 0), errorr)
errorr <- rbind(errorr, c(0, 1))
# Calculate the area under the ROC curve (AUC)
truepos <- (1 - errorr[, "typeII"])
truepos <- (truepos + rutils::lagit(truepos))/2
falsepos <- rutils::diffit(errorr[, "typeI"])
abs(sum(truepos*falsepos))
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/hampel_roc.png}
      <<echo=TRUE,eval=FALSE>>=
# Plot ROC curve for Hampel classifier
plot(x=errorr[, "typeI"], y=1-errorr[, "typeII"],
     xlab="FALSE positive rate", ylab="TRUE positive rate",
     xlim=c(0, 1), ylim=c(0, 1),
     main="ROC Curve for Hampel Classifier",
     type="l", lwd=3, col="blue")
abline(a=0.0, b=1.0, lwd=3, col="orange")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Filtering Bad Data From Daily Stock Prices}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Daily stock prices can also contain bad data points consisting of mostly single, isolated spikes in prices.
      \vskip1ex
      The number of false positives may be too high, so the Hampel filter parameters (the look-back interval and the threshold) need adjustment.
      \vskip1ex
      For example, the \emph{VXX} has only one bad price (on \texttt{2010-11-08}), but the Hampel filter identifies many more than that (which are false positives).
      <<echo=TRUE,eval=FALSE>>=
# Load log VXX prices
load("/Users/jerzy/Develop/lecture_slides/data/pricevxx.RData")
nrows <- NROW(pricev)
# Calculate the centered Hampel filter for VXX
lookb <- 7 # Look-back interval
halfb <- lookb %/% 2 # Half-back interval
medianv <- HighFreq::roll_mean(pricev, lookb=lookb, method="nonparametric")
medianv <- rutils::lagit(medianv, lagg=(-halfb), pad_zeros=FALSE)
madv <- HighFreq::roll_var(pricev, lookb=lookb, method="nonparametric")
madv <- rutils::lagit(madv, lagg=(-halfb), pad_zeros=FALSE)
zscores <- ifelse(madv > 0, (pricev - medianv)/madv, 0)
range(zscores); mad(zscores)
madz <- mad(zscores[abs(zscores) > 0])
hist(zscores, breaks=100, xlim=c(-3*madz, 3*madz))
# Define discrimination threshold value
threshv <- 9*madz
# Calculate the good prices
isgood <- (abs(zscores) < threshv)
sum(!isgood)
# Dates of the bad prices
zoo::index(pricev[!isgood])
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/hampel_stock_falsepos.png}
      <<echo=TRUE,eval=FALSE>>=
# Calculate the false positives
falsep <- !isgood
falsep[which(zoo::index(pricev) == as.Date("2010-11-08"))] <- FALSE
# Plot dygraph of the prices with bad prices
datam <- cbind(pricev, zscores)
colnames(datam)[2] <- "ZScores"
colv <- colnames(datam)
dygraphs::dygraph(datam, main="VXX Prices With Z-Scores and False Positives") %>%
  dyAxis("y", label=colv[1], independentTicks=TRUE) %>%
  dyAxis("y2", label=colv[2], independentTicks=TRUE) %>%
  dySeries(name=colv[1], axis="y", strokeWidth=1, col="blue") %>%
  dySeries(name=colv[2], axis="y2", strokeWidth=1, col="red") %>%
  dyEvent(zoo::index(pricev[falsep]), label=rep("false", sum(falsep)), strokePattern="solid", color="red") %>%
  dyEvent(zoo::index(pricev["2010-11-08"]), label="true", strokePattern="solid", color="green")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{draft: Filtering Combined Spikes From Stock Prices}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The narrow Hampel filter isn't very good anyway
      \vskip1ex
      The narrow Hampel filter using the median of \texttt{3} prices can only identify single isolated spikes. 
      \vskip1ex
      But sometimes several bad prices occur in a row, one after another.
      \vskip1ex
      The narrow Hampel filter cannot identify multiple bad prices in a row, and will therefore produce false negatives (bad prices identified as good).
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Add single isolated spike to the prices 
priceb <- pricev
priceb["2017-11-20"] <- 1.2*priceb["2017-11-20"]
# Calculate the Z-scores
medianv <- HighFreq::roll_mean(priceb, lookb=lookb, method="nonparametric")
medianv <- rutils::lagit(medianv, lagg=(-halfb), pad_zeros=FALSE)
madv <- HighFreq::roll_var(priceb, lookb=lookb, method="nonparametric")
madv <- rutils::lagit(madv, lagg=(-halfb), pad_zeros=FALSE)
zscores <- ifelse(madv > 0, (priceb - medianv)/madv, 0)
madz <- mad(zscores[abs(zscores) > 0])
# Calculate the number of bad prices
threshv <- 9*madz
isgood <- (abs(zscores) < threshv)
sum(!isgood)
zoo::index(priceb[!isgood])
# Add two neighboring spikes to the prices
priceb <- pricev
priceb["2017-11-20"] <- 1.2*priceb["2017-11-21"]
priceb["2017-11-21"] <- 1.2*priceb["2017-11-21"]
# Calculate the Z-scores
medianv <- HighFreq::roll_mean(priceb, lookb=lookb, method="nonparametric")
medianv <- rutils::lagit(medianv, lagg=(-halfb), pad_zeros=FALSE)
madv <- HighFreq::roll_var(priceb, lookb=lookb, method="nonparametric")
madv <- rutils::lagit(madv, lagg=(-halfb), pad_zeros=FALSE)
zscores <- ifelse(madv > 0, (priceb - medianv)/madv, 0)
madz <- mad(zscores[abs(zscores) > 0])
# Calculate the number of bad prices
isgood <- (abs(zscores) < threshv)
sum(!isgood)
zoo::index(priceb[!isgood])
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Scrubbing Bad Stock Prices}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Bad stock prices can be scrubbed (replaced) with the previous good price.  
      \vskip1ex
      But it's incorrect to replace bad prices with the average of the previous good price and the next good price, since that would cause data snooping.
      <<echo=TRUE,eval=FALSE>>=
# Replace bad stock prices with the previous good prices
priceg <- pricev
priceg[!isgood] <- NA
priceg <- zoo::na.locf(priceg)
# Calculate the Z-scores
medianv <- HighFreq::roll_mean(priceg, lookb=lookb, method="nonparametric")
medianv <- rutils::lagit(medianv, lagg=(-halfb), pad_zeros=FALSE)
madv <- HighFreq::roll_var(priceg, lookb=lookb, method="nonparametric")
madv <- rutils::lagit(madv, lagg=(-halfb), pad_zeros=FALSE)
zscores <- ifelse(madv > 0, (priceg - medianv)/madv, 0)
madz <- mad(zscores[abs(zscores) > 0])
# Calculate the number of bad prices
threshv <- 9*madz
isgood <- (abs(zscores) < threshv)
sum(!isgood)
zoo::index(priceg[!isgood])
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/hampel_stock_scrubbed.png}
      <<echo=TRUE,eval=FALSE>>=
# Calculate the false positives
falsep <- !isgood
falsep[which(zoo::index(pricev) == as.Date("2010-11-08"))] <- FALSE
# Plot dygraph of the prices with bad prices
dygraphs::dygraph(priceg, main="Scrubbed VXX Prices With False Positives") %>%
  dyEvent(zoo::index(priceg[falsep]), label=rep("false", sum(falsep)), strokePattern="solid", color="red") %>%
  dyOptions(colors="blue", strokeWidth=1)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{ROC Curve for Daily Hampel Classifier}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      In order to measure the performance of the Hampel filter, we add price spikes to the clean prices, to see how accurately they're classified. 
      \vskip1ex
      The performance of the Hampel noise classification model depends on the length of the look-back time interval.
      \vskip1ex
      The optimal \emph{look-back interval} and \emph{threshold value} can be determined using \emph{cross-validation}. 
      <<echo=TRUE,eval=FALSE>>=
# Add 200 random price spikes to the clean prices
set.seed(1121, "Mersenne-Twister", sample.kind="Rejection")
nspikes <- 200
ispike <- logical(nrows)
ispike[sample(x=nrows, size=nspikes)] <- TRUE
priceb <- priceg
priceb[ispike] <- priceb[ispike]*
  sample(c(0.99, 1.01), size=nspikes, replace=TRUE)
# Calculate the Z-scores
medianv <- HighFreq::roll_mean(priceb, lookb=lookb, method="nonparametric")
medianv <- rutils::lagit(medianv, lagg=(-halfb), pad_zeros=FALSE)
madv <- HighFreq::roll_var(priceb, lookb=lookb, method="nonparametric")
madv <- rutils::lagit(madv, lagg=(-halfb), pad_zeros=FALSE)
zscores <- ifelse(madv > 0, (priceb - medianv)/madv, 0)
madz <- mad(zscores[abs(zscores) > 0])
# Define vector of discrimination thresholds
threshv <- madz*seq(from=0.1, to=3.0, by=0.05)/2
# Calculate the error rates
errorr <- sapply(threshv, confun, actualv=!ispike, zscores=zscores)
errorr <- t(errorr)
rownames(errorr) <- threshv
errorr <- rbind(c(1, 0), errorr)
errorr <- rbind(errorr, c(0, 1))
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/hampel_roc_daily.png}
      <<echo=TRUE,eval=FALSE>>=
# Calculate the area under the ROC curve (AUC)
truepos <- (1 - errorr[, "typeII"])
truepos <- (truepos + rutils::lagit(truepos))/2
falsepos <- rutils::diffit(errorr[, "typeI"])
abs(sum(truepos*falsepos))
# Plot ROC curve for Hampel classifier
plot(x=errorr[, "typeI"], y=1-errorr[, "typeII"],
     xlab="FALSE positive rate", ylab="TRUE positive rate",
     xlim=c(0, 1), ylim=c(0, 1),
     main="ROC Curve for Daily Hampel Classifier",
     type="l", lwd=3, col="blue")
abline(a=0.0, b=1.0, lwd=3, col="orange")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\section{Modeling of Intraday Stock Prices}


%%%%%%%%%%%%%%%
\subsection{Aggregating \protect\emph{TAQ} Data to \protect\emph{OHLC}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The \emph{data table} columns can be \emph{aggregated} over categories (factors) defined by one or more columns passed to the \texttt{"by"} argument.
      \vskip1ex
      Multiple \emph{data table} columns can be referenced by passing a list of names specified by the dot \texttt{.()} operator.
      \vskip1ex
      The function \texttt{round.POSIXt()} rounds date-time objects to seconds, minutes, hours, days, months or years.
      \vskip1ex
      The function \texttt{as.POSIXct()} coerces objects to class \texttt{POSIXct}.
      <<echo=TRUE,eval=FALSE>>=
# Round time index to seconds
tickg[, zoo::index := as.POSIXct(round.POSIXt(index, "secs"))]
# Aggregate to OHLC by seconds
ohlc <- tickg[, .(open=first(price), high=max(price), low=min(price), close=last(price), volume=sum(volume)), by=index]
# Round time index to minutes
tickg[, zoo::index := as.POSIXct(round.POSIXt(index, "mins"))]
# Aggregate to OHLC by minutes
ohlc <- tickg[, .(open=first(price), high=max(price), low=min(price), close=last(price), volume=sum(volume)), by=index]
      @
    \column{0.5\textwidth}
      \includegraphics[width=0.45\paperwidth]{figure/hf_ticks_ohlc.png}
      <<echo=TRUE,eval=FALSE>>=
# Coerce OHLC prices to xts
ohlc <- xts::xts(ohlc[, -"index"], ohlc$index)
# Plot dygraph of the OHLC prices
dygraphs::dygraph(ohlc[, -5], main="XLK Trade Ticks for 2020-03-16 (OHLC)") %>%
  dyCandlestick()
# Plot the OHLC prices
x11(width=6, height=5)
quantmod::chart_Series(x=ohlc, TA="add_Vo()",
  name="XLK Trade Ticks for 2020-03-16 (OHLC)")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{draft: Trade and Quote (\protect\emph{TAQ}) Data}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Trade and Quote (\emph{TAQ}) Data\\
      The NYSE TAQ set is 'consolidated' meaning that the trades and quotes come from many exchanges, including the NASDAQ system. TAQ has all trades and quotes from the Consolidated Tape / Consolidated Quote, which includes data from the regional exchanges. (See https://www.tradearca.com/marketdata/intermarket.asp for a brief discussion of CTA/CQ and OTC/UTP.)
      https://wrds-www.wharton.upenn.edupages/support/research-wrds/research-guides/wrds-taq-faqs/
      \vskip1ex
      High frequency data is typically formatted as either Trade and Quote (\emph{TAQ}) data, or \emph{Open-High-Low-Close} (\emph{OHLC}) data.
      \vskip1ex
      Trade and Quote (\emph{TAQ}) data contains intraday trades and quotes on exchange-traded stocks and futures.
      \vskip1ex
      The \emph{TAQ} data is spaced irregularly in time, with data recorded each time a new trade or quote arrives.
      \vskip1ex
      Each row of \emph{TAQ} data contains both the quote and trade prices, and the corresponding quote size or trade volume.
      \vskip1ex
      Each row of \emph{TAQ} data contains both the quote and trade prices, and the corresponding quote size or trade volume:
      \emph{Bid.Price, Bid.Size, Ask.Price, Ask.Size, Trade.Price, Volume}.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=(-(1:1)),size="tiny",tidy=FALSE,eval=TRUE>>=
options(width=200)
# Load package HighFreq
library(HighFreq)
# Or load the high frequency data file directly:
# symbolv <- load("/Users/jerzy/Develop/R/HighFreq/data/hf_data.RData")
head(HighFreq::SPY_TAQ)
head(HighFreq::SPY)
tail(HighFreq::SPY)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Open-High-Low-Close (\protect\emph{OHLC}) Data}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      \emph{Open-High-Low-Close} (\emph{OHLC}) data contains intraday trade prices and trade volumes.
      \vskip1ex
      \emph{OHLC} data is evenly spaced in time, with each row containing the \emph{Open, High, Low, Close} prices, and the trade \emph{Volume}, recorded over the past time interval (called a \emph{bar} of data).
      \vskip1ex
      The \emph{Open} and \emph{Close} prices are the first and last trade prices recorded in the time bar.
      \vskip1ex
      The \emph{High} and \emph{Low} prices are the highest and lowest trade prices recorded in the time bar.
      \vskip1ex
      The \emph{Volume} is the total trading volume recorded in the time bar.
      \vskip1ex
      The \emph{OHLC} data format provides a way of efficiently compressing \emph{TAQ} data, while preserving information about price levels, volatility (range), and trading volumes.
      \vskip1ex
      In addition, evenly spaced \emph{OHLC} data allows for easier analysis of multiple time series, since the prices for different assets are given at the same moments in time.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,size="tiny",tidy=FALSE,eval=TRUE>>=
# Load package HighFreq
library(HighFreq)
head(HighFreq::SPY)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Plotting High Frequency \protect\emph{OHLC} Data}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Aggregating high frequency \emph{TAQ} data into \emph{OHLC} format with lower periodicity allows for data compression while maintaining some information about volatility.
      <<earl_ohlc_chart,echo=TRUE,eval=FALSE,fig.width=7,fig.height=6,fig.show='hide'>>=
# Load package HighFreq
library(HighFreq)
# Define symbol
symboln <- "SPY"
# Load OHLC data
dirout <- "/Users/jerzy/Develop/data/hfreq/scrub/"
symboln <- load(file.path(dirout, paste0(symboln, ".RData")))
interval <-"2013-11-11 09:30:00/2013-11-11 10:30:00"
chart_Series(SPY[interval], name=symboln)
      @
      The package \emph{HighFreq} contains both \emph{TAQ} data and \emph{Open-High-Low-Close} (\emph{OHLC}) data.
      \vskip1ex
      If you are not able to install package \emph{HighFreq} then download the file \texttt{hf\_data.RData} from the NYU share drive and load it.
    \column{0.5\textwidth}
      \includegraphics[width=0.45\paperwidth]{figure/earl_ohlc_chart-1}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Package \protect\emph{HighFreq} for Managing High Frequency Data}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The package \emph{HighFreq} contains functions for managing high frequency time series data, such as:
      \begin{itemize}
        \item converting \emph{TAQ} data to \emph{OHLC} format,
        \item chaining and joining time series,
        \item scrubbing bad data,
        \item managing time zones and alligning time indices,
        \item aggregating data to lower frequency (periodicity),
        \item calculating rolling aggregations (VWAP, Hurst exponent, etc.),
        \item calculating seasonality aggregations,
        \item estimating volatility, skewness, and higher moments,
      \end{itemize}
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Install package HighFreq from github
devtools::install_github(repo="algoquant/HighFreq")
# Load package HighFreq
library(HighFreq)
# Get documentation for package HighFreq
# Get short description
packageDescription(HighFreq)
# Load help page
help(package=HighFreq)
# List all datasets in HighFreq
data(package=HighFreq)
# List all objects in HighFreq
ls("package:HighFreq")
# Remove HighFreq from search path
detach("package:HighFreq")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{draft: Package \texttt{HighFreq} for Managing High Frequency Data}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Package \texttt{HighFreq} contains functions for managing high frequency \emph{TAQ} and \emph{OHLC} market data:
      \begin{itemize}
        \item reading and writing data from files,
        \item managing time zones and alligning indices,
        \item chaining and joining time series,
        \item scrubbing bad data points,
        \item converting \emph{TAQ} data to \emph{OHLC} format,
        \item aggregating data to lower frequency,
      \end{itemize}
      \texttt{HighFreq} is inspired by the package \texttt{highfrequency}, and follows many of its conventions.
      \vskip1ex
      \texttt{HighFreq} depends on packages \texttt{xts}, \texttt{quantmod}, \texttt{lubridate}, and \texttt{caTools}.
      \vskip1ex
      The function \texttt{scrub\_agg()} scrubs a single day of \emph{TAQ} data, aggregates it, and converts it to \emph{OHLC} format.
      \vskip1ex
      The function \texttt{save\_scrub\_agg()} loads, scrubs, aggregates, and binds multiple days of \emph{TAQ} data for a single symbol, and saves the \emph{OHLC} time series to a single \texttt{*.RData} file.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Install package HighFreq from github
install.packages("devtools")
library(devtools)
install_github(repo="algoquant/HighFreq")
# Load package HighFreq
library(HighFreq)
# Set data directories
dirin <- "/Users/jerzy/Develop/data/hfreq/src/"
dirout <- "/Users/jerzy/Develop/data/hfreq/scrub/"
# Define symbol
symboln <- "SPY"
# Load a single day of TAQ data
symboln <- load(file.path(dirin, paste0(symboln, "/2014.05.02.", symboln, ".RData")))
# Scrub, aggregate single day of TAQ data to OHLC
ohlc_data <- scrub_agg(taq_data=get(symboln))
# Aggregate TAQ data for the symbol, and save to file
HighFreq::save_scrub_agg(symboln,
               dirin=dirin,
               dirout=dirout,
               period="minutes")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Datasets in Package \protect\emph{HighFreq}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The package \emph{HighFreq} contains several high frequency time series, in \emph{xts} format, stored in a file called \texttt{hf\_data.RData}:
      \begin{itemize}
        \item a time series called \texttt{SPY\_TAQ}, containing a single day of \emph{TAQ} data for the \emph{SPY} ETF.
        \item three time series called \texttt{SPY}, \texttt{TLT}, and \texttt{VXX}, containing intraday \texttt{1}-minute \emph{OHLC} price bars for the \emph{SPY}, \emph{TLT}, and \emph{VXX} ETFs.
      \end{itemize}
      Even after the \emph{HighFreq} package is loaded, its datasets aren't loaded into the workspace, so they aren't listed in the workspace.
      \vskip1ex
      That's because the datasets in package \emph{HighFreq} are set up for \emph{lazy loading}, which means they can be called as if they were loaded, even though they're not loaded into the workspace.
      \vskip1ex
      The datasets in package \emph{HighFreq} can be loaded into the workspace using the function \texttt{data()}.
      \vskip1ex
      The data is set up for \emph{lazy loading}, so it doesn't require calling \texttt{data(hf\_data)} to load it into the workspace before calling it.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Load package HighFreq
library(HighFreq)
# You can see SPY when listing objects in HighFreq
ls("package:HighFreq")
# You can see SPY when listing datasets in HighFreq
data(package=HighFreq)
# But the SPY dataset isn't listed in the workspace
ls()
# HighFreq datasets are lazy loaded and available when needed
head(HighFreq::SPY)
# Load all the datasets in package HighFreq
data(hf_data)
# HighFreq datasets are now loaded and in the workspace
head(HighFreq::SPY)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Distribution of High Frequency Returns}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      High frequency returns exhibit \emph{large negative skewness} and \emph{very large kurtosis} (leptokurtosis), or fat tails.
      \vskip1ex
      Student's \emph{t-distribution} has fat tails, so it fits high frequency returns much better than the normal distribution.
      \vskip1ex
      The function \texttt{fitdistr()} from package \emph{MASS} fits a univariate distribution into a sample of data, by performing \emph{maximum likelihood} optimization.
      \vskip1ex
      The function \texttt{hist()} calculates and plots a histogram, and returns its data \emph{invisibly}.
      <<echo=(-(1:1)),eval=FALSE>>=
library(rutils)  # Load package rutils
# Calculate SPY percentage returns
ohlc <- HighFreq::SPY
nrows <- NROW(ohlc)
closep <- log(quantmod::Cl(ohlc))
retp <- rutils::diffit(closep)
colnames(retp) <- "SPY"
# Standardize raw returns to make later comparisons
retp <- (retp - mean(retp))/sd(retp)
# Calculate moments and perform normality test
sapply(c(var=2, skew=3, kurt=4), function(x) sum(retp^x)/nrows)
tseries::jarque.bera.test(retp)
# Fit SPY returns using MASS::fitdistr()
optiml <- MASS::fitdistr(retp, densfun="t", df=2)
loc <- optiml$estimate[1]
scalev <- optiml$estimate[2]
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/hf_dist.png}
      <<echo=(-(1:2)),eval=FALSE>>=
x11(width=6, height=5)
par(mar=c(3, 3, 2, 1), oma=c(1, 1, 1, 1))
# Plot histogram of SPY returns
histp <- hist(retp, col="lightgrey", mgp=c(2, 1, 0),
  xlab="returns (standardized)", ylab="frequency", xlim=c(-3, 3),
  breaks=1e3, freq=FALSE, main="Distribution of High Frequency SPY Returns")
# lines(density(retp, bw=0.2), lwd=3, col="blue")
# Plot t-distribution function
curve(expr=dt((x-loc)/scalev, df=2)/scalev,
      type="l", lwd=3, col="red", add=TRUE)
# Plot the Normal probability distribution
curve(expr=dnorm(x, mean=mean(retp),
  sd=sd(retp)), add=TRUE, lwd=3, col="blue")
# Add legend
legend("topright", inset=0.05, bty="n",
  leg=c("t-distr", "normal"), y.intersp=0.1,
  lwd=6, lty=1, col=c("red", "blue"))
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Distribution of Aggregated High Frequency Returns}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The distribution of returns depends on the sampling frequency.
      \vskip1ex
      High frequency returns aggregated to a lower periodicity become less negatively skewed and less fat tailed, and closer to the normal distribution.
      \vskip1ex
      The function \texttt{xts::to.period()} converts a time series to a lower periodicity (for example from hourly to daily periodicity).
      <<echo=TRUE,eval=FALSE>>=
# Hourly SPY percentage returns
closep <- log(Cl(xts::to.period(x=ohlc, period="hours")))
retsh <- rutils::diffit(closep)
retsh <- (retsh - mean(retsh))/sd(retsh)
# Daily SPY percentage returns
closep <- log(Cl(xts::to.period(x=ohlc, period="days")))
retd <- rutils::diffit(closep)
retd <- (retd - mean(retd))/sd(retd)
# Calculate moments
sapply(list(minutely=retp, hourly=retsh, daily=retd),
       function(rets) {sapply(c(var=2, skew=3, kurt=4),
                function(x) mean(rets^x))
})  # end sapply
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/hf_dist_agg.png}
      <<echo=(-(1:2)),eval=FALSE>>=
x11(width=6, height=5)
par(mar=c(3, 3, 2, 1), oma=c(1, 1, 1, 1))
# Plot densities of SPY returns
plot(density(retp, bw=0.4), xlim=c(-3, 3),
     lwd=3, mgp=c(2, 1, 0), col="blue",
     xlab="returns (standardized)", ylab="frequency",
     main="Density of High Frequency SPY Returns")
lines(density(retsh, bw=0.4), lwd=3, col="green")
lines(density(retd, bw=0.4), lwd=3, col="red")
# Add legend
legend("topright", inset=0.05, bty="n",
  leg=c("minutely", "hourly", "daily"), y.intersp=0.1,
  lwd=6, lty=1, col=c("blue", "green", "red"))
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Estimating Rolling Volatility of High Frequency Returns}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The volatility of high frequency returns can be inflated by large overnight returns.
      \vskip1ex
      The large overnight returns can be scaled down by dividing them by the overnight time interval.
        <<echo=TRUE,eval=FALSE>>=
# Calculate rolling volatility of SPY returns
ret2013 <- retp["2013-11-11/2013-11-15"]
# Calculate rolling volatility
lookb <- 11 # Look-back interval
endd <- seq_along(ret2013)
startp <- c(rep_len(1, lookb),
  endd[1:(NROW(endd)-lookb)])
endd[endd < lookb] <- lookb
vol_rolling <- sapply(seq_along(endd),
  function(it) sd(ret2013[startp[it]:endd[it]]))
vol_rolling <- xts::xts(vol_rolling, zoo::index(ret2013))
# Extract time intervals of SPY returns
indeks <- c(60, diff(xts::.index(ret2013)))
head(indeks)
table(indeks)
# Scale SPY returns by time intervals
ret2013 <- 60*ret2013/indeks
# Calculate scaled rolling volatility
vol_scaled <- sapply(seq_along(endd),
  function(it) sd(ret2013[startp[it]:endd[it]]))
vol_rolling <- cbind(vol_rolling, vol_scaled)
vol_rolling <- na.omit(vol_rolling)
sum(is.na(vol_rolling))
sapply(vol_rolling, range)
      @
    \column{0.5\textwidth}
      % \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/hf_vol_rolling.png}
        <<echo=TRUE,eval=FALSE>>=
# Plot rolling volatility
x11(width=6, height=5)
plot_theme <- chart_theme()
plot_theme$col$line.col <- c("blue", "red")
chart_Series(vol_rolling, theme=plot_theme,
             name="Rolling Volatility with Overnight Spikes")
legend("topright", legend=colnames(vol_rolling),
  inset=0.1, bg="white", lty=1, lwd=6, y.intersp=0.1,
  col=plot_theme$col$line.col, bty="n")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Daily Volume and Volatility of High Frequency Returns}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Trading volumes typically rise together with market price volatility.
      \vskip1ex
      The function \texttt{apply.daily()} from package \texttt{xts} applies functions to time series over daily periods.
      \vskip1ex
      The function \texttt{calc\_var\_ohlc()} from package \texttt{HighFreq} calculates the variance of an \emph{OHLC} time series using range estimators.
      <<echo=TRUE,eval=FALSE>>=
# Volatility of SPY
sqrt(HighFreq::calcvar_ohlc(ohlc))
# Daily SPY volatility and volume
volatd <- sqrt(xts::apply.daily(ohlc, FUN=calcvar_ohlc))
colnames(volatd) <- ("SPY_volatility")
volumv <- quantmod::Vo(ohlc)
volumd <- xts::apply.daily(volumv, FUN=sum)
colnames(volumd) <- ("SPY_volume")
# Plot SPY volatility and volume
datav <- cbind(volatd, volumd)["2008/2009"]
colv <- colnames(datav)
dygraphs::dygraph(datav,
  main="SPY Daily Volatility and Trading Volume") %>%
  dyAxis("y", label=colv[1], independentTicks=TRUE) %>%
  dyAxis("y2", label=colv[2], independentTicks=TRUE) %>%
  dySeries(name=colv[1], axis="y", col="red", strokeWidth=3) %>%
  dySeries(name=colv[2], axis="y2", col="blue", strokeWidth=3)
      @
    \column{0.5\textwidth}
      % \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/hf_daily_volat_volume.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Beta of Volume vs Volatility of High Frequency Returns}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      As a general empirical rule, the \emph{trading volume} $\upsilon$ in a given time period is roughly proportional to the \emph{volatility} of the returns $\sigma$: $\upsilon \propto \sigma$.
      \vskip1ex
      The regression of the \emph{log trading volume} versus the \emph{log volatility} fails the \emph{Durbin-Watson test} for the autocorrelation of residuals.
      \vskip1ex
      But the regression of the \emph{differences} passes the \emph{Durbin-Watson test}.
      <<echo=TRUE,eval=FALSE>>=
# Regress log of daily volume vs volatility
datav <- log(cbind(volumd, volatd))
colv <- colnames(datav)
dframe <- as.data.frame(datav)
formulav <- as.formula(paste(colv, collapse="~"))
regmod <- lm(formulav, data=dframe)
# Durbin-Watson test for autocorrelation of residuals
lmtest::dwtest(regmod)
# Regress diff log of daily volume vs volatility
dframe <- as.data.frame(rutils::diffit(datav))
regmod <- lm(formulav, data=dframe)
lmtest::dwtest(regmod)
summary(regmod)
plot(formulav, data=dframe, main="SPY Daily Trading Volume vs Volatility (log scale)")
abline(regmod, lwd=3, col="red")
mtext(paste("beta =", round(coef(regmod)[2], 3)), cex=1.2, lwd=3, side=2, las=2, adj=(-0.5), padj=(-7))
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/hf_daily_volume_volat_reg.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Beta of Hourly Trading Volume vs Volatility}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Hourly aggregations of high frequency data also support the rule that the \emph{trading volume} is roughly proportional to the \emph{volatility} of the returns: $\upsilon \propto \sigma$.
      <<echo=TRUE,eval=FALSE>>=
# 60 minutes of data in lookb interval
lookb <- 60 # Look-back interval
vol2013 <- volumv["2013"]
ret2013 <- retp["2013"]
# Define end points with beginning stub
nrows <- NROW(ret2013)
nagg <- nrows %/% lookb
endd <- nrows-lookb*nagg + (0:nagg)*lookb
startp <- c(1, endd[1:(NROW(endd)-1)])
# Calculate SPY volatility and volume
datav <- sapply(seq_along(endd), function(it) {
  endp <- startp[it]:endd[it]
  c(volume=sum(vol2013[endp]),
    volatility=sd(ret2013[endp]))
})  # end sapply
datav <- t(datav)
datav <- rutils::diffit(log(datav))
dframe <- as.data.frame(datav)
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/hf_hourly_volume_volat_reg.png}
      <<echo=TRUE,eval=FALSE>>=
formulav <- as.formula(paste(colnames(datav), collapse="~"))
regmod <- lm(formulav, data=dframe)
lmtest::dwtest(regmod)
summary(regmod)
plot(formulav, data=dframe,
     main="SPY Hourly Trading Volume vs Volatility (log scale)")
abline(regmod, lwd=3, col="red")
mtext(paste("beta =", round(coef(regmod)[2], 3)), cex=1.2, lwd=3, side=2, las=2, adj=(-0.5), padj=(-7))
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{High Frequency Returns in Trading Time}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The \emph{trading time} (volume clock) is the time measured by the level of \emph{trading volume}, with the \emph{volume clock} running faster in periods of higher \emph{trading volume}.
      \vskip1ex
      The time-dependent volatility of high frequency returns (\emph{heteroskedasticity}) produces their \emph{leptokurtosis} (large kurtosis, or fat tails).
      \vskip1ex
      The returns can be divided by the \emph{square root of the trading volumes} to obtain scaled returns over equal trading volumes.
      \vskip1ex
      But the returns should not be divided by very small volumes below a certain threshold.
      \vskip1ex
      The scaled returns have a smaller \emph{skewness} and \emph{kurtosis}, and they also have even higher autocorrelations than unscaled returns.
      <<echo=TRUE,eval=FALSE>>=
# Scale returns using volume (volume clock)
retsc <- ifelse(volumv > 1e4, retp/sqrt(volumv), 0)
retsc <- retsc/sd(retsc)
# Calculate moments of scaled returns
nrows <- NROW(retp)
sapply(list(retp=retp, retsc=retsc),
  function(rets) {sapply(c(skew=3, kurt=4),
           function(x) sum((rets/sd(rets))^x)/nrows)
})  # end sapply
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/hf_scaled.png}
      <<echo=TRUE,eval=FALSE>>=
x11(width=6, height=5)
par(mar=c(3, 3, 2, 1), oma=c(1, 1, 1, 1))
# Plot densities of SPY returns
plot(density(retp), xlim=c(-3, 3),
     lwd=3, mgp=c(2, 1, 0), col="blue",
     xlab="returns (standardized)", ylab="frequency",
     main="Density of Volume-scaled High Frequency SPY Returns")
lines(density(retsc, bw=0.4), lwd=3, col="red")
curve(expr=dnorm, add=TRUE, lwd=3, col="green")
# Add legend
legend("topright", inset=0.05, bty="n", y.intersp=0.1,
  leg=c("minutely", "scaled", "normal"),
  lwd=6, lty=1, col=c("blue", "red", "green"))
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Autocorrelations of High Frequency Returns}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The \emph{Ljung-Box} test, tests if the autocorrelations of a time series are \emph{statistically significant}.
      \vskip1ex
      The \emph{null hypothesis} of the \emph{Ljung-Box} test is that the autocorrelations are equal to zero.
      \vskip1ex
      The \emph{Ljung-Box} statistic is small for time series that have \emph{statistically insignificant} autocorrelations.
      \vskip1ex
      The function \texttt{Box.test()} calculates the \emph{Ljung-Box} test and returns the test statistic and its p-value.
      \vskip1ex
      For \emph{minutely SPY} returns, the \emph{Ljung-Box} statistic is large and its \emph{p}-value is very small, so we can conclude that \emph{minutely SPY} returns have statistically significant autocorrelations.
      \vskip1ex
      For \emph{scaled minutely SPY} returns, the \emph{Ljung-Box} statistic is even larger, so its autocorrelations are even more statistically significant.
      \vskip1ex
      \emph{SPY} returns aggregated to longer time intervals are less autocorrelated.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Ljung-Box test for minutely SPY returns
Box.test(retp, lag=10, type="Ljung")
# Ljung-Box test for daily SPY returns
Box.test(retd, lag=10, type="Ljung")
# Ljung-Box test statistics for scaled SPY returns
sapply(list(retp=retp, retsc=retsc),
  function(rets) {
    Box.test(rets, lag=10, type="Ljung")$statistic
})  # end sapply
# Ljung-Box test statistics for aggregated SPY returns
sapply(list(minutely=retp, hourly=retsh, daily=retd),
  function(rets) {
    Box.test(rets, lag=10, type="Ljung")$statistic
})  # end sapply
      @
      The level of the autocorrelations depends on the sampling frequency, with higher frequency returns having more significant negative autocorrelations.
      \vskip1ex
      As the returns are aggregated to a lower periodicity, they become less autocorrelated, with daily returns having almost insignificant autocorrelations.
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Partial Autocorrelations of High Frequency Returns}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      High frequency minutely \emph{SPY} returns have statistically significant negative autocorrelations.
      \vskip1ex
      \emph{SPY} returns \emph{scaled} by the trading volumes have even more significant negative autocorrelations.
      <<echo=TRUE,eval=FALSE>>=
# Set plot parameters
x11(width=6, height=8)
par(mar=c(4, 4, 2, 1), oma=c(0, 0, 0, 0))
layout(matrix(c(1, 2), ncol=1), widths=c(6, 6), heights=c(4, 4))
# Plot the partial autocorrelations of minutely SPY returns
pacfl <- pacf(as.numeric(retp), lag=10,
     xlab="lag", ylab="partial autocorrelation", main="")
title("Partial Autocorrelations of Minutely SPY Returns", line=1)
# Plot the partial autocorrelations of scaled SPY returns
pacfs <- pacf(as.numeric(retsc), lag=10,
     xlab="lag", ylab="partial autocorrelation", main="")
title("Partial Autocorrelations of Scaled SPY Returns", line=1)
# Calculate the sums of partial autocorrelations
sum(pacfl$acf)
sum(pacfs$acf)
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/hf_pacf.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Market Liquidity, Trading Volume and Volatility}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Market illiquidity is defined as the market price impact resulting from supply-demand imbalance.
      \vskip1ex
      Market liquidity $\mathcal{L}$ is proportional to the square root of the \emph{trading volume} $\upsilon$ divided by the price volatility $\sigma$:
      \begin{displaymath}
        \mathcal{L} \sim \frac{\sqrt{\upsilon}}{\sigma}
      \end{displaymath}
      Market illiquidity spiked during the May 6, 2010 \emph{flash crash}.
      \vskip1ex
      Research suggests that market crashes are caused by declining market liquidity:\\
      \href{http://papers.ssrn.com/sol3/papers.cfm?abstract_id=2583743}{\emph{Donier et al., Why Do Markets Crash?}}
      <<echo=TRUE,eval=FALSE>>=
# Calculate market illiquidity
liquidv <- sqrt(volumd)/volatd
# Plot market illiquidity
x11(width=6, height=7) ; par(mfrow=c(2, 1))
plot_theme <- chart_theme()
plot_theme$col$line.col <- c("blue")
chart_Series(liquidv["2010"], theme=plot_theme,
  name="SPY Liquidity in 2010", plot=FALSE)
plot_theme$col$line.col <- c("red")
chart_Series(volatd["2010"],
  theme=plot_theme, name="SPY Volatility in 2010")
@
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/hf_liquidity.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Intraday Seasonality of Volume and Volatility}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The volatility and trading volumes are typically higher at the beginning and end of the trading sessions.
      <<echo=TRUE,eval=FALSE>>=
# Calculate intraday time index with hours and minutes
datev <- format(zoo::index(retp), "%H:%M")
# Aggregate the mean volume
volumagg <- tapply(X=volumv, INDEX=datev, FUN=mean)
volumagg <- drop(volumagg)
# Aggregate the mean volatility
volagg <- tapply(X=retp^2, INDEX=datev, FUN=mean)
volagg <- sqrt(drop(volagg))
# Coerce to xts
datev <- as.POSIXct(paste(Sys.Date(), names(volumagg)))
volumagg <- xts::xts(volumagg, datev)
volagg <- xts::xts(volagg, datev)
# Plot seasonality of volume and volatility
x11(width=6, height=7) ; par(mfrow=c(2, 1))
plot_theme <- chart_theme()
plot_theme$col$line.col <- c("blue")
chart_Series(volumagg[c(-1, -NROW(volumagg))], theme=plot_theme,
  name="Intraday Seasonality of SPY Volume", plot=FALSE)
plot_theme$col$line.col <- c("red")
chart_Series(volagg[c(-1, -NROW(volagg))], theme=plot_theme,
  name="Intraday Seasonality of SPY Volatility")
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/hf_intraday_volume_volat.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Intraday Seasonality of Liquidity and Volatility}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Market liquidity is typically the highest at the end of the trading session, and the lowest at the beginning.
      \vskip1ex
      The end of day spike in trading volumes and liquidity is driven by computer-driven investors liquidating their positions.
      <<echo=TRUE,eval=FALSE>>=
# Calculate market liquidity
liquidv <- sqrt(volumagg)/volagg
# Plot intraday seasonality of market liquidity
x11(width=6, height=7) ; par(mfrow=c(2, 1))
plot_theme <- chart_theme()
plot_theme$col$line.col <- c("blue")
chart_Series(liquidv[c(-1, -NROW(liquidv))], theme=plot_theme,
  name="Intraday Seasonality of SPY Liquidity", plot=FALSE)
plot_theme$col$line.col <- c("red")
chart_Series(volagg[c(-1, -NROW(volagg))], theme=plot_theme,
  name="Intraday Seasonality of SPY Volatility")
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/hf_intraday_liquid_volat.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{draft: Daily Volatility and Hurst Exponent}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The Hurst exponent typically moves higher with higher market price volatility, and is above 0.5 with high volatility.
      <<daily_volat_hurst,echo=(-(1:2)),eval=FALSE,fig.width=7,fig.height=8,fig.show='hide'>>=
par(mfrow=c(2,1))  # set plot panels
library(rutils)  # Load package rutils
chart_Series(roll_sum(volatd, 10)[-(1:10)]/10, name=paste(symboln, "variance"))
chart_Series(roll_sum(hurstd, 10)[-(1:10)]/10, name=paste(symboln, "Hurst"))
abline(h=0.5, col="blue", lwd=2)
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/daily_volat_hurst.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{draft: Intraday Seasonality of Hurst Exponent and Volatility}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The Hurst exponent typically moves higher with higher market price volatility, and is above 0.5 with high volatility.
      <<daily_season_hurst_volat,echo=(-(1:2)),eval=FALSE,fig.width=7,fig.height=8,fig.show='hide'>>=
par(mfrow=c(2,1))  # set plot panels
library(rutils)  # Load package rutils
# Intraday seasonality of Hurst exponent
interval <- "2013"
season_hurst <- season_ality(hurst_ohlc(ohlc=SPY[interval, 1:4]))
season_hurst <- season_hurst[-(nrow(season_hurst))]
colnames(season_hurst) <- paste0(colname(get(symboln)), ".season_hurst")
plot_theme <- chart_theme()
plot_theme$format.labels <- "%H:%M"
chobj <- chart_Series(x=season_hurst,
  name=paste(colnames(season_hurst),
  "intraday seasonality"), theme=plot_theme,
  plot=FALSE)
ylim <- chobj$get_ylim()
ylim[[2]] <- structure(c(ylim[[2]][1],
              ylim[[2]][2]), fixed=TRUE)
chobj$set_ylim(ylim)
plot(chobj)
abline(h=0.5, col="blue", lwd=2)
# Intraday seasonality of volatility
season_var <- season_ality(vol_ohlc(ohlc=SPY))
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/intraday_season_hurst_volat.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{draft: Estimating Skew From \protect\emph{OHLC} Data}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The function \texttt{skew\_ohlc()} from package \texttt{HighFreq} calculates a skew-like indicator:\\
\vspace{-2em}
      \begin{multline*}
        \hskip-1em {s}^3=\frac{1}{n} \sum_{i=1}^{n} ((H_{i}-O_{i})(H_{i}-C_{i})(H_{i}-0.5(O_{i}+C_{i})) +\\ (L_{i}-O_{i})(L_{i}-C_{i})(L_{i}-0.5(O_{i}+C_{i}))
      \end{multline*}
      The function \texttt{roll\_agg\_ohlc()} aggregates rolling, volume weighted moment estimators.
      <<earl_highfreq_runskew,echo=(-(1:1)),eval=FALSE,fig.width=7,fig.height=8,fig.show='hide'>>=
par(mfrow=c(2,1))  # set plot panels
library(rutils)  # Load package rutils
# Rolling variance
varv <- roll_agg_ohlc(ohlc=SPY, agg_fun="vol_ohlc")
# Rolling skew
skewn <- roll_agg_ohlc(ohlc=SPY, agg_fun="skew_ohlc")
skewn <- skewn/(varv)^(1.5)
skewn[1, ] <- 0
skewn <- zoo::na.locf(skewn)
interval <- "2013-11-11/2013-11-15"
chart_Series(varv[interval],
            name=paste(symboln, "variance"))
chart_Series(skewn[interval],
            name=paste(symboln, "Skew"),
            ylim=c(-1, 1))
      @
    \column{0.5\textwidth}
      \includegraphics[width=0.45\paperwidth]{figure/earl_highfreq_runskew-1}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{draft: Daily Volatility and Skew From \protect\emph{OHLC} Data}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The function \texttt{agg\_ohlc()} calculates a statistical estimator over an \emph{OHLC} time series.
      <<earl_dailyskew,echo=(-(1:2)),eval=FALSE,fig.width=7,fig.height=8,fig.show='hide'>>=
par(mfrow=c(2,1))  # set plot panels
library(rutils)  # Load package rutils
# Daily variance and skew
volatd <- xts::apply.daily(x=HighFreq::SPY, FUN=agg_ohlc,
                        agg_fun="vol_ohlc")
colnames(volatd) <- paste0(symboln, ".var")
daily_skew <- xts::apply.daily(x=HighFreq::SPY, FUN=agg_ohlc,
                        agg_fun="skew_ohlc")
daily_skew <- daily_skew/(volatd)^(1.5)
colnames(daily_skew) <- paste0(symboln, ".skew")
interval <- "2013-06-01/"
chart_Series(volatd[interval],
             name=paste(symboln, "variance"))
chart_Series(daily_skew[interval],
             name=paste(symboln, "skew"))
      @
    \column{0.5\textwidth}
      \includegraphics[width=0.45\paperwidth]{figure/earl_dailyskew-1}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{draft: Regression of Skews Versus Returns}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      A regression of lagged skews versus returns appears to be statistically significant, especially in periods of high volatility during the financial crisis of \texttt{2008-09}.
      <<echo=(-(1:1))>>=
# Skew scatterplot
retp <- calc_rets(xts_data=SPY)
skewn <- skew_ohlc(log_ohlc=log(SPY[, -5]))
colnames(skewn) <- paste0(symboln, ".skew")
skewl <- rutils::lag_it(skewn)
skewl[1, ] <- 0
datav <- cbind(retp[, 1], sign(skewl))
formulav <- as.formula(paste(colnames(datav)[1],
          paste(paste(colnames(datav)[-1],
            collapse=" + "), "- 1"), sep="~"))
formulav
regmod <- lm(formulav, data=datav)
summary(regmod)$coef
summary(lm(formulav, data=datav["/2011-01-01"]))$coef
summary(lm(formulav, data=datav["2011-01-01/"]))$coef
      @
    \column{0.5\textwidth}
      \includegraphics[width=0.45\paperwidth]{figure/earl_skew_scatter-1}
      <<earl_skew_scatter,echo=TRUE,eval=FALSE,fig.width=7,fig.height=6,fig.show='hide'>>=
interval <- "2013-12-01/"
plot(formulav, data=datav[interval],
     xlim=c(-2e-09, 2e-09),
     cex=0.6, xlab="skew", ylab="rets")
abline(regmod, col="blue", lwd=2)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{draft: Contrarian Strategy Using Skew Oscillator}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The contrarian skew trading strategy involves long or short positions in a single unit of stock, that is opposite to the sign of the skew.
      \vskip1ex
      Skew is calculated over one-minute bars, and trades are executed in the following period.
      \vskip1ex
      The contrarian strategy shows good hypothetical performance before transaction costs, and since it's a liquidity providing strategy, should have very low transaction costs.
      \vskip1ex
      The contrarian strategy is hyperactive, trading almost 46\% of the time in each period.
      <<echo=(-(1:1))>>=
# Contrarian skew trading strategy
# Lag the skew to get positions
posit <- -sign(skewl)
posit[1, ] <- 0
# Cumulative PnL
pnl <- cumsum(posit*retp[, 1])
# Calculate frequency of trades
50*sum(abs(sign(skewn)-sign(skewl)))/nrow(skewn)
# Calculate transaction costs
bidask <- 0.001  # 10 bps for liquid ETFs
bidask*sum(abs(sign(skewn)-sign(skewl)))
@
    \column{0.5\textwidth}
      \includegraphics[width=0.45\paperwidth]{figure/earl_skew_strat-1}
      \vspace{-1em}
      <<earl_skew_strat,echo=TRUE,eval=FALSE,fig.width=7,fig.height=6,fig.show='hide'>>=
chart_Series(pnl[endpoints(pnl, on="hours"), ],
  name=paste(symboln, "Contrarian Skew Strategy PnL"))
@
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{draft: Volume-Weighted Average Price Indicator}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The Volume-Weighted Average Price (\emph{VWAP}) is an indicator used for trend following strategies.
      \vskip1ex
      The fast-moving \emph{VWAP} is calculated over a short look-back interval, while the slow-moving \emph{VWAP} is calculated over a longer interval.
      \vskip1ex
      The trend following reverses direction when the fast-moving \emph{VWAP} crosses the slow-moving one.
      <<earl_vwap_plot,echo=(-(1:1)),eval=FALSE,fig.width=7,fig.height=6,fig.show='hide'>>=
# vwap plot
vwapf <- vwapv(xtsv=SPY, lookb=70)
vwaps <- vwapv(xtsv=SPY, lookb=225)
vwapd <- vwapf - vwaps
colnames(vwapd) <- paste0(symboln, ".vwap")
interval <- "2010-05-05/2010-05-07"
invisible(chart_Series(x=Cl(SPY[interval]), name=paste(symboln, "plus VWAP")))
invisible(add_TA(vwapf[interval], on=1, col="red", lwd=2))
invisible(add_TA(vwaps[interval], on=1, col="blue", lwd=2))
invisible(add_TA(vwapd[interval] > 0, on=-1, col="lightgreen", border="lightgreen"))
add_TA(vwapd[interval] < 0, on=-1, col="lightgrey", border="lightgrey")
      @
    \column{0.5\textwidth}
      \includegraphics[width=0.45\paperwidth]{figure/earl_vwap_plot-1}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{draft: Regression of \protect\emph{VWAP} Versus Returns}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      A regression of the \emph{VWAP} indicator versus returns appears to be statistically significant.
      <<echo=(-(1:6))>>=
# vwap scatterplot
# retp <- calc_rets(xts_data=SPY)
vwapf <- vwapv(xtsv=SPY, lookb=70)
vwaps <- vwapv(xtsv=SPY, lookb=225)
vwapd <- vwapf - vwaps
colnames(vwapd) <- paste0(symboln, ".vwap")
lag_vwap <- rutils::lag_it(vwapd)
lag_vwap[1, ] <- 0
datav <- cbind(retp[, 1], sign(lag_vwap))
formulav <- as.formula(paste(colnames(datav)[1],
          paste(paste(colnames(datav)[-1],
            collapse=" + "), "- 1"), sep="~"))
formulav
regmod <- lm(formulav, data=datav)
summary(regmod)$coef
summary(lm(formulav, data=datav["/2011-01-01"]))$coef
summary(lm(formulav, data=datav["2011-01-01/"]))$coef
      @
    \column{0.5\textwidth}
    \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/earl_vwap_scatter-1}
      <<earl_vwap_scatter,echo=TRUE,eval=FALSE,fig.width=7,fig.height=6,fig.show='hide'>>=
interval <- "2013-12-01/"
plot(formulav, data=cbind(retp[, 1], lag_vwap)[interval],
     cex=0.6, xlab="skew", ylab="rets")
abline(regmod, col="blue", lwd=2)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{draft: Trend Following Strategy Using \protect\emph{VWAP}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The trend following trading strategy involves long or short positions in a single unit of stock, that is equal to the sign of the \emph{VWAP} indicator.
      \vskip1ex
      The \emph{VWAP} indicator is calculated over one-minute bars, and trades are executed in the following period.
      \vskip1ex
      The trend following strategy shows good hypothetical performance before transaction costs.
      \vskip1ex
      The trend following strategy is infrequent, trading only 0.56\% of the time in each period.
      <<echo=(-(1:1))>>=
# Trend following trading strategy
# Cumulative PnL
pnl <- cumsum(sign(lag_vwap)*retp[, 1])
# Calculate frequency of trades
50*sum(abs(sign(vwapd)-sign(lag_vwap)))/nrow(vwapd)
# Calculate transaction costs
bidask <- 0.001  # 10 bps for liquid ETFs
bidask*sum(abs(sign(vwapd)-sign(lag_vwap)))
@
    \column{0.5\textwidth}
      \includegraphics[width=0.45\paperwidth]{figure/earl_vwap_strat-1}
      \vspace{-1em}
      <<earl_vwap_strat,echo=TRUE,eval=FALSE,fig.width=7,fig.height=6,fig.show='hide'>>=
chart_Series(
  pnl[endpoints(pnl, on="hours"), ],
  name=paste(symboln, "VWAP Trend Following Strategy PnL"))
@
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{draft: Estimating Hurst Exponent From \protect\emph{OHLC} Data}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The Hurst exponent is a measure of long-term memory of a time series, and is related to its autocorrelation:
      \begin{displaymath}
        {\mathbb{E}}[\frac{(max(p)-min(p))}{\hat\sigma}]=t^H
      \end{displaymath}
      $H=0.5$ for Brownian motion (no autocorrelations), \\
      $H>0.5$ for positive autocorrelations, \\
      $H<0.5$ for negative autocorrelations.
      \vskip1ex
      The function \texttt{hurst\_ohlc()} from package \texttt{HighFreq} calculates a Hurst-like indicator:
      \begin{displaymath}
        H=\frac{1}{n} \sum_{i=1}^{n} log(\frac{H_{i}-L_{i}}{abs(C_{i}-O_{i})})
      \end{displaymath}
      The function \texttt{agg\_ohlc()} calculates a statistical estimator over an \emph{OHLC} time series.
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/hf_daily_volat_volume.png}\\
      <<hurstd,echo=(-(1:1)),eval=FALSE,fig.width=7,fig.height=8,fig.show='hide'>>=
library(rutils)  # Load package rutils
# Daily Hurst exponents
hurstd <- xts::apply.daily(x=HighFreq::SPY, FUN=agg_ohlc, agg_fun="hurst_ohlc")
colnames(hurstd) <- paste(colname(get(symboln)), ".Hurst")
chart_Series(roll_sum(hurstd, 10)[-(1:10)]/10, name=paste(symboln, "Hurst"))
abline(h=0.5, col="blue", lwd=2)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{draft: Conclusion}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
      Open questions:
      \begin{itemize}
        \item is there an interaction between volatility, volume and skew?
        \item does kurtosis also have predictive value for market direction?
        \item how can higher moments (skew, kurtosis) help predict market crashes?
        \item what is the persistence of market anomalies over time?
        \item what is relationship between returns and cross-section of skew?
        \item This presentation is available on GitHub: \url{https://github.com/algoquant/presentations}
      \end{itemize}
      \vskip1ex
      Acknowledgements:
      \begin{itemize}
        \item \emph{Snowfall Systems} provides the \emph{PortfolioEffect} system for:
        \includegraphics[scale=0.1]{figure/portfolioeffect-logo-full-200-950.png}
        \begin{itemize}
          \item real-time high frequency market data aggregations and risk metrics,
          \item real-time portfolio analytics and optimization,
          \item portfolio hosting,
          \item \url{https://www.portfolioeffect.com/}
        \end{itemize}
        \item Brian Peterson for Thomson Reuters tick data,
        \item Brian Peterson, Joshua Ulrich, and Jeffrey Ryan for packages \emph{xts}, \emph{quantmod}, \emph{PerformanceAnalytics}, and \emph{TTR},
      \end{itemize}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\section{Package \protect\emph{IBrokers} for Using Interactive Brokers}


%%%%%%%%%%%%%%%
\subsection{Package \protect\emph{IBrokers} for Using Interactive Brokers}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Interactive Brokers (IB) is a brokerage company which provides an API for computer-driven trading, and it also provides extensive documentation:\\
      \hskip1em\href{https://www.interactivebrokers.com/en/index.php?f=29205}{Interactive Brokers Main Page}\\
      \hskip1em\href{https://www.interactivebrokers.com/en/index.php?f=29205}{Interactive Brokers Documentation}\\
      \hskip1em\href{https://gdcdyn.interactivebrokers.com/en/index.php?f=25228&course=5}{Interactive Brokers Course}
      \vskip1ex
      \fcolorbox{red}{yellow}{
      \begin{minipage}{18em}
        \textbf{\color{red}{Disclosure:}} I do have a personal account with Interactive Brokers.\\
        But I do not have any other relationship with Interactive Brokers, and I do not endorse or recommend them.\\
         \textbf{\color{red}{Warning:}} Active trading is extremely risky, and most people lose money.\\
        \textbf{\color{red}{I advise not to trade with your own capital!}}
      \end{minipage}}
      \vskip1ex
      The package \emph{IBrokers} contains \texttt{R} functions for executing IB commands using the Interactive Brokers API.
      \vskip1ex
      The package \emph{IBrokers} has extensive
      \href{https://cran.r-project.org/web/packages/IBrokers/index.html}{documentation}.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Install package IBrokers
install.packages("IBrokers")
# Load package IBrokers
library(IBrokers)
# Get documentation for package IBrokers
# Get short description
packageDescription("IBrokers")
# Load help page
help(package="IBrokers")
# List all datasets in "IBrokers"
data(package="IBrokers")
# List all objects in "IBrokers"
ls("package:IBrokers")
# Remove IBrokers from search path
detach("package:IBrokers")
# Install package IBrokers2
devtools::install_github(repo="algoquant/IBrokers2")
      @
      \vspace{-1em}
      The package \emph{IBrokers2} is derived from package \emph{IBrokers}, and contains additional functions for executing real time trading strategies via the Interactive Brokers API.
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Configuring the IB Trader Workstation (\protect\emph{TWS})}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Connecting to Interactive Brokers via the API requires being logged into the IB Trader Workstation (\emph{TWS}) or the IB Gateway (\emph{IBG}).
      \vskip1ex
      You should \href{https://www.interactivebrokers.com/en/index.php?f=16040}{Download the TWS} and install it.
      \vskip1ex
      Read about the \emph{TWS}
      \href{https://interactivebrokers.github.io/tws-api/initial_setup.html}{initial setup}.
      \vskip1ex
      The \emph{TWS} settings must be configured to enable the API: \emph{File $\rightarrow$ Global Configuration $\rightarrow$ API}
      \vskip1ex
      The \emph{TWS} Java heap size should be increased to \texttt{1.5} GB: \emph{File $\rightarrow$ Global Configuration $\rightarrow$ General}
      \vskip1ex
      Read more about the
      \href{https://ibkr.info/article/2170}{required TWS memory allocation}
      and about
      \href{https://www.interactivebrokers.com/en/software/tws/usersguidebook/priceriskanalytics/custommemory.htm}{how to change the TWS heap size}.
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/ibapi_settings.png}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/ibtws_memory.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Connecting to Interactive Brokers via the API}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Connecting to Interactive Brokers via the API requires being logged into the IB Trader Workstation (\emph{TWS}) or the IB Gateway (\emph{IBG}).
      \vskip1ex
      Interactive Brokers provides extensive
      \href{http://interactivebrokers.github.io/tws-api/}{API Documentation}.
      \vskip1ex
      The functions \texttt{twsConnect()} and \texttt{ibgConnect()} open a connection to the Interactive Brokers API, via either the \emph{TWS} or the \emph{IBG}.
      \vskip1ex
      The parameter \texttt{port} should be assigned to the value of the \emph{socket port} displayed in \emph{TWS} or \emph{IBG}.
      \vskip1ex
      The function \texttt{twsDisconnect()} closes the Interactive Brokers API connection.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Connect to Interactive Brokers TWS
ib_connect <- IBrokers::twsConnect(port=7497)
# Or connect to IB Gateway
# Ib_connect <- ibgConnect(port=4002)
# Check connection
IBrokers::isConnected(ib_connect)
# Close the Interactive Brokers API connection
IBrokers::twsDisconnect(ib_connect)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Downloading Account Information from Interactive Brokers}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The function \texttt{reqAccountUpdates()} returns a list with the account information (requires an account code).
      \vskip1ex
      The first element of the list contains the account dollar balances, while the remaining elements contain contract information.
      \vskip1ex
      The function \texttt{twsPortfolioValue()} returns a data frame with commonly used account fields, such as the contract names, net positions, and realized and unrealized profits and losses (pnl's).
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Connect to Interactive Brokers TWS
ib_connect <- IBrokers::twsConnect(port=7497)
# Or connect to IB Gateway
# Ib_connect <- ibgConnect(port=4002)
# Download account information from IB
ac_count <- "DU1215081"
ib_account <- IBrokers::reqAccountUpdates(conn=ib_connect,
                                          acctCode=ac_count)
# Extract account balances
balance_s <- ib_account[[1]]
balance_s$AvailableFunds
# Extract contract names, net positions, and profits and losses
IBrokers::twsPortfolioValue(ib_account)
# Close the Interactive Brokers API connection
IBrokers::twsDisconnect(ib_connect)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Defining Contracts Using Package \protect\emph{IBrokers}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The function \texttt{twsEquity()} defines a stock contract (IB contract object).
      \vskip1ex
      The functions \texttt{twsFuture()} and \texttt{twsCurrency()} define futures and currency contracts.
      \vskip1ex
      The function \texttt{reqContractDetails()} returns a list with information on the IB instrument.
      \vskip1ex
      The package \emph{twsInstrument} contains utility functions for enhancing the package \emph{IBrokers}.
      \vskip1ex
      To define an IB contract, first look it up by keyword in the online
      \href{https://www.interactivebrokers.com/en/index.php?f=463}{IB Contract and Symbol Database}, and find the \emph{Conid} for that instrument.
      \vskip1ex
      Enter the \emph{Conid} into the function \texttt{twsInstrument::getContract()}, which will return the IB contract object for that instrument.
      \vskip1ex
      Interactive Brokers provides more information about financial contracts here:
      \href{https://www.interactivebrokers.com/en/index.php?f=1563&p=fut}{IB Traded Products}
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Define AAPL stock contract (object)
contractobj <- IBrokers::twsEquity("AAPL", primary="SMART")
# Define CHF currency contract
contractobj <- IBrokers::twsCurrency("CHF", currency="USD")
# Define S&P Emini future June 2019 contract
contractobj <- IBrokers::twsFuture(symbol="ES",
  exch="GLOBEX", expiry="201906")
# Define 10yr Treasury future June 2019 contract
contractobj <- IBrokers::twsFuture(symbol="ZN",
  exch="ECBOT", expiry="201906")
# Define euro currency future June 2019 contract
contractobj <- IBrokers::twsFuture(symbol="EUR",
  exch="GLOBEX", expiry="201906")
# Define Gold future June 2019 contract
contractobj <- IBrokers::twsFuture(symbol="GC",
  exch="NYMEX", expiry="201906")
# Define Oil future January 2019 contract
contractobj <- IBrokers::twsFuture(symbol="QM",
  exch="NYMEX", expiry="201901")
# Test if contract object is correct
IBrokers::is.twsContract(contractobj)
# Get list with instrument information
IBrokers::reqContractDetails(conn=ib_connect, Contract=contractobj)
# Install the package twsInstrument
install.packages("twsInstrument", repos="http://r-forge.r-project.org")
# Define euro future using getContract() and Conid
contractobj <- twsInstrument::getContract("317631411")
# Get list with instrument information
IBrokers::reqContractDetails(conn=ib_connect, Contract=contractobj)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Defining \protect\emph{VIX} Futures Contracts}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      \emph{VIX} futures have both monthly and weekly contracts, with the monthly contracts being the most liquid.
      \vskip1ex
      In order to uniquely specify a \emph{VIX} futures contract, the parameter \texttt{"local"} should be passed into the function \texttt{twsFuture()}, with the local security name.
      \vskip1ex
      For example, \emph{VXV8} is the local security name (symboln) for the monthly \emph{VIX} futures contract expiring on October 17th, 2018.
      \vskip1ex
      \emph{VX40V8} is the local security name (symboln) for the weekly \emph{VIX} futures contract expiring on October 3rd, 2018.
      \vskip1ex
      The function \texttt{reqContractDetails()} returns a list with information on the IB instrument.
      \vskip1ex
      \emph{VIX} futures are traded on the \emph{CFE} (CBOE Futures Exchange): \url{http://cfe.cboe.com/}
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Define VIX monthly and weekly futures June 2019 contract
symboln <- "VIX"
contractobj <- IBrokers::twsFuture(symbol=symboln,
  exch="CFE", expiry="201906")
# Define VIX monthly futures June 2019 contract
contractobj <- IBrokers::twsFuture(symbol=symboln,
  local="VXV8", exch="CFE", expiry="201906")
# Define VIX weekly futures October 3rd 2018 contract
contractobj <- IBrokers::twsFuture(symbol=symboln,
  local="VX40V8", exch="CFE", expiry="201906")
# Get list with instrument information
IBrokers::reqContractDetails(conn=ib_connect,
  Contract=contractobj)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Downloading Historical Daily Data from Interactive Brokers}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The function \texttt{reqHistoricalData()} downloads historical data to a \texttt{.csv} file.
      \vskip1ex
      The historical \emph{daily} bar data fields are \texttt{"Open", "High", "Low", "Close", "Volume", "WAP", "Count"}. (\texttt{"WAP"} is the weighted average price.)
      \vskip1ex
      Interactive Brokers provides more information about historical market data:\\
      \hskip1em\href{http://interactivebrokers.github.io/tws-api/historical_data.html}{IB Historical Market Data}\\
      \hskip1em\href{http://interactivebrokers.github.io/tws-api/historical_bars.html}{IB Historical Bar Data Fields}
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Define S&P Emini futures June 2019 contract
symboln <- "ES"
contractobj <- IBrokers::twsFuture(symbol=symboln,
  exch="GLOBEX", expiry="201906")
# Open file for data download
dirn <- "/Users/jerzy/Develop/data/ib_data"
dir.create(dirn)
filen <- file.path(dirn, paste0(symboln, "201906.csv"))
file_connect <- file(filen, open="w")
# Connect to Interactive Brokers TWS
ib_connect <- IBrokers::twsConnect(port=7497)
# Write header to file
cat(paste(paste(symboln, c("Index", "Open", "High", "Low", "Close", "Volume", "WAP", "Count"), sep="."), collapse=","), "\n", file=file_connect)
# Download historical data to file
IBrokers::reqHistoricalData(conn=ib_connect,
  Contract=contractobj,
  barSize="1 day", duration="6 M",
  file=file_connect)
# Close data file
close(file_connect)
# Close the Interactive Brokers API connection
IBrokers::twsDisconnect(ib_connect)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Downloading Historical Intraday Data for a Portfolio}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Historical data for a portfolio of symbols can be downloaded in a loop.
      \vskip1ex
      The historical \emph{intraday} bar data fields are \texttt{"Open", "High", "Low", "Close", "Volume", "WAP", "XTRA", "Count"}. (\texttt{"WAP"} is the weighted average price.)
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Define IB contract objects for stock symbols
symbolv <- c("AAPL", "F", "MSFT")
contractv <- lapply(symbolv, IBrokers::twsEquity, primary="SMART")
names(contractv) <- symbolv
# Open file connections for data download
dirn <- "/Users/jerzy/Develop/data/ib_data"
filens <- file.path(dirn, paste0(symbolv, format(Sys.time(), format="_%m_%d_%Y_%H_%M"), ".csv"))
file_connects <- lapply(filens, function(filen) file(filen, open="w"))
# Connect to Interactive Brokers TWS
ib_connect <- IBrokers::twsConnect(port=7497)
# Download historical 1-minute bar data to files
for (it in 1:NROW(symbolv)) {
  symboln <- symbolv[it]
  file_connect <- file_connects[[it]]
  contractobj <- contractv[[it]]
  cat("Downloading data for: ", symboln, "\n")
  # Write header to file
  cat(paste(paste(symboln, c("Index", "Open", "High", "Low", "Close", "Volume", "WAP", "XTRA", "Count"), sep="."), collapse=","), "\n", file=file_connect)
  IBrokers::reqHistoricalData(conn=ib_connect,
                               Contract=contractobj,
                               barSize="1 min", duration="2 D",
                               file=file_connect)
  Sys.sleep(10) # 10s pause to avoid IB pacing violation
}  # end for
# Close data files
for (file_connect in file_connects) close(file_connect)
# Close the Interactive Brokers API connection
IBrokers::twsDisconnect(ib_connect)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Downloading Historical Data for Expired Futures Contracts}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Historical data for expired futures contracts can be downloaded using \texttt{reqHistoricalData()} with the appropriate \texttt{expiry} date, and the parameter \texttt{include\_expired="1"}.
      \vskip1ex
      For example, \emph{ESM8} is the symbol for the \emph{S\&P500} emini futures expiring in June 2018.
      <<echo=TRUE,eval=FALSE>>=
# Define S&P Emini futures June 2018 contract
symboln <- "ES"
contractobj <- IBrokers::twsFuture(symbol=symboln,
  include_expired="1",
  exch="GLOBEX", expiry="201806")
# Open file connection for ESM8 data download
filen <- file.path(dirn, paste0(symboln, "M8.csv"))
file_connect <- file(filen, open="w")
# Connect to Interactive Brokers TWS
ib_connect <- IBrokers::twsConnect(port=7497)
# Download historical data to file
IBrokers::reqHistoricalData(conn=ib_connect,
  Contract=contractobj,
  barSize="1 day", duration="2 Y",
  file=file_connect)
# Close data file
close(file_connect)
# Close the Interactive Brokers API connection
IBrokers::twsDisconnect(ib_connect)
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/sp500_ESM8_futures.png}
      <<echo=TRUE,eval=FALSE>>=
# Load OHLC data and coerce it into xts series
pricev <- data.table::fread(filen)
data.table::setDF(pricev)
pricev <- xts::xts(pricev[, 2:6],
  order.by=as.Date(as.POSIXct.numeric(pricev[, 1],
    tz="America/New_York", origin="1970-01-01")))
colnames(pricev) <- c("Open", "High", "Low", "Close", "Volume")
# Plot OHLC data in x11 window
chart_Series(x=pricev, TA="add_Vo()",
  name="S&P500 ESM8 futures")
# Plot dygraph
dygraphs::dygraph(pricev[, 1:4], main="S&P500 ESM8 futures") %>%
  dyCandlestick()
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{draft: Downloading Continuous Contract Futures Data}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Note:
      Continuous futures require passing the parameter: contract.secType = "CONTFUT", which isn't currently included in package IBrokers.\\
      \hskip1em\url{http://interactivebrokers.github.io/tws-api/basic_contracts.html}
      \vskip1ex
      A continuous futures contract is a synthetic time series of prices, created by splicing together prices from several futures contracts with different expiration dates.
      \vskip1ex
      At any point in time, the price of the continuous contract is equal to the most liquid contract times a normalization factor.
      \vskip1ex
      When the next consecutive contract becomes more liquid, then the continuous contract price is rolled over to that contract.
      \vskip1ex
      The continuous price is multiplied by a normalization factor when the contract is rolled, to remove jumps caused by the shape of the futures curve.
      \vskip1ex
      So the continuous contract prices are not equal to the past futures prices.
      \vskip1ex
      Futures contracts trade at different prices (because of the futures convenience yield).
      \vskip1ex
      This cause price jumps between the currently expiring futures contract and the next futures contract.
      A continuous futures contract adjusts the prices to remove these jumps and time differences to create an artificial price series.
      \vskip1ex
      The function \texttt{reqHistoricalData()} downloads historical data to a \texttt{.csv} file.
      \vskip1ex
      The historical bar data fields are \texttt{"Open", "High", "Low", "Close", "Volume", "WAP", "Count"}.
      \vskip1ex
      Interactive Brokers provides more information about historical Continuous Contract Futures market data:\\
      \hskip1em\href{https://www.interactivebrokers.com/en/software/tws/usersguidebook/technicalanalytics/continuous.htm}{Continuous Contract Futures Data}
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Define S&P Emini futures June 2018 contract
symboln <- "ES"
contractobj <- IBrokers::twsFuture(symbol=symboln,
  include_expired="1",
  exch="GLOBEX", expiry="201806")
# Open file connection for data download
dirn <- "/Users/jerzy/Develop/data/ib_data"
dir.create(dirn)
filen <- file.path(dirn, paste0(symboln, ".csv"))
file_connect <- file(filen, open="w")
# Connect to Interactive Brokers TWS
ib_connect <- IBrokers::twsConnect(port=7497)
# Download historical data to file
IBrokers::reqHistoricalData(conn=ib_connect,
  Contract=contractobj,
  barSize="1 day", duration="6 M",
  file=file_connect)
# Close data file
close(file_connect)
# Close the Interactive Brokers API connection
IBrokers::twsDisconnect(ib_connect)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Downloading Live \protect\emph{TAQ} Data from Interactive Brokers}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The function \texttt{reqMktData()} downloads live (real-time) trades and quotes (\emph{TAQ}) data from Interactive Brokers.
      \vskip1ex
      The function \texttt{eWrapper()} formats the real-time market events (trades and quotes), so they can be displayed or saved to a file.
      \vskip1ex
      The method \texttt{eWrapper.MktData.CSV()} formats the real-time \emph{TAQ} data so it can be saved to a \texttt{.csv} file.
      \vskip1ex
      The real-time \emph{TAQ} data fields are \emph{BidSize, BidPrice, AskPrice, AskSize, Last, LastSize, Volume}.
      \vskip1ex
      \emph{BidPrice} is the quoted bid price, \emph{AskPrice} is the quoted offer price, and \emph{Last} is the most recent traded price.
      \vskip1ex
      The \emph{TAQ} data is spaced irregularly in time, with data recorded each time a new trade or quote arrives.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Define S&P Emini futures June 2019 contract
symboln <- "ES"
contractobj <- IBrokers::twsFuture(symbol=symboln,
  exch="GLOBEX", expiry="201906")
# Open file connection for data download
dirn <- "/Users/jerzy/Develop/data/ib_data"
# Dir.create(dirn)
filen <- file.path(dirn, paste0(symboln, "_taq_live.csv"))
file_connect <- file(filen, open="w")
# Connect to Interactive Brokers TWS
ib_connect <- IBrokers::twsConnect(port=7497)
# Download live data to file
IBrokers::reqMktData(conn=ib_connect,
           Contract=contractobj,
           eventWrapper=eWrapper.MktData.CSV(1),
           file=file_connect)
# Close data file
close(file_connect)
# Close the Interactive Brokers API connection
IBrokers::twsDisconnect(ib_connect)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Downloading Live \protect\emph{OHLC} Data from Interactive Brokers}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The function \texttt{reqRealTimeBars()} downloads live (real-time) \emph{OHLC} market data from Interactive Brokers.
      \vskip1ex
      The method \texttt{eWrapper.RealTimeBars.CSV()} formats the real-time \emph{OHLC} data so it can be saved to a \texttt{.csv} file.
      \vskip1ex
      Interactive Brokers by default only provides \texttt{5}-second bars of real-time prices (but it also provides historical data at other frequencies).
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Define S&P Emini futures June 2019 contract
symboln <- "ES"
contractobj <- IBrokers::twsFuture(symbol=symboln,
  exch="GLOBEX", expiry="201906")
# Open file connection for data download
dirn <- "/Users/jerzy/Develop/data/ib_data"
# Dir.create(dirn)
filen <- file.path(dirn, paste0(symboln, "_ohlc_live.csv"))
file_connect <- file(filen, open="w")
# Connect to Interactive Brokers TWS
ib_connect <- IBrokers::twsConnect(port=7497)
# Download live data to file
IBrokers::reqRealTimeBars(conn=ib_connect,
           Contract=contractobj, barSize="1",
           eventWrapper=eWrapper.RealTimeBars.CSV(1),
           file=file_connect)
# Close the Interactive Brokers API connection
IBrokers::twsDisconnect(ib_connect)
# Close data file
close(file_connect)
# Load OHLC data and coerce it into xts series
library(data.table)
pricev <- data.table::fread(filen)
pricev <- xts::xts(pricev[, paste0("V", 2:6)],
  as.POSIXct.numeric(as.numeric(pricev[, V1]), tz="America/New_York", origin="1970-01-01"))
colnames(pricev) <- c("Open", "High", "Low", "Close", "Volume")
# Plot OHLC data in x11 window
x11()
chart_Series(x=pricev, TA="add_Vo()",
             name="S&P500 ESM9 futures")
# Plot dygraph
library(dygraphs)
dygraphs::dygraph(pricev[, 1:4], main="S&P500 ESM9 futures") %>%
  dyCandlestick()
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Downloading Live \protect\emph{OHLC} Data For Multiple Contracts}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Live \protect\emph{OHLC} data can be downloaded for multiple instruments simultaneously.
      \vskip1ex
      This requires passing a \emph{list} of contracts and file connections to \texttt{reqRealTimeBars()}, and also passing the number of contracts to \texttt{eWrapper.RealTimeBars.CSV()}.
      \vskip1ex
      The bar prices for each contract are written into a separate file.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
library(IBrokers)
# Define list of S&P futures and 10yr Treasury contracts
contractv <- list(ES=IBrokers::twsFuture(symbol="ES", exch="GLOBEX", expiry="201906"),
                   ZN=IBrokers::twsFuture(symbol="ZN", exch="ECBOT", expiry="201906"))
# Open the file connection for storing the bar data
dirn <- "/Users/jerzy/Develop/data/ib_data"
filens <- file.path(dirn, paste0(c("ES", "ZN_"), format(Sys.time(), format="%m_%d_%Y_%H_%M"), ".csv"))
file_connects <- lapply(filens, function(filen) file(filen, open="w"))
# Connect to Interactive Brokers TWS
ib_connect <- IBrokers::twsConnect(port=7497)
# Download live data to file
IBrokers::reqRealTimeBars(conn=ib_connect,
                          Contract=contractv,
                          barSize="1", useRTH=FALSE,
                          eventWrapper=eWrapper.RealTimeBars.CSV(NROW(contractv)),
                          file=file_connects)
# Close the Interactive Brokers API connection
IBrokers::twsDisconnect(ib_connect)
# Close data files
for (file_connect in file_connects)
  close(file_connect)
library(data.table)
# Load ES futures June 2019 contract and coerce it into xts series
pricev <- data.table::fread(filens[1])
pricev <- xts::xts(pricev[, paste0("V", 2:6)],
  as.POSIXct.numeric(as.numeric(pricev[, V1]), tz="America/New_York", origin="1970-01-01"))
colnames(pricev) <- c("Open", "High", "Low", "Close", "Volume")
# Plot dygraph
library(dygraphs)
dygraphs::dygraph(pricev[, 1:4], main="S&P500 ESM9 futures") %>%
  dyCandlestick()
# Load ZN 10yr Treasury futures June 2019 contract
pricev <- data.table::fread(filens[2])
pricev <- xts::xts(pricev[, paste0("V", 2:6)],
  as.POSIXct.numeric(as.numeric(pricev[, V1]), tz="America/New_York", origin="1970-01-01"))
colnames(pricev) <- c("Open", "High", "Low", "Close", "Volume")
# Plot dygraph
dygraphs::dygraph(pricev[, 1:4], main="ZN 10yr Treasury futures") %>%
  dyCandlestick()
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Event Processing Using \texttt{eWrapper()} and \texttt{twsCALLBACK()}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Functions which process real-time market events (like \texttt{reqMktData()} and \texttt{reqRealTimeBars()}) rely on the functions \texttt{eWrapper()} and \texttt{twsCALLBACK}.
      \vskip1ex
      The function \texttt{eWrapper()} creates an \emph{eWrapper} object, consisting of a data environment and handler (methods) for formatting and adding new data to it.
      \vskip1ex
      The function \texttt{reqMktData()} accepts an \emph{eWrapper} object and passes it into \texttt{twsCALLBACK()}.
      \vskip1ex
      The function \texttt{twsCALLBACK()} processes market events by calling functions \texttt{readBin()} and \texttt{processMsg()} in a \texttt{while()} loop.
      \vskip1ex
      The \emph{TWS} broadcasts real-time market events in the form of \texttt{character} strings, which are captured by \texttt{readBin()}.
      \vskip1ex
      The \texttt{character} strings are then parsed by \texttt{processMsg()}, and copied to the data environment of the \emph{eWrapper} object.
    \column{0.5\textwidth}
      \includegraphics[width=0.45\paperwidth]{figure/market_event_processing.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Placing Market Trade Orders on \protect\emph{TWS}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The function \texttt{reqIds()} requests a trade order ID from Interactive Brokers \emph{TWS}.
      \vskip1ex
      The function \texttt{twsOrder()} creates a trade order object.
      \vskip1ex
      The parameter \texttt{orderType} specifies the type of trade order: market order (\texttt{MKT}), limit order (\texttt{LMT}), etc.
      \vskip1ex
      Each trade order requires its own ID generated by \texttt{reqIds()}.
      \vskip1ex
      The function \texttt{placeOrder()} places a trade order.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=(-(1:10)),eval=FALSE>>=
# Define S&P Emini future June 2019 contract
contractobj <- IBrokers::twsFuture(symbol="ES", exch="GLOBEX", expiry="201906")
# Define euro currency contract EUR.USD
contractobj <- IBrokers::twsCurrency("EUR", currency="USD")
# Define euro currency E-mini futures June 2019 contract E7Z8
contractobj <- IBrokers::twsFuture(symbol="E7", exch="GLOBEX", expiry="201906")
# Define Japanese yen currency contract JPY.USD
contractobj <- IBrokers::twsCurrency("JPY", currency="USD")
# Define Japanese yen currency E-mini futures June 2019 contract J7Z8
contractobj <- IBrokers::twsFuture(symbol="J7", exch="GLOBEX", expiry="201906")
# Define Japanese yen currency futures June 2019 contract 6JZ8
contractobj <- IBrokers::twsFuture(symbol="JPY", exch="GLOBEX", expiry="201906")
# Connect to Interactive Brokers TWS
ib_connect <- IBrokers::twsConnect(port=7497)
IBrokers::reqContractDetails(conn=ib_connect, Contract=contractobj)
# Request trade order ID
order_id <- IBrokers::reqIds(ib_connect)
# Create buy market order object
ib_order <- IBrokers::twsOrder(order_id,
  orderType="MKT", action="BUY", totalQuantity=1)
# Place trade order
IBrokers::placeOrder(ib_connect, contractobj, ib_order)
# Execute sell market order
order_id <- IBrokers::reqIds(ib_connect)
ib_order <- IBrokers::twsOrder(order_id,
  orderType="MKT", action="SELL", totalQuantity=1)
IBrokers::placeOrder(ib_connect, contractobj, ib_order)
# Execute buy market order
order_id <- IBrokers::reqIds(ib_connect)
ib_order <- IBrokers::twsOrder(order_id,
  orderType="MKT", action="BUY", totalQuantity=1)
IBrokers::placeOrder(ib_connect, contractobj, ib_order)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Placing and Cancelling Limit Trade Orders on \protect\emph{TWS}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The function \texttt{twsOrder()} with \texttt{orderType=LMT} creates a limit trade order object.
      \vskip1ex
      The function \texttt{cancelOrder()} with the parameter \texttt{orderId=order\_id} cancels a trade order with the ID equal to \texttt{order\_id}.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Request trade order ID
order_id <- IBrokers::reqIds(ib_connect)
# Create buy limit order object
ib_order <- IBrokers::twsOrder(order_id, orderType="LMT",
  lmtPrice="1.1511", action="BUY", totalQuantity=1)
# Place trade order
IBrokers::placeOrder(ib_connect, contractobj, ib_order)
# Cancel trade order
IBrokers::cancelOrder(ib_connect, order_id)
# Execute sell limit order
order_id <- IBrokers::reqIds(ib_connect)
ib_order <- IBrokers::twsOrder(order_id, orderType="LMT",
  lmtPrice="1.1512", action="SELL", totalQuantity=1)
IBrokers::placeOrder(ib_connect, contractobj, ib_order)
# Cancel trade order
IBrokers::cancelOrder(ib_connect, order_id)
# Close the Interactive Brokers API connection
IBrokers::twsDisconnect(ib_connect)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Placing Limit Orders in a Programmatic Loop}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The function \texttt{reqRealTimeBars()} creates data objects and then calls \texttt{twsCALLBACK()} and passes the objects to \texttt{twsCALLBACK()}.
      \vskip1ex
      \texttt{twsCALLBACK()} calls \texttt{processMsg()} in a loop, and passes the \texttt{eWrapper()} to it.
      \vskip1ex
      \texttt{processMsg()} performs if-else statements and calls eWrapper methods.
      \vskip1ex
      \texttt{eWrapper()} creates an environment for data, and methods (handlers) for formatting the data.
      \vskip1ex
      \texttt{eWrapper.RealTimeBars.CSV()} calls \texttt{eWrapper()} to create a closure (function) and returns it.
      \vskip1ex
      Closures allow creating mutable states for persistent data objects.
      \vskip1ex
      Limit trade orders can be placed inside a modified \texttt{eWrapper()} function.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
eWrapper_realtimebars <- function(n = 1) {
  eW <- eWrapper_new(NULL)
  # eW <- IBrokers::eWrapper(NULL)
  eW$assign.Data("data", rep(list(structure(.xts(matrix(rep(NA_real_, 7), ncol = 7), 0), .Dimnames = list(NULL, c("Open", "High", "Low", "Close", "Volume", "WAP", "Count")))), n))
  eW$realtimeBars <- function(curMsg, msg, timestamp, file, ...) {
    id <- as.numeric(msg[2])
    file <- file[[id]]
    data <- eW$get.Data("data")
    attr(data[[id]], "index") <- as.numeric(msg[3])
    nr.data <- NROW(data[[id]])
    # Write to file
    cat(paste(msg[3], msg[4], msg[5], msg[6], msg[7], msg[8], msg[9], msg[10], sep = ","), "\n", file = file, append = TRUE)
    # Write to console
    # eW$countn <- eW$countn + 1
    eW$assign.Data("countn", eW$get.Data("countn") + 1)
    cat(paste0("countn=", eW$get.Data("countn"), "\tOpen=", msg[4], "\tHigh=", msg[5], "\tLow=", msg[6], "\tClose=", msg[7], "\tVolume=", msg[8]), "\n")
    # cat(paste0("Open=", msg[4], "\tHigh=", msg[5], "\tLow=", msg[6], "\tClose=", msg[7], "\tVolume=", msg[8]), "\n")
    ### Trade
    # Cancel previous trade orders
    buy_id <- eW$get.Data("buy_id")
    sell_id <- eW$get.Data("sell_id")
    if (buy_id>0) IBrokers::cancelOrder(ib_connect, buy_id)
    if (sell_id>0) IBrokers::cancelOrder(ib_connect, sell_id)
    # Execute buy limit order
    buy_id <- IBrokers::reqIds(ib_connect)
    buy_order <- IBrokers::twsOrder(buy_id, orderType="LMT",
                                    lmtPrice=msg[6]-0.25, action="BUY", totalQuantity=1)
    IBrokers::placeOrder(ib_connect, contractobj, buy_order)
    # Execute sell limit order
    sell_id <- IBrokers::reqIds(ib_connect)
    sell_order <- IBrokers::twsOrder(sell_id, orderType="LMT",
                                     lmtPrice=msg[5]+0.25, action="SELL", totalQuantity=1)
    IBrokers::placeOrder(ib_connect, contractobj, sell_order)
    # Copy new trade orders
    eW$assign.Data("buy_id", buy_id)
    eW$assign.Data("sell_id", sell_id)
    ### Trade finished
    data[[id]][nr.data, 1:7] <- as.numeric(msg[4:10])
    eW$assign.Data("data", data)
    c(curMsg, msg)
  }  # end eW$realtimeBars
  return(eW)
}  # end eWrapper_realtimebars
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{draft: Advanced Order Types and Execution Algos}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Interactive Brokers provides many different
      \hskip1em\href{https://www.interactivebrokers.com/en/index.php?f=4985}{Advanced Order Types and Execution Algos}.
      \vskip1ex
      The function \texttt{reqRealTimeBars()} creates data objects and then calls \texttt{twsCALLBACK()} and passes the objects to \texttt{twsCALLBACK()}.
      \vskip1ex
      \texttt{twsCALLBACK()} calls \texttt{processMsg()} in a loop, and passes the \texttt{eWrapper()} to it.
      \vskip1ex
      \texttt{processMsg()} performs if-else statements and calls eWrapper methods.
      \vskip1ex
      \texttt{eWrapper()} creates an environment for data, and methods (functions) for handling (formatting) the data.
      \vskip1ex
      \texttt{eWrapper.RealTimeBars.CSV()} calls \texttt{eWrapper()} to create a closure (function) and returns it.
      \vskip1ex
      Closures allow creating mutable states for persistent data objects.
      \vskip1ex
      Limit trade orders can be placed inside a modified \texttt{eWrapper()} function.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
eWrapper_realtimebars <- function(n = 1) {
  eW <- eWrapper_new(NULL)
  # eW <- IBrokers::eWrapper(NULL)
  eW$assign.Data("data", rep(list(structure(.xts(matrix(rep(NA_real_, 7), ncol = 7), 0), .Dimnames = list(NULL, c("Open", "High", "Low", "Close", "Volume", "WAP", "Count")))), n))
  eW$realtimeBars <- function(curMsg, msg, timestamp, file, ...) {
    id <- as.numeric(msg[2])
    file <- file[[id]]
    data <- eW$get.Data("data")
    attr(data[[id]], "index") <- as.numeric(msg[3])
    nr.data <- NROW(data[[id]])
    # Write to file
    cat(paste(msg[3], msg[4], msg[5], msg[6], msg[7], msg[8], msg[9], msg[10], sep = ","), "\n", file = file, append = TRUE)
    # Write to console
    # eW$countn <- eW$countn + 1
    eW$assign.Data("countn", eW$get.Data("countn")+1)
    cat(paste0("countn=", eW$get.Data("countn"), "\tOpen=", msg[4], "\tHigh=", msg[5], "\tLow=", msg[6], "\tClose=", msg[7], "\tVolume=", msg[8]), "\n")
    # cat(paste0("Open=", msg[4], "\tHigh=", msg[5], "\tLow=", msg[6], "\tClose=", msg[7], "\tVolume=", msg[8]), "\n")
    ### Trade
    # Cancel previous trade orders
    buy_id <- eW$get.Data("buy_id")
    sell_id <- eW$get.Data("sell_id")
    if (buy_id>0) IBrokers::cancelOrder(ib_connect, buy_id)
    if (sell_id>0) IBrokers::cancelOrder(ib_connect, sell_id)
    # Execute buy limit order
    buy_id <- IBrokers::reqIds(ib_connect)
    buy_order <- IBrokers::twsOrder(buy_id, orderType="LMT",
                                    lmtPrice=msg[6]-0.25, action="BUY", totalQuantity=1)
    IBrokers::placeOrder(ib_connect, contractobj, buy_order)
    # Execute sell limit order
    sell_id <- IBrokers::reqIds(ib_connect)
    sell_order <- IBrokers::twsOrder(sell_id, orderType="LMT",
                                     lmtPrice=msg[5]+0.25, action="SELL", totalQuantity=1)
    IBrokers::placeOrder(ib_connect, contractobj, sell_order)
    # Copy new trade orders
    eW$assign.Data("buy_id", buy_id)
    eW$assign.Data("sell_id", sell_id)
    ### Trade finished
    data[[id]][nr.data, 1:7] <- as.numeric(msg[4:10])
    eW$assign.Data("data", data)
    c(curMsg, msg)
  }  # end eW$realtimeBars
  return(eW)
}  # end eWrapper_realtimebars
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{draft: Downloading Live \protect\emph{TAQ} Data and Replaying It}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Note: The script downloads the raw data, but doesn't replay the bar data properly.
      https://offerm.wordpress.com/2015/05/21/market-data-recording-and-playback-with-ibrokers-and-r-2/
      \vskip1ex
      The function \texttt{reqMktData()} downloads live (real-time) trades and quotes (\emph{TAQ}) data from Interactive Brokers.
      \vskip1ex
      The function \texttt{eWrapper()} formats the real-time market events (trades and quotes), so they can be displayed or saved to a file.
      \vskip1ex
      The method \texttt{eWrapper.MktData.CSV()} formats the real-time \emph{TAQ} data so it can be saved to a \texttt{.csv} file.
      \vskip1ex
      The real-time \emph{TAQ} data fields are \emph{BidSize, BidPrice, AskPrice, AskSize, Last, LastSize, Volume}.
      \vskip1ex
      \emph{BidPrice} is the quoted bid price, \emph{AskPrice} is the quoted offer price, and \emph{Last} is the most recent traded price.
      \vskip1ex
      The \emph{TAQ} data is spaced irregularly in time, with data recorded each time a new trade or quote arrives.
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Define S&P Emini futures June 2019 contract
snp_contract <- IBrokers::twsFuture(symbol="ES",
  exch="GLOBEX", expiry="201906")
# Define VIX futures June 2019 contract
vix_contract <- IBrokers::twsFuture(symbol="VIX",
  local="VXZ8", exch="CFE", expiry="201906")
# Define 10yr Treasury futures June 2019 contract
trs_contract <- IBrokers::twsFuture(symbol="ZN",
  exch="ECBOT", expiry="201906")
# Define Emini gold futures June 2019 contract
gold_contract <- IBrokers::twsFuture(symbol="YG",
  exch="NYSELIFFE", expiry="201906")
# Define euro currency future June 2019 contract
euro_contract <- IBrokers::twsFuture(symbol="EUR",
  exch="GLOBEX", expiry="201906")
IBrokers::reqContractDetails(conn=ib_connect, Contract=euro_contract)

# Define data directory
dirn <- "/Users/jerzy/Develop/data/ib_data"
# Dir.create(dirn)

# Open file for error messages
file_root <- "replay"
filen <- file.path(dirn, paste0(file_root, "_error.csv"))
error_connect <- file(filen, open="w")

# Open file for raw data
filen <- file.path(dirn, paste0(file_root, "_raw.csv"))
raw_connect <- file(filen, open="w")

# Create empty eWrapper to redirect error messages to error file
error_ewrapper <- eWrapper(debug=NULL, errfile=error_connect)

# Create eWrapper for raw data
raw_ewrapper <- eWrapper(debug=TRUE)

# Redirect error messages to error eWrapper (error_ewrapper),
# by replacing handler function errorMessage() in raw_ewrapper
raw_ewrapper$errorMessage <- error_ewrapper$errorMessage

# Connect to Interactive Brokers TWS
ib_connect <- IBrokers::twsConnect(port=7497)

# Download raw data for multiple contracts for replay
IBrokers::reqMktData(ib_connect,
  list(snp_contract, vix_contract, trs_contract, gold_contract, euro_contract),
  eventWrapper=raw_ewrapper, file=raw_connect)

# Close the Interactive Brokers API connection
IBrokers::twsDisconnect(ib_connect)

# Close data files
close(raw_connect)
close(error_connect)

## Replay the raw data

# Open file with raw data
filen <- file.path(dirn, paste0(file_root, "_raw.csv"))
raw_connect <- IBrokers::twsConnect(filen)
class(raw_connect) <- c("twsPlayback", class(raw_connect))
# Replay the raw data
IBrokers::reqMktData(raw_connect, list(snp_contract, vix_contract))

# Open file for data
file_connect <- file(file.path(dirn, "temp.csv"), open="w")
# Download TAQ data to file
IBrokers::reqMktData(conn=raw_connect,
           Contract=snp_contract,
           eventWrapper=eWrapper.MktData.CSV(1),
           file=file_connect)

# Close file for TAQ data
close(file_connect)
# Close file with raw data
IBrokers::twsDisconnect(raw_connect)

      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{draft: Defining Instrument Pairs in \protect\emph{TWS}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Users can \href{https://www.interactivebrokers.com/en/index.php?f=744}{Define Pairs of Instruments} in the \emph{TWS}, using the \emph{Combo Selection} window.
      \vskip1ex
      To open the \emph{Combo Selection} window, right-click on a blank contract field and select \emph{Generic Combo}.
      \vskip1ex
      Or enter the symbol for one of the instruments and select \emph{Combinations} followed by \emph{Option Combos} from the drop down menu, and then click the tab \emph{Pair or Leg-by-leg}.
      \vskip1ex
      The pairs can be traded as a single instrument, but the execution is not guaranteed, and the bid-ask spread may be very large.
      \vskip1ex
      The tab \emph{Multiple} allows selecting a group of combination quotes on the same underlying for comparison.
      \vskip1ex
      First access the \emph{Combo Selection} window, enter the symbol for one of the instruments and select \emph{Combinations} followed by \emph{Option Combos} from the drop down menu, and then click the tab \emph{Pair or Leg-by-leg}.
      \vskip1ex
      Interactive Brokers provides more information about financial contracts here:
      \href{https://www.interactivebrokers.com/en/index.php?f=1563&p=fut}{IB Traded Products}
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Define AAPL stock contract (object)
contractobj <- IBrokers::twsEquity("AAPL", primary="SMART")
# Define CHF currency contract
contractobj <- IBrokers::twsCurrency("CHF", currency="USD")
# Define S&P Emini future June 2019 contract
contractobj <- IBrokers::twsFuture(symbol="ES",
  exch="GLOBEX", expiry="201906")
# Define 10yr Treasury future June 2019 contract
contractobj <- IBrokers::twsFuture(symbol="ZN",
  exch="ECBOT", expiry="201906")
# Define euro currency future June 2019 contract
contractobj <- IBrokers::twsFuture(symbol="EUR",
  exch="GLOBEX", expiry="201906")
# Define Gold future June 2019 contract
contractobj <- IBrokers::twsFuture(symbol="GC",
  exch="NYMEX", expiry="201906")
# Test if contract object is correct
IBrokers::is.twsContract(contractobj)
# Get list with instrument information
IBrokers::reqContractDetails(conn=ib_connect, Contract=contractobj)
# Install the package twsInstrument
install.packages("twsInstrument", repos="http://r-forge.r-project.org")
# Define euro future using getContract() and Conid
contractobj <- twsInstrument::getContract("317631411")
# Get list with instrument information
IBrokers::reqContractDetails(conn=ib_connect, Contract=contractobj)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{draft: Defining Investment Strategies in \protect\emph{TWS}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Users can \href{https://www.interactivebrokers.com/en/index.php?f=15064}{Define Investment Strategies} in the \emph{TWS}, using the \emph{Portfolio Builder} window.
      \vskip1ex
      To open the \emph{Portfolio Builder} window, left-click on the New Window dropdown on the upper-left of \emph{TWS}, and select \emph{Portfolio Builder}.
      \vskip1ex
      Click "Create New Strategy" to open and edit Investment Rules in the sidecar populated with sample data based on the last sorting option you selected. The main Portfolio Builder reflects changes made in the sidecar in real time.
      \vskip1ex
      Or enter the symbol for one of the instruments and select \emph{Combinations} followed by \emph{Option Combos} from the drop down menu, and then click the tab \emph{Pair or Leg-by-leg}.
      \vskip1ex
      The pairs can be traded as a single instrument, but the execution is not guaranteed, and the bid-ask spread may be very large.
      \vskip1ex
      The tab \emph{Multiple} allows selecting a group of combination quotes on the same underlying for comparison.
      \vskip1ex
      First access the \emph{Combo Selection} window, enter the symbol for one of the instruments and select \emph{Combinations} followed by \emph{Option Combos} from the drop down menu, and then click the tab \emph{Pair or Leg-by-leg}.
      \vskip1ex
      Interactive Brokers provides more information about financial contracts here:
      \href{https://www.interactivebrokers.com/en/index.php?f=1563&p=fut}{IB Traded Products}
    \column{0.5\textwidth}
      \vspace{-1em}
      <<echo=TRUE,eval=FALSE>>=
# Define AAPL stock contract (object)
contractobj <- IBrokers::twsEquity("AAPL", primary="SMART")
# Define CHF currency contract
contractobj <- IBrokers::twsCurrency("CHF", currency="USD")
# Define S&P Emini future June 2019 contract
contractobj <- IBrokers::twsFuture(symbol="ES",
  exch="GLOBEX", expiry="201906")
# Define 10yr Treasury future June 2019 contract
contractobj <- IBrokers::twsFuture(symbol="ZN",
  exch="ECBOT", expiry="201906")
# Define euro currency future June 2019 contract
contractobj <- IBrokers::twsFuture(symbol="EUR",
  exch="GLOBEX", expiry="201906")
# Define Gold future June 2019 contract
contractobj <- IBrokers::twsFuture(symbol="GC",
  exch="NYMEX", expiry="201906")
# Test if contract object is correct
IBrokers::is.twsContract(contractobj)
# Get list with instrument information
IBrokers::reqContractDetails(conn=ib_connect, Contract=contractobj)
# Install the package twsInstrument
install.packages("twsInstrument", repos="http://r-forge.r-project.org")
# Define euro future using getContract() and Conid
contractobj <- twsInstrument::getContract("317631411")
# Get list with instrument information
IBrokers::reqContractDetails(conn=ib_connect, Contract=contractobj)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\section{Google Cloud Projects}


%%%%%%%%%%%%%%%
\subsection{draft: Linux}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Advanced Package Tool, or apt, is a free software user interface that works with core libraries to handle the installation and removal of software on Debian, Ubuntu and other Linux distributions.
Debian uses the dpkg packaging system
apt is a more friendly way to handle packaging than dpkg.
apt-get is similar to apt.
apt consists some of the most widely used features from apt-get and apt-cache leaving aside obscure and seldom used features.
apt provides the most common used commands from apt-get and apt-cache.
https://itsfoss.com/apt-command-guide/
https://itsfoss.com/apt-vs-apt-get-difference/


      \vskip1ex
Get Debian version and release
cat /etc/issue
cat /etc/os-release

Install GNOME Debian Desktop Environment
https://www.gnome.org/
https://wiki.debian.org/Gnome
https://wiki.debian.org/DesktopEnvironment
Install VNC server
https://medium.com/google-cloud/graphical-user-interface-gui-for-google-compute-engine-instance-78fccda09e5c
sudo apt-get install vnc4server
vncserver


Install Windows RDP to Debian remote desktop
http://blog.technotesdesk.com/how-to-use-rdp-from-windows-to-connect-to-debian-or-ubuntu-machine
apt-get install xrdp
Run Windows RDP to Static IP Address
https://cloud.google.com/compute/docs/ip-addresses/reserve-static-external-ip-address


      \vskip1ex
Update the database of available packages packages using apt tool.
apt update only updates the database of the packages.
sudo apt update
An apt upgrade after apt update, it upgrades (or updates, whichever term you prefer) the installed packages to the newer version.
sudo apt upgrade

https://cran.rstudio.com/bin/linux/debian/
sudo apt install libopenblas-base
sudo apt install libssl-dev
sudo apt install libgit2-dev
sudo apt install libssh2-1-dev
sudo apt install libcurl4-openssl-dev


Install R just like a package
sudo apt install r-base
Install the development R
sudo apt install r-base-dev

Change R lib permission
cd /usr/local/lib/R
sudo chmod o+w site-library

Type "R" and inside R run:
install.packages("openssl")
install.packages("git2r")
install.packages("curl")
install.packages("httr")
install.packages("usethis")
install.packages("devtools")

Upgrade R
% remove the old version of R
sudo apt-get remove r-base
% list apt file
cat /etc/apt/sources.list

% invoke editor
sudo nano /etc/apt/sources.list
% add this line at end:
deb http://cran.rstudio.com/bin/linux/debian stretch-cran35/

% add keyto apt using crypto keyserver - copied the key FCAE2A0E115C3D8A from GPG error message
sudo apt install dirmngr
http://irtfweb.ifa.hawaii.edu/~lockhart/gpg/
https://wiki.debian.org/SecureApt
https://www.gnupg.org/gph/en/manual/x56.html
gpg --list-keys
gpg --keyserver keyserver.ubuntu.com --recv-key FCAE2A0E115C3D8A
gpg -a --export FCAE2A0E115C3D8A | sudo apt-key add -


% install RStudio Server
https://www.rstudio.com/products/rstudio/download-server/
      \vskip1ex
      You should \href{https://www.interactivebrokers.com/en/index.php?f=16040}{Download the TWS} and install it.
      \vskip1ex
      Read about the \emph{TWS}
      \href{https://interactivebrokers.github.io/tws-api/initial_setup.html}{initial setup}.
      \vskip1ex
      The \emph{TWS} settings must be configured to enable the API: \emph{File $\rightarrow$ Global Configuration $\rightarrow$ API}
      \vskip1ex
      The \emph{TWS} Java heap size should be increased to \texttt{1.5} GB: \emph{File $\rightarrow$ Global Configuration $\rightarrow$ General}
      \vskip1ex
      Read more about the
      \href{https://ibkr.info/article/2170}{required TWS memory allocation}
      and about
      \href{https://www.interactivebrokers.com/en/software/tws/usersguidebook/priceriskanalytics/custommemory.htm}{how to change the TWS heap size}.
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/ibapi_settings.png}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/ibtws_memory.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{draft: Creating Google Cloud Project}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      \vskip1ex
Install R
      https://www.rstudio.com/products/rstudio/download-server/
      https://cran.rstudio.com/bin/linux/debian/
sudo apt-get update
sudo apt-get install r-base r-base-dev
sudo apt-get install libopenblas-base
sudo apt-get install libssl-dev
sudo apt-get install libgit2-dev
sudo apt-get install libssh2-1-dev
sudo apt-get install libcurl4-openssl-dev


Change R lib permission
cd /usr/local/lib/R
sudo chmod o+w site-library

Type "R"
In R
install.packages("openssl")
install.packages("git2r")
install.packages("curl")
install.packages("httr")
install.packages("usethis")
install.packages("devtools")


Upgrade R
% remove the old version of R
sudo apt-get remove r-base
% list file
cat /etc/apt/sources.list
% invoke editor
sudo nano /etc/apt/sources.list
% add line
deb http://cran.rstudio.com/bin/linux/debian stretch-cran35/
% add crypto keyserver - copy key from GPG error message
% https://wiki.debian.org/SecureApt
sudo apt-get install dirmngr
gpg --keyserver keyserver.ubuntu.com --recv-key FCAE2A0E115C3D8A
gpg -a --export FCAE2A0E115C3D8A | sudo apt-key add -
% update the installed packages
sudo apt-get update
% install R
sudo apt-get install r-base
% install the development R
sudo apt-get install r-base-dev

% update the apt package tool
sudo apt update
sudo apt upgrade

% install RStudio Server
https://www.rstudio.com/products/rstudio/download-server/

      \vskip1ex
      Connecting to Interactive Brokers via the API requires being logged into the IB Trader Workstation (\emph{TWS}) or the IB Gateway (\emph{IBG}).

      \vskip1ex
      You should \href{https://www.interactivebrokers.com/en/index.php?f=16040}{Download the TWS} and install it.
      \vskip1ex
      Read about the \emph{TWS}
      \href{https://interactivebrokers.github.io/tws-api/initial_setup.html}{initial setup}.
      \vskip1ex
      The \emph{TWS} settings must be configured to enable the API: \emph{File $\rightarrow$ Global Configuration $\rightarrow$ API}
      \vskip1ex
      The \emph{TWS} Java heap size should be increased to \texttt{1.5} GB: \emph{File $\rightarrow$ Global Configuration $\rightarrow$ General}
      \vskip1ex
      Read more about the
      \href{https://ibkr.info/article/2170}{required TWS memory allocation}
      and about
      \href{https://www.interactivebrokers.com/en/software/tws/usersguidebook/priceriskanalytics/custommemory.htm}{how to change the TWS heap size}.
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/ibapi_settings.png}
      \vspace{-1em}
      \includegraphics[width=0.45\paperwidth]{figure/ibtws_memory.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\section{Pursuing a Career in Portfolio Management}


%%%%%%%%%%%%%%%
\subsection{draft: Pursuing a Career in Portfolio Management}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.6\textwidth}
      Becoming a portfolio manager (PM) is very difficult:
      \setlength{\leftmargini}{1.0em}
      \begin{itemize}
        \item Portfolio management jobs are more difficult than they appear because of survivorship bias among portfolio managers.  There are very few successful portfolio managers, and most of the others get fired.  But you never hear about those.
        \item Becoming a portfolio manager (PM) is very difficult because it requires experience and a track record.
        \item Funds only want to hire experienced PMs with a track record.  But how to gain experience and a track record without working as a PM?
        \item The best way to start is by getting a job as a quant supporting portfolio managers.
        \item Strong programming skills will drastically improve your chances.
        \item The best way to get a good job is through networking (personal contacts).  Personal contacts are the most valuable resource in pursuing a career in portfolio management.
        \item Networking techniques: Create a Github account with your projects (it's your calling card), Collaborate on open-source projects.
      \end{itemize}
    \column{0.4\textwidth}
  \end{columns}
\end{block}

\end{frame}


\end{document}