RFinance_2016.Rnw

% Define knitr options
% !Rnw weave=knitr
% Set global chunk options
<<knitr_setup,include=FALSE,cache=FALSE>>=
library(knitr)
opts_chunk$set(prompt=TRUE, eval=FALSE, tidy=FALSE, strip.white=FALSE, comment=NA, highlight=FALSE, message=FALSE, warning=FALSE, size='scriptsize', fig.width=4, fig.height=4)
options(width=60, dev='pdf')
options(xts_check_TZ=FALSE)
thm <- knit_theme$get("acid")
knit_theme$set(thm)
@


% Define document options
\documentclass[10pt]{beamer}
\mode<presentation>
\usetheme{AnnArbor}
% \usecolortheme{whale}
% Uncover everything in a step-wise fashion
% \beamerdefaultoverlayspecification{<+->}
% mathtools package for math symbols
% \usepackage{mathtools}
% bbold package for unitary vector or matrix symbol
\usepackage{bbold}
\usepackage[latin1]{inputenc}
\usepackage{hyperref}
\usepackage{fancybox}
\usepackage{url}
\usepackage[backend=bibtex,style=alphabetic]{biblatex} % bibstyle=numeric
% \bibliographystyle{amsalpha} % doesn't work
\addbibresource{FRE_lectures.bib}
% \addbibresource[location=remote]{http://www.citeulike.org/user/jerzyp}
\renewcommand\bibfont{\footnotesize}
\renewcommand{\pgfuseimage}[1]{\scalebox{0.75}{\includegraphics{#1}}} % scale bib icons
\setbeamertemplate{bibliography item}[text] % set bib icons
% \setbeamertemplate{bibliography item}{} % remove bib icons

% \usepackage{enumerate}
% \let\emph\textbf
% \let\alert\textbf
% Define colors for hyperlinks
\definecolor{links}{HTML}{2A1B81}
\hypersetup{colorlinks=true,linkcolor=,urlcolor=links}
% Make url text footnotesize
\renewcommand\UrlFont{\footnotesize}
% Make institute text italic and small
\setbeamerfont{institute}{size=\small,shape=\itshape,bg=red,fg=red}
\setbeamerfont{date}{size=\small}
\setbeamerfont{block title}{size=\normalsize} % shape=\itshape
\setbeamerfont{block body}{size=\footnotesize}


% Title page setup
\title[Exploring Higher Order Risk Premia Using High Frequency Data]{Exploring Higher Order Risk Premia Using High Frequency Data}
\subtitle{R/Finance Chicago 2016}
\institute[NYU Tandon]{NYU Tandon School of Engineering}
\titlegraphic{\includegraphics[scale=0.2]{/Users/jerzy/Develop/R/FRE6871/image/tandon_long_color}}
\author[Jerzy Pawlowski]{Jerzy Pawlowski \emph{\href{mailto:jp3900@nyu.edu}{jp3900@nyu.edu}}}
\date{May 20, 2016}
% \pgfdeclareimage[height=0.5cm]{university-logo}{engineering_long_white}
% \logo{\pgfuseimage{engineering_long_white}}


%%%%%%%%%%%%%%%
\begin{document}


%%%%%%%%%%%%%%%
\maketitle


%%%%%%%%%%%%%%%
\section{Introduction}


%%%%%%%%%%%%%%%
\subsection{Investor \protect\emph{Utility}, \protect\emph{Risk Aversion}, \protect\emph{Prudence} and \protect\emph{Temperance}}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The \emph{expected utility} hypothesis states that investor risk and return preferences are based on the expected value of the \emph{utility} of their wealth, instead of on the level of wealth, 
      \vskip1ex
      Investor risk and return preferences depend on the signs of the derivatives of the \emph{utility} function, 
      \vskip1ex
      A \emph{utility} function with a positive first derivative implies a preference for higher \emph{returns} (first moment), while a negative second derivative implies risk aversion and a preference for lower \emph{volatility} (second moment), 
      \vskip1ex
      A positive third derivative implies \emph{prudence}, or a preference for higher \emph{skewness} (third moment), while a negative fourth derivative implies \emph{temperance}, or a preference for lower \emph{kurtosis} (fourth moment), 
      \vskip1ex
      Investors with a logarithmic \emph{utility} of wealth base their preferences on the percentage change of wealth, instead of the absolute change, and have a preference for larger odd moments and smaller even moments, 
    \column{0.5\textwidth}
      \begin{multline*}
        u(w) = u(w_0) + \mathrm{d}w \frac{\mathrm{d}u}{\mathrm{d}w} + {\mathrm{d}w}^2 \frac{1}{2} \frac{\mathrm{d}^2u}{\mathrm{d}w^2} + \\
        {\mathrm{d}w}^3 \frac{1}{3!} \frac{\mathrm{d}^3u}{\mathrm{d}w^3} + {\mathrm{d}w}^4 \frac{1}{4!} \frac{\mathrm{d}^4u}{\mathrm{d}w^4} + \ldots
      \end{multline*}
      \begin{multline*}
        \mathrm{d}E[u] = \alpha1 mean + \alpha1 variance + \\
        \alpha1 skew + \alpha4 kurtosis + \ldots
      \end{multline*}
      mean: $\bar{w}=\frac{1}{k} \sum_{i=1}^{k} w_i$
      \vskip1ex
      variance: $\hat{\sigma}^2=\frac{1}{k-1} \sum_{i=1}^{k} (w_i-\bar{w})^2$
      \vskip1ex
      skew: $\hat{s}=\frac{1}{k-1} \sum_{i=1}^{k} (\frac{w_i-\bar{w}}{\hat{\sigma}})^3$
      \vskip1ex
      kurtosis: $\hat{k}=\frac{1}{k-1} \sum_{i=1}^{k} (\frac{w_i-\bar{w}}{\hat{\sigma}})^4$
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Investor Preferences and Empirical Return Distributions}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Investor preference for higher \emph{returns} and for lower \emph{volatility} is expressed by maximizing the \emph{Sharpe} ratio, 
      \vskip1ex
      Stock price \emph{momentum} refers to the fact that stocks with high past returns tend to have high future returnv, and vice versa, 
      \vskip1ex
      Stock price \emph{momentum} has been observed, that is driven by approximately one-year of past returns:\\
Eugene Fama and Kenneth French, \href{http://papers.ssrn.com/sol3/papers.cfm?abstract_id=911960}{\emph{Dissecting Anomalies}}\\
      Asness et al., \href{http://papers.ssrn.com/sol3/papers.cfm?abstract_id=2435323}{\emph{Fact, Fiction and Momentum Investing}}
      \vskip1ex
      The question then is can momentum also work on shorter, intraday time scales?
      \vskip1ex
      Also, do higher moments (skew, kurtosis) have predictive power as well?\\
      Amaya et al., \href{http://papers.ssrn.com/sol3/papers.cfm?abstract_id=1898735}{\emph{Does Realized Skewness Predict the Cross-Section of Equity Returns?}}\\
      \vskip1ex
      Stocks typically have negative skew and excess kurtosis, the opposite of what investors prefer, 
      \vskip1ex
      Higher moments are hard to estimate from low frequency (daily) returnv, 
    \column{0.5\textwidth}
      \vspace{-3em}
      \includegraphics[width=0.5\paperwidth,valign=t]{figure/earl_ret_hist-1}
      \vspace{-3em}
      <<echo=(-(1:6))>>=
# load package "HighFreq"
library(HighFreq)
symbol <- "SPY"  # define symbol
# load OHLC data
returnv <- calc_rets(xtes=to.daily(SPY))
nrows <- nrow(returnv)  # number of observations
mean_rets <- mean(returnv[, 1])  # calculate mean
sd_rets <- sd(returnv[, 1])  # calculate standard deviation
# calculate skew and kurtosis
(sum(((returnv[, 1] - mean_rets)/sd_rets)^3))/nrows
(sum(((returnv[, 1] - mean_rets)/sd_rets)^4))/nrows
      @
      \vspace{-3em}
      <<earl_ret_hist,echo=TRUE,fig.width=7,fig.height=6,fig.show='hide'>>=
library(PerformanceAnalytics)
chart.Histogram(returnv[, 1], main="", 
  xlim=c(-6e-5, 6e-5), 
  methods = c("add.density", "add.normal"))
# add title
title(main=paste(symbol, "density"), line=-1)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Package \texttt{HighFreq} for Managing High Frequency Data}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Package \texttt{HighFreq} contains functions for managing high frequency \emph{TAQ} and \emph{OHLC} market data:
      \begin{itemize}
        \item reading and writing data from files, 
        \item managing time zones and alligning indices, 
        \item chaining and joining time series, 
        \item scrubbing bad data points, 
        \item converting \emph{TAQ} data to \emph{OHLC} format, 
        \item aggregating data to lower frequency,
      \end{itemize}
      \texttt{HighFreq} is inspired by the package \texttt{highfrequency}, and follows many of its conventions,
      \vskip1ex
      \texttt{HighFreq} depends on packages \texttt{xts}, \texttt{quantmod}, \texttt{lubridate}, and \texttt{caTools},
      \vskip1ex
      The function \texttt{scrub\_agg()} scrubs a single day of \emph{TAQ} data, aggregates it, and converts it to \emph{OHLC} format,
      \vskip1ex
      The function \texttt{save\_scrub\_agg()} loads, scrubs, aggregates, and binds multiple days of \emph{TAQ} data for a single symbol, and saves the \emph{OHLC} time series to a single \texttt{*.RData} file,
    \column{0.5\textwidth}
      <<echo=TRUE,eval=FALSE>>=
# install package "HighFreq" from github
install.packages("devtools")
library(devtools)
install_github(repo="algoquant/HighFreq")
# load package "HighFreq"
library(HighFreq)
# set data directories
data_dir <- "/Users/jerzy/Develop/data/hfreq/src/"
output_dir <- "/Users/jerzy/Develop/data/hfreq/scrub/"
# define symbol
symbol <- "SPY"
# load a single day of TAQ data
symbol <- load(
  file.path(data_dir, 
            paste0(symbol, "/2014.05.02.", 
                   symbol, ".RData")))
# scrub, aggregate single day of TAQ data to OHLC
ohlc_data <- scrub_agg(taq_data=get(symbol))
# aggregate TAQ data for symbol, save to file
save_scrub_agg(symbol, 
               data_dir=data_dir, 
               output_dir=output_dir, 
               period="minutes")
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{High Frequency \protect\emph{OHLC} Data}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Aggregating high frequency \emph{TAQ} data into \emph{OHLC} format with lower periodicity allows for data compression while maintaining some information about volatility,
      <<earl_ohlc_chart,echo=TRUE,eval=FALSE,fig.width=7,fig.height=6,fig.show='hide'>>=
# load package "HighFreq"
library(HighFreq)
# define symbol
symbol <- "SPY"
# load OHLC data
output_dir <- "/Users/jerzy/Develop/data/hfreq/scrub/"
symbol <- load(
  file.path(output_dir, 
            paste0(symbol, ".RData")))
interval <- 
  "2013-11-11 09:30:00/2013-11-11 10:30:00"
chart_Series(SPY[interval], 
            name=symbol)
      @
    \column{0.5\textwidth}
      \includegraphics[width=0.5\paperwidth,valign=t]{figure/earl_ohlc_chart-1}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Estimating Volatility From \protect\emph{OHLC} Data}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Package \texttt{TTR} contains statistical estimators and technical indicators implemented in fast \texttt{C} code,
      \vskip1ex
      The function \texttt{volatility()} from package \texttt{TTR} estimates the volatility from \protect\emph{OHLC} data,
      \vskip1ex
      \texttt{volatility()} includes the \emph{Garman-Klass} estimator:\\
\vspace{-2em}
      \begin{displaymath}
        {\sigma}^2=\frac{1}{n} \sum_{i=1}^{n} (0.5(H_{i}-L_{i})^2 - (2\log2-1)(C_{i}-O_{i})^2)
      \end{displaymath}
\vspace{-1em}
      \vskip1ex
      and the \emph{Rogers-Satchell} estimator:\\
\vspace{-2em}
      \begin{displaymath}
        {\sigma}^2=\frac{1}{n} \sum_{i=1}^{n} ((H_{i}-O_{i})(H_{i}-C_{i}) + (L_{i}-O_{i})(L_{i}-C_{i}))
      \end{displaymath}
      \vspace{-1em}
      <<earl_ttr_runvol,echo=(-(1:1)),eval=FALSE,fig.width=7,fig.height=8,fig.show='hide'>>=
par(mfrow=c(2,1))  # set plot panels
library(quantmod)
library(TTR)
interval <- "2013-11-11/2013-11-15"
var_iance <- volatility(OHLC=SPY, 
                     calc="yang.zhang", n=20)
chart_Series(var_iance[interval], 
            name=paste(symbol, "vol w/ ON spikes"))
var_iance <- volatility(OHLC=SPY, 
                     calc="rogers.satchell", n=20)
chart_Series(var_iance[interval], 
            name=paste(symbol, "vol w/o ON spikes"))
      @
    \column{0.5\textwidth}
      \includegraphics[width=0.5\paperwidth,valign=t]{figure/earl_ttr_runvol-1}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Daily Volume and Volatility From \protect\emph{OHLC} Data}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Trading volumes typically rise in sympathy with market price volatility, 
      \vskip1ex
      The function \texttt{vol\_ohlc()} from package \texttt{HighFreq} calculates volatility estimators over an \texttt{OHLC} time series,
      \vskip1ex
      The function \texttt{agg\_ohlc()} calculates a statistical estimator over an \texttt{OHLC} time series, 
      <<daily_volume_volat,echo=(-(1:2)),eval=FALSE,fig.width=7,fig.height=8,fig.show='hide'>>=
par(mfrow=c(2,1))  # set plot panels
library(HighFreq)  # load package "HighFreq"
# daily variance and volume
dailyvar <- apply.daily(x=SPY, FUN=aggfun, 
                        esti_mator="vol_ohlc")
colnames(dailyvar) <- paste0(symbol, ".var")
daily_volume <- apply.daily(x=Vo(SPY), FUN=sum)
chart_Series(dailyvar, 
             name=paste(symbol, "variance"))
chart_Series(daily_volume, 
             name=paste(symbol, "volume"))
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.5\paperwidth,valign=t]{figure/daily_volume_volat.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Measuring Market Liquidity Using High Frequency Data}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Market illiquidity is defined as the market price impact resulting from supply-demand imbalance, 
      \vskip1ex
      Market illiquidity is proportional to the price volatility divided by the square root of the trading volume: 
      \begin{displaymath}
        \mathcal{L}^{-1} \sim \frac{\sigma}{\sqrt{v}}
      \end{displaymath}
      Recent research suggests that market crashes are conditioned by declining market liquidity:\\
      Donier et al., \href{http://papers.ssrn.com/sol3/papers.cfm?abstract_id=2583743}{\emph{Why Do Markets Crash?  Bitcoin Data Offers Unprecedented Insights}}
      <<earl_crash,echo=(-(1:2)),fig.width=7,fig.height=9,fig.show='hide'>>=
par(mfrow=c(3,1))  # set plot panels
library(HighFreq)  # load package "HighFreq"
# daily volume
daily_volu <- apply.daily(x=SPY, 
              FUN=function(datav) sum(Vo(datav)))
colnames(daily_volu) <- paste0(symbol, ".volume")
interval <- "2010"
chart_Series(sqrt(dailyvar[interval]), 
             name=paste(symbol, "std dev"))
chart_Series(daily_volu[interval],
             name=paste(symbol, "volume"))
chart_Series(
  sqrt(dailyvar[interval]/daily_volu[interval]),
  name=paste(symbol, "illiquidity"))
@
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.5\paperwidth,valign=t]{figure/earl_crash-1}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Estimating Hurst Exponent From \protect\emph{OHLC} Data}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The Hurst exponent is a measure of long-term memory of a time series, and is related to its autocorrelation:
      \begin{displaymath}
        {\mathbb{E}}[\frac{(max(p)-min(p))}{\hat\sigma}]=t^H
      \end{displaymath}
      $H=0.5$ for Brownian motion (no autocorrelations), \\
      $H>0.5$ for positive autocorrelations, \\
      $H<0.5$ for negative autocorrelations, 
      \vskip1ex
      The function \texttt{hurst\_ohlc()} from package \texttt{HighFreq} calculates a Hurst-like indicator:
      \begin{displaymath}
        H=\frac{1}{n} \sum_{i=1}^{n} log(\frac{H_{i}-L_{i}}{abs(C_{i}-O_{i})})
      \end{displaymath}
      The function \texttt{agg\_ohlc()} calculates a statistical estimator over an \texttt{OHLC} time series,
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.5\paperwidth,valign=t]{figure/daily_hurst.png}\\
      \vspace{-1em}
      <<daily_hurst,echo=(-(1:1)),eval=FALSE,fig.width=7,fig.height=8,fig.show='hide'>>=
library(HighFreq)  # load package "HighFreq"
# daily Hurst exponents
daily_hurst <- apply.daily(x=SPY, 
                           FUN=aggfun, 
                           esti_mator="hurst_ohlc")
colnames(daily_hurst) <- 
  paste(na_me(get(symbol)), ".Hurst")
chart_Series(roll_sum(daily_hurst, 10)[-(1:10)]/10, 
             name=paste(symbol, "Hurst"))
abline(h=0.5, col="blue", lwd=2)
      @
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Daily Volatility and Hurst Exponent}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The Hurst exponent is typically greater when market volatility is higher, and is usually above 0.5 in periods high volatility, 
      <<daily_volat_hurst,echo=(-(1:2)),eval=FALSE,fig.width=7,fig.height=8,fig.show='hide'>>=
par(mfrow=c(2,1))  # set plot panels
library(HighFreq)  # load package "HighFreq"
chart_Series(roll_sum(dailyvar, 10)[-(1:10)]/10, 
             name=paste(symbol, "variance"))
chart_Series(roll_sum(daily_hurst, 10)[-(1:10)]/10, 
             name=paste(symbol, "Hurst"))
abline(h=0.5, col="blue", lwd=2)
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.5\paperwidth,valign=t]{figure/daily_volat_hurst.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Intraday Seasonality of Volume and Volatility}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Market price volatility and trading volumes are typically higher at the beginning and end of the trading sessions, 
      \vskip1ex
      The function \texttt{season\_ality()} calculates the seasonality aggregations of a statistical estimator over an \texttt{OHLC} time series,
      <<intradayveason_volume_volat,echo=(-(1:2)),eval=FALSE,fig.width=7,fig.height=8,fig.show='hide'>>=
par(mfrow=c(2,1))  # set plot panels
library(HighFreq)  # load package "HighFreq"
# intraday seasonality of volume
season_volume <- season_ality(Vo(SPY))
season_volume <- season_volume[-(nrow(season_volume))]
colnames(season_volume) <- 
  paste0(na_me(get(symbol)), ".season_volume")
plot_theme <- chart_theme()
plot_theme$format.labels <- "%H:%M"
chobj <- chart_Series(x=season_volume, 
  name=paste(colnames(season_volume), 
  "intraday seasonality"), theme=plot_theme, 
  plot=FALSE)
ylim <- chobj$get_ylim()
ylim[[2]] <- structure(c(ylim[[2]][1], 
              ylim[[2]][2]/2), fixed=TRUE)
chobj$set_ylim(ylim)
plot(chobj)
# intraday seasonality of volatility
season_var <- season_ality(vol_ohlc(ohlc=SPY))
season_var <- season_var[-(nrow(season_var))]
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.5\paperwidth,valign=t]{figure/intradayveason_volume_volat.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Intraday Seasonality of Liquidity and Volatility}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      Market illiquidity rises in sympathy with market price volatility, 
      \vskip1ex
      Market liquidity is typically the highest at the end of the trading session, and the lowest at the beginning, 
      <<intradayveason_illiquid_volat,echo=(-(1:2)),eval=FALSE,fig.width=7,fig.height=8,fig.show='hide'>>=
par(mfrow=c(2,1))  # set plot panels
library(HighFreq)  # load package "HighFreq"
# intraday seasonality of liquidity
season_illiquid <- season_ality(10^6*sqrt(
  ifelse(Vo(SPY)>0, vol_ohlc(ohlc=SPY)/Vo(SPY), 0)))
colnames(season_illiquid) <- 
  paste0(na_me(get(symbol)), ".season_illiquid")
chobj <- chart_Series(x=season_illiquid,
  name=paste(colnames(season_illiquid), "intraday seasonality"), 
  plot=FALSE)
ylim <- chobj$get_ylim()
ylim[[2]] <- structure(c(0.2, 0.6), fixed=TRUE)
chobj$set_ylim(ylim)
plot(chobj)
# intraday seasonality of volatility
season_var <- season_ality(vol_ohlc(ohlc=SPY))
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.5\paperwidth,valign=t]{figure/intradayveason_illiquid_volat.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Intraday Seasonality of Hurst Exponent and Volatility}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The Hurst exponent typically moves higher with higher market price volatility, and is above 0.5 with high volatility, 
      <<intradayveason_hurst_volat,echo=(-(1:2)),eval=FALSE,fig.width=7,fig.height=8,fig.show='hide'>>=
par(mfrow=c(2,1))  # set plot panels
library(HighFreq)  # load package "HighFreq"
# intraday seasonality of Hurst exponent
season_hurst <- season_ality(hurst_ohlc(ohlc=SPY[interval, 1:4]))
season_hurst <- season_hurst[-(nrow(season_hurst))]
colnames(season_hurst) <- paste0(na_me(get(symbol)), ".season_hurst")
plot_theme <- chart_theme()
plot_theme$format.labels <- "%H:%M"
chobj <- chart_Series(x=season_hurst, 
  name=paste(colnames(season_hurst), 
  "intraday seasonality"), theme=plot_theme, 
  plot=FALSE)
ylim <- chobj$get_ylim()
ylim[[2]] <- structure(c(ylim[[2]][1], 
              ylim[[2]][2]), fixed=TRUE)
chobj$set_ylim(ylim)
plot(chobj)
abline(h=0.5, col="blue", lwd=2)
# intraday seasonality of volatility
season_var <- season_ality(vol_ohlc(ohlc=SPY))
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.5\paperwidth,valign=t]{figure/intradayveason_hurst_volat.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Dependence of Hurst Exponent on Volatility}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The Hurst exponent typically moves higher with higher market price volatility, and is above 0.5 with high volatility, 
      <<daily_season_hurst_volat_scatter,echo=(-(1:2)),eval=FALSE,fig.width=7,fig.height=8,fig.show='hide'>>=
par(mfrow=c(2,1))  # set plot panels
library(HighFreq)  # load package "HighFreq"
# intraday seasonality of Hurst exponent
rangev <- range(dailyvar)
plot(x=as.vector(dailyvar), y=as.vector(daily_hurst), 
     xlab=colnames(dailyvar), ylab=colnames(daily_hurst),  
     main="Daily Hurst and variance", 
     xlim=c(rangev[1], rangev[2]/4))
abline(h=0.5, col="blue", lwd=2)
rangev <- range(season_var)
plot(x=as.vector(season_var), y=as.vector(season_hurst), 
     xlab=colnames(season_var), ylab=colnames(season_hurst),  
     main="Intraday seasonal Hurst and variance", 
     xlim=c(rangev[1], rangev[2]/2), ylim=c(0.35, 0.7))
abline(h=0.5, col="blue", lwd=2)
      @
    \column{0.5\textwidth}
      \vspace{-1em}
      \includegraphics[width=0.5\paperwidth,valign=t]{figure/daily_season_hurst_volat_scatter.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Estimating Skew From \protect\emph{OHLC} Data}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The function \texttt{skew\_ohlc()} from package \texttt{HighFreq} calculates a skew-like indicator:\\
\vspace{-2em}
      \begin{multline*}
        \hskip-1em {s}^3=\frac{1}{n} \sum_{i=1}^{n} ((H_{i}-O_{i})(H_{i}-C_{i})(H_{i}-0.5(O_{i}+C_{i})) +\\ (L_{i}-O_{i})(L_{i}-C_{i})(L_{i}-0.5(O_{i}+C_{i}))
      \end{multline*}
      The function \texttt{roll\_agg\_ohlc()} aggregates rolling, volume weighted moment estimators,
      <<earl_highfreq_runskew,echo=(-(1:1)),eval=FALSE,fig.width=7,fig.height=8,fig.show='hide'>>=
par(mfrow=c(2,1))  # set plot panels
library(HighFreq)  # load package "HighFreq"
# rolling variance
var_iance <- 
  roll_agg(ohlc=SPY, esti_mator="vol_ohlc")
# rolling skew
skew <- 
  roll_agg(ohlc=SPY, esti_mator="skew_ohlc")
skew <- skew/(var_iance)^(1.5)
skew[1, ] <- 0
skew <- na.locf(skew)
interval <- "2013-11-11/2013-11-15"
chart_Series(var_iance[interval], 
            name=paste(symbol, "variance"))
chart_Series(skew[interval],
            name=paste(symbol, "Skew"), 
            ylim=c(-1, 1))
      @
    \column{0.5\textwidth}
      \includegraphics[width=0.5\paperwidth,valign=t]{figure/earl_highfreq_runskew-1}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Daily Volatility and Skew From \protect\emph{OHLC} Data}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The function \texttt{agg\_ohlc()} calculates a statistical estimator over an \texttt{OHLC} time series,
      <<earl_dailyskew,echo=(-(1:2)),eval=FALSE,fig.width=7,fig.height=8,fig.show='hide'>>=
par(mfrow=c(2,1))  # set plot panels
library(HighFreq)  # load package "HighFreq"
# daily variance and skew
dailyvar <- apply.daily(x=SPY, FUN=aggfun, 
                        esti_mator="vol_ohlc")
colnames(dailyvar) <- paste0(symbol, ".var")
daily_skew <- apply.daily(x=SPY, FUN=aggfun, 
                        esti_mator="skew_ohlc")
daily_skew <- daily_skew/(dailyvar)^(1.5)
colnames(daily_skew) <- paste0(symbol, ".skew")
interval <- "2013-06-01/"
chart_Series(dailyvar[interval], 
             name=paste(symbol, "variance"))
chart_Series(daily_skew[interval],
             name=paste(symbol, "skew"))
      @
    \column{0.5\textwidth}
      \includegraphics[width=0.5\paperwidth,valign=t]{figure/earl_dailyskew-1}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Regression of Skews Versus Returns}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      A regression of lagged skews versus returns appears to be statistically significant, especially in periods of high volatility during the financial crisis of 2008-09, 
      \vspace{-1em}
      <<echo=(-(1:1))>>=
# scatterplot of returns versus lagged skew
returnv <- calc_rets(xtes=SPY)
skew <- skew_ohlc(ohlc=SPY[, -5])
colnames(skew) <- paste0(symbol, ".skew")
lag_skew <- lag(skew)
lag_skew[1, ] <- 0
datav <- cbind(returnv[, 1], lag_skew)
formulav <- as.formula(paste(colnames(datav)[1], 
          paste(paste(colnames(datav)[-1], 
            collapse=" + ")), sep="~"))
regmod <- lm(formulav, data=datav["2010"])
summary(regmod)$coef
summary(lm(formulav, data=datav["2011"]))$coef
summary(lm(formulav, data=datav["2012"]))$coef
      @
      <<earl_skewscatter,echo=TRUE,eval=FALSE,fig.width=7,fig.height=6,fig.show='hide'>>=
plot(formulav, data=datav["2010"], 
     main="Returns versus lagged skew", 
     xlim=c(-2e-09, 2e-09), ylim=c(-0.01, 0.01), 
     cex=0.6, xlab="skew", ylab="rets")
abline(regmod, col="blue", lwd=2)
      @
    \column{0.5\textwidth}
      \includegraphics[width=0.5\paperwidth,valign=t]{figure/skewscatter.png}
  \end{columns}
\end{block}

\end{frame}


%%%%%%%%%%%%%%%
\subsection{Contrarian Strategy Using Skew Oscillator}
\begin{frame}[fragile,t]{\subsecname}
\vspace{-1em}
\begin{block}{}
  \begin{columns}[T]
    \column{0.5\textwidth}
      The contrarian skew trading strategy involves long or short positions in a single unit of stock, that is opposite to the sign of the skew, 
      \vskip1ex
      Skew is calculated over one-minute bars, and trades are executed in the following period, 
      \vskip1ex
      The contrarian strategy shows good hypothetical performance before transaction costs, and since it's a liquidity providing strategy, should have very low transaction costs, 
      \vskip1ex
      The contrarian strategy is hyperactive, trading almost 46\% of the time in each period, 
      <<echo=(-(1:1))>>=
# contrarian skew trading strategy
# lag the skew to get positions
posv <- -sign(lag_skew)
posv[1, ] <- 0
# cumulative PnL
cumu_pnl <- cumsum(posv*returnv[, 1])
# calculate frequency of trades
50*sum(abs(sign(skew)-sign(lag_skew)))/nrow(skew)
# calculate transaction costs
bid_offer <- 0.001  # 10 bps for liquid ETFs
bid_offer*sum(abs(sign(skew)-sign(lag_skew)))
@
    \column{0.5\textwidth}
      \includegraphics[width=0.5\paperwidth,valign=t]{figure/earl_skewstrat-1}
      \vspace{-1em}
      <<earl_skewstrat,echo=TRUE,eval=FALSE,fig.width=7,fig.height=6,fig.show='hide'>>=
chart_Series(
  cumu_pnl[endpoints(cumu_pnl, on="hours"), ], 
  name=paste(symbol, "contrarian skew strategy pnl"))
@
  \end{columns}
\end{block}

\end{frame}


\end{document}