Skip to content

Commit

Permalink
Add Data Methods
Browse files Browse the repository at this point in the history
First methods for adding data to distance approach class
  • Loading branch information
pranavwalia committed Mar 29, 2022
1 parent 2dfa281 commit 4eb9bb7
Show file tree
Hide file tree
Showing 5 changed files with 242 additions and 0 deletions.
52 changes: 52 additions & 0 deletions PairsTrading/DistanceApproach.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import pandas as pd
from utilities import isSequenceOf
from pandas.api.types import is_numeric_dtype

'''
This class implements the distance approach to a pairs trading signal in 3 steps:
- Orders and ranks pairs according to some distance metric
- Generates a signal according to a threshold
- Outputs a backtest according to a brokerage fee template
'''
class DistanceApproach():
def __init__(self) -> None:
self.pairsData = None
self.tradeData = None

'''
This Function sets the data set for generating possible pairs.
Make sure your data frame is formated according to the following structure
DateTime | Price Series A | Price Series B | Price Series C |...
Raises Exception if:
- Data doesn't have more than two columns
- Left-Most column is not of type datetime64
- If there are non-numerical data types
Returns true if we can successfully set the pairs data
'''
def setPairsData(self, data: pd.DataFrame) -> bool:
if self.isDataWellFormed(data):
self.data = data
return True

def setTradeData(self, data: pd.DataFrame) -> bool:
if self.isDataWellFormed(data):
self.tradeData = data
return True
'''
Checks whether a dataframe conforms to requirements to store in DistanceApproach class
'''
@staticmethod
def isDataWellFormed(data: pd.DataFrame) -> bool:
t = lambda x: is_numeric_dtype(x)
if len(data.columns) <= 1:
raise Exception('Dataframe is missing columns. Check that you have both securities and date columns')
elif data.dtypes[0] != 'datetime64':
raise Exception('Left-Most Column is not of type datetime64')
#Check if there is a non-numerical column to the right of date column
elif False in list(data.apply(t)[1:]):
raise Exception('Detected non-numerical data-types to the right of date column')
else:
return True


180 changes: 180 additions & 0 deletions PairsTrading/DistanceApproachExample.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 6,
"id": "cb4eb849",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from pandas.api.types import is_numeric_dtype"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "30502d1a",
"metadata": {},
"outputs": [],
"source": [
"df = pd.DataFrame({'team': ['A', 'A', 'A', 'B', 'B', 'B'],\n",
" 'points': [11, 7, 8, 10, 13, 13],\n",
" 'assists': [5, 7, 7, 9, 12, 9],\n",
" 'rebounds': [11, 8, 10, 6, 6, 5]})"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "31629c65",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"is_numeric_dtype(df.iloc[0:,1])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "10024d10",
"metadata": {},
"outputs": [],
"source": [
"f = lambda x: is_numeric_dtype(x)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "f3acf638",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>points</th>\n",
" <th>assists</th>\n",
" <th>rebounds</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>11</td>\n",
" <td>5</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>8</td>\n",
" <td>7</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>10</td>\n",
" <td>9</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>13</td>\n",
" <td>12</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>13</td>\n",
" <td>9</td>\n",
" <td>5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" points assists rebounds\n",
"0 11 5 11\n",
"1 7 7 8\n",
"2 8 7 10\n",
"3 10 9 6\n",
"4 13 12 6\n",
"5 13 9 5"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.iloc[0:,1:]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b0542b1b",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Empty file added PairsTrading/README.md
Empty file.
Empty file.
10 changes: 10 additions & 0 deletions utilities.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from sympy import false, true

'''
Checks if a sequence is restricted to the domain of elements
'''
def isSequenceOf(sequence: list, elements: list) -> bool:
for i in sequence:
if not i in elements:
return False
return True

0 comments on commit 4eb9bb7

Please sign in to comment.