diff --git a/examples/get_pse_data_multiple_demo.ipynb b/examples/get_pse_data_multiple_demo.ipynb new file mode 100644 index 00000000..c3477d5b --- /dev/null +++ b/examples/get_pse_data_multiple_demo.ipynb @@ -0,0 +1,378 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + }, + "colab": { + "name": "get_pse_data_multiple_demo.ipynb", + "provenance": [], + "toc_visible": true + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "h-tZ3KhfPQW6" + }, + "source": [ + "*This notebook was run with Google Colab.*" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "FcVD1bJrL_RG" + }, + "source": [ + "from fastquant import get_pse_data, get_pse_data_multiple\r\n", + "from datetime import datetime" + ], + "execution_count": 1, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "obCQra9qL_RL" + }, + "source": [ + "# Define variables" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "vqnwvVoAL_RL" + }, + "source": [ + "# Define a list of stock symbols\n", + "psei = [\n", + " \"AEV\",\n", + " \"AP\",\n", + " \"AGI\",\n", + " \"AC\",\n", + " \"ALI\",\n", + " \"BPI\",\n", + " \"BDO\",\n", + " \"BLOOM\",\n", + " \"DMC\",\n", + " \"EMP\",\n", + " \"FGEN\",\n", + " \"GLO\",\n", + " \"GTCAP\",\n", + " \"ICT\",\n", + " \"JGS\",\n", + " \"JFC\",\n", + " \"LTG\",\n", + " \"MER\",\n", + " \"MEG\",\n", + " \"MPI\",\n", + " \"MBT\",\n", + " \"TEL\",\n", + " \"PGOLD\",\n", + " \"RLC\",\n", + " \"RRHI\",\n", + " \"SMC\",\n", + " \"SECB\",\n", + " \"SM\",\n", + " \"SMPH\",\n", + " \"URC\",\n", + "]\n", + "\n", + "# Define start and stop dates\n", + "start_date = \"2021-01-01\"\n", + "end_date = datetime.utcnow().strftime(\"%Y-%m-%d\")" + ], + "execution_count": 2, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nqLGsSenL_RM" + }, + "source": [ + "# Method 1: Without parallelization" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "AOLREYbCL_RM", + "outputId": "c27f80a7-2ff2-4bd4-89f9-c5925bb3898b", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "%%time\n", + "\n", + "for symbol in psei:\n", + " get_pse_data(symbol, start_date, end_date)" + ], + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "text": [ + "193it [00:24, 7.72it/s]\n", + "193it [00:24, 8.54it/s]\n", + "193it [00:25, 8.02it/s]\n", + "193it [00:26, 8.21it/s]\n", + "193it [00:25, 8.83it/s]\n", + "193it [00:25, 8.30it/s]\n", + "193it [00:24, 8.73it/s]\n", + "193it [00:26, 8.22it/s]\n", + "193it [00:24, 5.93it/s]\n", + "193it [00:25, 6.94it/s]\n", + "193it [00:29, 9.29it/s]\n", + "193it [00:26, 8.26it/s]\n", + "193it [00:26, 8.53it/s]\n", + "193it [00:26, 7.34it/s]\n", + "193it [00:26, 7.40it/s]\n", + "193it [00:26, 6.06it/s]\n", + "193it [00:27, 9.16it/s]\n", + "193it [00:23, 8.38it/s]\n", + "193it [00:24, 9.32it/s]\n", + "193it [00:24, 7.80it/s]\n", + "193it [00:25, 7.72it/s]\n", + "193it [00:31, 6.21it/s]\n", + "193it [00:25, 9.26it/s]\n", + "193it [00:24, 7.66it/s]\n", + "193it [00:25, 6.85it/s]\n", + "193it [00:25, 7.69it/s]\n", + "193it [00:26, 6.93it/s]\n", + "193it [00:23, 9.43it/s]\n", + "193it [00:26, 8.46it/s]\n", + "193it [00:27, 8.13it/s]" + ], + "name": "stderr" + }, + { + "output_type": "stream", + "text": [ + "CPU times: user 49 s, sys: 4.33 s, total: 53.3 s\n", + "Wall time: 13min 14s\n" + ], + "name": "stdout" + }, + { + "output_type": "stream", + "text": [ + "\n" + ], + "name": "stderr" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3_nL-bnRL_RN" + }, + "source": [ + "# Method 2: With parallelization" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "WtCjazDsL_RN", + "outputId": "2dfbca92-ec17-411c-a15d-c64ddc5b45a4", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "source": [ + "%%time\n", + "\n", + "data = get_pse_data_multiple(\n", + " psei, n_jobs=-1, start_date=start_date, end_date=end_date\n", + ")" + ], + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.\n" + ], + "name": "stderr" + }, + { + "output_type": "stream", + "text": [ + "CPU times: user 110 ms, sys: 44.4 ms, total: 155 ms\n", + "Wall time: 8min 34s\n" + ], + "name": "stdout" + }, + { + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=-1)]: Done 30 out of 30 | elapsed: 8.6min finished\n" + ], + "name": "stderr" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iDkkY5qSL_RO" + }, + "source": [ + "## Accessing individual stock data" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ciFsQ4J1L_RO", + "outputId": "20f4b574-697a-47c7-af2a-f89322941d3a", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 235 + } + }, + "source": [ + "data[\"JFC\"].head()" + ], + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
openhighlowclosevaluevolume
dt
2021-01-04NaNNaNNaN194.4NaN625440.0
2021-01-05NaNNaNNaN194.9NaN513300.0
2021-01-06NaNNaNNaN194.4NaN565340.0
2021-01-07NaNNaNNaN192.6NaN802590.0
2021-01-08NaNNaNNaN192.5NaN720470.0
\n", + "
" + ], + "text/plain": [ + " open high low close value volume\n", + "dt \n", + "2021-01-04 NaN NaN NaN 194.4 NaN 625440.0\n", + "2021-01-05 NaN NaN NaN 194.9 NaN 513300.0\n", + "2021-01-06 NaN NaN NaN 194.4 NaN 565340.0\n", + "2021-01-07 NaN NaN NaN 192.6 NaN 802590.0\n", + "2021-01-08 NaN NaN NaN 192.5 NaN 720470.0" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "9LT0bBAJPEVN" + }, + "source": [ + "" + ], + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/python/fastquant/data/__init__.py b/python/fastquant/data/__init__.py index 12cb1ab6..b6d7d3f3 100644 --- a/python/fastquant/data/__init__.py +++ b/python/fastquant/data/__init__.py @@ -9,6 +9,7 @@ get_stock_table, # Combines get_phisix_data and get_pse_data_cache get_pse_data, + get_pse_data_multiple, # Gets data from PHISIX get_phisix_data, # Gets data from PSE Data Cache diff --git a/python/fastquant/data/stocks/pse.py b/python/fastquant/data/stocks/pse.py index 32df32eb..8367d57e 100644 --- a/python/fastquant/data/stocks/pse.py +++ b/python/fastquant/data/stocks/pse.py @@ -15,6 +15,7 @@ import pandas as pd from pandas.io.json import json_normalize import numpy as np +from joblib import Parallel, delayed import lxml.html as LH from tqdm import tqdm @@ -324,6 +325,42 @@ def get_pse_data( return pse_data_df.set_index("dt") +def get_pse_data_multiple(symbols, n_jobs=None, verbose=1, **kwargs): + """Return a dictionary of pricing data for the given PHISIX stock symbols. + + This is a utility function for `get_pse_data` to be able to query multiple stock data with parallelization using joblib. The return format is a dictionary whose key-value pairsare the stock symbols and their respective pricing dataframes. + + Parameters + ---------- + symbols : list of str + List of symbols of the stock in the PSE. You can refer to this link: https://www.pesobility.com/stock. + n_jobs : int + The maximum number of concurrently running jobs. Refer to joblib.Parallel docs for more information: https://joblib.readthedocs.io/en/latest/generated/joblib.Parallel.html. + verbose : int + The verbosity level: if non zero, progress messages are printed. Above 50, the output is sent to stdout. The frequency of the messages increases with the verbosity level. If it more than 10, all iterations are reported. + + Returns + ------- + data : dict + Dictionary of symbols and their pricing dataframe + + Examples + -------- + # TODO: write sample usage + """ + try: + symbols = list(symbols) + except TypeError: + raise TypeError("symbols should be a list or list-like.") + + lst = Parallel(n_jobs=n_jobs, verbose=verbose)( + delayed(get_pse_data)(symbol, **kwargs) for symbol in symbols + ) + data = dict(zip(symbols, lst)) + + return data + + def datestring_to_datetime(date, sep="-"): ymd = date.split(sep) errmsg = "date format must be YYYY-MM-DD"