From 71d426d8f0a4f5afe84680e865ba6af3331fff3f Mon Sep 17 00:00:00 2001 From: Kwan Yuet Stephen Ho <3810067+stephenhky@users.noreply.github.com> Date: Sat, 1 Feb 2025 18:49:56 -0500 Subject: [PATCH 1/4] removed unused imports --- test/test_stock_simulations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_stock_simulations.py b/test/test_stock_simulations.py index 69d344f..a69aa63 100644 --- a/test/test_stock_simulations.py +++ b/test/test_stock_simulations.py @@ -1,6 +1,6 @@ import unittest -from math import exp, sqrt +from math import exp from datetime import datetime, timedelta import numpy as np From 5485ac439ccee2e0a7db991613b6da2e7e56d7de Mon Sep 17 00:00:00 2001 From: Kwan Yuet Stephen Ho <3810067+stephenhky@users.noreply.github.com> Date: Sat, 1 Feb 2025 18:50:08 -0500 Subject: [PATCH 2/4] initiate development of release 1.0.4 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ba644bf..872ddc2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "finsim" -version = "1.0.3" +version = "1.0.4a1" authors = [ {name = "Kwan Yuet Stephen Ho", email = "stephenhky@yahoo.com.hk"} ] From 47919d618508d36fa0ec76c2db563eb2599569a7 Mon Sep 17 00:00:00 2001 From: Kwan Yuet Stephen Ho <3810067+stephenhky@users.noreply.github.com> Date: Wed, 12 Feb 2025 21:36:35 -0500 Subject: [PATCH 3/4] cache generating in threads --- finsim/data/preader.py | 51 +++++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/finsim/data/preader.py b/finsim/data/preader.py index 86fa6ff..b4a779a 100644 --- a/finsim/data/preader.py +++ b/finsim/data/preader.py @@ -5,8 +5,11 @@ from time import sleep import glob from functools import lru_cache +import threading +import traceback import pandas as pd +import tables import yfinance as yf import tables as tb from tqdm import tqdm @@ -196,6 +199,10 @@ def finding_missing_symbols_in_cache(symbols, startdate, enddate, cacheddir): return sorted(list(set(symbols) - set(existing_valid_symbols))) +def dataframe_to_hdf(df, filepath, key): + df.to_hdf(filepath, key=key) + + def generating_cached_yahoofinance_data(symbols, startdate, enddate, cacheddir, slicebatch=50, waittime=1, threads=True): tocache_symbols = finding_missing_symbols_in_cache(symbols, startdate, enddate, cacheddir) @@ -227,26 +234,40 @@ def generating_cached_yahoofinance_data(symbols, startdate, enddate, cacheddir, except: sleep(waittime) + threads = [] for symbol in dataframes: df = dataframes[symbol] df = df[~df['Close'].isna()] logging.debug('Caching data for {} from {} to {}'.format(symbol, startdate, enddate)) - df.to_hdf(os.path.join(cacheddir, '{}.h5'.format(symbol)), key='yahoodata') + thread = threading.Thread( + target=dataframe_to_hdf, + args=(df, os.path.join(cacheddir, '{}.h5'.format(symbol)), 'yahoodata') + ) + # df.to_hdf(os.path.join(cacheddir, '{}.h5'.format(symbol)), key='yahoodata') + thread.start() + threads.append(thread) - logging.debug('Creating symbol {} in metatable'.format(symbol)) - newrow = table.row - newrow['symbol'] = symbol - newrow['query_startdate'] = startdate - newrow['query_enddate'] = enddate - if len(df) > 0: - newrow['data_startdate'] = datetime.strftime(df['TimeStamp'].to_list()[0].date(), '%Y-%m-%d') - newrow['data_enddate'] = datetime.strftime(df['TimeStamp'].to_list()[-1].date(), '%Y-%m-%d') - else: - newrow['data_startdate'] = '0000-00-00' - newrow['data_enddate'] = '0000-00-00' - newrow.append() - - table.flush() + try: + logging.debug('Creating symbol {} in metatable'.format(symbol)) + newrow = table.row + newrow['symbol'] = symbol + newrow['query_startdate'] = startdate + newrow['query_enddate'] = enddate + if len(df) > 0: + newrow['data_startdate'] = datetime.strftime(df['TimeStamp'].to_list()[0].date(), '%Y-%m-%d') + newrow['data_enddate'] = datetime.strftime(df['TimeStamp'].to_list()[-1].date(), '%Y-%m-%d') + else: + newrow['data_startdate'] = '0000-00-00' + newrow['data_enddate'] = '0000-00-00' + newrow.append() + table.flush() + except tables.HDF5ExtError as e: + logging.error('Cannot append record for symbol {}'.format(symbol)) + traceback.print_exc() + continue + + for thread in threads: + thread.join() metatable_h5file.close() From 323e836aaf1e12e0137068ecf09ec5becd9981f5 Mon Sep 17 00:00:00 2001 From: Kwan Yuet Stephen Ho <3810067+stephenhky@users.noreply.github.com> Date: Wed, 12 Feb 2025 21:42:08 -0500 Subject: [PATCH 4/4] release 1.0.4 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 872ddc2..80538f7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "finsim" -version = "1.0.4a1" +version = "1.0.4" authors = [ {name = "Kwan Yuet Stephen Ho", email = "stephenhky@yahoo.com.hk"} ]