Skip to content

Commit

Permalink
Merge pull request #58 from stephenhky/develop
Browse files Browse the repository at this point in the history
Release 1.0.4
  • Loading branch information
stephenhky authored Feb 13, 2025
2 parents b859c85 + 323e836 commit 30a1cb1
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 17 deletions.
51 changes: 36 additions & 15 deletions finsim/data/preader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@
from time import sleep
import glob
from functools import lru_cache
import threading
import traceback

import pandas as pd
import tables
import yfinance as yf
import tables as tb
from tqdm import tqdm
Expand Down Expand Up @@ -196,6 +199,10 @@ def finding_missing_symbols_in_cache(symbols, startdate, enddate, cacheddir):
return sorted(list(set(symbols) - set(existing_valid_symbols)))


def dataframe_to_hdf(df, filepath, key):
df.to_hdf(filepath, key=key)


def generating_cached_yahoofinance_data(symbols, startdate, enddate, cacheddir, slicebatch=50, waittime=1, threads=True):
tocache_symbols = finding_missing_symbols_in_cache(symbols, startdate, enddate, cacheddir)

Expand Down Expand Up @@ -227,26 +234,40 @@ def generating_cached_yahoofinance_data(symbols, startdate, enddate, cacheddir,
except:
sleep(waittime)

threads = []
for symbol in dataframes:
df = dataframes[symbol]
df = df[~df['Close'].isna()]
logging.debug('Caching data for {} from {} to {}'.format(symbol, startdate, enddate))
df.to_hdf(os.path.join(cacheddir, '{}.h5'.format(symbol)), key='yahoodata')
thread = threading.Thread(
target=dataframe_to_hdf,
args=(df, os.path.join(cacheddir, '{}.h5'.format(symbol)), 'yahoodata')
)
# df.to_hdf(os.path.join(cacheddir, '{}.h5'.format(symbol)), key='yahoodata')
thread.start()
threads.append(thread)

logging.debug('Creating symbol {} in metatable'.format(symbol))
newrow = table.row
newrow['symbol'] = symbol
newrow['query_startdate'] = startdate
newrow['query_enddate'] = enddate
if len(df) > 0:
newrow['data_startdate'] = datetime.strftime(df['TimeStamp'].to_list()[0].date(), '%Y-%m-%d')
newrow['data_enddate'] = datetime.strftime(df['TimeStamp'].to_list()[-1].date(), '%Y-%m-%d')
else:
newrow['data_startdate'] = '0000-00-00'
newrow['data_enddate'] = '0000-00-00'
newrow.append()

table.flush()
try:
logging.debug('Creating symbol {} in metatable'.format(symbol))
newrow = table.row
newrow['symbol'] = symbol
newrow['query_startdate'] = startdate
newrow['query_enddate'] = enddate
if len(df) > 0:
newrow['data_startdate'] = datetime.strftime(df['TimeStamp'].to_list()[0].date(), '%Y-%m-%d')
newrow['data_enddate'] = datetime.strftime(df['TimeStamp'].to_list()[-1].date(), '%Y-%m-%d')
else:
newrow['data_startdate'] = '0000-00-00'
newrow['data_enddate'] = '0000-00-00'
newrow.append()
table.flush()
except tables.HDF5ExtError as e:
logging.error('Cannot append record for symbol {}'.format(symbol))
traceback.print_exc()
continue

for thread in threads:
thread.join()

metatable_h5file.close()

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "finsim"
version = "1.0.3"
version = "1.0.4"
authors = [
{name = "Kwan Yuet Stephen Ho", email = "stephenhky@yahoo.com.hk"}
]
Expand Down
2 changes: 1 addition & 1 deletion test/test_stock_simulations.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

import unittest
from math import exp, sqrt
from math import exp
from datetime import datetime, timedelta

import numpy as np
Expand Down

0 comments on commit 30a1cb1

Please sign in to comment.