Skip to content

Commit

Permalink
Merge pull request #59 from stephenhky/develop
Browse files Browse the repository at this point in the history
Release 1.0.5
  • Loading branch information
stephenhky authored Feb 24, 2025
2 parents 30a1cb1 + 79305c7 commit 10c29e8
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 34 deletions.
81 changes: 49 additions & 32 deletions finsim/data/preader.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,16 @@ def dataframe_to_hdf(df, filepath, key):
df.to_hdf(filepath, key=key)


def generating_cached_yahoofinance_data(symbols, startdate, enddate, cacheddir, slicebatch=50, waittime=1, threads=True):
def generating_cached_yahoofinance_data(
symbols,
startdate,
enddate,
cacheddir,
slicebatch=50,
waittime=1,
yfinance_multithreads=False,
io_multithreads=False
):
tocache_symbols = finding_missing_symbols_in_cache(symbols, startdate, enddate, cacheddir)

logging.info('Total number of symbols: {}'.format(len(symbols)))
Expand All @@ -228,46 +237,54 @@ def generating_cached_yahoofinance_data(symbols, startdate, enddate, cacheddir,
tocache_symbols[startidx:min(startidx + slicebatch, nbsymbols)],
startdate,
enddate,
threads=threads
threads=yfinance_multithreads
)
success = True
except:
sleep(waittime)

threads = []
writing_threads = []
for symbol in dataframes:
df = dataframes[symbol]
df = df[~df['Close'].isna()]
logging.debug('Caching data for {} from {} to {}'.format(symbol, startdate, enddate))
thread = threading.Thread(
target=dataframe_to_hdf,
args=(df, os.path.join(cacheddir, '{}.h5'.format(symbol)), 'yahoodata')
)
# df.to_hdf(os.path.join(cacheddir, '{}.h5'.format(symbol)), key='yahoodata')
thread.start()
threads.append(thread)
if len(df) > 0:
thissymbol_startdate = datetime.strftime(df['TimeStamp'].to_list()[0].date(), '%Y-%m-%d')
thissymbol_enddate = datetime.strftime(df['TimeStamp'].to_list()[-1].date(), '%Y-%m-%d')
else:
thissymbol_startdate = '0000-00-00'
thissymbol_enddate = '0000-00-00'

try:
logging.debug('Creating symbol {} in metatable'.format(symbol))
newrow = table.row
newrow['symbol'] = symbol
newrow['query_startdate'] = startdate
newrow['query_enddate'] = enddate
if len(df) > 0:
newrow['data_startdate'] = datetime.strftime(df['TimeStamp'].to_list()[0].date(), '%Y-%m-%d')
newrow['data_enddate'] = datetime.strftime(df['TimeStamp'].to_list()[-1].date(), '%Y-%m-%d')
else:
newrow['data_startdate'] = '0000-00-00'
newrow['data_enddate'] = '0000-00-00'
newrow.append()
table.flush()
except tables.HDF5ExtError as e:
logging.error('Cannot append record for symbol {}'.format(symbol))
traceback.print_exc()
continue

for thread in threads:
thread.join()
logging.debug('Caching data for {} from {} to {}'.format(symbol, startdate, enddate))
if not io_multithreads:
dataframe_to_hdf(df, os.path.join(cacheddir, '{}.h5'.format(symbol)), key='yahoodata')
else:
thread = threading.Thread(
target=dataframe_to_hdf,
args=(df, os.path.join(cacheddir, '{}.h5'.format(symbol)), 'yahoodata')
)
thread.start()
writing_threads.append(thread)

try:
logging.debug('Creating symbol {} in metatable'.format(symbol))
newrow = table.row
newrow['symbol'] = symbol
newrow['query_startdate'] = startdate
newrow['query_enddate'] = enddate
newrow['data_startdate'] = thissymbol_startdate
newrow['data_enddate'] = thissymbol_enddate
newrow.append()

except tables.HDF5ExtError as e:
logging.error('Cannot append record for symbol {}'.format(symbol))
traceback.print_exc()
continue

table.flush()

if io_multithreads:
for thread in writing_threads:
thread.join()

metatable_h5file.close()

Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "finsim"
version = "1.0.4"
version = "1.0.5"
authors = [
{name = "Kwan Yuet Stephen Ho", email = "stephenhky@yahoo.com.hk"}
]
Expand Down Expand Up @@ -35,7 +35,7 @@ dependencies = [
"quandl>=3.5.0",
"tqdm>=4.49.0",
"tables>=3.8.5",
"yfinance>=0.2.51",
"yfinance>=0.2.54",
"openpyxl>=3.1.0",
"Cython>=0.29.0"
]
Expand Down

0 comments on commit 10c29e8

Please sign in to comment.