Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REV/MAINT: Switching back to Yahoo (temp) to deal with Google #1972

Closed
wants to merge 11 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@ install:
- CONDA_NPY=${NPY_VERSION_ARR[0]}${NPY_VERSION_ARR[1]}
- CONDA_PY=$TRAVIS_PYTHON_VERSION

- if [[ "$TRAVIS_SECURE_ENV_VARS" = "true" && "$TRAVIS_BRANCH" = "master" && "$TRAVIS_PULL_REQUEST" = "false" ]]; then DO_UPLOAD="true"; else DO_UPLOAD="false"; fi
- if [[ "$TRAVIS_SECURE_ENV_VARS" = "true" && "$TRAVIS_BRANCH" = "master" && "$TRAVIS_PULL_REQUEST" = "false" && $TRAVIS_PYTHON_VERSION != 3.4 ]]; then DO_UPLOAD="true"; else DO_UPLOAD="false"; fi
- |
for recipe in $(ls -d conda/*/ | xargs -I {} basename {}); do
if [[ "$recipe" = "zipline" ]]; then continue; fi
if [[ "$recipe" = "zipline" ]] && [[ $TRAVIS_PYTHON_VERSION != 3.4 ]]; then continue; fi

conda build conda/$recipe --python=$CONDA_PY --numpy=$CONDA_NPY --skip-existing --old-build-string -c quantopian -c quantopian/label/ci
RECIPE_OUTPUT=$(conda build conda/$recipe --python=$CONDA_PY --numpy=$CONDA_NPY --output)
Expand All @@ -68,7 +68,7 @@ script:

# unshallow the clone so the conda build can clone it.
- git fetch --unshallow
- exec 3>&1; ZP_OUT=$(conda build conda/zipline --python=$CONDA_PY --numpy=$CONDA_NPY -c quantopian -c quantopian/label/ci | tee >(cat - >&3))
- if [[ $TRAVIS_PYTHON_VERSION != 3.4 ]]; then exec 3>&1; ZP_OUT=$(conda build conda/zipline --python=$CONDA_PY --numpy=$CONDA_NPY -c quantopian -c quantopian/label/ci | tee >(cat - >&3))
- ZP_OUTPUT=$(echo "$ZP_OUT" | grep "anaconda upload" | awk '{print $NF}')
- if [ -z "$ZP_OUTPUT" ]; then exit 1; fi
- if [[ "$DO_UPLOAD" = "true" ]]; then anaconda -t $ANACONDA_TOKEN upload $ZP_OUTPUT -u quantopian --label ci; fi
Expand Down
35 changes: 18 additions & 17 deletions ci/make_conda_packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,29 +27,30 @@ def iter_stdout(cmd):

def main(env, do_upload):
for recipe in get_immediate_subdirectories('conda'):
cmd = ["conda", "build", os.path.join('conda', recipe),
"--python", env['CONDA_PY'],
"--numpy", env['CONDA_NPY'],
"--skip-existing",
"-c", "quantopian/label/ci",
"-c", "quantopian"]
if env['CONDA_PY'] != '3.4':
cmd = ["conda", "build", os.path.join('conda', recipe),
"--python", env['CONDA_PY'],
"--numpy", env['CONDA_NPY'],
"--skip-existing",
"-c", "quantopian/label/ci",
"-c", "quantopian"]

output = None
output = None

for line in iter_stdout(cmd):
print(line)
for line in iter_stdout(cmd):
print(line)

if not output:
match = PKG_PATH_PATTERN.match(line)
if match:
output = match.group('pkg_path')
if not output:
match = PKG_PATH_PATTERN.match(line)
if match:
output = match.group('pkg_path')

if output and os.path.exists(output) and do_upload:
cmd = ["anaconda", "-t", env['ANACONDA_TOKEN'],
if output and os.path.exists(output) and do_upload:
cmd = ["anaconda", "-t", env['ANACONDA_TOKEN'],
"upload", output, "-u", "quantopian", "--label", "ci"]

for line in iter_stdout(cmd):
print(line)
for line in iter_stdout(cmd):
print(line)


if __name__ == '__main__':
Expand Down
2 changes: 1 addition & 1 deletion etc/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ requests-file==1.4.1
# statsmodels in turn is required for some pandas packages
scipy==0.17.1
pandas==0.18.1
pandas-datareader==0.2.1
pandas-datareader==0.5.0
# Needed for parts of pandas.stats
patsy==0.4.0
statsmodels==0.6.1
Expand Down
Binary file modified tests/resources/example_data.tar.gz
Binary file not shown.
16 changes: 0 additions & 16 deletions tests/resources/rebuild_example_data
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,6 @@ def eof(*args, **kwargs):
raise EOFError()


@click.command()
@click.option(
'--rebuild-input',
is_flag=True,
default=False,
help="Should we rebuild the input data from Yahoo?",
)

@click.pass_context
def main(ctx, rebuild_input):
"""Rebuild the perf data for test_examples
Expand All @@ -101,14 +93,6 @@ def main(ctx, rebuild_input):
# as we use in test_examples.py.
environ = {'ZIPLINE_ROOT': d.getpath('example_data/root')}

if rebuild_input:
raise NotImplementedError(
("We cannot rebuild input for Yahoo because of "
"changes Yahoo made to their API, so we cannot "
"use Yahoo data bundles anymore. This will be fixed in "
"a future release")
)

new_perf_path = d.getpath(
'example_data/new_perf/%s' % pd.__version__.replace('.', '-'),
)
Expand Down
6 changes: 5 additions & 1 deletion tests/test_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

from zipline import examples
from zipline.data.bundles import register, unregister
from zipline.finance.constants import BENCHMARK_SYMBOL
from zipline.testing import test_resource_path
from zipline.testing.fixtures import WithTmpDir, ZiplineTestCase
from zipline.testing.predicates import assert_equal
Expand Down Expand Up @@ -55,7 +56,10 @@ def init_class_fixtures(cls):
serialization='pickle',
)

market_data = ('SPY_benchmark.csv', 'treasury_curves.csv')
market_data = (
'{}_benchmark.csv'.format(BENCHMARK_SYMBOL),
'treasury_curves.csv'
)
for data in market_data:
update_modified_time(
cls.tmpdir.getpath(
Expand Down
29 changes: 6 additions & 23 deletions zipline/data/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,13 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import pandas as pd

import pandas_datareader.data as pd_reader


def get_benchmark_returns(symbol, first_date, last_date):
"""
Get a Series of benchmark returns from Google associated with `symbol`.
Default is `SPY`.
Get a Series of benchmark returns from Yahoo associated with `symbol`.
Default is `GSPC`.

Parameters
----------
Expand All @@ -32,30 +29,16 @@ def get_benchmark_returns(symbol, first_date, last_date):
last_date : pd.Timestamp
Last date for which we want to get data.

The furthest date that Google goes back to is 1993-02-01. It has missing
data for 2008-12-15, 2009-08-11, and 2012-02-02, so we add data for the
dates for which Google is missing data.

We're also limited to 4000 days worth of data per request. If we make a
request for data that extends past 4000 trading days, we'll still only
receive 4000 days of data.

first_date is **not** included because we need the close from day N - 1 to
compute the returns for day N.
`first_date` is **not** included because we need the close from
day N - 1 to compute the returns for day N.
"""
data = pd_reader.DataReader(
symbol,
'google',
'yahoo',
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the newer version works ok with yahoo again?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes on my machine with a fresh zipline install it works.

first_date,
last_date
)

data = data['Close']

data[pd.Timestamp('2008-12-15')] = np.nan
data[pd.Timestamp('2009-08-11')] = np.nan
data[pd.Timestamp('2012-02-02')] = np.nan

data = data.fillna(method='ffill')
data = data['Adj Close']

return data.sort_index().tz_localize('UTC').pct_change(1).iloc[1:]
101 changes: 6 additions & 95 deletions zipline/data/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from collections import OrderedDict

import logbook
import pandas as pd
from pandas_datareader.data import DataReader
import pytz
from six import iteritems
from six.moves.urllib_error import HTTPError

from .benchmarks import get_benchmark_returns
Expand All @@ -35,7 +31,7 @@

# Mapping from index symbol to appropriate bond data
INDEX_MAPPING = {
'SPY':
'^GSPC':
(treasuries, 'treasury_curves.csv', 'www.federalreserve.gov'),
'^GSPTSE':
(treasuries_can, 'treasury_curves_can.csv', 'bankofcanada.ca'),
Expand Down Expand Up @@ -91,7 +87,9 @@ def has_data_for_dates(series_or_df, first_date, last_date):
return (first <= first_date) and (last >= last_date)


def load_market_data(trading_day=None, trading_days=None, bm_symbol='SPY',
def load_market_data(trading_day=None,
trading_days=None,
bm_symbol='^GSPC',
environ=None):
"""
Load benchmark returns and treasury yield curves for the given calendar and
Expand All @@ -115,7 +113,7 @@ def load_market_data(trading_day=None, trading_days=None, bm_symbol='SPY',
A calendar of trading days. Also used for determining what cached
dates we should expect to have cached. Defaults to the NYSE calendar.
bm_symbol : str, optional
Symbol for the benchmark index to load. Defaults to 'SPY', the Google
Symbol for the benchmark index to load. Defaults to 'GSPC', the Yahoo
ticker for the S&P 500.

Returns
Expand Down Expand Up @@ -272,7 +270,7 @@ def ensure_treasury_data(symbol, first_date, last_date, now, environ=None):
path.
"""
loader_module, filename, source = INDEX_MAPPING.get(
symbol, INDEX_MAPPING['SPY'],
symbol, INDEX_MAPPING['^GSPC'],
)
first_date = max(first_date, loader_module.earliest_possible_date())

Expand Down Expand Up @@ -356,93 +354,6 @@ def _load_cached_data(filename, first_date, last_date, now, resource_name,
return None


def _load_raw_yahoo_data(indexes=None, stocks=None, start=None, end=None):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should've been removed in #1812

"""Load closing prices from yahoo finance.

:Optional:
indexes : dict (Default: {'SPX': '^SPY'})
Financial indexes to load.
stocks : list (Default: ['AAPL', 'GE', 'IBM', 'MSFT',
'XOM', 'AA', 'JNJ', 'PEP', 'KO'])
Stock closing prices to load.
start : datetime (Default: datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc))
Retrieve prices from start date on.
end : datetime (Default: datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc))
Retrieve prices until end date.

:Note:
This is based on code presented in a talk by Wes McKinney:
http://wesmckinney.com/files/20111017/notebook_output.pdf
"""
assert indexes is not None or stocks is not None, """
must specify stocks or indexes"""

if start is None:
start = pd.datetime(1990, 1, 1, 0, 0, 0, 0, pytz.utc)

if start is not None and end is not None:
assert start < end, "start date is later than end date."

data = OrderedDict()

if stocks is not None:
for stock in stocks:
logger.info('Loading stock: {}'.format(stock))
stock_pathsafe = stock.replace(os.path.sep, '--')
cache_filename = "{stock}-{start}-{end}.csv".format(
stock=stock_pathsafe,
start=start,
end=end).replace(':', '-')
cache_filepath = get_cache_filepath(cache_filename)
if os.path.exists(cache_filepath):
stkd = pd.DataFrame.from_csv(cache_filepath)
else:
stkd = DataReader(stock, 'yahoo', start, end).sort_index()
stkd.to_csv(cache_filepath)
data[stock] = stkd

if indexes is not None:
for name, ticker in iteritems(indexes):
logger.info('Loading index: {} ({})'.format(name, ticker))
stkd = DataReader(ticker, 'yahoo', start, end).sort_index()
data[name] = stkd

return data


def load_from_yahoo(indexes=None,
stocks=None,
start=None,
end=None,
adjusted=True):
"""
Loads price data from Yahoo into a dataframe for each of the indicated
assets. By default, 'price' is taken from Yahoo's 'Adjusted Close',
which removes the impact of splits and dividends. If the argument
'adjusted' is False, then the non-adjusted 'close' field is used instead.

:param indexes: Financial indexes to load.
:type indexes: dict
:param stocks: Stock closing prices to load.
:type stocks: list
:param start: Retrieve prices from start date on.
:type start: datetime
:param end: Retrieve prices until end date.
:type end: datetime
:param adjusted: Adjust the price for splits and dividends.
:type adjusted: bool

"""
data = _load_raw_yahoo_data(indexes, stocks, start, end)
if adjusted:
close_key = 'Adj Close'
else:
close_key = 'Close'
df = pd.DataFrame({key: d[close_key] for key, d in iteritems(data)})
df.index = df.index.tz_localize(pytz.utc)
return df


def load_prices_from_csv(filepath, identifier_col, tz='UTC'):
data = pd.read_csv(filepath, index_col=identifier_col)
data.index = pd.DatetimeIndex(data.index, tz=tz)
Expand Down
4 changes: 3 additions & 1 deletion zipline/examples/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from toolz import merge

from zipline import run_algorithm

from zipline.data.bundles.core import register

# These are used by test_examples.py to discover the examples to run.
from zipline.utils.calendars import register_calendar, get_calendar
Expand Down Expand Up @@ -69,6 +69,8 @@ def run_example(example_name, environ):
mod = EXAMPLE_MODULES[example_name]

register_calendar("YAHOO", get_calendar("NYSE"), force=True)
# for when we don't actually have a 'test' bundle
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When do we have a 'test' bundle otherwise? Did this work before?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we ever actually had a test bundle, but I don't think this failed before when we switched to Google 🤔

register('test', lambda *args: None)

return run_algorithm(
initialize=getattr(mod, 'initialize', None),
Expand Down
2 changes: 2 additions & 0 deletions zipline/finance/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,3 +177,5 @@
'YM': DEFAULT_ETA, # Dow Jones e-mini
'YS': DEFAULT_ETA, # Silver e-mini
}

BENCHMARK_SYMBOL = '^GSPC'
3 changes: 2 additions & 1 deletion zipline/finance/trading.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from zipline.assets import AssetDBWriter, AssetFinder
from zipline.assets.continuous_futures import CHAIN_PREDICATES
from zipline.data.loader import load_market_data
from zipline.finance.constants import BENCHMARK_SYMBOL
from zipline.utils.calendars import get_calendar
from zipline.utils.memoize import remember_last

Expand Down Expand Up @@ -78,7 +79,7 @@ class TradingEnvironment(object):
def __init__(
self,
load=None,
bm_symbol='SPY',
bm_symbol=BENCHMARK_SYMBOL,
exchange_tz="US/Eastern",
trading_calendar=None,
asset_db_path=':memory:',
Expand Down
Loading