From dec1033de7e481a5d26d0c09e7a8e1a7e7043807 Mon Sep 17 00:00:00 2001 From: Ben Dilday Date: Sat, 15 Jul 2023 14:35:57 -0400 Subject: [PATCH] update versions (#74) * unpins versions * test py3.8+ only * formatting * make clean * docker tests * fixes statcast daily and retrosheet tables * approx test for trig --- .github/workflows/pythonpackage.yml | 2 +- Dockerfile | 11 ++++++++ Makefile | 10 +++++-- docker-compose.yaml | 9 +++++++ .../analysis/simulations/components/state.py | 3 --- .../sources/baseball_reference/_update.py | 1 - pybbda/data/sources/data_source/base.py | 11 ++++++-- pybbda/data/sources/retrosheet/_update.py | 2 +- pybbda/data/sources/retrosheet/data.py | 13 ++++++--- pybbda/data/sources/statcast/_update.py | 1 - pybbda/data/sources/statcast/data.py | 5 +++- pybbda/graphics/graphical_standings.py | 2 +- requirements-dev.txt | 16 +++++------ requirements.txt | 27 +++++++++---------- .../compute_default_running_probabilities.py | 1 - scripts/graphical_standings.py | 4 ++- scripts/mlb_standings.py | 23 +++++++++------- .../projections/marcels/test_batting.py | 1 - .../projections/marcels/test_pitching.py | 1 - .../batted_balls/test_batted_balls_utils.py | 2 +- tests/data/test_statcast/test_statcast.py | 3 +-- 21 files changed, 92 insertions(+), 56 deletions(-) create mode 100644 Dockerfile create mode 100644 docker-compose.yaml diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index 6cf741a..a3996ca 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -9,7 +9,7 @@ jobs: strategy: max-parallel: 4 matrix: - python-version: [3.6, 3.7, 3.8] + python-version: ["3.8", "3.9", "3.10", "3.11"] steps: - uses: actions/checkout@v1 diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..121edde --- /dev/null +++ b/Dockerfile @@ -0,0 +1,11 @@ +FROM python:3.10 + +WORKDIR /workdir + +COPY requirements.txt /workdir/requirements.txt +COPY requirements-dev.txt /workdir/requirements-dev.txt + +RUN python3.10 -m pip install -r requirements-dev.txt +RUN python3.10 -m pip install -r requirements.txt + +RUN make clean-data \ No newline at end of file diff --git a/Makefile b/Makefile index 1962eed..38b701a 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ .PHONY: lint \ test-analysis test-data test-markov test \ -clean \ +clean clean-pyc \ install-dev install \ dist @@ -39,12 +39,18 @@ clean-docs: clean-data: rm -rf pybbda/data/assets/* -clean: +clean-pyc: + find . -name '*.pyc' -exec rm -f {} + + find . -name '*.pyo' -exec rm -f {} + + find . -name '*__pycache__' -exec rm -fr {} + + +clean: clean-pyc rm -fr pybbda.egg-info rm -fr build rm -fr dist rm -fr .pytest_cache + dist: clean python setup.py bdist_wheel python setup.py sdist diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..6392402 --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,9 @@ +version: "3.9" +services: + initdata: + build: . + volumes: + - .:/workdir + command: + - python3.10 + - -V \ No newline at end of file diff --git a/pybbda/analysis/simulations/components/state.py b/pybbda/analysis/simulations/components/state.py index bc7c5ab..582410e 100644 --- a/pybbda/analysis/simulations/components/state.py +++ b/pybbda/analysis/simulations/components/state.py @@ -34,7 +34,6 @@ def evolve( second_base_running_event=SecondBaseRunningEvent.DEFAULT, third_base_running_event=ThirdBaseRunningEvent.DEFAULT, ): - if batting_event == BattingEvent.OUT: base_state = attr.evolve(self) @@ -48,7 +47,6 @@ def evolve( ) elif batting_event == BattingEvent.SINGLE: - running_events = get_running_events_cached( batting_event, first_base_running_event, @@ -354,7 +352,6 @@ def evolve( second_base_running_event=SecondBaseRunningEvent.DEFAULT, third_base_running_event=ThirdBaseRunningEvent.DEFAULT, ): - outs = self.outs if outs == self.max_outs: diff --git a/pybbda/data/sources/baseball_reference/_update.py b/pybbda/data/sources/baseball_reference/_update.py index 60aeb55..e50f06a 100644 --- a/pybbda/data/sources/baseball_reference/_update.py +++ b/pybbda/data/sources/baseball_reference/_update.py @@ -20,7 +20,6 @@ def _download_csv(url): def _save(lines, file_name, output_path): - output_file_path = os.path.join(output_path, file_name) output_payload = "\n".join(str(line, "utf-8") for line in lines) logger.info("saving file to {}".format(output_file_path)) diff --git a/pybbda/data/sources/data_source/base.py b/pybbda/data/sources/data_source/base.py index d00e441..3631feb 100644 --- a/pybbda/data/sources/data_source/base.py +++ b/pybbda/data/sources/data_source/base.py @@ -30,8 +30,15 @@ def _locate_file(self, name): raise FileNotFoundError(f"Cannot find file {full_path}") def _load(self, name): - file_full_path = self._locate_file(name) - return pd.read_csv(file_full_path) + if isinstance(name, str): + file_full_path = self._locate_file(name) + return pd.read_csv(file_full_path) + elif isinstance(name, list): + file_full_paths = [self._locate_file(n) for n in name] + dfs = [pd.read_csv(file_full_path) for file_full_path in file_full_paths] + return pd.concat(dfs, axis=0) + else: + raise TypeError def __getattr__(self, name): if name not in self.tables.keys(): diff --git a/pybbda/data/sources/retrosheet/_update.py b/pybbda/data/sources/retrosheet/_update.py index 8c8f609..6bcb49b 100644 --- a/pybbda/data/sources/retrosheet/_update.py +++ b/pybbda/data/sources/retrosheet/_update.py @@ -21,7 +21,7 @@ def _download_csv(url): logger.info("downloading file from {}".format(url)) response = requests.get(url, stream=True) if response.status_code != 200: - logger.info("there was a download error code={}", response.status_code) + logger.info("there was a download error code=", response.status_code) raise FileNotFoundError it = response.iter_lines() return list(it) diff --git a/pybbda/data/sources/retrosheet/data.py b/pybbda/data/sources/retrosheet/data.py index 41929da..04fc91f 100644 --- a/pybbda/data/sources/retrosheet/data.py +++ b/pybbda/data/sources/retrosheet/data.py @@ -9,14 +9,19 @@ from pybbda.data.sources.data_source.base import DataSource RETROSHEET_DATA_PATH = PYBBDA_DATA_ROOT / "retrosheet" -RETROSHEET_TABLES = {"people": "people.csv"} -RETROSHEET_URLS = { - "people": "https://raw.githubusercontent.com/" +RETROSHEET_TABLES = {"people": ["people{c}.csv" for c in "0123456789abcdef"]} + +RETROSHEET_URL_FMT = ( + "https://raw.githubusercontent.com/" "chadwickbureau/" "register/" "master/" "data/" - "people.csv" + "people-{}.csv" +) + +RETROSHEET_URLS = { + f"people{c}": RETROSHEET_URL_FMT.format(c) for c in "0123456789abcdef" } logger = logging.getLogger(__name__) diff --git a/pybbda/data/sources/statcast/_update.py b/pybbda/data/sources/statcast/_update.py index 7529de5..1cc9a7f 100644 --- a/pybbda/data/sources/statcast/_update.py +++ b/pybbda/data/sources/statcast/_update.py @@ -88,7 +88,6 @@ def _pool_do_update(overwrite=False, season_stats=None): def _update( output_root=None, min_date=None, max_date=None, num_threads=2, overwrite=False ): - today = datetime.date.today() min_date = min_date or (today - datetime.timedelta(1)).strftime("%Y-%m-%d") max_date = max_date or today.strftime("%Y-%m-%d") diff --git a/pybbda/data/sources/statcast/data.py b/pybbda/data/sources/statcast/data.py index 634da08..9986843 100644 --- a/pybbda/data/sources/statcast/data.py +++ b/pybbda/data/sources/statcast/data.py @@ -1,5 +1,7 @@ import logging import datetime +import io +import requests import pandas as pd @@ -67,7 +69,8 @@ def get_statcast_daily(self, player_type, start_date, end_date, player_id=""): } ) - daily_df = pd.read_csv(url) + file_handle = io.BytesIO(requests.get(url).content) + daily_df = pd.read_csv(file_handle) if len(daily_df) == STATCAST_QUERY_DATA_SIZE_LIMIT: logger.warning( "Statcast query returned %d rows which probably " diff --git a/pybbda/graphics/graphical_standings.py b/pybbda/graphics/graphical_standings.py index 31ccc73..d0eb8d2 100644 --- a/pybbda/graphics/graphical_standings.py +++ b/pybbda/graphics/graphical_standings.py @@ -32,7 +32,7 @@ # defaults: (1.05, 1.20) k = 0.2 -EXPAND_TEXT = ExpandMultiplier(1.05*k, 1.2*k) +EXPAND_TEXT = ExpandMultiplier(1.05 * k, 1.2 * k) EXPAND_POINTS = ExpandMultiplier(1.05, 1.2) EXPAND_OBJECTS = ExpandMultiplier(1.05, 1.2) diff --git a/requirements-dev.txt b/requirements-dev.txt index bb8c622..0e13747 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,12 +1,12 @@ pytest>=6.0.0 -tox==3.14.3 +tox setuptools -Sphinx==3.0.3 -recommonmark==0.6.0 +Sphinx +recommonmark Pygments>=2.5.1 black>=22.3.0 -flake8==3.7.8 -sphinx-rtd-theme==0.4.3 -sphinx-gallery==0.7.0 -pytest-cov~=2.10.1 -pytest-xdist~=2.1.0 +flake8 +sphinx-rtd-theme +sphinx-gallery +pytest-cov +pytest-xdist diff --git a/requirements.txt b/requirements.txt index 8dad20c..81e7dab 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,15 +1,14 @@ -attrs~=19.3.0 -beautifulsoup4~=4.8.0 -lxml~=4.6.2 -numpy~=1.18.1 -pandas~=1.1.0 -requests~=2.22.0 -scipy~=1.4.1 -sqlalchemy~=1.3.13 -tqdm~=4.46.1 -pychadwick~=0.5.0 -matplotlib~=3.1.3 -seaborn~=0.10.1 -plotnine~=0.7.1 -adjustText==0.7.3 +attrs +beautifulsoup4 +lxml +pandas +requests +scipy +sqlalchemy +tqdm +pychadwick>=0.6.1 +matplotlib +seaborn +plotnine +adjustText namedframes>=0.1.0 \ No newline at end of file diff --git a/scripts/compute_default_running_probabilities.py b/scripts/compute_default_running_probabilities.py index eb4ad06..0c981e4 100644 --- a/scripts/compute_default_running_probabilities.py +++ b/scripts/compute_default_running_probabilities.py @@ -47,7 +47,6 @@ def process_df(df): def get_first_to_home_on_double(ana_df): - first_to_third = ana_df.query( "event_cd==21 and start_first_base and end_third_base" ).event_ct.sum() diff --git a/scripts/graphical_standings.py b/scripts/graphical_standings.py index d3a4f12..637ad06 100644 --- a/scripts/graphical_standings.py +++ b/scripts/graphical_standings.py @@ -3,6 +3,7 @@ from pybbda.graphics.graphical_standings import plot_graphical_standings import os + root = os.environ.get("PYBBDA_DATA_ROOT") print(root) data_root = "/home/bdilday/.pybbda/data/Lahman" @@ -11,6 +12,7 @@ teams = ld.teams import sys + if len(sys.argv) == 2: yr = sys.argv[1] else: @@ -19,4 +21,4 @@ p = plot_graphical_standings(standings) -print(p) \ No newline at end of file +print(p) diff --git a/scripts/mlb_standings.py b/scripts/mlb_standings.py index 584ce97..10961d0 100644 --- a/scripts/mlb_standings.py +++ b/scripts/mlb_standings.py @@ -1,4 +1,3 @@ - import requests from pybbda.graphics.graphical_standings import plot_graphical_standings import pandas as pd @@ -16,18 +15,22 @@ ) - def process_team_record(team_record): - return {"Team": team_record["team"]["teamName"], - "W": team_record["wins"], - "L": team_record["losses"], - "RS_G": team_record["runsScored"]/team_record["gamesPlayed"], - "RA_G": team_record["runsAllowed"] / team_record["gamesPlayed"] - } + return { + "Team": team_record["team"]["teamName"], + "W": team_record["wins"], + "L": team_record["losses"], + "RS_G": team_record["runsScored"] / team_record["gamesPlayed"], + "RA_G": team_record["runsAllowed"] / team_record["gamesPlayed"], + } + def process_record(record): lg_div = record["division"]["abbreviation"] - return [{"lg_div": lg_div, **process_team_record(team_record)} for team_record in record["teamRecords"]] + return [ + {"lg_div": lg_div, **process_team_record(team_record)} + for team_record in record["teamRecords"] + ] payload = requests.get(url).json() @@ -38,4 +41,4 @@ def process_record(record): standings = pd.DataFrame(data) print(standings) p = plot_graphical_standings(standings) -print(p) \ No newline at end of file +print(p) diff --git a/tests/analysis/projections/marcels/test_batting.py b/tests/analysis/projections/marcels/test_batting.py index 53b9397..a43aec7 100644 --- a/tests/analysis/projections/marcels/test_batting.py +++ b/tests/analysis/projections/marcels/test_batting.py @@ -12,7 +12,6 @@ def test_batting_projections(): "season, expected", [(2020, 36), (2019, 38), (2018, 41), (2017, 34), (2004, 42)] ) def test_batting_metric_projections(season, expected): - md = MarcelProjectionsBatting() proj = md.metric_projection("HR", season) assert round(proj.HR.max()) == expected diff --git a/tests/analysis/projections/marcels/test_pitching.py b/tests/analysis/projections/marcels/test_pitching.py index 42c9f7f..d32a3eb 100644 --- a/tests/analysis/projections/marcels/test_pitching.py +++ b/tests/analysis/projections/marcels/test_pitching.py @@ -13,7 +13,6 @@ def test_pitching_projections(): [(2020, 242), (2019, 235), (2018, 229), (2017, 224), (2004, 207)], ) def test_pitching_metric_projections(season, expected): - md = MarcelProjectionsPitching() proj = md.metric_projection("SO", season) assert round(proj.SO.max()) == expected diff --git a/tests/analysis/trajectories/batted_balls/test_batted_balls_utils.py b/tests/analysis/trajectories/batted_balls/test_batted_balls_utils.py index 1b8addd..a0e00c2 100644 --- a/tests/analysis/trajectories/batted_balls/test_batted_balls_utils.py +++ b/tests/analysis/trajectories/batted_balls/test_batted_balls_utils.py @@ -5,7 +5,7 @@ def test_trig_utils(): deg = 1 - assert cos_in_degrees(deg) ** 2 + sin_in_degrees(deg) ** 2 == 1 + assert cos_in_degrees(deg) ** 2 + sin_in_degrees(deg) ** 2 == pytest.approx(1) deg = 30 assert cos_in_degrees(deg) == pytest.approx(sqrt(3) / 2) diff --git a/tests/data/test_statcast/test_statcast.py b/tests/data/test_statcast/test_statcast.py index 9c690c7..0c085a9 100644 --- a/tests/data/test_statcast/test_statcast.py +++ b/tests/data/test_statcast/test_statcast.py @@ -13,12 +13,11 @@ def test_statcast_init(): def test_statcast_get_daily(statcast_data): statcast_data.get_statcast_daily( - player_type="batter", start_date="2018-01-01", end_date="2018-01-02" + player_type="batter", start_date="2018-06-01", end_date="2018-06-02" ) def test_statcast_validate_dates(statcast_data): - with pytest.raises(ValueError): statcast_data.get_statcast_daily( player_type="batter", start_date="2018-01-01", end_date="2017-12-31"