Skip to content

Commit

Permalink
update versions (#74)
Browse files Browse the repository at this point in the history
* unpins versions

* test py3.8+ only

* formatting

* make clean

* docker tests

* fixes statcast daily and retrosheet tables

* approx test for trig
  • Loading branch information
bdilday authored Jul 15, 2023
1 parent 0b40a85 commit dec1033
Show file tree
Hide file tree
Showing 21 changed files with 92 additions and 56 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pythonpackage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
strategy:
max-parallel: 4
matrix:
python-version: [3.6, 3.7, 3.8]
python-version: ["3.8", "3.9", "3.10", "3.11"]

steps:
- uses: actions/checkout@v1
Expand Down
11 changes: 11 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
FROM python:3.10

WORKDIR /workdir

COPY requirements.txt /workdir/requirements.txt
COPY requirements-dev.txt /workdir/requirements-dev.txt

RUN python3.10 -m pip install -r requirements-dev.txt
RUN python3.10 -m pip install -r requirements.txt

RUN make clean-data
10 changes: 8 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
.PHONY: lint \
test-analysis test-data test-markov test \
clean \
clean clean-pyc \
install-dev install \
dist

Expand Down Expand Up @@ -39,12 +39,18 @@ clean-docs:
clean-data:
rm -rf pybbda/data/assets/*

clean:
clean-pyc:
find . -name '*.pyc' -exec rm -f {} +
find . -name '*.pyo' -exec rm -f {} +
find . -name '*__pycache__' -exec rm -fr {} +

clean: clean-pyc
rm -fr pybbda.egg-info
rm -fr build
rm -fr dist
rm -fr .pytest_cache


dist: clean
python setup.py bdist_wheel
python setup.py sdist
Expand Down
9 changes: 9 additions & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
version: "3.9"
services:
initdata:
build: .
volumes:
- .:/workdir
command:
- python3.10
- -V
3 changes: 0 additions & 3 deletions pybbda/analysis/simulations/components/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ def evolve(
second_base_running_event=SecondBaseRunningEvent.DEFAULT,
third_base_running_event=ThirdBaseRunningEvent.DEFAULT,
):

if batting_event == BattingEvent.OUT:
base_state = attr.evolve(self)

Expand All @@ -48,7 +47,6 @@ def evolve(
)

elif batting_event == BattingEvent.SINGLE:

running_events = get_running_events_cached(
batting_event,
first_base_running_event,
Expand Down Expand Up @@ -354,7 +352,6 @@ def evolve(
second_base_running_event=SecondBaseRunningEvent.DEFAULT,
third_base_running_event=ThirdBaseRunningEvent.DEFAULT,
):

outs = self.outs

if outs == self.max_outs:
Expand Down
1 change: 0 additions & 1 deletion pybbda/data/sources/baseball_reference/_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ def _download_csv(url):


def _save(lines, file_name, output_path):

output_file_path = os.path.join(output_path, file_name)
output_payload = "\n".join(str(line, "utf-8") for line in lines)
logger.info("saving file to {}".format(output_file_path))
Expand Down
11 changes: 9 additions & 2 deletions pybbda/data/sources/data_source/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,15 @@ def _locate_file(self, name):
raise FileNotFoundError(f"Cannot find file {full_path}")

def _load(self, name):
file_full_path = self._locate_file(name)
return pd.read_csv(file_full_path)
if isinstance(name, str):
file_full_path = self._locate_file(name)
return pd.read_csv(file_full_path)
elif isinstance(name, list):
file_full_paths = [self._locate_file(n) for n in name]
dfs = [pd.read_csv(file_full_path) for file_full_path in file_full_paths]
return pd.concat(dfs, axis=0)
else:
raise TypeError

def __getattr__(self, name):
if name not in self.tables.keys():
Expand Down
2 changes: 1 addition & 1 deletion pybbda/data/sources/retrosheet/_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def _download_csv(url):
logger.info("downloading file from {}".format(url))
response = requests.get(url, stream=True)
if response.status_code != 200:
logger.info("there was a download error code={}", response.status_code)
logger.info("there was a download error code=", response.status_code)
raise FileNotFoundError
it = response.iter_lines()
return list(it)
Expand Down
13 changes: 9 additions & 4 deletions pybbda/data/sources/retrosheet/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,19 @@
from pybbda.data.sources.data_source.base import DataSource

RETROSHEET_DATA_PATH = PYBBDA_DATA_ROOT / "retrosheet"
RETROSHEET_TABLES = {"people": "people.csv"}
RETROSHEET_URLS = {
"people": "https://raw.githubusercontent.com/"
RETROSHEET_TABLES = {"people": ["people{c}.csv" for c in "0123456789abcdef"]}

RETROSHEET_URL_FMT = (
"https://raw.githubusercontent.com/"
"chadwickbureau/"
"register/"
"master/"
"data/"
"people.csv"
"people-{}.csv"
)

RETROSHEET_URLS = {
f"people{c}": RETROSHEET_URL_FMT.format(c) for c in "0123456789abcdef"
}

logger = logging.getLogger(__name__)
Expand Down
1 change: 0 additions & 1 deletion pybbda/data/sources/statcast/_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ def _pool_do_update(overwrite=False, season_stats=None):
def _update(
output_root=None, min_date=None, max_date=None, num_threads=2, overwrite=False
):

today = datetime.date.today()
min_date = min_date or (today - datetime.timedelta(1)).strftime("%Y-%m-%d")
max_date = max_date or today.strftime("%Y-%m-%d")
Expand Down
5 changes: 4 additions & 1 deletion pybbda/data/sources/statcast/data.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import logging
import datetime
import io
import requests

import pandas as pd

Expand Down Expand Up @@ -67,7 +69,8 @@ def get_statcast_daily(self, player_type, start_date, end_date, player_id=""):
}
)

daily_df = pd.read_csv(url)
file_handle = io.BytesIO(requests.get(url).content)
daily_df = pd.read_csv(file_handle)
if len(daily_df) == STATCAST_QUERY_DATA_SIZE_LIMIT:
logger.warning(
"Statcast query returned %d rows which probably "
Expand Down
2 changes: 1 addition & 1 deletion pybbda/graphics/graphical_standings.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@

# defaults: (1.05, 1.20)
k = 0.2
EXPAND_TEXT = ExpandMultiplier(1.05*k, 1.2*k)
EXPAND_TEXT = ExpandMultiplier(1.05 * k, 1.2 * k)
EXPAND_POINTS = ExpandMultiplier(1.05, 1.2)
EXPAND_OBJECTS = ExpandMultiplier(1.05, 1.2)

Expand Down
16 changes: 8 additions & 8 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
pytest>=6.0.0
tox==3.14.3
tox
setuptools
Sphinx==3.0.3
recommonmark==0.6.0
Sphinx
recommonmark
Pygments>=2.5.1
black>=22.3.0
flake8==3.7.8
sphinx-rtd-theme==0.4.3
sphinx-gallery==0.7.0
pytest-cov~=2.10.1
pytest-xdist~=2.1.0
flake8
sphinx-rtd-theme
sphinx-gallery
pytest-cov
pytest-xdist
27 changes: 13 additions & 14 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
attrs~=19.3.0
beautifulsoup4~=4.8.0
lxml~=4.6.2
numpy~=1.18.1
pandas~=1.1.0
requests~=2.22.0
scipy~=1.4.1
sqlalchemy~=1.3.13
tqdm~=4.46.1
pychadwick~=0.5.0
matplotlib~=3.1.3
seaborn~=0.10.1
plotnine~=0.7.1
adjustText==0.7.3
attrs
beautifulsoup4
lxml
pandas
requests
scipy
sqlalchemy
tqdm
pychadwick>=0.6.1
matplotlib
seaborn
plotnine
adjustText
namedframes>=0.1.0
1 change: 0 additions & 1 deletion scripts/compute_default_running_probabilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ def process_df(df):


def get_first_to_home_on_double(ana_df):

first_to_third = ana_df.query(
"event_cd==21 and start_first_base and end_third_base"
).event_ct.sum()
Expand Down
4 changes: 3 additions & 1 deletion scripts/graphical_standings.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from pybbda.graphics.graphical_standings import plot_graphical_standings

import os

root = os.environ.get("PYBBDA_DATA_ROOT")
print(root)
data_root = "/home/bdilday/.pybbda/data/Lahman"
Expand All @@ -11,6 +12,7 @@
teams = ld.teams

import sys

if len(sys.argv) == 2:
yr = sys.argv[1]
else:
Expand All @@ -19,4 +21,4 @@

p = plot_graphical_standings(standings)

print(p)
print(p)
23 changes: 13 additions & 10 deletions scripts/mlb_standings.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

import requests
from pybbda.graphics.graphical_standings import plot_graphical_standings
import pandas as pd
Expand All @@ -16,18 +15,22 @@
)



def process_team_record(team_record):
return {"Team": team_record["team"]["teamName"],
"W": team_record["wins"],
"L": team_record["losses"],
"RS_G": team_record["runsScored"]/team_record["gamesPlayed"],
"RA_G": team_record["runsAllowed"] / team_record["gamesPlayed"]
}
return {
"Team": team_record["team"]["teamName"],
"W": team_record["wins"],
"L": team_record["losses"],
"RS_G": team_record["runsScored"] / team_record["gamesPlayed"],
"RA_G": team_record["runsAllowed"] / team_record["gamesPlayed"],
}


def process_record(record):
lg_div = record["division"]["abbreviation"]
return [{"lg_div": lg_div, **process_team_record(team_record)} for team_record in record["teamRecords"]]
return [
{"lg_div": lg_div, **process_team_record(team_record)}
for team_record in record["teamRecords"]
]


payload = requests.get(url).json()
Expand All @@ -38,4 +41,4 @@ def process_record(record):
standings = pd.DataFrame(data)
print(standings)
p = plot_graphical_standings(standings)
print(p)
print(p)
1 change: 0 additions & 1 deletion tests/analysis/projections/marcels/test_batting.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ def test_batting_projections():
"season, expected", [(2020, 36), (2019, 38), (2018, 41), (2017, 34), (2004, 42)]
)
def test_batting_metric_projections(season, expected):

md = MarcelProjectionsBatting()
proj = md.metric_projection("HR", season)
assert round(proj.HR.max()) == expected
Expand Down
1 change: 0 additions & 1 deletion tests/analysis/projections/marcels/test_pitching.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ def test_pitching_projections():
[(2020, 242), (2019, 235), (2018, 229), (2017, 224), (2004, 207)],
)
def test_pitching_metric_projections(season, expected):

md = MarcelProjectionsPitching()
proj = md.metric_projection("SO", season)
assert round(proj.SO.max()) == expected
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

def test_trig_utils():
deg = 1
assert cos_in_degrees(deg) ** 2 + sin_in_degrees(deg) ** 2 == 1
assert cos_in_degrees(deg) ** 2 + sin_in_degrees(deg) ** 2 == pytest.approx(1)

deg = 30
assert cos_in_degrees(deg) == pytest.approx(sqrt(3) / 2)
Expand Down
3 changes: 1 addition & 2 deletions tests/data/test_statcast/test_statcast.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,11 @@ def test_statcast_init():

def test_statcast_get_daily(statcast_data):
statcast_data.get_statcast_daily(
player_type="batter", start_date="2018-01-01", end_date="2018-01-02"
player_type="batter", start_date="2018-06-01", end_date="2018-06-02"
)


def test_statcast_validate_dates(statcast_data):

with pytest.raises(ValueError):
statcast_data.get_statcast_daily(
player_type="batter", start_date="2018-01-01", end_date="2017-12-31"
Expand Down

0 comments on commit dec1033

Please sign in to comment.