Skip to content

Commit 2a55c25

Browse files
Merge remote-tracking branch 'upstream/v0.3.3' into ability_to_import_def_pfr_data
2 parents d997aa8 + 478ec27 commit 2a55c25

File tree

8 files changed

+184
-117
lines changed

8 files changed

+184
-117
lines changed

.github/workflows/lint-and-test.yml

Lines changed: 47 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,63 @@
11
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
22
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
33

4-
name: Python package
4+
name: Lint and Test
55

66
on:
77
push:
88
branches: [ "main" ]
99
pull_request:
10-
branches: [ "main" ]
10+
branches: [ "main", "v*" ]
11+
types: [ opened, synchronize, reopened, ready_for_review ]
1112

1213
jobs:
1314
build:
14-
15-
runs-on: ubuntu-latest
15+
if: github.event.pull_request.draft == false
1616
strategy:
1717
fail-fast: false
1818
matrix:
19-
python-version: ["3.9", "3.10", "3.11", "3.12"]
19+
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
20+
os: [macos-latest, windows-latest, ubuntu-latest]
21+
include:
22+
- python-version: "3.6"
23+
os: macos-12
24+
- python-version: "3.6"
25+
os: windows-latest
26+
- python-version: "3.6"
27+
os: ubuntu-20.04
28+
- python-version: "3.7"
29+
os: macos-12
30+
- python-version: "3.7"
31+
os: windows-latest
32+
- python-version: "3.7"
33+
os: ubuntu-latest
34+
35+
runs-on: ${{ matrix.os }}
2036

2137
steps:
22-
- uses: actions/checkout@v4
23-
- name: Set up Python ${{ matrix.python-version }}
24-
uses: actions/setup-python@v3
25-
with:
26-
python-version: ${{ matrix.python-version }}
27-
- name: Install dependencies
28-
run: |
29-
python -m pip install --upgrade pip
30-
python -m pip install flake8 pytest
31-
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
32-
- name: Lint with flake8
33-
run: |
34-
# stop the build if there are Python syntax errors or undefined names
35-
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
36-
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
37-
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
38-
- name: Test with pytest
39-
run: |
40-
pytest
38+
- uses: actions/checkout@v4
39+
40+
- name: Set up Python ${{ matrix.python-version }}
41+
uses: actions/setup-python@v5
42+
with:
43+
python-version: ${{ matrix.python-version }}
44+
45+
- name: Install dependencies
46+
shell: bash
47+
run: |
48+
python -m pip install --upgrade pip
49+
python -m pip install flake8 pytest
50+
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
51+
52+
- name: Lint with flake8
53+
shell: bash
54+
run: |
55+
# stop the build if there are Python syntax errors or undefined names
56+
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
57+
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
58+
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
59+
60+
- name: Test with pytest
61+
shell: bash
62+
run: |
63+
pytest

.gitignore

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Byte-compiled / optimized / DLL files
2-
__pycache__/
2+
__pycache__
33
*.py[cod]
44
*$py.class
55

@@ -50,6 +50,7 @@ coverage.xml
5050
.hypothesis/
5151
.pytest_cache/
5252
cover/
53+
*/**/tmpcache-*
5354

5455
# Translations
5556
*.mo
@@ -139,3 +140,9 @@ dmypy.json
139140

140141
# Cython debug symbols
141142
cython_debug/
143+
144+
# Mac local files
145+
.DS_Store
146+
147+
# Dev
148+
scratch.ipynb

nfl_data_py/__init__.py

Lines changed: 54 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
11
name = 'nfl_data_py'
22

3-
import datetime
43
import os
54
import logging
5+
import datetime
6+
from warnings import warn
7+
from typing import Iterable
68
from concurrent.futures import ThreadPoolExecutor, as_completed
79

8-
import appdirs
910
import numpy
1011
import pandas
11-
from typing import Iterable
12+
import appdirs
13+
from urllib.error import HTTPError
1214

1315
# module level doc string
1416
__doc__ = """
@@ -142,20 +144,32 @@ def import_pbp_data(
142144
raw = pandas.DataFrame(data)
143145
raw['season'] = year
144146

145-
if all([include_participation, year >= 2016, not cache]):
147+
148+
if include_participation and not cache:
146149
path = r'https://github.com/nflverse/nflverse-data/releases/download/pbp_participation/pbp_participation_{}.parquet'.format(year)
147-
partic = pandas.read_parquet(path)
148-
raw = raw.merge(partic, how='left', on=['play_id','old_game_id'])
150+
151+
try:
152+
partic = pandas.read_parquet(path)
153+
raw = raw.merge(
154+
partic,
155+
how='left',
156+
left_on=['play_id','game_id'],
157+
right_on=['play_id','nflverse_game_id']
158+
)
159+
except HTTPError:
160+
pass
149161

150162
pbp_data.append(raw)
151163
print(str(year) + ' done.')
152164

153-
except Error as e:
165+
except Exception as e:
154166
print(e)
155167
print('Data not available for ' + str(year))
156168

157-
if pbp_data:
158-
plays = pandas.concat(pbp_data).reset_index(drop=True)
169+
if not pbp_data:
170+
return pandas.DataFrame()
171+
172+
plays = pandas.concat(pbp_data, ignore_index=True)
159173

160174
# converts float64 to float32, saves ~30% memory
161175
if downcast:
@@ -183,12 +197,10 @@ def cache_pbp(years, downcast=True, alt_path=None):
183197
if min(years) < 1999:
184198
raise ValueError('Data not available before 1999.')
185199

186-
plays = pandas.DataFrame()
187-
188200
url1 = r'https://github.com/nflverse/nflverse-data/releases/download/pbp/play_by_play_'
189201
url2 = r'.parquet'
190202
appname = 'nfl_data_py'
191-
appauthor = 'cooper_dff'
203+
appauthor = 'nflverse'
192204

193205
# define path for caching
194206
if alt_path is not None:
@@ -230,7 +242,15 @@ def cache_pbp(years, downcast=True, alt_path=None):
230242

231243
print(str(year) + ' done.')
232244

233-
except:
245+
except Exception as e:
246+
warn(
247+
f"Caching failed for {year}, skipping.\n"
248+
"In nfl_data_py 1.0, this will raise an exception.\n"
249+
f"Failure: {e}",
250+
DeprecationWarning,
251+
stacklevel=2
252+
)
253+
234254
next
235255

236256

@@ -432,7 +452,7 @@ def __import_rosters(release, years, columns=None):
432452
rosters = pandas.concat([
433453
pandas.read_parquet(uri.format(y))
434454
for y in years
435-
])
455+
], ignore_index=True)
436456

437457
# Post-import processing
438458
rosters['birth_date'] = pandas.to_datetime(rosters.birth_date)
@@ -728,52 +748,32 @@ def import_ids(columns=None, ids=None):
728748
"""Import mapping table of ids for most major data providers
729749
730750
Args:
731-
columns (List[str]): list of columns to return
732-
ids (List[str]): list of specific ids to return
751+
columns (Iterable[str]): list of columns to return
752+
ids (Iterable[str]): list of specific ids to return
733753
734754
Returns:
735755
DataFrame
736756
"""
737-
738-
# create list of id options
739-
avail_ids = ['mfl_id', 'sportradar_id', 'fantasypros_id', 'gsis_id', 'pff_id',
740-
'sleeper_id', 'nfl_id', 'espn_id', 'yahoo_id', 'fleaflicker_id',
741-
'cbs_id', 'rotowire_id', 'rotoworld_id', 'ktc_id', 'pfr_id',
742-
'cfbref_id', 'stats_id', 'stats_global_id', 'fantasy_data_id']
743-
avail_sites = [x[:-3] for x in avail_ids]
744-
745-
# check variable types
746-
if columns is None:
747-
columns = []
748-
749-
if ids is None:
750-
ids = []
751757

752-
if not isinstance(columns, list):
753-
raise ValueError('columns variable must be list.')
754-
755-
if not isinstance(ids, list):
756-
raise ValueError('ids variable must be list.')
757-
758-
# confirm id is in table
759-
if False in [x in avail_sites for x in ids]:
760-
raise ValueError('ids variable can only contain ' + ', '.join(avail_sites))
758+
columns = columns or []
759+
if not isinstance(columns, Iterable):
760+
raise ValueError('columns argument must be a list.')
761+
762+
ids = ids or []
763+
if not isinstance(ids, Iterable):
764+
raise ValueError('ids argument must be a list.')
761765

762-
# import data
763-
df = pandas.read_csv(r'https://raw.githubusercontent.com/dynastyprocess/data/master/files/db_playerids.csv')
766+
df = pandas.read_csv("https://raw.githubusercontent.com/dynastyprocess/data/master/files/db_playerids.csv")
764767

765-
rem_cols = [x for x in df.columns if x not in avail_ids]
766-
tgt_ids = [x + '_id' for x in ids]
767-
768-
# filter df to just specified columns
769-
if len(columns) > 0 and len(ids) > 0:
770-
df = df[set(tgt_ids + columns)]
771-
elif len(columns) > 0 and len(ids) == 0:
772-
df = df[set(avail_ids + columns)]
773-
elif len(columns) == 0 and len(ids) > 0:
774-
df = df[set(tgt_ids + rem_cols)]
768+
id_cols = [c for c in df.columns if c.endswith('_id')]
769+
non_id_cols = [c for c in df.columns if not c.endswith('_id')]
775770

776-
return df
771+
# filter df to just specified ids + columns
772+
ret_ids = [x + '_id' for x in ids] or id_cols
773+
ret_cols = columns or non_id_cols
774+
ret_columns = list(set([*ret_ids, *ret_cols]))
775+
776+
return df[ret_columns]
777777

778778

779779
def import_contracts():
@@ -1139,33 +1139,18 @@ def clean_nfl_data(df):
11391139
'Louisiana State': 'LSU'
11401140
}
11411141

1142-
pro_tm_repl = {
1143-
'GNB': 'GB',
1144-
'KAN': 'KC',
1145-
'LA': 'LAR',
1146-
'LVR': 'LV',
1147-
'NWE': 'NE',
1148-
'NOR': 'NO',
1149-
'SDG': 'SD',
1150-
'SFO': 'SF',
1151-
'TAM': 'TB'
1152-
}
1153-
11541142
na_replace = {
11551143
'NA':numpy.nan
11561144
}
11571145

11581146
for col in df.columns:
1159-
df.replace({col:na_replace}, inplace=True)
1147+
if df[col].dtype == 'object':
1148+
df.replace({col:na_replace}, inplace=True)
11601149

11611150
if 'name' in df.columns:
11621151
df.replace({'name': name_repl}, inplace=True)
11631152

11641153
if 'col_team' in df.columns:
11651154
df.replace({'col_team': col_tm_repl}, inplace=True)
11661155

1167-
if 'name' in df.columns:
1168-
for z in player_col_tm_repl:
1169-
df[df['name'] == z[0]] = df[df['name'] == z[0]].replace({z[1]: z[2]})
1170-
11711156
return df
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)