|
1 | 1 | name = 'nfl_data_py'
|
2 | 2 |
|
3 |
| -import datetime |
4 | 3 | import os
|
5 | 4 | import logging
|
6 |
| -from concurrent.futures import ThreadPoolExecutor, as_completed |
| 5 | +import datetime |
7 | 6 | from warnings import warn
|
| 7 | +from typing import Iterable |
| 8 | +from concurrent.futures import ThreadPoolExecutor, as_completed |
8 | 9 |
|
9 |
| -import appdirs |
10 | 10 | import numpy
|
11 | 11 | import pandas
|
12 |
| -from typing import Iterable |
| 12 | +import appdirs |
13 | 13 |
|
14 | 14 | # module level doc string
|
15 | 15 | __doc__ = """
|
@@ -735,52 +735,32 @@ def import_ids(columns=None, ids=None):
|
735 | 735 | """Import mapping table of ids for most major data providers
|
736 | 736 |
|
737 | 737 | Args:
|
738 |
| - columns (List[str]): list of columns to return |
739 |
| - ids (List[str]): list of specific ids to return |
| 738 | + columns (Iterable[str]): list of columns to return |
| 739 | + ids (Iterable[str]): list of specific ids to return |
740 | 740 |
|
741 | 741 | Returns:
|
742 | 742 | DataFrame
|
743 | 743 | """
|
744 |
| - |
745 |
| - # create list of id options |
746 |
| - avail_ids = ['mfl_id', 'sportradar_id', 'fantasypros_id', 'gsis_id', 'pff_id', |
747 |
| - 'sleeper_id', 'nfl_id', 'espn_id', 'yahoo_id', 'fleaflicker_id', |
748 |
| - 'cbs_id', 'rotowire_id', 'rotoworld_id', 'ktc_id', 'pfr_id', |
749 |
| - 'cfbref_id', 'stats_id', 'stats_global_id', 'fantasy_data_id'] |
750 |
| - avail_sites = [x[:-3] for x in avail_ids] |
751 |
| - |
752 |
| - # check variable types |
753 |
| - if columns is None: |
754 |
| - columns = [] |
755 |
| - |
756 |
| - if ids is None: |
757 |
| - ids = [] |
758 | 744 |
|
759 |
| - if not isinstance(columns, list): |
760 |
| - raise ValueError('columns variable must be list.') |
761 |
| - |
762 |
| - if not isinstance(ids, list): |
763 |
| - raise ValueError('ids variable must be list.') |
764 |
| - |
765 |
| - # confirm id is in table |
766 |
| - if False in [x in avail_sites for x in ids]: |
767 |
| - raise ValueError('ids variable can only contain ' + ', '.join(avail_sites)) |
| 745 | + columns = columns or [] |
| 746 | + if not isinstance(columns, Iterable): |
| 747 | + raise ValueError('columns argument must be a list.') |
| 748 | + |
| 749 | + ids = ids or [] |
| 750 | + if not isinstance(ids, Iterable): |
| 751 | + raise ValueError('ids argument must be a list.') |
768 | 752 |
|
769 |
| - # import data |
770 |
| - df = pandas.read_csv(r'https://raw.githubusercontent.com/dynastyprocess/data/master/files/db_playerids.csv') |
| 753 | + df = pandas.read_csv("https://raw.githubusercontent.com/dynastyprocess/data/master/files/db_playerids.csv") |
771 | 754 |
|
772 |
| - rem_cols = [x for x in df.columns if x not in avail_ids] |
773 |
| - tgt_ids = [x + '_id' for x in ids] |
774 |
| - |
775 |
| - # filter df to just specified columns |
776 |
| - if len(columns) > 0 and len(ids) > 0: |
777 |
| - df = df[set(tgt_ids + columns)] |
778 |
| - elif len(columns) > 0 and len(ids) == 0: |
779 |
| - df = df[set(avail_ids + columns)] |
780 |
| - elif len(columns) == 0 and len(ids) > 0: |
781 |
| - df = df[set(tgt_ids + rem_cols)] |
| 755 | + id_cols = [c for c in df.columns if c.endswith('_id')] |
| 756 | + non_id_cols = [c for c in df.columns if not c.endswith('_id')] |
782 | 757 |
|
783 |
| - return df |
| 758 | + # filter df to just specified ids + columns |
| 759 | + ret_ids = [x + '_id' for x in ids] or id_cols |
| 760 | + ret_cols = columns or non_id_cols |
| 761 | + ret_columns = list(set([*ret_ids, *ret_cols])) |
| 762 | + |
| 763 | + return df[ret_columns] |
784 | 764 |
|
785 | 765 |
|
786 | 766 | def import_contracts():
|
|
0 commit comments