Skip to content

Commit

Permalink
Merge pull request #33 from orion512/preparing-release-002
Browse files Browse the repository at this point in the history
Preparing release 002
  • Loading branch information
orion512 authored Nov 6, 2022
2 parents 584d8ad + c1302e9 commit 1594e70
Show file tree
Hide file tree
Showing 7 changed files with 78 additions and 85 deletions.
42 changes: 40 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ baskref -t gp -y 2006 -fp datasets
# python -c "from baskref import run_baskref; run_baskref()" -t gp -y 2006 -fp datasets
```

Use proxy for scraping.
```bash
baskref -t g -d 2022-01-07 -fp datasets -p http://someproxy.com
```

## How to Use the Package?

Install requirements
Expand All @@ -76,6 +81,10 @@ from baskref.data_collection import (

url_scraper = BaskRefUrlScraper()
data_scraper = BaskRefDataScraper()

# optionally you can set a proxy
proxy_url_scraper = BaskRefUrlScraper("http://someproxy.com")
proxy_data_scraper = BaskRefDataScraper("http://someproxy.com")
```
The BaskRefDataScraper.get_games_data returns a list of dictionaries.

Expand Down Expand Up @@ -144,7 +153,6 @@ the configuration for pylint is stored in .pylintrc file.

```bash
# run pylint over the entire code base
pylint baskref
pylint --recursive=y ./
```

Expand All @@ -154,7 +162,7 @@ the configuration for mypy is stored in pyproject.toml file.

```bash
# run mypy over the entire code base
mypy .
mypy baskref
```

## Bonus
Expand Down Expand Up @@ -189,13 +197,43 @@ deactivate

```
pip install -r requirements_dev.txt
# uninstall all packages Windows
pip freeze > unins && pip uninstall -y -r unins && del unins
# uninstall all packages linux
pip freeze | xargs pip uninstall -y
```

3. Install the pre-commit hook
```
pre-commit install
```

### Prepare a new Version
This section describes some of the steps when preparing a new baskref version.

- adjust the pyproject.toml file
- version
- dependencies
- install project locally and test it
```
python -m build
pip install .
```
- publish project to test.pypi
```
pip install --upgrade twine
twine upload --repository testpypi dist/*
# install from test.pypi
pip install --index-url https://test.pypi.org/simple/ baskref
```
- publish a new version
```
twine upload dist/*
```


## Contributors

1. [Dominik Zulovec Sajovic](https://www.linkedin.com/in/dominik-zulovec-sajovic/)
14 changes: 7 additions & 7 deletions baskref/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import os
import argparse
import logging
from typing import List, Callable, Dict
from typing import Callable
from datetime import date

from baskref.settings import Settings, InLine
Expand Down Expand Up @@ -152,12 +152,12 @@ def main(args: argparse.Namespace) -> None:
## Data Collection Functions


def run_data_collection_manager(settings: Settings) -> List:
def run_data_collection_manager(settings: Settings) -> list:
"""This function runs the selected mode of collection"""

logger.info("Started the data collection manager")

collection_modes: Dict[str, Callable] = {
collection_modes: dict[str, Callable] = {
"g": run_daily_game_collector,
"t": run_team_collector,
"p": run_player_collector,
Expand All @@ -174,7 +174,7 @@ def run_data_collection_manager(settings: Settings) -> List:
return collection_modes[settings.in_line.type](settings)


def run_daily_game_collector(settings: Settings) -> List:
def run_daily_game_collector(settings: Settings) -> list:
"""
This function orchestrates the collection of NBA games on
a specific day.
Expand Down Expand Up @@ -206,7 +206,7 @@ def run_player_collector():
raise NotImplementedError


def run_season_games_collector(settings: Settings) -> List:
def run_season_games_collector(settings: Settings) -> list:
"""Orchestrates the collection of all games in a season"""

logger.info("SEASON GAME COLLECTOR MODE")
Expand All @@ -225,7 +225,7 @@ def run_season_games_collector(settings: Settings) -> List:
return game_data


def run_playoffs_game_collector(settings: Settings) -> List:
def run_playoffs_game_collector(settings: Settings) -> list:
"""Orchestrates the collection of all games in a playoff"""

logger.info("PLAYOFF GAME COLLECTOR MODE")
Expand All @@ -250,7 +250,7 @@ def run_playoffs_game_collector(settings: Settings) -> List:
def run_data_saving_manager(settings: Settings, coll_data: list) -> None:
"""Integration function which runs the saving of the data"""

saving_prefix_options: Dict[str, str] = {
saving_prefix_options: dict[str, str] = {
"g": settings.in_line.date.strftime("%Y%m%d"),
"t": "teams",
"p": settings.in_line.namechar,
Expand Down
11 changes: 5 additions & 6 deletions baskref/data_collection/baskref_data_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import logging
from dataclasses import dataclass
from datetime import datetime
from typing import Tuple, Optional, Dict, Union
from urllib import parse
from bs4 import BeautifulSoup
import baskref.data_collection.html_scraper as scr
Expand Down Expand Up @@ -99,7 +98,7 @@ def _parse_full_game_data(self, game_page: BeautifulSoup) -> dict:

def _parse_team_name(
self, html: BeautifulSoup, team: str
) -> Tuple[str, str]:
) -> tuple[str, str]:
"""
Provided the BR game page and the team parameter it parses out
the team short and long names.
Expand All @@ -121,7 +120,7 @@ def _parse_team_name(

def _parse_game_meta_data(
self, html: BeautifulSoup
) -> Tuple[datetime, str]:
) -> tuple[datetime, str]:
"""
Provided the BR game page it parses out the game time and
game arena name.
Expand All @@ -136,7 +135,7 @@ def _parse_game_meta_data(

return game_time, arena_name

def _parse_attendance(self, html: BeautifulSoup) -> Optional[int]:
def _parse_attendance(self, html: BeautifulSoup) -> int | None:
"""
Provided the BR game page it parses out the game attendance.
Sometimes the page doesn't include attendance in which case the
Expand Down Expand Up @@ -172,7 +171,7 @@ def _parse_game_id(self, game_url: str) -> str:

def _parse_basic_stats(
self, page: BeautifulSoup, team: str, team_sn: str
) -> Dict[str, Union[int, float]]:
) -> dict[str, int | float]:
"""
Provided the BR game page it parses out the basic stats
for either the home or the road team, depending on the
Expand Down Expand Up @@ -217,7 +216,7 @@ def _parse_basic_stats(

def _parse_advanced_stats(
self, page: BeautifulSoup, team: str, team_sn: str
) -> Dict[str, Union[int, float]]:
) -> dict[str, int | float]:
"""
Provided the BR game page it parses out the advanced stats
for either the home or the road team, depending on the
Expand Down
2 changes: 1 addition & 1 deletion baskref/data_collection/html_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
"""


from typing import Callable, Any
from dataclasses import dataclass
import logging
from typing import Callable, Any
import requests
from requests import Response
from bs4 import BeautifulSoup
Expand Down
26 changes: 2 additions & 24 deletions notebooks/testing_baskref.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,8 @@
}
],
"source": [
"username = 'sp53333242'\n",
"password = 'oxD7r1bU'\n",
"username = ''\n",
"password = ''\n",
"\n",
"proxies = {\n",
" \"https\": f'http://user-{username}:{password}@gb.smartproxy.com:30000',\n",
Expand All @@ -152,28 +152,6 @@
"print(requests.get(url_ip, proxies=proxies).text)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "5735a65c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'https': 'http://user-sp53333242:oxD7r1bU@gb.smartproxy.com:30000',\n",
" 'http': 'http://user-sp53333242:oxD7r1bU@gb.smartproxy.com:30000'}"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"proxies"
]
},
{
"cell_type": "markdown",
"id": "79610acc",
Expand Down
20 changes: 15 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,37 +1,43 @@
[build-system]
requires = ["setuptools~=62.6", "wheel~=0.37.1"]
requires = [
"setuptools",
"wheel",
]
build-backend = "setuptools.build_meta"

[project]
name = "baskref"
version = "0.0.1"
version = "0.0.2"
authors = [
{name="Dominik Zulovec Sajovic", email="dominik.zsajovic@gmail.com"},
]
keywords = ["basketball", "web scraper", "python"]
description = "baskRef is a tool to scrape basketball Data from the web."
readme = "README.md"
license = {file="LICENSE"}
requires-python = ">=3.8"
requires-python = ">=3.10"
classifiers = [
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
]
dependencies = [
"requests_html==0.10.0",
"requests==2.28.1",
"beautifulsoup4==4.11.1",
"fake_useragent==0.1.11",
]

[project.urls]
"Homepage" = "https://github.com/orion512/basketball_scraper"
"Bug Tracker" = "https://github.com/orion512/basketball_scraper/issues"
"Project Board" = "https://github.com/users/orion512/projects/2/views/1"

[project.scripts]
baskref = "baskref:run_baskref"

[tool.black]
line-length = 79
target-version = ['py38']
target-version = ['py310']
exclude = '''
/(
\.toml
Expand All @@ -43,6 +49,10 @@ exclude = '''
)/
'''

[tool.setuptools.packages.find]
include = ["baskref"]
exclude = ["notebooks*"]

[tool.mypy]
ignore_missing_imports = true

Expand Down
48 changes: 8 additions & 40 deletions requirements_dev.txt
Original file line number Diff line number Diff line change
@@ -1,80 +1,48 @@
appdirs==1.4.4
astroid==2.12.12
attrs==22.1.0
beautifulsoup4==4.11.1
black==22.8.0
bleach==5.0.1
bs4==0.0.1
build==0.8.0
certifi==2022.9.14
black==22.10.0
certifi==2022.9.24
cfgv==3.3.1
charset-normalizer==2.1.1
click==8.1.3
colorama==0.4.5
commonmark==0.9.1
coverage==6.4.4
cssselect==1.1.0
colorama==0.4.6
dill==0.3.6
distlib==0.3.6
docutils==0.19
exceptiongroup==1.0.1
fake-useragent==0.1.11
filelock==3.8.0
identify==2.5.8
idna==3.4
importlib-metadata==4.12.0
iniconfig==1.1.1
isort==5.10.1
jaraco.classes==3.2.2
keyring==23.9.3
lazy-object-proxy==1.8.0
lxml==4.9.1
mccabe==0.7.0
more-itertools==8.14.0
mypy==0.982
mypy-extensions==0.4.3
nodeenv==1.7.0
numpy==1.23.4
packaging==21.3
pandas==1.4.3
parse==1.19.0
pathspec==0.10.1
pep517==0.13.0
pkginfo==1.8.3
platformdirs==2.5.2
pluggy==1.0.0
pre-commit==2.20.0
py==1.11.0
pyee==8.2.2
Pygments==2.13.0
pylint==2.15.2
pylint==2.15.5
pyparsing==3.0.9
pyppeteer==1.0.2
pyquery==1.4.3
pytest==7.1.3
pytest==7.2.0
python-dateutil==2.8.2
pytz==2022.5
pywin32-ctypes==0.2.0
pytz==2022.6
PyYAML==6.0
readme-renderer==37.1
requests==2.28.1
requests-html==0.10.0
requests-toolbelt==0.9.1
rfc3986==2.0.0
rich==12.5.1
six==1.16.0
soupsieve==2.3.2.post1
toml==0.10.2
tomli==2.0.1
tomlkit==0.11.6
tqdm==4.64.1
twine==4.0.1
types-requests==2.28.11
types-requests==2.28.11.2
types-urllib3==1.26.25.1
typing_extensions==4.4.0
urllib3==1.26.12
virtualenv==20.16.6
w3lib==2.0.1
webencodings==0.5.1
websockets==10.3
wrapt==1.14.1
zipp==3.8.1

0 comments on commit 1594e70

Please sign in to comment.