diff --git a/.github/workflows/1-fetch.yml b/.github/workflows/1-fetch.yml index 23b7da2..154d04d 100644 --- a/.github/workflows/1-fetch.yml +++ b/.github/workflows/1-fetch.yml @@ -65,3 +65,5 @@ jobs: run: | ./scripts/1-fetch/github_fetch.py \ --enable-save --enable-git + env: + GH_TOKEN: ${{ secrets.BOT_TOKEN }} diff --git a/env.example b/env.example index f44832f..ca41457 100644 --- a/env.example +++ b/env.example @@ -1,6 +1,15 @@ # This file must be copied to .env and the appropriate variables populated. +# GitHub + +# https://docs.github.com/en/rest/using-the-rest-api/rate-limits-for-the-rest-api#primary-rate-limit-for-authenticated-users +# +# https://docs.github.com/en/rest/authentication/authenticating-to-the-rest-api + +# GH_TOKEN = + + ## GCS (Google Custom Search) # https://developers.google.com/custom-search/v1/introduction @@ -21,7 +30,9 @@ # GCS_CX = + ## Flickr + # "The flickr developer guide: https://www.flickr.com/services/developer/" # FLICKR_API_KEY = diff --git a/scripts/1-fetch/github_fetch.py b/scripts/1-fetch/github_fetch.py index 1181418..3d3bbb3 100755 --- a/scripts/1-fetch/github_fetch.py +++ b/scripts/1-fetch/github_fetch.py @@ -31,10 +31,10 @@ # Constants FILE1_COUNT = os.path.join(PATHS["data_phase"], "github_1_count.csv") +GH_TOKEN = os.getenv("GH_TOKEN") GITHUB_RETRY_STATUS_FORCELIST = [ 408, # Request Timeout - 422, # Unprocessable Content - # (Validation failed, or the endpoint has been spammed) + 422, # Unprocessable Content (Validation failed, or endpoint spammed) 429, # Too Many Requests 500, # Internal Server Error 502, # Bad Gateway @@ -94,7 +94,10 @@ def get_requests_session(): ) session = requests.Session() session.mount("https://", HTTPAdapter(max_retries=max_retries)) - session.headers.update({"Accept": "application/vnd.github+json"}) + headers = {"accept": "application/vnd.github+json"} + if GH_TOKEN: + headers["authorization"] = f"Bearer {GH_TOKEN}" + session.headers.update(headers) return session