Skip to content

Commit

Permalink
configure ruff (used by hatch fmt) in pyproject.toml. Lint and format…
Browse files Browse the repository at this point in the history
… repo according to this config (#22)

* configure ruff (used by hatch fmt) in pyproject.toml. remove style hatch env as we can use hatch fmt instead of black

* register test_utils as a pytest plugin so that the fixtures it contains can be used without being imported

* explicitly enable pycodestyle, pyflakes, pyupgrade, flake8-bugbear, flake8-simplify, and isort for lintering and formatting.

* remove pre-file-ignore for unused imports now that the issue is fixed with pytest_plugins in conftests.py registering tests.test_utils

* helper func for json decode error type check

* manual change for | type operator

* use relative path, instead of plain pattern, for exclude of alembic dir

* fix type annotations used in typer options.
  • Loading branch information
macpd authored Jun 21, 2024
1 parent 9014611 commit 74c0763
Show file tree
Hide file tree
Showing 13 changed files with 328 additions and 321 deletions.
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -173,11 +173,11 @@ OR run with hatch (this runs above docker commands as sudo):
hatch run test:postgres-integration-test-docker-as-sudo
```

## Automatic formatting with black
To check if black would change code (but not actually make changes):
`hatch run style:check`
To apply changes from black
`hatch run style:fmt`
## Automatic formatting and linting with ruff
To check if ruff would change code (but not actually make changes):
`hatch fmt --check`
To apply changes from ruff
`hatch fmt`

## Alembic database schema migrations
Alembic is a tool/framework for database schema migrations. For instructions on
Expand Down
5 changes: 5 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import pytest

# Register tests/test_utils.py module as a pytest plugin so that tests can use fixtures from that
# module without importing them (pytest recommends against importing them, and linters will flag the
# import as unused and the fixture usage in test method args as redefining the var; https://github.com/astral-sh/ruff/issues/4046)
pytest_plugins="tests.test_utils"

def pytest_addoption(parser):
"""Used to pass database URL for DB docker container in integration test."""
Expand All @@ -14,3 +18,4 @@ def pytest_addoption(parser):
@pytest.fixture
def database_url_command_line_arg(request):
return request.config.getoption("--database-url")

34 changes: 22 additions & 12 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,6 @@ extra-dependencies = [

[tool.hatch.envs.alembic]
extra-dependencies = [
"pytest",
"black",
"alembic"
]

Expand All @@ -67,15 +65,27 @@ postgres-integration-test-docker-as-sudo = [
"sudo docker compose down"
]

[tool.hatch.envs.style]
detached = true
dependencies = [
"black",
]
[tool.hatch.envs.style.scripts]
check = [
"black --check --diff src/",
# Hatch uses ruff for linting and formatting (hatch fmt ...)
[tool.ruff]
line-length=100
exclude = ["./alembic"]

[tool.ruff.lint]
select = [
# pycodestyle
"E",
# Pyflakes
"F",
# pyupgrade
"UP",
# flake8-bugbear
"B",
# flake8-simplify
"SIM",
# isort
"I",
]
fmt = [
"black src/",
ignore=[
# Do not suggest ternary operator
"SIM108",
]
76 changes: 35 additions & 41 deletions src/tiktok_api_helper/api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
import enum
import json
import logging
import re
from collections.abc import Mapping, Sequence
from datetime import datetime, timedelta
from pathlib import Path
from typing import Any, Mapping, Optional, Sequence
import re
from typing import Any

import attrs
import certifi
Expand All @@ -16,9 +17,9 @@
import yaml
from sqlalchemy import Engine

from tiktok_api_helper import utils
from tiktok_api_helper.query import Query, QueryJSONEncoder
from tiktok_api_helper.sql import Crawl, upsert_videos
from tiktok_api_helper import utils

ALL_VIDEO_DATA_URL = "https://open.tiktokapis.com/v2/research/video/query/?fields=id,video_description,create_time,region_code,share_count,view_count,like_count,comment_count,music_id,hashtag_names,username,effect_ids,voice_to_text,playlist_id"

Expand All @@ -44,9 +45,7 @@ class InvalidSearchIdError(InvalidRequestError):

def field_is_not_empty(instance, attribute, value):
if not value:
raise ValueError(
f"{instance.__class__.__name__}: {attribute.name} cannot be empty"
)
raise ValueError(f"{instance.__class__.__name__}: {attribute.name} cannot be empty")


class ApiRateLimitWaitStrategy(enum.StrEnum):
Expand All @@ -62,9 +61,7 @@ class TiktokCredentials:
client_secret: str = attrs.field(
validator=[attrs.validators.instance_of(str), field_is_not_empty]
)
client_key: str = attrs.field(
validator=[attrs.validators.instance_of(str), field_is_not_empty]
)
client_key: str = attrs.field(validator=[attrs.validators.instance_of(str), field_is_not_empty])


@attrs.define
Expand All @@ -89,8 +86,8 @@ class ApiClientConfig:
api_credentials_file: Path
max_count: int = 100
stop_after_one_request: bool = False
crawl_tags: Optional[list[str]] = None
raw_responses_output_dir: Optional[Path] = None
crawl_tags: list[str] | None = None
raw_responses_output_dir: Path | None = None
api_rate_limit_wait_strategy: ApiRateLimitWaitStrategy = attrs.field(
default=ApiRateLimitWaitStrategy.WAIT_FOUR_HOURS,
validator=attrs.validators.instance_of(ApiRateLimitWaitStrategy), # type: ignore - Attrs overload
Expand All @@ -111,8 +108,8 @@ class TiktokRequest:
max_count: int = 100
is_random: bool = False

cursor: Optional[int] = None
search_id: Optional[str] = None
cursor: int | None = None
search_id: str | None = None

@classmethod
def from_config(cls, config: ApiClientConfig, **kwargs) -> TiktokRequest:
Expand All @@ -139,14 +136,16 @@ def as_json(self, indent=None):
request_obj["cursor"] = self.cursor
return json.dumps(request_obj, cls=QueryJSONEncoder, indent=indent)

def is_json_decode_error(exception):
return isinstance(exception, rq.exceptions.JSONDecodeError | json.JSONDecodeError)

def retry_json_decoding_error_once(
retry_state,
):
exception = retry_state.outcome.exception()

# Retry once if JSON decoding response fails
if isinstance(exception, (rq.exceptions.JSONDecodeError, json.JSONDecodeError)):
if is_json_decode_error(exception):
return retry_state.attempt_number <= 1

return None
Expand Down Expand Up @@ -181,7 +180,7 @@ def json_decoding_error_retry_immediately(
):
exception = retry_state.outcome.exception()
# If JSON decoding fails retry immediately
if isinstance(exception, (rq.exceptions.JSONDecodeError, json.JSONDecodeError)):
if is_json_decode_error(exception):
return 0

logging.warning("Unknown exception in wait callback: %r", exception)
Expand Down Expand Up @@ -219,7 +218,8 @@ def api_rate_limi_wait_until_next_utc_midnight(
if isinstance(exception, ApiRateLimitError):
next_utc_midnight = pendulum.tomorrow("UTC")
logging.warning(
"Response indicates rate limit exceeded: %r\nSleeping until next UTC midnight: %s (local time %s). Will resume in approx %s",
"Response indicates rate limit exceeded: %r\n"
"Sleeping until next UTC midnight: %s (local time %s). Will resume in approx %s",
exception,
next_utc_midnight,
next_utc_midnight.in_tz("local"),
Expand Down Expand Up @@ -254,11 +254,12 @@ class TikTokApiRequestClient:

_credentials: TiktokCredentials = attrs.field(
validator=[attrs.validators.instance_of(TiktokCredentials), field_is_not_empty],
alias="credentials", # Attrs removes underscores from field names but the static type checker doesn't know that
# Attrs removes underscores from field names but the static type checker doesn't know that
alias="credentials",
)
_access_token_fetcher_session: rq.Session = attrs.field()
_api_request_session: rq.Session = attrs.field()
_raw_responses_output_dir: Optional[Path] = None
_raw_responses_output_dir: Path | None = None
_api_rate_limit_wait_strategy: ApiRateLimitWaitStrategy = attrs.field(
default=ApiRateLimitWaitStrategy.WAIT_FOUR_HOURS,
validator=attrs.validators.instance_of(ApiRateLimitWaitStrategy), # type: ignore - Attrs overload
Expand All @@ -285,7 +286,6 @@ def _get_client_access_token(
self,
grant_type: str = "client_credentials",
) -> str:

headers = {
"Content-Type": "application/x-www-form-urlencoded",
"Cache-Control": "no-cache",
Expand Down Expand Up @@ -331,9 +331,9 @@ def num_api_requests_sent(self):
return self._num_api_requests_sent

def _configure_request_sessions(self):
"""Gets access token for authorization, sets token in headers for all requests, and registers
a hook to refresh token when response indicates it has expired. Configures access token
fetcher and api fetcher sessions to verify certs using certifi."""
"""Gets access token for authorization, sets token in headers for all requests, and
registers a hook to refresh token when response indicates it has expired. Configures access
token fetcher and api fetcher sessions to verify certs using certifi."""
self._access_token_fetcher_session.verify = certifi.where()

token = self._get_client_access_token()
Expand All @@ -349,21 +349,17 @@ def _configure_request_sessions(self):
def _refresh_token(
self,
r,
*unused_args: Optional[Sequence],
**unused_kwargs: Optional[Mapping[Any, Any]],
*unused_args: Sequence | None,
**unused_kwargs: Mapping[Any, Any] | None,
) -> rq.Response | None:
# Adapted from https://stackoverflow.com/questions/37094419/python-requests-retry-request-after-re-authentication
if r.status_code == 401:
logging.info("Fetching new token as the previous token expired")

token = self._get_client_access_token()
self._api_request_session.headers.update(
{"Authorization": f"Bearer {token}"}
)
self._api_request_session.headers.update({"Authorization": f"Bearer {token}"})

r.request.headers["Authorization"] = self._api_request_session.headers[
"Authorization"
]
r.request.headers["Authorization"] = self._api_request_session.headers["Authorization"]

return self._api_request_session.send(r.request)

Expand Down Expand Up @@ -404,12 +400,10 @@ def _fetch_retryer(self, max_api_rate_limit_retries=None):
reraise=True,
)

def fetch(
self, request: TiktokRequest, max_api_rate_limit_retries=None
) -> TikTokResponse:
return self._fetch_retryer(
max_api_rate_limit_retries=max_api_rate_limit_retries
)(self._fetch, request)
def fetch(self, request: TiktokRequest, max_api_rate_limit_retries=None) -> TikTokResponse:
return self._fetch_retryer(max_api_rate_limit_retries=max_api_rate_limit_retries)(
self._fetch, request
)

def _fetch(self, request: TiktokRequest) -> TikTokResponse:
api_response = self._post(request)
Expand Down Expand Up @@ -456,14 +450,14 @@ def _post(self, request: TiktokRequest) -> rq.Response | None:
logging.info("API responded 500. This happens occasionally")
else:
logging.warning(
f"Request failed, status code {response.status_code} - text {response.text} - data {data}",
)
f"Request failed, status code {response.status_code} - text {response.text} - data "
"{data}",)
response.raise_for_status()
# In case raise_for_status does not raise an exception we return None
return None

@staticmethod
def _parse_response(response: Optional[rq.Response]) -> TikTokResponse:
def _parse_response(response: rq.Response | None) -> TikTokResponse:
if response is None:
raise ValueError("Response is None")

Expand Down Expand Up @@ -596,8 +590,8 @@ def api_results_iter(self) -> TikTokApiClientFetchResult:
if not api_response.videos and crawl.has_more:
logging.log(
logging.ERROR,
f"No videos in response but there's still data to Crawl - Query: {self._config.query} \n api_response.data: {api_response.data}",
)
"No videos in response but there's still data to Crawl - Query: "
f"{self._config.query} \n api_response.data: {api_response.data}",)
if self._config.stop_after_one_request:
logging.info("Stopping after one request")
break
Expand Down
Loading

0 comments on commit 74c0763

Please sign in to comment.