Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

configure ruff (used by hatch fmt) in pyproject.toml. Lint and format repo according to this config #22

Merged
merged 28 commits into from
Jun 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
b7d7e44
configure ruff (used by hatch fmt) in pyproject.toml. remove style ha…
macpd May 14, 2024
2102881
ruff formatting
macpd May 14, 2024
010f9e8
ruff lint formatting
macpd May 14, 2024
121d8ac
fix ruff documentation
macpd May 14, 2024
85c069d
Merge branch 'main' into feature/configure_ruff
macpd May 14, 2024
998f795
Merge branch 'main' into feature/configure_ruff
macpd May 16, 2024
7196b92
specify UTC timezone in datetime.now call
macpd May 16, 2024
f0683b2
assign f string to variable and pass that to exception object constr…
macpd May 16, 2024
4e2dc87
remove duplicate string from set definition
macpd May 16, 2024
cbd6c6a
Merge branch 'main' into feature/configure_ruff
macpd Jun 21, 2024
7772209
add comment explaining why lint message ignored
macpd Jun 21, 2024
b4e10af
fix pytest.mark.parametrize first arg
macpd Jun 21, 2024
3591f07
linter automatic fixes
macpd Jun 21, 2024
9fd4955
register test_utils as a pytest plugin so that the fixtures it contai…
macpd Jun 21, 2024
b0e1ea5
explicitly enable pycodestyle, pyflakes, pyupgrade, flake8-bugbear, f…
macpd Jun 21, 2024
bc71622
automatic linter fixes
macpd Jun 21, 2024
f12236a
linter automatic fixes
macpd Jun 21, 2024
0b95ecd
fix lint warnings
macpd Jun 21, 2024
b9a107b
manual lint fixes
macpd Jun 21, 2024
5774199
remove pre-file-ignore for unused imports now that the issue is fixed…
macpd Jun 21, 2024
fc6b3c8
helper func for json decode error type check
macpd Jun 21, 2024
24060b7
automatic formatting and linting fixes
macpd Jun 21, 2024
da8ade1
syntax fix
macpd Jun 21, 2024
7fd1728
manual change for | type operator
macpd Jun 21, 2024
57f3da0
remove update_crawl in sql.py that was accidentally restored in manua…
macpd Jun 21, 2024
5aa8228
use relative path, instead of plain pattern, for exclude of alembic dir
macpd Jun 21, 2024
20fa3b1
fix type annotations used in typer options.
macpd Jun 21, 2024
da3aa0d
remove logging import that is no longer used
macpd Jun 21, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -173,11 +173,11 @@ OR run with hatch (this runs above docker commands as sudo):
hatch run test:postgres-integration-test-docker-as-sudo
```

## Automatic formatting with black
To check if black would change code (but not actually make changes):
`hatch run style:check`
To apply changes from black
`hatch run style:fmt`
## Automatic formatting and linting with ruff
To check if ruff would change code (but not actually make changes):
`hatch fmt --check`
To apply changes from ruff
`hatch fmt`

## Alembic database schema migrations
Alembic is a tool/framework for database schema migrations. For instructions on
Expand Down
5 changes: 5 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import pytest

# Register tests/test_utils.py module as a pytest plugin so that tests can use fixtures from that
# module without importing them (pytest recommends against importing them, and linters will flag the
# import as unused and the fixture usage in test method args as redefining the var; https://github.com/astral-sh/ruff/issues/4046)
pytest_plugins="tests.test_utils"

def pytest_addoption(parser):
"""Used to pass database URL for DB docker container in integration test."""
Expand All @@ -14,3 +18,4 @@ def pytest_addoption(parser):
@pytest.fixture
def database_url_command_line_arg(request):
return request.config.getoption("--database-url")

34 changes: 22 additions & 12 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,6 @@ extra-dependencies = [

[tool.hatch.envs.alembic]
extra-dependencies = [
"pytest",
"black",
"alembic"
]

Expand All @@ -67,15 +65,27 @@ postgres-integration-test-docker-as-sudo = [
"sudo docker compose down"
]

[tool.hatch.envs.style]
detached = true
dependencies = [
"black",
]
[tool.hatch.envs.style.scripts]
check = [
"black --check --diff src/",
# Hatch uses ruff for linting and formatting (hatch fmt ...)
[tool.ruff]
line-length=100
exclude = ["./alembic"]

[tool.ruff.lint]
select = [
# pycodestyle
"E",
# Pyflakes
"F",
# pyupgrade
"UP",
# flake8-bugbear
"B",
# flake8-simplify
"SIM",
# isort
"I",
]
fmt = [
"black src/",
ignore=[
# Do not suggest ternary operator
"SIM108",
]
76 changes: 35 additions & 41 deletions src/tiktok_api_helper/api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
import enum
import json
import logging
import re
from collections.abc import Mapping, Sequence
from datetime import datetime, timedelta
from pathlib import Path
from typing import Any, Mapping, Optional, Sequence
import re
from typing import Any

import attrs
import certifi
Expand All @@ -16,9 +17,9 @@
import yaml
from sqlalchemy import Engine

from tiktok_api_helper import utils
from tiktok_api_helper.query import Query, QueryJSONEncoder
from tiktok_api_helper.sql import Crawl, upsert_videos
from tiktok_api_helper import utils

ALL_VIDEO_DATA_URL = "https://open.tiktokapis.com/v2/research/video/query/?fields=id,video_description,create_time,region_code,share_count,view_count,like_count,comment_count,music_id,hashtag_names,username,effect_ids,voice_to_text,playlist_id"

Expand All @@ -44,9 +45,7 @@ class InvalidSearchIdError(InvalidRequestError):

def field_is_not_empty(instance, attribute, value):
if not value:
raise ValueError(
f"{instance.__class__.__name__}: {attribute.name} cannot be empty"
)
raise ValueError(f"{instance.__class__.__name__}: {attribute.name} cannot be empty")


class ApiRateLimitWaitStrategy(enum.StrEnum):
Expand All @@ -62,9 +61,7 @@ class TiktokCredentials:
client_secret: str = attrs.field(
validator=[attrs.validators.instance_of(str), field_is_not_empty]
)
client_key: str = attrs.field(
validator=[attrs.validators.instance_of(str), field_is_not_empty]
)
client_key: str = attrs.field(validator=[attrs.validators.instance_of(str), field_is_not_empty])


@attrs.define
Expand All @@ -89,8 +86,8 @@ class ApiClientConfig:
api_credentials_file: Path
max_count: int = 100
stop_after_one_request: bool = False
crawl_tags: Optional[list[str]] = None
raw_responses_output_dir: Optional[Path] = None
crawl_tags: list[str] | None = None
raw_responses_output_dir: Path | None = None
api_rate_limit_wait_strategy: ApiRateLimitWaitStrategy = attrs.field(
default=ApiRateLimitWaitStrategy.WAIT_FOUR_HOURS,
validator=attrs.validators.instance_of(ApiRateLimitWaitStrategy), # type: ignore - Attrs overload
Expand All @@ -111,8 +108,8 @@ class TiktokRequest:
max_count: int = 100
is_random: bool = False

cursor: Optional[int] = None
search_id: Optional[str] = None
cursor: int | None = None
search_id: str | None = None

@classmethod
def from_config(cls, config: ApiClientConfig, **kwargs) -> TiktokRequest:
Expand All @@ -139,14 +136,16 @@ def as_json(self, indent=None):
request_obj["cursor"] = self.cursor
return json.dumps(request_obj, cls=QueryJSONEncoder, indent=indent)

def is_json_decode_error(exception):
return isinstance(exception, rq.exceptions.JSONDecodeError | json.JSONDecodeError)

def retry_json_decoding_error_once(
retry_state,
):
exception = retry_state.outcome.exception()

# Retry once if JSON decoding response fails
if isinstance(exception, (rq.exceptions.JSONDecodeError, json.JSONDecodeError)):
if is_json_decode_error(exception):
return retry_state.attempt_number <= 1

return None
Expand Down Expand Up @@ -181,7 +180,7 @@ def json_decoding_error_retry_immediately(
):
exception = retry_state.outcome.exception()
# If JSON decoding fails retry immediately
if isinstance(exception, (rq.exceptions.JSONDecodeError, json.JSONDecodeError)):
if is_json_decode_error(exception):
return 0

logging.warning("Unknown exception in wait callback: %r", exception)
Expand Down Expand Up @@ -219,7 +218,8 @@ def api_rate_limi_wait_until_next_utc_midnight(
if isinstance(exception, ApiRateLimitError):
next_utc_midnight = pendulum.tomorrow("UTC")
logging.warning(
"Response indicates rate limit exceeded: %r\nSleeping until next UTC midnight: %s (local time %s). Will resume in approx %s",
"Response indicates rate limit exceeded: %r\n"
"Sleeping until next UTC midnight: %s (local time %s). Will resume in approx %s",
exception,
next_utc_midnight,
next_utc_midnight.in_tz("local"),
Expand Down Expand Up @@ -254,11 +254,12 @@ class TikTokApiRequestClient:

_credentials: TiktokCredentials = attrs.field(
validator=[attrs.validators.instance_of(TiktokCredentials), field_is_not_empty],
alias="credentials", # Attrs removes underscores from field names but the static type checker doesn't know that
# Attrs removes underscores from field names but the static type checker doesn't know that
alias="credentials",
)
_access_token_fetcher_session: rq.Session = attrs.field()
_api_request_session: rq.Session = attrs.field()
_raw_responses_output_dir: Optional[Path] = None
_raw_responses_output_dir: Path | None = None
_api_rate_limit_wait_strategy: ApiRateLimitWaitStrategy = attrs.field(
default=ApiRateLimitWaitStrategy.WAIT_FOUR_HOURS,
validator=attrs.validators.instance_of(ApiRateLimitWaitStrategy), # type: ignore - Attrs overload
Expand All @@ -285,7 +286,6 @@ def _get_client_access_token(
self,
grant_type: str = "client_credentials",
) -> str:

headers = {
"Content-Type": "application/x-www-form-urlencoded",
"Cache-Control": "no-cache",
Expand Down Expand Up @@ -331,9 +331,9 @@ def num_api_requests_sent(self):
return self._num_api_requests_sent

def _configure_request_sessions(self):
"""Gets access token for authorization, sets token in headers for all requests, and registers
a hook to refresh token when response indicates it has expired. Configures access token
fetcher and api fetcher sessions to verify certs using certifi."""
"""Gets access token for authorization, sets token in headers for all requests, and
registers a hook to refresh token when response indicates it has expired. Configures access
token fetcher and api fetcher sessions to verify certs using certifi."""
self._access_token_fetcher_session.verify = certifi.where()

token = self._get_client_access_token()
Expand All @@ -349,21 +349,17 @@ def _configure_request_sessions(self):
def _refresh_token(
self,
r,
*unused_args: Optional[Sequence],
**unused_kwargs: Optional[Mapping[Any, Any]],
*unused_args: Sequence | None,
**unused_kwargs: Mapping[Any, Any] | None,
) -> rq.Response | None:
# Adapted from https://stackoverflow.com/questions/37094419/python-requests-retry-request-after-re-authentication
if r.status_code == 401:
logging.info("Fetching new token as the previous token expired")

token = self._get_client_access_token()
self._api_request_session.headers.update(
{"Authorization": f"Bearer {token}"}
)
self._api_request_session.headers.update({"Authorization": f"Bearer {token}"})

r.request.headers["Authorization"] = self._api_request_session.headers[
"Authorization"
]
r.request.headers["Authorization"] = self._api_request_session.headers["Authorization"]

return self._api_request_session.send(r.request)

Expand Down Expand Up @@ -404,12 +400,10 @@ def _fetch_retryer(self, max_api_rate_limit_retries=None):
reraise=True,
)

def fetch(
self, request: TiktokRequest, max_api_rate_limit_retries=None
) -> TikTokResponse:
return self._fetch_retryer(
max_api_rate_limit_retries=max_api_rate_limit_retries
)(self._fetch, request)
def fetch(self, request: TiktokRequest, max_api_rate_limit_retries=None) -> TikTokResponse:
return self._fetch_retryer(max_api_rate_limit_retries=max_api_rate_limit_retries)(
self._fetch, request
)

def _fetch(self, request: TiktokRequest) -> TikTokResponse:
api_response = self._post(request)
Expand Down Expand Up @@ -456,14 +450,14 @@ def _post(self, request: TiktokRequest) -> rq.Response | None:
logging.info("API responded 500. This happens occasionally")
else:
logging.warning(
f"Request failed, status code {response.status_code} - text {response.text} - data {data}",
)
f"Request failed, status code {response.status_code} - text {response.text} - data "
"{data}",)
response.raise_for_status()
# In case raise_for_status does not raise an exception we return None
return None

@staticmethod
def _parse_response(response: Optional[rq.Response]) -> TikTokResponse:
def _parse_response(response: rq.Response | None) -> TikTokResponse:
if response is None:
raise ValueError("Response is None")

Expand Down Expand Up @@ -596,8 +590,8 @@ def api_results_iter(self) -> TikTokApiClientFetchResult:
if not api_response.videos and crawl.has_more:
logging.log(
logging.ERROR,
f"No videos in response but there's still data to Crawl - Query: {self._config.query} \n api_response.data: {api_response.data}",
)
"No videos in response but there's still data to Crawl - Query: "
f"{self._config.query} \n api_response.data: {api_response.data}",)
if self._config.stop_after_one_request:
logging.info("Stopping after one request")
break
Expand Down
Loading
Loading