configure ruff (used by hatch fmt) in pyproject.toml. Lint and format…

… repo according to this config (#22) * configure ruff (used by hatch fmt) in pyproject.toml. remove style hatch env as we can use hatch fmt instead of black * register test_utils as a pytest plugin so that the fixtures it contains can be used without being imported * explicitly enable pycodestyle, pyflakes, pyupgrade, flake8-bugbear, flake8-simplify, and isort for lintering and formatting. * remove pre-file-ignore for unused imports now that the issue is fixed with pytest_plugins in conftests.py registering tests.test_utils * helper func for json decode error type check * manual change for | type operator * use relative path, instead of plain pattern, for exclude of alembic dir * fix type annotations used in typer options.
CybersecurityForDemocracy · Jun 21, 2024 · 74c0763 · 74c0763
1 parent 9014611
commit 74c0763
Show file tree

Hide file tree

Showing 13 changed files with 328 additions and 321 deletions.
diff --git a/README.md b/README.md
@@ -173,11 +173,11 @@ OR run with hatch (this runs above docker commands as sudo):
 hatch run test:postgres-integration-test-docker-as-sudo
 ```
 
-## Automatic formatting with black
-To check if black would change code (but not actually make changes):
-`hatch run style:check`
-To apply changes from black
-`hatch run style:fmt`
+## Automatic formatting and linting with ruff
+To check if ruff would change code (but not actually make changes):
+`hatch fmt --check`
+To apply changes from ruff
+`hatch fmt`
 
 ## Alembic database schema migrations
 Alembic is a tool/framework for database schema migrations. For instructions on

diff --git a/conftest.py b/conftest.py
@@ -1,5 +1,9 @@
 import pytest
 
+# Register tests/test_utils.py module as a pytest plugin so that tests can use fixtures from that
+# module without importing them (pytest recommends against importing them, and linters will flag the
+# import as unused and the fixture usage in test method args as redefining the var; https://github.com/astral-sh/ruff/issues/4046)
+pytest_plugins="tests.test_utils"
 
 def pytest_addoption(parser):
     """Used to pass database URL for DB docker container in integration test."""
@@ -14,3 +18,4 @@ def pytest_addoption(parser):
 @pytest.fixture
 def database_url_command_line_arg(request):
     return request.config.getoption("--database-url")
+
diff --git a/pyproject.toml b/pyproject.toml
@@ -54,8 +54,6 @@ extra-dependencies = [
 
 [tool.hatch.envs.alembic]
 extra-dependencies = [
-  "pytest",
-  "black",
   "alembic"
 ]
 
@@ -67,15 +65,27 @@ postgres-integration-test-docker-as-sudo = [
   "sudo docker compose down"
 ]
 
-[tool.hatch.envs.style]
-detached = true
-dependencies = [
-  "black",
-]
-[tool.hatch.envs.style.scripts]
-check = [
-  "black --check --diff src/",
+# Hatch uses ruff for linting and formatting (hatch fmt ...)
+[tool.ruff]
+line-length=100
+exclude = ["./alembic"]
+
+[tool.ruff.lint]
+select = [
+    # pycodestyle
+    "E",
+    # Pyflakes
+    "F",
+    # pyupgrade
+    "UP",
+    # flake8-bugbear
+    "B",
+    # flake8-simplify
+    "SIM",
+    # isort
+    "I",
 ]
-fmt = [
-  "black src/",
+ignore=[
+  # Do not suggest ternary operator
+  "SIM108",
 ]
diff --git a/src/tiktok_api_helper/api_client.py b/src/tiktok_api_helper/api_client.py
@@ -3,10 +3,11 @@
 import enum
 import json
 import logging
+import re
+from collections.abc import Mapping, Sequence
 from datetime import datetime, timedelta
 from pathlib import Path
-from typing import Any, Mapping, Optional, Sequence
-import re
+from typing import Any
 
 import attrs
 import certifi
@@ -16,9 +17,9 @@
 import yaml
 from sqlalchemy import Engine
 
+from tiktok_api_helper import utils
 from tiktok_api_helper.query import Query, QueryJSONEncoder
 from tiktok_api_helper.sql import Crawl, upsert_videos
-from tiktok_api_helper import utils
 
 ALL_VIDEO_DATA_URL = "https://open.tiktokapis.com/v2/research/video/query/?fields=id,video_description,create_time,region_code,share_count,view_count,like_count,comment_count,music_id,hashtag_names,username,effect_ids,voice_to_text,playlist_id"
 
@@ -44,9 +45,7 @@ class InvalidSearchIdError(InvalidRequestError):
 
 def field_is_not_empty(instance, attribute, value):
     if not value:
-        raise ValueError(
-            f"{instance.__class__.__name__}: {attribute.name} cannot be empty"
-        )
+        raise ValueError(f"{instance.__class__.__name__}: {attribute.name} cannot be empty")
 
 
 class ApiRateLimitWaitStrategy(enum.StrEnum):
@@ -62,9 +61,7 @@ class TiktokCredentials:
     client_secret: str = attrs.field(
         validator=[attrs.validators.instance_of(str), field_is_not_empty]
     )
-    client_key: str = attrs.field(
-        validator=[attrs.validators.instance_of(str), field_is_not_empty]
-    )
+    client_key: str = attrs.field(validator=[attrs.validators.instance_of(str), field_is_not_empty])
 
 
 @attrs.define
@@ -89,8 +86,8 @@ class ApiClientConfig:
     api_credentials_file: Path
     max_count: int = 100
     stop_after_one_request: bool = False
-    crawl_tags: Optional[list[str]] = None
-    raw_responses_output_dir: Optional[Path] = None
+    crawl_tags: list[str] | None = None
+    raw_responses_output_dir: Path | None = None
     api_rate_limit_wait_strategy: ApiRateLimitWaitStrategy = attrs.field(
         default=ApiRateLimitWaitStrategy.WAIT_FOUR_HOURS,
         validator=attrs.validators.instance_of(ApiRateLimitWaitStrategy),  # type: ignore - Attrs overload
@@ -111,8 +108,8 @@ class TiktokRequest:
     max_count: int = 100
     is_random: bool = False
 
-    cursor: Optional[int] = None
-    search_id: Optional[str] = None
+    cursor: int | None = None
+    search_id: str | None = None
 
     @classmethod
     def from_config(cls, config: ApiClientConfig, **kwargs) -> TiktokRequest:
@@ -139,14 +136,16 @@ def as_json(self, indent=None):
             request_obj["cursor"] = self.cursor
         return json.dumps(request_obj, cls=QueryJSONEncoder, indent=indent)
 
+def is_json_decode_error(exception):
+    return isinstance(exception, rq.exceptions.JSONDecodeError | json.JSONDecodeError)
 
 def retry_json_decoding_error_once(
     retry_state,
 ):
     exception = retry_state.outcome.exception()
 
     # Retry once if JSON decoding response fails
-    if isinstance(exception, (rq.exceptions.JSONDecodeError, json.JSONDecodeError)):
+    if is_json_decode_error(exception):
         return retry_state.attempt_number <= 1
 
     return None
@@ -181,7 +180,7 @@ def json_decoding_error_retry_immediately(
 ):
     exception = retry_state.outcome.exception()
     # If JSON decoding fails retry immediately
-    if isinstance(exception, (rq.exceptions.JSONDecodeError, json.JSONDecodeError)):
+    if is_json_decode_error(exception):
         return 0
 
     logging.warning("Unknown exception in wait callback: %r", exception)
@@ -219,7 +218,8 @@ def api_rate_limi_wait_until_next_utc_midnight(
     if isinstance(exception, ApiRateLimitError):
         next_utc_midnight = pendulum.tomorrow("UTC")
         logging.warning(
-            "Response indicates rate limit exceeded: %r\nSleeping until next UTC midnight: %s (local time %s). Will resume in approx %s",
+            "Response indicates rate limit exceeded: %r\n"
+            "Sleeping until next UTC midnight: %s (local time %s). Will resume in approx %s",
             exception,
             next_utc_midnight,
             next_utc_midnight.in_tz("local"),
@@ -254,11 +254,12 @@ class TikTokApiRequestClient:
 
     _credentials: TiktokCredentials = attrs.field(
         validator=[attrs.validators.instance_of(TiktokCredentials), field_is_not_empty],
-        alias="credentials",  # Attrs removes underscores from field names but the static type checker doesn't know that
+        # Attrs removes underscores from field names but the static type checker doesn't know that
+        alias="credentials",
     )
     _access_token_fetcher_session: rq.Session = attrs.field()
     _api_request_session: rq.Session = attrs.field()
-    _raw_responses_output_dir: Optional[Path] = None
+    _raw_responses_output_dir: Path | None = None
     _api_rate_limit_wait_strategy: ApiRateLimitWaitStrategy = attrs.field(
         default=ApiRateLimitWaitStrategy.WAIT_FOUR_HOURS,
         validator=attrs.validators.instance_of(ApiRateLimitWaitStrategy),  # type: ignore - Attrs overload
@@ -285,7 +286,6 @@ def _get_client_access_token(
         self,
         grant_type: str = "client_credentials",
     ) -> str:
-
         headers = {
             "Content-Type": "application/x-www-form-urlencoded",
             "Cache-Control": "no-cache",
@@ -331,9 +331,9 @@ def num_api_requests_sent(self):
         return self._num_api_requests_sent
 
     def _configure_request_sessions(self):
-        """Gets access token for authorization, sets token in headers for all requests, and registers
-        a hook to refresh token when response indicates it has expired. Configures access token
-        fetcher and api fetcher sessions to verify certs using certifi."""
+        """Gets access token for authorization, sets token in headers for all requests, and
+        registers a hook to refresh token when response indicates it has expired. Configures access
+        token fetcher and api fetcher sessions to verify certs using certifi."""
         self._access_token_fetcher_session.verify = certifi.where()
 
         token = self._get_client_access_token()
@@ -349,21 +349,17 @@ def _configure_request_sessions(self):
     def _refresh_token(
         self,
         r,
-        *unused_args: Optional[Sequence],
-        **unused_kwargs: Optional[Mapping[Any, Any]],
+        *unused_args: Sequence | None,
+        **unused_kwargs: Mapping[Any, Any] | None,
     ) -> rq.Response | None:
         # Adapted from https://stackoverflow.com/questions/37094419/python-requests-retry-request-after-re-authentication
         if r.status_code == 401:
             logging.info("Fetching new token as the previous token expired")
 
             token = self._get_client_access_token()
-            self._api_request_session.headers.update(
-                {"Authorization": f"Bearer {token}"}
-            )
+            self._api_request_session.headers.update({"Authorization": f"Bearer {token}"})
 
-            r.request.headers["Authorization"] = self._api_request_session.headers[
-                "Authorization"
-            ]
+            r.request.headers["Authorization"] = self._api_request_session.headers["Authorization"]
 
             return self._api_request_session.send(r.request)
 
@@ -404,12 +400,10 @@ def _fetch_retryer(self, max_api_rate_limit_retries=None):
             reraise=True,
         )
 
-    def fetch(
-        self, request: TiktokRequest, max_api_rate_limit_retries=None
-    ) -> TikTokResponse:
-        return self._fetch_retryer(
-            max_api_rate_limit_retries=max_api_rate_limit_retries
-        )(self._fetch, request)
+    def fetch(self, request: TiktokRequest, max_api_rate_limit_retries=None) -> TikTokResponse:
+        return self._fetch_retryer(max_api_rate_limit_retries=max_api_rate_limit_retries)(
+            self._fetch, request
+        )
 
     def _fetch(self, request: TiktokRequest) -> TikTokResponse:
         api_response = self._post(request)
@@ -456,14 +450,14 @@ def _post(self, request: TiktokRequest) -> rq.Response | None:
             logging.info("API responded 500. This happens occasionally")
         else:
             logging.warning(
-                f"Request failed, status code {response.status_code} - text {response.text} - data {data}",
-            )
+                f"Request failed, status code {response.status_code} - text {response.text} - data "
+                "{data}",)
         response.raise_for_status()
         # In case raise_for_status does not raise an exception we return None
         return None
 
     @staticmethod
-    def _parse_response(response: Optional[rq.Response]) -> TikTokResponse:
+    def _parse_response(response: rq.Response | None) -> TikTokResponse:
         if response is None:
             raise ValueError("Response is None")
 
@@ -596,8 +590,8 @@ def api_results_iter(self) -> TikTokApiClientFetchResult:
             if not api_response.videos and crawl.has_more:
                 logging.log(
                     logging.ERROR,
-                    f"No videos in response but there's still data to Crawl - Query: {self._config.query} \n api_response.data: {api_response.data}",
-                )
+                    "No videos in response but there's still data to Crawl - Query: "
+                    f"{self._config.query} \n api_response.data: {api_response.data}",)
             if self._config.stop_after_one_request:
                 logging.info("Stopping after one request")
                 break