Skip to content

Commit

Permalink
hatch formatting (ie ruff) on whole repo
Browse files Browse the repository at this point in the history
  • Loading branch information
macpd committed Jun 23, 2024
1 parent ddb864a commit 23b3cef
Show file tree
Hide file tree
Showing 8 changed files with 31 additions and 43 deletions.
4 changes: 2 additions & 2 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
# Register tests/test_utils.py module as a pytest plugin so that tests can use fixtures from that
# module without importing them (pytest recommends against importing them, and linters will flag the
# import as unused and the fixture usage in test method args as redefining the var; https://github.com/astral-sh/ruff/issues/4046)
pytest_plugins="tests.test_utils"
pytest_plugins = "tests.test_utils"


def pytest_addoption(parser):
"""Used to pass database URL for DB docker container in integration test."""
Expand All @@ -18,4 +19,3 @@ def pytest_addoption(parser):
@pytest.fixture
def database_url_command_line_arg(request):
return request.config.getoption("--database-url")

21 changes: 9 additions & 12 deletions src/tiktok_api_helper/api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@

ALL_VIDEO_DATA_URL = "https://open.tiktokapis.com/v2/research/video/query/?fields=id,video_description,create_time,region_code,share_count,view_count,like_count,comment_count,music_id,hashtag_names,username,effect_ids,voice_to_text,playlist_id"

SEARCH_ID_INVALID_ERROR_MESSAGE_REGEX = re.compile(
r"Search Id \d+ is invalid or expired"
)
SEARCH_ID_INVALID_ERROR_MESSAGE_REGEX = re.compile(r"Search Id \d+ is invalid or expired")

INVALID_SEARCH_ID_ERROR_RETRY_WAIT = 5
INVALID_SEARCH_ID_ERROR_MAX_NUM_RETRIES = 5
Expand Down Expand Up @@ -136,9 +134,11 @@ def as_json(self, indent=None):
request_obj["cursor"] = self.cursor
return json.dumps(request_obj, cls=QueryJSONEncoder, indent=indent)


def is_json_decode_error(exception):
return isinstance(exception, rq.exceptions.JSONDecodeError | json.JSONDecodeError)


def retry_json_decoding_error_once(
retry_state,
):
Expand Down Expand Up @@ -451,7 +451,8 @@ def _post(self, request: TiktokRequest) -> rq.Response | None:
else:
logging.warning(
f"Request failed, status code {response.status_code} - text {response.text} - data "
"{data}",)
"{data}",
)
response.raise_for_status()
# In case raise_for_status does not raise an exception we return None
return None
Expand All @@ -476,9 +477,7 @@ def _parse_response(response: rq.Response | None) -> TikTokResponse:

videos = response_data_section.get("videos", [])

return TikTokResponse(
data=response_data_section, videos=videos, error=error_data
)
return TikTokResponse(data=response_data_section, videos=videos, error=error_data)


def update_crawl_from_api_response(
Expand All @@ -487,10 +486,7 @@ def update_crawl_from_api_response(
crawl.cursor = api_response.data["cursor"]
crawl.has_more = api_response.data["has_more"]

if (
"search_id" in api_response.data
and api_response.data["search_id"] != crawl.search_id
):
if "search_id" in api_response.data and api_response.data["search_id"] != crawl.search_id:
if crawl.search_id is not None:
logging.log(
logging.ERROR,
Expand Down Expand Up @@ -591,7 +587,8 @@ def api_results_iter(self) -> TikTokApiClientFetchResult:
logging.log(
logging.ERROR,
"No videos in response but there's still data to Crawl - Query: "
f"{self._config.query} \n api_response.data: {api_response.data}",)
f"{self._config.query} \n api_response.data: {api_response.data}",
)
if self._config.stop_after_one_request:
logging.info("Stopping after one request")
break
Expand Down
5 changes: 3 additions & 2 deletions src/tiktok_api_helper/cli_data_acquisition.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def run_long_query(config: ApiClientConfig):
"""Runs a "long" query, defined as one that may need multiple requests to get all the data.
Unless you have a good reason to believe otherwise, queries should default to be considered
"long". """
"long"."""
api_client = TikTokApiClient.from_config(config)
api_client.fetch_and_store_all()

Expand Down Expand Up @@ -288,7 +288,8 @@ def run(
query_file_json: JsonQueryFileType | None = None,
api_credentials_file: ApiCredentialsFileType = _DEFAULT_CREDENTIALS_FILE_PATH,
rate_limit_wait_strategy: ApiRateLimitWaitStrategyType = (
ApiRateLimitWaitStrategy.WAIT_FOUR_HOURS),
ApiRateLimitWaitStrategy.WAIT_FOUR_HOURS
),
region: RegionCodeListType | None = None,
include_any_hashtags: IncludeAnyHashtagListType | None = None,
exclude_any_hashtags: ExcludeAnyHashtagListType | None = None,
Expand Down
9 changes: 6 additions & 3 deletions src/tiktok_api_helper/custom_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
TikTokEndDateFormat = Annotated[
str,
typer.Argument(
help=("End date in the format %Y%m%d (e.g. 20210101) NOT INCLUSIVE (ie start date 20210101 "
"and end date 20210102 will only include API results from 20210101.)")
help=(
"End date in the format %Y%m%d (e.g. 20210101) NOT INCLUSIVE (ie start date 20210101 "
"and end date 20210102 will only include API results from 20210101.)"
)
),
]

Expand Down Expand Up @@ -72,7 +74,8 @@
"(ie close lid on laptop) the wait time is also paused. So if you use "
f"{ApiRateLimitWaitStrategy.WAIT_NEXT_UTC_MIDNIGHT.value} and the machine goes to "
"sleep retry will likely wait past upcoming midnight by however long the machine was "
"asleep"),
"asleep"
),
),
]

Expand Down
4 changes: 1 addition & 3 deletions src/tiktok_api_helper/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,9 +402,7 @@ def from_query(
has_more=has_more,
query=json.dumps(query, cls=QueryJSONEncoder),
search_id=search_id,
crawl_tags=(
{CrawlTag(name=name) for name in crawl_tags} if crawl_tags else set()
),
crawl_tags=({CrawlTag(name=name) for name in crawl_tags} if crawl_tags else set()),
)

def upload_self_to_db(self, engine: Engine) -> None:
Expand Down
20 changes: 5 additions & 15 deletions tests/test_api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,15 +139,11 @@ def test_tiktok_api_request_client_retry_once_on_json_decoder_error(
access_token_fetcher_session=mock_access_token_fetcher_session,
)
with pytest.raises(json.JSONDecodeError):
request.fetch(
api_client.TiktokRequest(query={}, start_date=None, end_date=None)
)
request.fetch(api_client.TiktokRequest(query={}, start_date=None, end_date=None))
# Confirm that code retried the post request and json extraction twice (ie retried once after
# the decode error before the exception is re-raised)
assert mock_request_session_json_decoder_error.post.call_count == 2
assert (
mock_request_session_json_decoder_error.post.return_value.json.call_count == 2
)
assert mock_request_session_json_decoder_error.post.return_value.json.call_count == 2
mock_sleep.assert_called_once_with(0)


Expand All @@ -170,10 +166,7 @@ def test_tiktok_api_request_client_wait_one_hour_on_rate_limit_wait_strategy(
# Confirm that code retried the post request and json extraction twice (ie retried once after
# the decode error before the exception is re-raised)
assert mock_request_session_rate_limit_error.post.call_count == num_retries
assert (
mock_request_session_rate_limit_error.post.return_value.json.call_count
== num_retries
)
assert mock_request_session_rate_limit_error.post.return_value.json.call_count == num_retries
# Sleep will be called once less than num_retries because it is not called after last retry
assert mock_sleep.call_count == num_retries - 1
assert mock_sleep.mock_calls == [
Expand Down Expand Up @@ -207,8 +200,7 @@ def test_tiktok_api_request_client_wait_til_next_utc_midnight_on_rate_limit_wait
# after the decode error before the exception is re-raised)
assert mock_request_session_rate_limit_error.post.call_count == num_retries
assert (
mock_request_session_rate_limit_error.post.return_value.json.call_count
== num_retries
mock_request_session_rate_limit_error.post.return_value.json.call_count == num_retries
)
# Sleep will be called once less than num_retries because it is not called after last retry
assert mock_sleep.call_count == num_retries - 1
Expand Down Expand Up @@ -395,9 +387,7 @@ def assert_has_expected_crawl_and_videos_in_database(
crawl = crawls[0]
assert crawl.id == fetch_result.crawl.id
assert crawl.cursor == len(tiktok_responses) * acquisition_config.max_count
assert crawl.query == json.dumps(
acquisition_config.query, cls=query.QueryJSONEncoder
)
assert crawl.query == json.dumps(acquisition_config.query, cls=query.QueryJSONEncoder)
videos = all_videos(session)
assert len(videos) == len(tiktok_responses) * len(tiktok_responses[0].videos)
assert len(videos) == len(fetch_result.videos)
Expand Down
10 changes: 4 additions & 6 deletions tests/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,9 +362,9 @@ def test_upsert_videos_to_crawls_association(test_database_engine, mock_crawl, a
engine=test_database_engine,
)
with Session(test_database_engine) as session:
assert {
v.id: {crawl.id for crawl in v.crawls} for v in all_videos(session)
} == {v["id"]: {expected_crawl_id} for v in api_response_videos}
assert {v.id: {crawl.id for crawl in v.crawls} for v in all_videos(session)} == {
v["id"]: {expected_crawl_id} for v in api_response_videos
}


def test_upsert_existing_hashtags_names_gets_same_id(
Expand Down Expand Up @@ -396,9 +396,7 @@ def test_upsert_existing_hashtags_names_gets_same_id(
engine=test_database_engine,
)

original_hashtags = {
hashtag.id: hashtag.name for hashtag in all_hashtags(session)
}
original_hashtags = {hashtag.id: hashtag.name for hashtag in all_hashtags(session)}
assert set(original_hashtags.values()) == {"hashtag1", "hashtag2"}

upsert_videos(
Expand Down
1 change: 1 addition & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
pytest plugins in conftest.py. If this module is moved conftest.py pytest_plugins will also need to
be updated.
"""

import json

import pytest
Expand Down

0 comments on commit 23b3cef

Please sign in to comment.