From 23b3cef7702585f3f69ff3aaffceab2e1936c6af Mon Sep 17 00:00:00 2001 From: Paul Date: Sun, 23 Jun 2024 16:59:52 -0400 Subject: [PATCH] hatch formatting (ie ruff) on whole repo --- conftest.py | 4 ++-- src/tiktok_api_helper/api_client.py | 21 ++++++++----------- src/tiktok_api_helper/cli_data_acquisition.py | 5 +++-- src/tiktok_api_helper/custom_types.py | 9 +++++--- src/tiktok_api_helper/sql.py | 4 +--- tests/test_api_client.py | 20 +++++------------- tests/test_sql.py | 10 ++++----- tests/test_utils.py | 1 + 8 files changed, 31 insertions(+), 43 deletions(-) diff --git a/conftest.py b/conftest.py index 4856c9a..0636915 100644 --- a/conftest.py +++ b/conftest.py @@ -3,7 +3,8 @@ # Register tests/test_utils.py module as a pytest plugin so that tests can use fixtures from that # module without importing them (pytest recommends against importing them, and linters will flag the # import as unused and the fixture usage in test method args as redefining the var; https://github.com/astral-sh/ruff/issues/4046) -pytest_plugins="tests.test_utils" +pytest_plugins = "tests.test_utils" + def pytest_addoption(parser): """Used to pass database URL for DB docker container in integration test.""" @@ -18,4 +19,3 @@ def pytest_addoption(parser): @pytest.fixture def database_url_command_line_arg(request): return request.config.getoption("--database-url") - diff --git a/src/tiktok_api_helper/api_client.py b/src/tiktok_api_helper/api_client.py index 2cefed9..fd1bcac 100644 --- a/src/tiktok_api_helper/api_client.py +++ b/src/tiktok_api_helper/api_client.py @@ -23,9 +23,7 @@ ALL_VIDEO_DATA_URL = "https://open.tiktokapis.com/v2/research/video/query/?fields=id,video_description,create_time,region_code,share_count,view_count,like_count,comment_count,music_id,hashtag_names,username,effect_ids,voice_to_text,playlist_id" -SEARCH_ID_INVALID_ERROR_MESSAGE_REGEX = re.compile( - r"Search Id \d+ is invalid or expired" -) +SEARCH_ID_INVALID_ERROR_MESSAGE_REGEX = re.compile(r"Search Id \d+ is invalid or expired") INVALID_SEARCH_ID_ERROR_RETRY_WAIT = 5 INVALID_SEARCH_ID_ERROR_MAX_NUM_RETRIES = 5 @@ -136,9 +134,11 @@ def as_json(self, indent=None): request_obj["cursor"] = self.cursor return json.dumps(request_obj, cls=QueryJSONEncoder, indent=indent) + def is_json_decode_error(exception): return isinstance(exception, rq.exceptions.JSONDecodeError | json.JSONDecodeError) + def retry_json_decoding_error_once( retry_state, ): @@ -451,7 +451,8 @@ def _post(self, request: TiktokRequest) -> rq.Response | None: else: logging.warning( f"Request failed, status code {response.status_code} - text {response.text} - data " - "{data}",) + "{data}", + ) response.raise_for_status() # In case raise_for_status does not raise an exception we return None return None @@ -476,9 +477,7 @@ def _parse_response(response: rq.Response | None) -> TikTokResponse: videos = response_data_section.get("videos", []) - return TikTokResponse( - data=response_data_section, videos=videos, error=error_data - ) + return TikTokResponse(data=response_data_section, videos=videos, error=error_data) def update_crawl_from_api_response( @@ -487,10 +486,7 @@ def update_crawl_from_api_response( crawl.cursor = api_response.data["cursor"] crawl.has_more = api_response.data["has_more"] - if ( - "search_id" in api_response.data - and api_response.data["search_id"] != crawl.search_id - ): + if "search_id" in api_response.data and api_response.data["search_id"] != crawl.search_id: if crawl.search_id is not None: logging.log( logging.ERROR, @@ -591,7 +587,8 @@ def api_results_iter(self) -> TikTokApiClientFetchResult: logging.log( logging.ERROR, "No videos in response but there's still data to Crawl - Query: " - f"{self._config.query} \n api_response.data: {api_response.data}",) + f"{self._config.query} \n api_response.data: {api_response.data}", + ) if self._config.stop_after_one_request: logging.info("Stopping after one request") break diff --git a/src/tiktok_api_helper/cli_data_acquisition.py b/src/tiktok_api_helper/cli_data_acquisition.py index 775861f..5d79300 100644 --- a/src/tiktok_api_helper/cli_data_acquisition.py +++ b/src/tiktok_api_helper/cli_data_acquisition.py @@ -57,7 +57,7 @@ def run_long_query(config: ApiClientConfig): """Runs a "long" query, defined as one that may need multiple requests to get all the data. Unless you have a good reason to believe otherwise, queries should default to be considered - "long". """ + "long".""" api_client = TikTokApiClient.from_config(config) api_client.fetch_and_store_all() @@ -288,7 +288,8 @@ def run( query_file_json: JsonQueryFileType | None = None, api_credentials_file: ApiCredentialsFileType = _DEFAULT_CREDENTIALS_FILE_PATH, rate_limit_wait_strategy: ApiRateLimitWaitStrategyType = ( - ApiRateLimitWaitStrategy.WAIT_FOUR_HOURS), + ApiRateLimitWaitStrategy.WAIT_FOUR_HOURS + ), region: RegionCodeListType | None = None, include_any_hashtags: IncludeAnyHashtagListType | None = None, exclude_any_hashtags: ExcludeAnyHashtagListType | None = None, diff --git a/src/tiktok_api_helper/custom_types.py b/src/tiktok_api_helper/custom_types.py index 3642601..2d5646f 100644 --- a/src/tiktok_api_helper/custom_types.py +++ b/src/tiktok_api_helper/custom_types.py @@ -13,8 +13,10 @@ TikTokEndDateFormat = Annotated[ str, typer.Argument( - help=("End date in the format %Y%m%d (e.g. 20210101) NOT INCLUSIVE (ie start date 20210101 " - "and end date 20210102 will only include API results from 20210101.)") + help=( + "End date in the format %Y%m%d (e.g. 20210101) NOT INCLUSIVE (ie start date 20210101 " + "and end date 20210102 will only include API results from 20210101.)" + ) ), ] @@ -72,7 +74,8 @@ "(ie close lid on laptop) the wait time is also paused. So if you use " f"{ApiRateLimitWaitStrategy.WAIT_NEXT_UTC_MIDNIGHT.value} and the machine goes to " "sleep retry will likely wait past upcoming midnight by however long the machine was " - "asleep"), + "asleep" + ), ), ] diff --git a/src/tiktok_api_helper/sql.py b/src/tiktok_api_helper/sql.py index eb8d95d..262c720 100644 --- a/src/tiktok_api_helper/sql.py +++ b/src/tiktok_api_helper/sql.py @@ -402,9 +402,7 @@ def from_query( has_more=has_more, query=json.dumps(query, cls=QueryJSONEncoder), search_id=search_id, - crawl_tags=( - {CrawlTag(name=name) for name in crawl_tags} if crawl_tags else set() - ), + crawl_tags=({CrawlTag(name=name) for name in crawl_tags} if crawl_tags else set()), ) def upload_self_to_db(self, engine: Engine) -> None: diff --git a/tests/test_api_client.py b/tests/test_api_client.py index f51d25d..e7dd1ef 100644 --- a/tests/test_api_client.py +++ b/tests/test_api_client.py @@ -139,15 +139,11 @@ def test_tiktok_api_request_client_retry_once_on_json_decoder_error( access_token_fetcher_session=mock_access_token_fetcher_session, ) with pytest.raises(json.JSONDecodeError): - request.fetch( - api_client.TiktokRequest(query={}, start_date=None, end_date=None) - ) + request.fetch(api_client.TiktokRequest(query={}, start_date=None, end_date=None)) # Confirm that code retried the post request and json extraction twice (ie retried once after # the decode error before the exception is re-raised) assert mock_request_session_json_decoder_error.post.call_count == 2 - assert ( - mock_request_session_json_decoder_error.post.return_value.json.call_count == 2 - ) + assert mock_request_session_json_decoder_error.post.return_value.json.call_count == 2 mock_sleep.assert_called_once_with(0) @@ -170,10 +166,7 @@ def test_tiktok_api_request_client_wait_one_hour_on_rate_limit_wait_strategy( # Confirm that code retried the post request and json extraction twice (ie retried once after # the decode error before the exception is re-raised) assert mock_request_session_rate_limit_error.post.call_count == num_retries - assert ( - mock_request_session_rate_limit_error.post.return_value.json.call_count - == num_retries - ) + assert mock_request_session_rate_limit_error.post.return_value.json.call_count == num_retries # Sleep will be called once less than num_retries because it is not called after last retry assert mock_sleep.call_count == num_retries - 1 assert mock_sleep.mock_calls == [ @@ -207,8 +200,7 @@ def test_tiktok_api_request_client_wait_til_next_utc_midnight_on_rate_limit_wait # after the decode error before the exception is re-raised) assert mock_request_session_rate_limit_error.post.call_count == num_retries assert ( - mock_request_session_rate_limit_error.post.return_value.json.call_count - == num_retries + mock_request_session_rate_limit_error.post.return_value.json.call_count == num_retries ) # Sleep will be called once less than num_retries because it is not called after last retry assert mock_sleep.call_count == num_retries - 1 @@ -395,9 +387,7 @@ def assert_has_expected_crawl_and_videos_in_database( crawl = crawls[0] assert crawl.id == fetch_result.crawl.id assert crawl.cursor == len(tiktok_responses) * acquisition_config.max_count - assert crawl.query == json.dumps( - acquisition_config.query, cls=query.QueryJSONEncoder - ) + assert crawl.query == json.dumps(acquisition_config.query, cls=query.QueryJSONEncoder) videos = all_videos(session) assert len(videos) == len(tiktok_responses) * len(tiktok_responses[0].videos) assert len(videos) == len(fetch_result.videos) diff --git a/tests/test_sql.py b/tests/test_sql.py index 912e5d2..e9740c2 100644 --- a/tests/test_sql.py +++ b/tests/test_sql.py @@ -362,9 +362,9 @@ def test_upsert_videos_to_crawls_association(test_database_engine, mock_crawl, a engine=test_database_engine, ) with Session(test_database_engine) as session: - assert { - v.id: {crawl.id for crawl in v.crawls} for v in all_videos(session) - } == {v["id"]: {expected_crawl_id} for v in api_response_videos} + assert {v.id: {crawl.id for crawl in v.crawls} for v in all_videos(session)} == { + v["id"]: {expected_crawl_id} for v in api_response_videos + } def test_upsert_existing_hashtags_names_gets_same_id( @@ -396,9 +396,7 @@ def test_upsert_existing_hashtags_names_gets_same_id( engine=test_database_engine, ) - original_hashtags = { - hashtag.id: hashtag.name for hashtag in all_hashtags(session) - } + original_hashtags = {hashtag.id: hashtag.name for hashtag in all_hashtags(session)} assert set(original_hashtags.values()) == {"hashtag1", "hashtag2"} upsert_videos( diff --git a/tests/test_utils.py b/tests/test_utils.py index 2ee7f40..1739433 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -2,6 +2,7 @@ pytest plugins in conftest.py. If this module is moved conftest.py pytest_plugins will also need to be updated. """ + import json import pytest