Skip to content

Commit

Permalink
Rename Query -> VideoQuery
Browse files Browse the repository at this point in the history
  • Loading branch information
macpd committed Jul 4, 2024
1 parent 9d5d97a commit 64b0044
Show file tree
Hide file tree
Showing 7 changed files with 40 additions and 41 deletions.
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ A query is a combination of a "type (and, or, not)" with multiple Conditions ("C
Each condition is a combination of a "field" (Fields, F), "value" and a operation ("Operations", "Op").
```python
from tiktok_research_api_helper.query import Query, Cond, Fields, Op
from tiktok_research_api_helper.query import VideoQuery, Cond, Fields, Op

query = Query(
query = VideoQuery(
and_=[
Cond(Fields.hashtag_name, "garfield", Op.EQ),
Cond(Fields.region_code, "US", Op.EQ),
Expand All @@ -45,7 +45,7 @@ query = Query(
```python
from pathlib import Path
from datetime import datetime
from tiktok_research_api_helper.query import Query, Cond, Fields, Op
from tiktok_research_api_helper.query import VideoQuery, Cond, Fields, Op
from tiktok_research_api_helper.api_client import ApiClientConfig, TikTokApiClient

config = ApiClientConfig(query=query,
Expand Down Expand Up @@ -77,9 +77,9 @@ from tiktok_research_api_helper.api_client import TikTokApiRequestClient, TikTok

# reads from secrets.yaml in the same directory
request_client = TikTokApiRequestClient.from_credentials_file(Path("./secrets.yaml"))
from tiktok_research_api_helper.query import Query, Cond, Fields, Op
from tiktok_research_api_helper.query import VideoQuery, Cond, Fields, Op

query = Query(or_=Cond(Fields.video_id, ["7345557461438385450", "123456"], Op.IN))
query = VideoQuery(or_=Cond(Fields.video_id, ["7345557461438385450", "123456"], Op.IN))

# sample query
req = TikTokVideoRequest(
Expand Down
10 changes: 5 additions & 5 deletions src/tiktok_research_api_helper/api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

from tiktok_research_api_helper import utils
from tiktok_research_api_helper.models import Crawl, upsert_videos
from tiktok_research_api_helper.query import Query, QueryJSONEncoder
from tiktok_research_api_helper.query import VideoQuery, VideoQueryJSONEncoder

ALL_VIDEO_DATA_URL = "https://open.tiktokapis.com/v2/research/video/query/?fields=id,video_description,create_time,region_code,share_count,view_count,like_count,comment_count,music_id,hashtag_names,username,effect_ids,voice_to_text,playlist_id"
ALL_USER_INFO_DATA_URL = "https://open.tiktokapis.com/v2/research/user/info/?fields=display_name,bio_description,avatar_url,is_verified,follower_count,following_count,likes_count,video_count"
Expand Down Expand Up @@ -96,7 +96,7 @@ class TikTokApiClientFetchResult:
# and/or comments for videos from latest crawl
@attrs.define
class ApiClientConfig:
video_query: Query
video_query: VideoQuery
start_date: datetime
end_date: datetime
engine: Engine
Expand All @@ -119,7 +119,7 @@ class TikTokVideoRequest:
The start date is inclusive but the end date is NOT.
"""

query: Query
query: VideoQuery
start_date: str
end_date: str # The end date is NOT inclusive!
max_count: int = 100
Expand Down Expand Up @@ -151,7 +151,7 @@ def as_json(self, indent=None):

if self.cursor is not None:
request_obj["cursor"] = self.cursor
return json.dumps(request_obj, cls=QueryJSONEncoder, indent=indent)
return json.dumps(request_obj, cls=VideoQueryJSONEncoder, indent=indent)


@attrs.define
Expand Down Expand Up @@ -689,7 +689,7 @@ def api_results_iter(self) -> TikTokApiClientFetchResult:
if not api_response.videos and crawl.has_more:
logging.log(
logging.ERROR,
"No videos in response but there's still data to Crawl - Query: "
"No videos in response but there's still data to Crawl - VideoQuery: "
f"{self._config.video_query} \n api_response.data: {api_response.data}",
)
if self._config.stop_after_one_request:
Expand Down
12 changes: 6 additions & 6 deletions src/tiktok_research_api_helper/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@
Cond,
Fields,
Op,
Query,
QueryJSONEncoder,
VideoQuery,
VideoQueryJSONEncoder,
generate_query,
)

Expand Down Expand Up @@ -124,14 +124,14 @@ def test(
utils.setup_logging_info_level()
logging.log(logging.INFO, f"Arguments: {locals()}")

test_query = Query(
test_query = VideoQuery(
and_=[
Cond(Fields.hashtag_name, "snoopy", Op.EQ),
Cond(Fields.region_code, "US", Op.EQ),
]
)

logging.log(logging.INFO, f"Query: {test_query}")
logging.log(logging.INFO, f"VideoQuery: {test_query}")

start_date_datetime = utils.str_tiktok_date_format_to_datetime("20220101")
end_date_datetime = utils.str_tiktok_date_format_to_datetime("20220101")
Expand Down Expand Up @@ -270,7 +270,7 @@ def print_query(
exclude_from_usernames=exclude_from_usernames,
)

print(json.dumps(query, cls=QueryJSONEncoder, indent=2))
print(json.dumps(query, cls=VideoQueryJSONEncoder, indent=2))


def make_crawl_date_window(crawl_span: int, crawl_lag: int) -> CrawlDateWindow:
Expand Down Expand Up @@ -503,7 +503,7 @@ def run(
exclude_from_usernames=exclude_from_usernames,
)

logging.log(logging.INFO, f"Query: {query}")
logging.log(logging.INFO, f"VideoQuery: {query}")

if db_url:
engine = get_engine_and_create_tables(db_url)
Expand Down
10 changes: 5 additions & 5 deletions src/tiktok_research_api_helper/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
synonym,
)

from tiktok_research_api_helper.query import Query, QueryJSONEncoder
from tiktok_research_api_helper.query import VideoQuery, VideoQueryJSONEncoder

# See https://amercader.net/blog/beware-of-json-fields-in-sqlalchemy/
MUTABLE_JSON = MutableDict.as_mutable(JSON) # type: ignore
Expand Down Expand Up @@ -409,28 +409,28 @@ def __repr__(self) -> str:

@classmethod
def from_request(
cls, res_data: Mapping, query: Query, crawl_tags: Sequence[str] | None = None
cls, res_data: Mapping, query: VideoQuery, crawl_tags: Sequence[str] | None = None
) -> "Crawl":
return cls(
cursor=res_data["cursor"],
has_more=res_data["has_more"],
search_id=res_data["search_id"],
query=json.dumps(query, cls=QueryJSONEncoder),
query=json.dumps(query, cls=VideoQueryJSONEncoder),
crawl_tags=({CrawlTag(name=name) for name in crawl_tags} if crawl_tags else set()),
)

# TODO(macpd): rename this to explain it's intent of being used before fetch starts
@classmethod
def from_query(
cls,
query: Query,
query: VideoQuery,
crawl_tags: Sequence[str] | None = None,
has_more: bool = True,
search_id: [int | None] = None,
) -> "Crawl":
return cls(
has_more=has_more,
query=json.dumps(query, cls=QueryJSONEncoder),
query=json.dumps(query, cls=VideoQueryJSONEncoder),
search_id=search_id,
crawl_tags=({CrawlTag(name=name) for name in crawl_tags} if crawl_tags else set()),
)
Expand Down
13 changes: 6 additions & 7 deletions src/tiktok_research_api_helper/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,9 @@ def convert_optional_cond_or_condseq_to_condseq(
return optional_cond_or_seq


# TODO(macpd): rename this to VideoQuery
# TODO(macpd): rename this to VideoVideoQuery
@attrs.define
class Query:
class VideoQuery:
and_: OptionalCondOrCondSeq = attrs.field(
default=None, converter=convert_optional_cond_or_condseq_to_condseq
)
Expand All @@ -155,10 +155,9 @@ def as_dict(self):
return formatted_operands


# TODO(macpd): rename this to VideoQueryJSONEncoder
class QueryJSONEncoder(json.JSONEncoder):
class VideoQueryJSONEncoder(json.JSONEncoder):
def default(self, o):
if isinstance(o, Query):
if isinstance(o, VideoQuery):
return o.as_dict()
return super().default(o)

Expand Down Expand Up @@ -218,7 +217,7 @@ def generate_query(
exclude_all_keywords: str | None = None,
only_from_usernames: str | None = None,
exclude_from_usernames: str | None = None,
) -> Query:
) -> VideoQuery:
query_args = {_QUERY_AND_ARG_NAME: [], _QUERY_NOT_ARG_NAME: []}

if include_any_hashtags:
Expand Down Expand Up @@ -252,7 +251,7 @@ def generate_query(
Cond(Fields.region_code, sorted(region_codes), Op.IN)
)

return Query(**query_args)
return VideoQuery(**query_args)


# TODO(macpd): make a class for this, comment queries, etc
Expand Down
2 changes: 1 addition & 1 deletion tests/test_api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ def assert_has_expected_crawl_and_videos_in_database(
crawl = crawls[0]
assert crawl.id == fetch_result.crawl.id
assert crawl.cursor == len(tiktok_responses) * acquisition_config.max_count
assert crawl.query == json.dumps(acquisition_config.video_query, cls=query.QueryJSONEncoder)
assert crawl.query == json.dumps(acquisition_config.video_query, cls=query.VideoQueryJSONEncoder)
videos = all_videos(session)
assert len(videos) == len(tiktok_responses) * len(tiktok_responses[0].videos)
assert len(videos) == len(fetch_result.videos)
Expand Down
24 changes: 12 additions & 12 deletions tests/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
Cond,
Fields,
Op,
Query,
QueryJSONEncoder,
VideoQuery,
VideoQueryJSONEncoder,
generate_query,
get_normalized_hashtag_set,
get_normalized_keyword_set,
Expand All @@ -19,7 +19,7 @@
def mock_query_us():
hashtags = ["hashtag", "lol", "yay"]

return Query(
return VideoQuery(
and_=[
Cond(Fields.hashtag_name, hashtags, Op.IN),
Cond(Fields.region_code, "US", Op.EQ),
Expand All @@ -31,7 +31,7 @@ def mock_query_us():
def mock_query_us_ca():
hashtags = ["hashtag", "lol", "yay"]

return Query(
return VideoQuery(
and_=[
Cond(Fields.hashtag_name, hashtags, Op.IN),
Cond(Fields.region_code, ["US", "CA"], Op.IN),
Expand All @@ -47,7 +47,7 @@ def mock_query_exclude_some_hashtags():
"gross",
]

return Query(
return VideoQuery(
and_=[
Cond(Fields.hashtag_name, include_hashtags, Op.IN),
Cond(Fields.region_code, ["US", "CA"], Op.IN),
Expand All @@ -60,7 +60,7 @@ def mock_query_exclude_some_hashtags():

@pytest.fixture
def mock_query_create_date():
return Query(
return VideoQuery(
and_=[
Cond(Fields.create_date, "20230101", Op.EQ),
],
Expand All @@ -81,14 +81,14 @@ def test_query_create_date(mock_query_create_date):

def test_query_invalid_create_date():
with pytest.raises(ValueError):
Query(
VideoQuery(
and_=[
Cond(Fields.create_date, "2023-01-01", Op.EQ),
],
)

with pytest.raises(ValueError):
Query(
VideoQuery(
and_=[
Cond(Fields.create_date, "It's not a date", Op.EQ),
],
Expand Down Expand Up @@ -172,15 +172,15 @@ def test_query_exclude_some_hashtags(mock_query_exclude_some_hashtags):

def test_invalid_region_code():
with pytest.raises(ValueError):
Query(
VideoQuery(
and_=[
Cond(Fields.region_code, "invalid", Op.EQ),
],
)


def test_query_json_decoder_us(mock_query_us):
assert json.dumps(mock_query_us, cls=QueryJSONEncoder, indent=1) == (
assert json.dumps(mock_query_us, cls=VideoQueryJSONEncoder, indent=1) == (
"""
{
"and": [
Expand All @@ -207,7 +207,7 @@ def test_query_json_decoder_us(mock_query_us):


def test_query_json_decoder_us_ca(mock_query_us_ca):
assert json.dumps(mock_query_us_ca, cls=QueryJSONEncoder, indent=1) == (
assert json.dumps(mock_query_us_ca, cls=VideoQueryJSONEncoder, indent=1) == (
"""
{
"and": [
Expand Down Expand Up @@ -235,7 +235,7 @@ def test_query_json_decoder_us_ca(mock_query_us_ca):


def test_query_json_decoder_exclude_some_hashtags(mock_query_exclude_some_hashtags):
assert json.dumps(mock_query_exclude_some_hashtags, cls=QueryJSONEncoder, indent=1) == (
assert json.dumps(mock_query_exclude_some_hashtags, cls=VideoQueryJSONEncoder, indent=1) == (
"""
{
"and": [
Expand Down

0 comments on commit 64b0044

Please sign in to comment.