V5.2.0 - Add Comments & Fix A Few Things (#908)

* fixed issue for counts over 30 (#900) Co-authored-by: Tommy Daly <ctdgunner7@gmail.com> * Add comments & Fix Some Stuff * version bump Co-authored-by: ctdgunner <43358137+ctdgunner@users.noreply.github.com> Co-authored-by: Tommy Daly <ctdgunner7@gmail.com> Co-authored-by: davidteather <davidteather@users.noreply.github.com>
davidteather · Jul 3, 2022 · dc9393d · dc9393d
1 parent dbe9c6f
commit dc9393d
Show file tree

Hide file tree

Showing 22 changed files with 1,327 additions and 138 deletions.
diff --git a/CITATION.cff b/CITATION.cff
@@ -5,5 +5,5 @@ authors:
     orcid: "https://orcid.org/0000-0002-9467-4676"
 title: "TikTokAPI"
 url: "https://github.com/davidteather/tiktok-api"
-version: 5.1.1
-date-released: 2022-3-21
+version: 5.2.0
+date-released: 2022-7-3
diff --git a/TikTokApi/api/comment.py b/TikTokApi/api/comment.py
@@ -0,0 +1,62 @@
+from __future__ import annotations
+
+from typing import ClassVar, Optional
+from typing import TYPE_CHECKING, ClassVar, Iterator, Optional
+
+if TYPE_CHECKING:
+    from ..tiktok import TikTokApi
+    from .user import User
+
+
+class Comment:
+    """
+    A TikTok Comment.
+
+    Example Usage
+    ```py
+    for comment in video.comments:
+        print(comment.text)
+    ```
+    """
+
+    parent: ClassVar[TikTokApi]
+
+    id: str
+    """The id of the comment"""
+    author: ClassVar[User]
+    """The author of the comment"""
+    text: str
+    """The contents of the comment"""
+    likes_count: int
+    """The amount of likes of the comment"""
+    as_dict: dict
+    """The raw data associated with this comment"""
+
+    def __init__(self, data: Optional[dict] = None):
+        if data is not None:
+            self.as_dict = data
+            self.__extract_from_data()
+
+    def __extract_from_data(self):
+        self.id = self.as_dict["cid"]
+        self.text = self.as_dict["text"]
+
+        usr = self.as_dict["user"]
+        self.author = self.parent.user(
+            user_id=usr["uid"], username=usr["unique_id"], sec_uid=usr["sec_uid"]
+        )
+        self.likes_count = self.as_dict["digg_count"]
+
+    def __repr__(self):
+        return self.__str__()
+
+    def __str__(self):
+        return f"TikTokApi.comment(comment_id='{self.id}', text='{self.text}')"
+
+    def __getattr__(self, name):
+        if name in ["as_dict"]:
+            self.as_dict = self.info()
+            self.__extract_from_data()
+            return self.__getattribute__(name)
+
+        raise AttributeError(f"{name} doesn't exist on TikTokApi.api.Comment")
diff --git a/TikTokApi/api/hashtag.py b/TikTokApi/api/hashtag.py
@@ -39,8 +39,11 @@ def __init__(
         """
         You must provide the name or id of the hashtag.
         """
-        self.name = name
-        self.id = id
+
+        if name is not None:
+            self.name = name
+        if id is not None:
+            self.id = id
 
         if data is not None:
             self.as_dict = data
@@ -96,35 +99,24 @@ def videos(self, count=30, offset=0, **kwargs) -> Iterator[Video]:
             # do something
         ```
         """
-        processed = self.parent._process_kwargs(kwargs)
-        kwargs["custom_device_id"] = processed.device_id
-
-        if self.id is None:
-            self.id = self.info()["id"]
-
         cursor = offset
         page_size = 30
-
         while cursor - offset < count:
             query = {
+                "aid": 1988,
                 "count": page_size,
                 "challengeID": self.id,
                 "cursor": cursor,
             }
-            path = "api/challenge/item_list/?{}&{}".format(
-                self.parent._add_url_params(), urlencode(query)
-            )
-            res = self.parent.get_data(path, **kwargs)
-
+            path = "api/challenge/item_list/?{}".format(urlencode(query))
+            res = self.parent.get_data_no_sig(path, subdomain="us", **kwargs)
             for result in res.get("itemList", []):
                 yield self.parent.video(data=result)
-
             if not res.get("hasMore", False):
                 self.parent.logger.info(
                     "TikTok isn't sending more TikToks beyond this point."
                 )
                 return
-
             cursor = int(res["cursor"])
 
     def __extract_from_data(self):

diff --git a/TikTokApi/api/user.py b/TikTokApi/api/user.py
@@ -111,6 +111,26 @@ def info_full(self, **kwargs) -> dict:
 
         return user_props["userInfo"]
 
+        """
+        TODO: There is a route for user info, but uses msToken :\
+        processed = self.parent._process_kwargs(kwargs)
+        kwargs["custom_device_id"] = processed.device_id
+
+        query = {
+            "uniqueId": "therock",
+            "secUid": "",
+            "msToken": User.parent._get_cookies()["msToken"]
+        }
+
+        path = "api/user/detail/?{}&{}".format(
+            User.parent._add_url_params(), urlencode(query)
+        )
+
+        res = User.parent.get_data(path, subdomain="m", **kwargs)
+        print(res)
+
+        return res["userInfo"]"""
+
     def videos(self, count=30, cursor=0, **kwargs) -> Iterator[Video]:
         """
         Returns an iterator yielding Video objects.
@@ -155,8 +175,8 @@ def videos(self, count=30, cursor=0, **kwargs) -> Iterator[Video]:
             res = User.parent.get_data(path, send_tt_params=True, **kwargs)
 
             videos = res.get("itemList", [])
-            amount_yielded += len(videos)
             for video in videos:
+                amount_yielded += 1
                 yield self.parent.video(data=video)
 
             if not res.get("hasMore", False) and not first:
@@ -218,7 +238,6 @@ def liked(self, count: int = 30, cursor: int = 0, **kwargs) -> Iterator[Video]:
                 return
 
             videos = res.get("itemList", [])
-            amount_yielded += len(videos)
             for video in videos:
                 amount_yielded += 1
                 yield self.parent.video(data=video)

diff --git a/TikTokApi/api/video.py b/TikTokApi/api/video.py
@@ -2,14 +2,16 @@
 
 from urllib.parse import urlencode
 from ..helpers import extract_video_id_from_url
-from typing import TYPE_CHECKING, ClassVar, Optional
+from typing import TYPE_CHECKING, ClassVar, Iterator, Optional
 from datetime import datetime
+import requests
 
 if TYPE_CHECKING:
     from ..tiktok import TikTokApi
     from .user import User
     from .sound import Sound
     from .hashtag import Hashtag
+    from .comment import Comment
 
 
 class Video:
@@ -53,7 +55,9 @@ def __init__(
             self.as_dict = data
             self.__extract_from_data()
         elif url is not None:
-            self.id = extract_video_id_from_url(url)
+            self.id = extract_video_id_from_url(
+                url, headers={"user-agent": self.parent._user_agent}
+            )
 
         if self.id is None:
             raise TypeError("You must provide id or url parameter.")
@@ -88,7 +92,6 @@ def info_full(self, **kwargs) -> dict:
         path = "api/item/detail/?{}&{}".format(
             self.parent._add_url_params(), urlencode(query)
         )
-
         return self.parent.get_data(path, **kwargs)
 
     def bytes(self, **kwargs) -> bytes:
@@ -133,6 +136,52 @@ def __extract_from_data(self) -> None:
                 f"Failed to create Video with data: {data}\nwhich has keys {data.keys()}"
             )
 
+    def comments(self, count=20, offset=0, **kwargs) -> Iterator[Comment]:
+        """
+        Returns Comments from the video
+
+        - Parameters:
+            - count (int): The amount of videos you want returned.
+            - offset (int): The offset you want to check comments of
+        """
+
+        processed = Video.parent._process_kwargs(kwargs)
+        kwargs["custom_device_id"] = processed.device_id
+        cursor = offset
+
+        spawn = requests.head(
+            "https://www.tiktok.com",
+            proxies=Video.parent._format_proxy(processed.proxy),
+            **Video.parent._requests_extra_kwargs,
+        )
+        ttwid = spawn.cookies["ttwid"]
+
+        while cursor - offset <= count:
+            query = {
+                "aweme_id": self.id,
+                "cursor": cursor,
+                "app_language": Video.parent._language,
+                "count": 30,
+            }
+            path = "api/comment/list/?{}&{}".format(
+                Video.parent._add_url_params(), urlencode(query)
+            )
+
+            api_response = Video.parent.get_data(
+                path, subdomain="www", ttwid=ttwid, **kwargs
+            )
+
+            for comment_data in api_response.get("comments", []):
+                yield self.parent.comment(data=comment_data)
+
+            if api_response.get("has_more", 0) == 0:
+                Video.parent.logger.info(
+                    "TikTok is not sending comments beyond this point."
+                )
+                return
+
+            cursor = int(api_response.get("cursor", cursor))
+
     def __repr__(self):
         return self.__str__()
 
@@ -146,4 +195,9 @@ def __getattr__(self, name):
             self.__extract_from_data()
             return self.__getattribute__(name)
 
+        if name in ["comments"]:
+            # Requires a different request to produce the comments
+            self.__extract_comments()
+            return self.__getattribute__(name)
+
         raise AttributeError(f"{name} doesn't exist on TikTokApi.api.Video")
diff --git a/TikTokApi/browser_utilities/browser.py b/TikTokApi/browser_utilities/browser.py
@@ -1,6 +1,7 @@
 import random
 import time
 import string
+from typing import Any, Optional
 import requests
 import logging
 import time
@@ -19,6 +20,9 @@
 
 
 class browser(BrowserInterface):
+
+    kwargs: dict[str, Any]
+
     def __init__(self, **kwargs):
         pass
 

diff --git a/TikTokApi/helpers.py b/TikTokApi/helpers.py
@@ -1,3 +1,4 @@
+import TikTokApi
 from TikTokApi.browser_utilities.browser import browser
 from urllib.parse import quote, urlencode
 from .exceptions import *
@@ -32,8 +33,8 @@ def extract_tag_contents(html):
             )
 
 
-def extract_video_id_from_url(url):
-    url = requests.head(url=url, allow_redirects=True).url
+def extract_video_id_from_url(url, headers={}):
+    url = requests.head(url=url, allow_redirects=True, headers=headers).url
     if "@" in url and "/video/" in url:
         return url.split("/video/")[1].split("?")[0]
     else:

diff --git a/TikTokApi/tiktok.py b/TikTokApi/tiktok.py
@@ -17,6 +17,7 @@
 from .api.hashtag import Hashtag
 from .api.video import Video
 from .api.trending import Trending
+from .api.comment import Comment
 
 from playwright.sync_api import sync_playwright
 
@@ -41,6 +42,7 @@ class TikTokApi:
     hashtag = Hashtag
     video = Video
     trending = Trending
+    comment = Comment
     logger = logging.getLogger(LOGGER_NAME)
 
     def __init__(
@@ -136,14 +138,16 @@ def _initialize(self, **kwargs):
         Hashtag.parent = self
         Video.parent = self
         Trending.parent = self
+        Comment.parent = self
 
         # Some Instance Vars
         self._executable_path = kwargs.get("executable_path", None)
+        self.cookie_jar = None
 
         if kwargs.get("custom_did") != None:
             raise Exception("Please use 'custom_device_id' instead of 'custom_did'")
         self._custom_device_id = kwargs.get("custom_device_id", None)
-        self._user_agent = "5.0 (iPhone; CPU iPhone OS 14_8 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1"
+        self._user_agent = "5.0 (iPhone; CPU iPhone OS 14_8 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1"  # TODO: Randomly generate agents
         self._proxy = kwargs.get("proxy", None)
         self._custom_verify_fp = kwargs.get("custom_verify_fp")
         self._signer_url = kwargs.get("external_signer", None)
@@ -293,6 +297,8 @@ def get_data(self, path, subdomain="m", **kwargs) -> dict:
             **self._requests_extra_kwargs,
         )
 
+        self.cookie_jar = r.cookies
+
         try:
             parsed_data = r.json()
             if (
@@ -378,6 +384,37 @@ def get_data(self, path, subdomain="m", **kwargs) -> dict:
             else:
                 raise InvalidJSONException("TikTok sent invalid JSON") from e
 
+    def get_data_no_sig(self, path, subdomain="m", **kwargs) -> dict:
+        processed = self._process_kwargs(kwargs)
+        full_url = f"https://{subdomain}.tiktok.com/" + path
+        referrer = self._browser.referrer
+        headers = {
+            "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:101.0) Gecko/20100101 Firefox/101.0",
+            "authority": "m.tiktok.com",
+            "method": "GET",
+            "path": full_url.split("tiktok.com")[1],
+            "scheme": "https",
+            "accept": "application/json, text/plain, */*",
+            "accept-encoding": "gzip",
+            "accept-language": "en-US,en;q=0.9",
+            "origin": referrer,
+            "referer": referrer,
+            "sec-fetch-dest": "empty",
+            "sec-fetch-mode": "cors",
+            "sec-fetch-site": "none",
+            "sec-gpc": "1",
+        }
+        self.logger.debug(f"GET: %s\n\theaders: %s", full_url, headers)
+
+        r = requests.get(
+            full_url,
+            headers=headers,
+            cookies=self._get_cookies(**kwargs),
+            proxies=self._format_proxy(processed.proxy),
+            **self._requests_extra_kwargs,
+        )
+        return r.json()
+
     def __del__(self):
         """A basic cleanup method, called automatically from the code"""
         if not self._is_context_manager: