diff --git a/setup.py b/setup.py index 435bf37..9d25644 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setuptools.setup( name="twitter_scraper_selenium", - version="0.1.2", + version="0.1.3", author="Sajid Shaikh", author_email="shaikhsajid3732@gmail.com", description="Python package to scrap twitter's front-end easily with selenium", diff --git a/twitter_scraper_selenium/element_finder.py b/twitter_scraper_selenium/element_finder.py index 0c772b8..54b54ec 100644 --- a/twitter_scraper_selenium/element_finder.py +++ b/twitter_scraper_selenium/element_finder.py @@ -51,11 +51,8 @@ def __find_shares(tweet): @staticmethod def __find_status(tweet): try: - anchors = Finder.__find_all_anchor_tags(tweet) - status = "NA" - if len(anchors) > 2: - status = anchors[3].get_attribute("href").split("/") - return status + anchor = tweet.find_element_by_css_selector("a.r-bcqeeo.r-3s2u2q.r-qvutc0") + return (anchor.get_attribute("href").split("/"), anchor.get_attribute("href")) except Exception as ex: print("Error at method find_status on line no. {} : {}".format(frameinfo.f_lineno, ex)) return [] @@ -132,7 +129,7 @@ def __find_videos(tweet): @staticmethod def __is_retweet(tweet): try: - tweet.find_element_by_css_selector('[role="presentation"]') + tweet.find_element_by_css_selector('div.r-92ng3h.r-qvutc0') return True except NoSuchElementException: return False @@ -142,12 +139,15 @@ def __is_retweet(tweet): return False @staticmethod - def __find_name_from_post(tweet): + def __find_name_from_post(tweet,is_retweet=False): try: name = "NA" anchors = Finder.__find_all_anchor_tags(tweet) if len(anchors) > 2: - name = anchors[1].text.split("\n")[0] + if is_retweet: + name = anchors[2].text.strip() + else: + name = anchors[1].text.split("\n")[0] return name except Exception as ex: print("Error at method __find_name_from_post on line no. {} : {}".format( diff --git a/twitter_scraper_selenium/keyword.py b/twitter_scraper_selenium/keyword.py index 76b9666..6f873a9 100644 --- a/twitter_scraper_selenium/keyword.py +++ b/twitter_scraper_selenium/keyword.py @@ -52,10 +52,10 @@ def __fetch_and_store_data(self): while len(self.posts_data) < self.tweets_count: for tweet in present_tweets: name = Finder._Finder__find_name_from_post(tweet) - status = Finder._Finder__find_status(tweet) + status,tweet_url = Finder._Finder__find_status(tweet) replies = Finder._Finder__find_replies(tweet) retweets = Finder._Finder__find_shares(tweet) - username = status[3] + username = tweet_url.split("/")[3] status = status[-1] is_retweet = Finder._Finder__is_retweet(tweet) posted_time = Finder._Finder__find_timestamp(tweet) @@ -66,7 +66,6 @@ def __fetch_and_store_data(self): hashtags = re.findall(r"#(\w+)", content) mentions = re.findall(r"@(\w+)", content) profile_picture = "https://twitter.com/{}/photo".format(username) - tweet_url = "https://twitter.com/{}/status/{}".format(username,status) link = Finder._Finder__find_external_link(tweet) self.posts_data[status] = { diff --git a/twitter_scraper_selenium/profile.py b/twitter_scraper_selenium/profile.py index 8e0c4c4..d5b14e4 100644 --- a/twitter_scraper_selenium/profile.py +++ b/twitter_scraper_selenium/profile.py @@ -50,14 +50,14 @@ def __fetch_and_store_data(self): while len(self.posts_data) < self.tweets_count: for tweet in present_tweets: - status = Finder._Finder__find_status(tweet) - name = Finder._Finder__find_name_from_post(tweet) + status,tweet_url = Finder._Finder__find_status(tweet) replies = Finder._Finder__find_replies(tweet) retweets = Finder._Finder__find_shares(tweet) - username = status[3] status = status[-1] + username = tweet_url.split("/")[3] is_retweet = True if self.twitter_username.lower() != username.lower() else False - retweet_link = Finder._Finder__find_all_anchor_tags(tweet)[2].get_attribute("href") if is_retweet is True else "" + name = Finder._Finder__find_name_from_post(tweet,is_retweet) + retweet_link = tweet_url if is_retweet is True else "" posted_time = Finder._Finder__find_timestamp(tweet) content = Finder._Finder__find_content(tweet) likes = Finder._Finder__find_like(tweet) @@ -66,7 +66,6 @@ def __fetch_and_store_data(self): hashtags = re.findall(r"#(\w+)", content) mentions = re.findall(r"@(\w+)", content) profile_picture = "https://twitter.com/{}/photo".format(username) - tweet_url = "https://twitter.com/{}/status/{}".format(username,status) link = Finder._Finder__find_external_link(tweet) self.posts_data[status] = { "tweet_id" : status,