-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtwitterScraper.py
49 lines (36 loc) · 1.51 KB
/
twitterScraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import tweepy
from pickle import dump, load
from config import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN_KEY, ACCESS_TOKEN_SECRET
class TwitterScraper(object):
def __init__(self, user):
"""
Upon initialization, the class scrapes the twitter account of the user name entered as a parameter. It stores the tweets it finds in a long
list and saves the file where the class was initialized
user: twitter user name of account to scrape
return: list of original tweets from the account holder
"""
# created api environment with keys
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN_KEY, ACCESS_TOKEN_SECRET)
self.api = tweepy.API(auth)
self.user = user
def scrape(self):
scraped_tweets = []
try:
# initialize page counter
page = 1
while True:
tweets = self.api.user_timeline(screen_name=str(self.user), page=page, count=200, tweet_mode='extended', include_rts=False)
if tweets:
for tweet in tweets:
# process status here
scraped_tweets.append(tweet)
else:
# All done
break
page += 1 # next page
f = open(str(self.user) + 'Tweets.pickle', 'wb')
dump(scraped_tweets, f)
f.close()
except tweepy.error.TweepError:
return -1