-
Notifications
You must be signed in to change notification settings - Fork 80
/
Copy pathtwitter_crawl.py
35 lines (30 loc) · 1.15 KB
/
twitter_crawl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import tweepy, csv, time
import pandas as pd
#"""
consumer_key = 'GSDyidvmJDvBMlGsbwXd5oJcr'
consumer_secret = '1JzMV9nFhtX2WyRtpqNZbsDQ8iIiApkfveiKrASi5uXuIy5wb3'
access_token = '342602156-blKsnAaObTRsuVifwvwSrO3oeaUv3qS1RtoR49Vb'
access_token_secret = '20CUaxbGK91YMOTimOnV3TXnky4ahaKooI4XAoZQoqTkA'
#"""
'''
consumer_key = 'vU17b7Kb18pZlgjx9Oc43aWEj'
consumer_secret = 'Oh5CiY5bp1nULnYD3kl6Z5i6uxk1i8oLpANynncT4L8goni4cn'
access_token = '342602156-ojXdIFyC1VH4aBCaNhcUbSq8QE6Epg86IXCQm8MV'
access_token_secret = 'WNCBXDrP1dx2HCCGXHVlzcTNIwBzRbEvbng8746Cv6cbK'
'''
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth,wait_on_rate_limit=True)
tweets = []
query = "hate"
print("starting crawl:",query)
try:
for tweet in tweepy.Cursor(api.search,q="#"+query,lang="en",since="2017-01-01").items(2000):
text = tweet.text.replace("&","&").replace(",","").replace("RT","")
print(text)
tweets.append(text)
time.sleep(1e-3)
pd.DataFrame(tweets).to_csv(query+".csv")
except Exception as e:
print(e)
pd.DataFrame(tweets).to_csv(query+".csv")