-
Notifications
You must be signed in to change notification settings - Fork 0
/
twitter.py
98 lines (89 loc) · 3.47 KB
/
twitter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#! python
"""
File: twitterTrendCapture.py
Description:
This script uses an api to get a download for trending twitter hashtags,
then adds the data to a local database. It can be run as a cron job or scheduled task daily.
"""
# import libraries used below
import http.client
import mysql.connector
from mysql.connector import errorcode
import pandas as pd
import json
from datetime import date
from datetime import timedelta
import requests
# Setup api details here
api_conn = http.client.HTTPSConnection("onurmatik-twitter-trends-archive-v1.p.rapidapi.com")
headers = {
'x-rapidapi-host': "onurmatik-twitter-trends-archive-v1.p.rapidapi.com",
'x-rapidapi-key': "8ea6d22264mshd97d2ff15c516c5p160f30jsn8a63f7d83682"
}
# Define function to get json response from rapidapi
def get_api_response(captureDate):
api_conn.request("GET", "/download?date="+str(captureDate)+"&country=US", headers=headers)
res = api_conn.getresponse()
data = res.read().decode("utf-8")
thejson = json.loads(data)
return thejson
# Connect to the local database
try:
cnx = mysql.connector.connect(user='root', password='',
host='localhost',
database='rapidapi_examples')
except mysql.connector.Error as err:
if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
print("Something is wrong with your user name or password")
elif err.errno == errorcode.ER_BAD_DB_ERROR:
print("Database does not exist")
else:
print(err)
# Get date to be used in the API call
today = date.today()
daysBack = 1 # set to 1 to get a full set of yesterday's data
captureDate = today - timedelta(days=daysBack)
# Make the API call and return the response to get a link
api_response = get_api_response(captureDate)
resultsURL = api_response['url']
# Download the file specified by the URL returned
# by the API into a pandas dataframe object
df = pd.read_csv(resultsURL)
# Loop through the data from the CSV file
for index, row in df.iterrows():
# Remove quotes and hashtag symbols from hashtags,
# and single quotes from other fields
thisHashTag = row[0].replace("'",'').replace('"','').replace('#','')
thisLoc = row[1].replace("'",'')
thisLocType = row[2].replace("'",'')
thisTime = row[4].replace("'",'')
thisCount = row[5] #This field is sparsely populated so we will ignore it
thisDate = row[3]
thisDT = row[3]+" "+thisTime+":00"
# add record to database
sql = ("insert into twitter_popular_tags (hashtag,location,location_type,popular_date_time,popular_date)"
" values ('"+thisHashTag+"','"+thisLoc+"','"+thisLocType+"','"+thisDT+"','"+thisDate+"')")
cursor = cnx.cursor()
try:
cursor.execute(sql)
cnx.commit()
except mysql.connector.DataError as e:
print("DataError from query: "+sql)
print(e)
except mysql.connector.InternalError as e:
print("InternalError from query: "+sql)
print(e)
except mysql.connector.IntegrityError as e:
print("IntegrityError from query: "+sql)
print(e)
except mysql.connector.OperationalError as e:
print("OperationalError from query: "+sql)
print(e)
except mysql.connector.NotSupportedError as e:
print("NotSupportedError from query: "+sql)
print(e)
except mysql.connector.ProgrammingError as e:
print("ProgrammingError from query: "+sql)
print(e)
except :
print("Unknown error occurred from query: "+sql)