-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathstore_stackjoin.py
306 lines (277 loc) · 16.9 KB
/
store_stackjoin.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
import json
import requests
from get_tweet_gif_url import *
from jinja2 import Template
import boto3
from remove_mentions_from_tweet_message import *
from datetime import datetime
import io
from dotenv import load_dotenv, find_dotenv
from pyairtable import Table
from tvdl.tvdl_launcher import twitter_video_dl_launcher
ENV_FILE = find_dotenv()
if ENV_FILE:
load_dotenv(ENV_FILE)
AWS_ACCESS_KEY_ID = os.environ.get("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.environ.get("AWS_SECRET_ACCESS_KEY")
AIRTABLE_API_KEY = os.environ.get("AIRTABLE_API_KEY")
AIRTABLE_BEARER_TOKEN = os.environ.get("AIRTABLE_BEARER_TOKEN")
def store_stackjoin(json_response, tweet_datetimeISO, stackjoinadd_reporter = "0", stackjoinadd_tweet_message = "", block_height_or_tweet_id="", stackjoin_tweets_or_blocks = "block", dollar_amount=""):
print("running store_stackjoin function")
# temporarily storing json on a file, it will be later transferred directly through the function, just the two lines below, and indenting the whole function to chnage
# with open(json_response,'r') as f:
# json_response = json.load(f)
tweet_id = json_response["data"]["id"]
author_id = json_response["data"]["author_id"]
tweet_message = remove_mentions_from_tweet_message(json_response["data"]["text"])
if tweet_datetimeISO == None:
tweet_datetimeISO = datetime.utcnow().isoformat()
tweet_datetimeISO = tweet_datetimeISO[0:tweet_datetimeISO.find(".")]
tweet_timestamp = str(f"{datetime.timestamp(datetime.now().replace(microsecond=0)):.0f}")
else:
tweet_timestamp = datetime.strptime(tweet_datetimeISO,'%Y-%m-%dT%H:%M:%S')
tweet_timestamp = int(datetime.timestamp(tweet_timestamp))
for item in json_response['includes']['users']:
# print (item)
# print (json_response['data']['author_id'])
# if ['id'] == json_response['data']['author_id']:
if item['id'] == author_id:
print (item['id'])
author_handle = item['username']
print(f"the author handle is {author_handle}")
image_url_dict = []
img_src_dict = []
airtable_image_files_dict = []
airtable_gif_files_dict = []
if "media" in json_response["includes"]:
for index, item in enumerate(json_response['includes']['media']):
video_path = ""
media_key = item['media_key']
if item['type'] == "animated_gif":
print('found animated gif')
if "preview_image_url" in item:
image_url = get_tweet_gif_url(tweet_id, media_key, "gif", gif_url_if_already_included=item['preview_image_url'])
image_preview_url = get_tweet_gif_url(tweet_id, media_key, "video", gif_url_if_already_included=item['preview_image_url'])
else:
image_url = get_tweet_gif_url(tweet_id, media_key, "gif")
image_preview_url = get_tweet_gif_url(tweet_id, media_key, "video")
print(f"the image URL is {image_url}")
elif item['type'] == "video":
print('found video')
if not os.path.exists("stackjoin_tweet_images/"+tweet_id):
print('folder not found, creating')
os.makedirs("stackjoin_tweet_images/"+tweet_id)
video_filename_and_filetype = str(index+1)+"_video.mp4"
video_path = "stackjoin_tweet_images/"+tweet_id+"/"+video_filename_and_filetype
# twitter_video_dl_launcher(f"https://www.twitter.com/{author_handle}/status/{tweet_id}",video_path)
if "preview_image_url" in item:
image_url = get_tweet_gif_url(tweet_id, media_key, "video", gif_url_if_already_included=item['preview_image_url'])
image_preview_url = get_tweet_gif_url(tweet_id, media_key, "video", gif_url_if_already_included=item['preview_image_url'])
else:
image_url = get_tweet_gif_url(tweet_id, media_key, "video")
image_preview_url = get_tweet_gif_url(tweet_id, media_key, "video")
# videos are retrievable now after plugging in twitter_video_downloader!
# tweet_message += " [*Tweet has video attached (videos are unretrievable via API). Open original tweet to access video.]"
else:
image_url = json_response["includes"]["media"][index]["url"]
image_preview_url = image_url
print(f"the image URL is {image_url}")
#saving tweet images to s3
if item['type'] == "animated_gif":
filename = str(index+1)+"_gif"
else:
filename = str(index+1)+"_image"
image_filetype = image_url.rsplit('/', 1)[1].rsplit('.', 1)[1]
image_preview_filetype = image_preview_url.rsplit('/', 1)[1].rsplit('.', 1)[1]
s3_upload = boto3.resource('s3',region_name='us-east-1',aws_access_key_id=AWS_ACCESS_KEY_ID,aws_secret_access_key=AWS_SECRET_ACCESS_KEY)
s3_image_path = 'stackjoin_tweet_images/'+tweet_id+"/"+filename+"."+image_filetype
s3_image_preview_path = 'stackjoin_tweet_images/'+tweet_id+"/"+filename+"_thumbnail."+image_preview_filetype
# uploading stackjoin image previews to S3 bucket
print(f"the image preview URL is {image_preview_url}")
r = requests.get(image_preview_url, allow_redirects=True)
s3_upload.Object('pleblira',s3_image_preview_path).put(Body=io.BytesIO(r.content), ACL="public-read",ContentType='image/jpeg')
# uploading stackjoin images to S3 bucket
print(f"the image URL is {image_url}")
r = requests.get(image_url, allow_redirects=True)
s3_upload.Object('pleblira',s3_image_path).put(Body=io.BytesIO(r.content), ACL="public-read",ContentType='image/jpeg')
# uploading downloaded video to S3
# if item['type'] == "video":
# with open(video_path, 'rb') as f:
# print('uploading to s3')
# s3_upload = boto3.resource('s3',region_name='us-east-1',aws_access_key_id=AWS_ACCESS_KEY_ID,aws_secret_access_key=AWS_SECRET_ACCESS_KEY)
# s3_upload.Object('pleblira',video_path).put(Body=f,ACL="public-read")
# append to image_files_dict for later creating row on Airtable
if item['type'] == "animated_gif":
airtable_gif_files_dict.append({"url":image_url,'filename':filename+"."+image_filetype})
else:
airtable_image_files_dict.append({"url":image_url,'filename':filename+"."+image_filetype})
# if video_path != "":
# airtable_gif_files_dict.append({"url":"https://pleblira.s3.amazonaws.com/"+video_path,'filename':video_filename_and_filetype})
# appending url and html tag for embedding to dictionaries which will then be added to the json on S3 and to the html table
image_url_dict.append(image_url)
img_src_dict.append(f"<a href=\"/{s3_image_path}\" target=\"_blank\"><img src=\"/{s3_image_preview_path}\" style=\"max-width:100px;\"></a>")
# print(f"the image_url_dict is: {image_url_dict}")
# appending url and html tag for video files (new implementation)
# if item['type'] == "video":
# image_url_dict.append(video_path)
# img_src_dict.append(f"<a href=\"/{video_path}\" target=\"_blank\"><img src=\"/{s3_image_preview_path}\" style=\"max-width:100px;\"></a>")
# print(f"the image_url_dict is: {image_url_dict}")
else:
print("no image")
# defining airtable_API_import_notes string
airtable_API_import_notes = ""
# if " [*Tweet has video attached (videos are unretrievable via API). Open original tweet to access video.]" in tweet_message:
# airtable_API_import_notes = "[*Tweet has video attached (videos are unretrievable via API). Open original tweet to access video.]"
if stackjoinadd_reporter != "0":
airtable_API_import_notes += stackjoinadd_reporter
if stackjoinadd_tweet_message != "":
airtable_API_import_notes += stackjoinadd_tweet_message
# tweet_message_for_airtable_API = tweet_message.replace(" [*Tweet has video attached (videos are unretrievable via API). Open original tweet to access video.]","")
# creating row or updating row on Airtable
table = Table(AIRTABLE_API_KEY, "appiNbM9r6Zy7G2ux", stackjoin_tweets_or_blocks)
if stackjoin_tweets_or_blocks == "blocks":
stackjoin_tweets_or_blocks = "tblcwUsNLE3AecXpu"
stackjoin_tweets_or_blocks_filter_term = "block_height"
else:
stackjoin_tweets_or_blocks_filter_term = "tweet_id"
stackjoin_tweets_or_blocks = "tblAHtdeESADDCKGA"
url = "https://api.airtable.com/v0/appiNbM9r6Zy7G2ux/"+stackjoin_tweets_or_blocks
if block_height_or_tweet_id == "":
block_height_or_tweet_id = 21000000
headers = {"Authorization": "Bearer "+AIRTABLE_BEARER_TOKEN}
x = requests.get(url, headers=headers, params={'fields[]':[stackjoin_tweets_or_blocks_filter_term], 'filterByFormula':stackjoin_tweets_or_blocks_filter_term+"="+str(block_height_or_tweet_id)}).json()
# print(json.dumps(x,indent=4))
print(x)
if x['records'] == []:
create_or_update = "create"
else:
create_or_update = "update"
record_id = x['records'][0]['id']
# checking if block
if stackjoin_tweets_or_blocks == "tblcwUsNLE3AecXpu":
# merge airtable_gif_files_dict into airtable_image_files_dict
for item in airtable_gif_files_dict:
airtable_image_files_dict.append(item)
# checking if valid block height was added to tweet
try:
block_height_or_tweet_id = int(block_height_or_tweet_id.replace("$",""))
except:
block_height_or_tweet_id = 21000000
if create_or_update == "create":
print('creating')
table.create({
"block_height": block_height_or_tweet_id,
"tweet_id": tweet_id,
"author_handle": author_handle,
"author_id": author_id,
"importing_full_block_message": tweet_message,
"block_summary_image": airtable_image_files_dict,
"image_url_dict": str(image_url_dict).translate({39: None,91: None, 93: None, 44: None}),
"block_timestamp": int(tweet_timestamp),
"block_datetimeISO": tweet_datetimeISO,
"img_src_dict": str(img_src_dict).translate({39: None,91: None, 93: None, 44: None}),
"airtable_API_import_notes": airtable_API_import_notes.strip()
})
# if updating:
elif create_or_update == "update":
print('updating')
table = Table(AIRTABLE_API_KEY, 'appiNbM9r6Zy7G2ux', "blocks")
Table.update(table, record_id=record_id,fields={
"tweet_id": tweet_id,
"author_handle": author_handle,
"author_id": author_id,
"importing_full_block_message": tweet_message,
"block_summary_image": airtable_image_files_dict,
"image_url_dict": str(image_url_dict).translate({39: None,91: None, 93: None, 44: None}),
"block_timestamp": int(tweet_timestamp),
"block_datetimeISO": tweet_datetimeISO,
"img_src_dict": str(img_src_dict).translate({39: None,91: None, 93: None, 44: None}),
"airtable_API_import_notes": airtable_API_import_notes.strip()
})
# for stackjoins
else:
try:
dollar_amount = float(dollar_amount.replace("$",""))
stackjoinadd_reporters = ["AnthonyDessauer", "__overflow_", "jc4466", "LoKoBTC", "BrokenSystem20", "pleblira", "phathodl", "derekmross", "VStackSats"]
if stackjoinadd_reporter[25:25+stackjoinadd_reporter[25:].find(" ")] not in stackjoinadd_reporters:
print("stackjoin reporter not found in list")
dollar_amount = 0.0
except:
dollar_amount = 0.0
if create_or_update == "create":
print('creating')
table.create({
"tweet_id": tweet_id,
"dollar_amount": dollar_amount,
"author_handle": author_handle,
"author_id": author_id,
"tweet_message": tweet_message,
"image_files": airtable_image_files_dict,
"gif_files": airtable_gif_files_dict,
"image_url_dict": str(image_url_dict).translate({39: None,91: None, 93: None, 44: None}),
"tweet_timestamp": int(tweet_timestamp),
"tweet_datetimeISO": tweet_datetimeISO,
"img_src_dict": str(img_src_dict).translate({39: None,91: None, 93: None, 44: None}),
"airtable_API_import_notes": airtable_API_import_notes.strip()
}) # if updating:
elif create_or_update == "update":
print('updating')
table = Table(AIRTABLE_API_KEY, 'appiNbM9r6Zy7G2ux', "stackjoin_tweets")
Table.update(table, record_id=record_id,fields={
"tweet_id": tweet_id,
"dollar_amount": dollar_amount,
"author_handle": author_handle,
"author_id": author_id,
"tweet_message": tweet_message,
"image_files": airtable_image_files_dict,
"gif_files": airtable_gif_files_dict,
"image_url_dict": str(image_url_dict).translate({39: None,91: None, 93: None, 44: None}),
"tweet_timestamp": int(tweet_timestamp),
"tweet_datetimeISO": tweet_datetimeISO,
"img_src_dict": str(img_src_dict).translate({39: None,91: None, 93: None, 44: None}),
"airtable_API_import_notes": airtable_API_import_notes.strip()
})
if stackjoinadd_reporter != "0":
tweet_message += stackjoinadd_reporter
#downloading stackjoin_tweets.json from s3 to append tweet information
boto3.client('s3').download_file('pleblira', 'stackjoin_tweets/stackjoin_tweets.json', 'stackjoin_tweets/stackjoin_tweets.json')
with open("stackjoin_tweets/stackjoin_tweets.json",'r+') as openfile:
try:
stackjoin_tweets = json.load(openfile)
print("try")
except:
print("except")
stackjoin_tweets = []
stackjoin_tweets.append({"tweet_id":tweet_id,"author_handle":author_handle,"author_id":author_id,"tweet_message":tweet_message,"image_url_dict":image_url_dict,"img_src_dict":img_src_dict,"tweet_timestamp":tweet_timestamp,"tweet_datetimeISO":tweet_datetimeISO})
openfile.seek(0)
openfile.write(json.dumps(stackjoin_tweets, indent=4))
# uploading appended json to s3
s3_upload = boto3.resource('s3',region_name='us-east-1',aws_access_key_id=AWS_ACCESS_KEY_ID,aws_secret_access_key=AWS_SECRET_ACCESS_KEY)
content=json.dumps(stackjoin_tweets).encode('utf-8')
s3_upload.Object('pleblira', 'stackjoin_tweets/stackjoin_tweets.json').put(Body=content,ACL="public-read")
# turning stackjoin_tweets into html table data
stackjoin_tweets_table_data = ""
for index, tweet in enumerate(stackjoin_tweets):
if tweet['image_url_dict'] == []:
tweet['image_url_dict'] = "no image"
if tweet['img_src_dict'] == []:
tweet['img_src_dict'] = "no image"
if int(datetime.fromtimestamp(int(tweet['tweet_timestamp'])).strftime("%j")) % 2 >0:
html_row_class = "row-odd-day"
else:
html_row_class = "row-even-day"
stackjoin_tweets_table_data += (f"<tr class=\"{html_row_class}\"><td>{index+1}</td><td><a href=\"https://www.twitter.com/pleblira/status/{tweet['tweet_id']}\" target=\"_blank\">{tweet['tweet_id']}</a></td><td><a href=\"https://twitter.com/{tweet['author_handle']}\" target=\"_blank\">{tweet['author_handle']}</a><br>({tweet['author_id']})</td><td>{tweet['tweet_message']}</td><td>{str(tweet['img_src_dict']).translate({39: None,91: None, 93: None, 44: None})}</td><td style=\"word-break:break-all\">{str(tweet['image_url_dict'])}</td><td>{tweet['tweet_datetimeISO']} <br>({tweet['tweet_timestamp']})</td>")
# print(stackjoin_tweets_table_data)
with open("stackjoin_tweets/stackjoin_tweets_table_data.html",'w') as openfile:
openfile.write(stackjoin_tweets_table_data)
# updating html template with table data
with open('stackjoin_tweets/stackjoin_tweets_template.html') as openfile:
t = Template(openfile.read())
vals = {"stackjoin_tweets_table_data": stackjoin_tweets_table_data}
with open('stackjoin_tweets/stackjoin_tweets.html','w') as f:
f.write(t.render(vals))
# uploading template to s3
content=t.render(vals)
s3_upload.Object('pleblira', 'stackjoin_tweets/stackjoin_tweets.html').put(Body=content,ACL="public-read",ContentType='text/html')
# if __name__ == "__main__":
# store_stackjoin("json_response_with_image.json")