-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdetweet.py
executable file
·127 lines (97 loc) · 4.27 KB
/
detweet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/env python3
from __future__ import print_function
from tweepy.error import TweepError
import argparse
import tweepy
import os
import sys
import csv
import re
import moment
import time
ENVIRON_CONSUMER_KEY = 'CONSUMER_KEY'
ENVIRON_CONSUMER_SECRET = 'CONSUMER_SECRET'
ENVIRON_ACCESS_TOKEN = 'ACCESS_TOKEN'
ENVIRON_ACCESS_TOKEN_SECRET = 'ACCESS_TOKEN_SECRET'
DATE_FORMAT = 'YYYY-M-D'
TWEET_TIMESTAMP_FORMAT = 'YYYY-MM-DD'
def main():
parser = argparse.ArgumentParser(description='Delete tweets en masse')
parser.add_argument('--csv', help='the path to your tweets.csv file located in your twitter archive', required=True)
parser.add_argument('--dry', action='store_true', default=False, help='do a dry run')
parser.add_argument('--before', help='match tweets before this date (YYYY-M-D)')
parser.add_argument('--after', help='match tweets after this date (YYYY-M-D)')
parser.add_argument('pattern', help='regular expressions to match', nargs='+')
args = parser.parse_args()
# The consumer keys can be found on your application's Details
# page located at https://dev.twitter.com/apps (under "OAuth settings")
consumer_key = os.getenv(ENVIRON_CONSUMER_KEY)
consumer_secret = os.getenv(ENVIRON_CONSUMER_SECRET)
# The access tokens can be found on your applications's Details
# page located at https://dev.twitter.com/apps (located
# under "Your access token")
access_token = os.getenv(ENVIRON_ACCESS_TOKEN)
access_token_secret = os.getenv(ENVIRON_ACCESS_TOKEN_SECRET)
required_environ_vars = [consumer_key, consumer_secret, access_token, access_token_secret]
if None in required_environ_vars:
required_environ_var_keys = [ENVIRON_ACCESS_TOKEN, ENVIRON_ACCESS_TOKEN_SECRET, ENVIRON_CONSUMER_KEY,
ENVIRON_CONSUMER_SECRET]
eprint('Missing a required environment variable. Make sure the following are set: {}'.format(
', '.join(required_environ_var_keys)))
exit(1)
# Set up tweepy
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.secure = True
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
# Parse the before/after dates if they exist
before_date = None
after_date = None
if args.before is not None:
before_date = moment.date(args.before, DATE_FORMAT)
if args.after is not None:
after_date = moment.date(args.after, DATE_FORMAT)
# Read the CSV file
matching_tweets = []
with open(args.csv) as csv_file:
reader = csv.DictReader(csv_file)
for row in reader:
# slice the date here so that we don't bother parsing the time
tweet_date = moment.date(row['timestamp'][:len(TWEET_TIMESTAMP_FORMAT)], TWEET_TIMESTAMP_FORMAT)
if before_date is not None and tweet_date > before_date:
continue
if after_date is not None and tweet_date < after_date:
continue
if tweet_matches_patterns(row['text'], args.pattern):
matching_tweets.append(row)
# Begin doing tweet deletion
if not args.dry:
print('Beginning to delete tweets:\n\n')
username = auth.get_username()
for tweet in matching_tweets:
print(tweet['timestamp'], 'https://twitter.com/{}/status/{}'.format(username, tweet['tweet_id']),
tweet['text'], '\n')
if not args.dry:
try:
api.destroy_status(int(tweet['tweet_id']))
except TweepError as e:
# Tweet with ID not found
if e.api_code != 144:
raise e
# Make 1 request every 10 seconds
time.sleep(10)
def eprint(*args, **kwargs):
"""Helper function that prints to stderr"""
print(*args, file=sys.stderr, **kwargs)
def tweet_matches_patterns(tweet, patterns):
"""Checks if a tweet matches a pattern
:param tweet: tweet to search for the given pattern
:param patterns: patterns to check against
"""
for pattern in patterns:
# this could probably be optimized to use compiled patterns but that's not the bottleneck here
if re.search(pattern, tweet, re.IGNORECASE) is not None:
return True
return False
if __name__ == '__main__':
main()