forked from leonardr/sycorax
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpuppet_ebooks.py
executable file
·193 lines (177 loc) · 7.25 KB
/
puppet_ebooks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
#!/usr/bin/env python
"""
puppet_ebooks.py by BillSeitz Jul'2014 forked from firstWorldStoic.py which was building on LeonardRichardson's sycorax library
see http://webseitz.fluxent.com/wiki/TwitterBot for background
launch with `python puppet_ebooks.py`
Concept*****
* pick at random one of the lists for the puppet_ebooks Twitter account
* grab last 100 tweets from that list
* strip any URLs from tweets, feed rest of content into MarkovChain
* as starting word for new tweet, pick first word from random pick from the incoming tweets
* generate the rest of the new tweet
"""
import os
import sys
import json
from keys import TWITTER_CONSUMER_KEY, TWITTER_CONSUMER_SECRET
import twitter
from random import randint
def load_config(directory):
filename = os.path.join(directory, "config.json")
if not os.path.exists(filename):
raise Exception("Could not find config.json file at %s" % (
filename
))
data = json.loads(open(filename).read().strip())
return data
def lists(): # get lists for account
access_token_key, access_token_secret = config["user_credentials"]["twitter_token"], config["user_credentials"]["twitter_secret"]
api = twitter.Api(TWITTER_CONSUMER_KEY, TWITTER_CONSUMER_SECRET, access_token_key, access_token_secret)
results = api.GetLists(screen_name=config["user_credentials"]["account"])
lists = []
for list in results:
#print list.id, list.slug, list.member_count
if list.member_count > 0:
#list = list.__dict__
lists.append(list)
#print results
return lists
def list(lists): # pick 1 list from the list of lists (at random)
n = len(lists)
#print n
from random import randint
num = randint(0, n-1)
#print num
return lists[num]
def list_timeline(list): # get the timeline for the selected list
access_token_key, access_token_secret = config["user_credentials"]["twitter_token"], config["user_credentials"]["twitter_secret"]
api = twitter.Api(TWITTER_CONSUMER_KEY, TWITTER_CONSUMER_SECRET, access_token_key, access_token_secret)
results = api.GetListTimeline(list.id, list.slug, count = num_tweets)
#print 'timeline: ', results
return results
def corpus_assemble(timeline): # assemble trimmed lines of tweets to feed to engine
import re, random
#if random.random() < .001:
# keep_handle = True
#else:
keep_handle = False
url_patt = 'http:[^ ]+'
tweets = []
for tweet in timeline:
body = tweet._text
body = re.sub(url_patt,'', body)
body = body.replace('|', '')
body = body.replace('"', '')
if not(keep_handle):
body = body.replace('@', '#')
#print 'body: ', body
tweets.append(body)
#print 'input tweets: ', tweets
return tweets
def spew(corpus): # feed corpus (set of tweets) into engine, output single tweet
engin_dir = '/Users/billseitz/documents/djcode/st/wilson/'
sys.path.append(engin_dir)
import StaticBotCore
#print 'in spew'
for tweet in corpus:
StaticBotCore.add_to_brain(tweet)
num_tweets = len(corpus)
from random import randint
num = randint(0, num_tweets-1)
tweet_trigger = corpus[num]
first_word = tweet_trigger.split()[0]
#print 'first word: ', first_word
tweet_out = StaticBotCore.generate_sentence(tweet_trigger) #was first_word
if len(tweet_out) > 140:
tweet_out = tweet_out[0:137] + '...'
#print tweet_out
return tweet_out
def search(text): # get tweets matching string
access_token_key, access_token_secret = config["user_credentials"]["twitter_token"], config["user_credentials"]["twitter_secret"]
#print 'tokens: ', access_token_key, access_token_secret, TWITTER_CONSUMER_KEY, TWITTER_CONSUMER_SECRET
api = twitter.Api(TWITTER_CONSUMER_KEY, TWITTER_CONSUMER_SECRET, access_token_key, access_token_secret)
results = api.GetSearch(text, lang='en')
print 'num results: ', len(results)
results_dicts = []
for result in results:
result = result.__dict__
results_dicts.append(result)
return results_dicts
def trim_results(results): # reduce down to tweets that can work, and strip each to quotable portion
net_results = []
import re
for result in results:
# drop if too old
# ????******************************************************
# strip everything after match
#print 'starting result: ', result['_text']
len_result = len(result['_text'])
pos_match = result['_text'].lower().find(search_text.lower())
result['_text'] = result['_text'][0:pos_match-1]
#print 'stripped result: ', result['_text']
if len(result['_text']) < 10: continue
if result['_text'][-1] == '#':
result['_text'] = result['_text'][0:-1]
# strip any URLs
url_patt = 'http:[^ ]+'
result['_text'] = re.sub(url_patt,'', result['_text'])
result['_text'] = result['_text'].strip()
if len(result['_text']) < 10: continue
if result['_text'][-1] not in ['.', '!', '?']:
result['_text'] = result['_text'] + '.'
if 10 < len(result['_text']) < 80:
#print '*** ', result['_text']
#print str(result['_text']), result['_id']
net_results.append(result)
print 'num net results: ', len(net_results)
return net_results
def pick_result(results): # just pick random for now
num_results = len(results)
from random import randint
num = randint(0, num_results-1)
return results[num]
def pick_quote(result): # pick from set of Stoic quotes to match - for now random
f_path = os.path.join(directory, 'quotes.txt')
f = open(f_path, 'r')
lines = f.readlines()
line_num = randint(0, len(lines)-1)
line = lines[line_num]
return line
def merge_pieces(result, quote): # glom them together
line = result['_text'] + ' ' + quote
if len(line) > 140:
line = line[0:137] + '...'
return line
def post(line, reply_to=None): # post tweet
access_token_key, access_token_secret = config["user_credentials"]["twitter_token"], config["user_credentials"]["twitter_secret"]
#print 'tokens: ', access_token_key, access_token_secret, TWITTER_CONSUMER_KEY, TWITTER_CONSUMER_SECRET
api = twitter.Api(TWITTER_CONSUMER_KEY, TWITTER_CONSUMER_SECRET, access_token_key, access_token_secret)
# Post the tweet.
try:
#print 'will now verify'
#api.VerifyCredentials()
#print 'will now post'
api.PostUpdate(line)
pass
except twitter.TwitterError, e:
if e.message != "Status is a duplicate.":
raise e
if __name__ == "__main__":
script_directory = 'puppet_ebooks'
directory = os.path.join('/Users/billseitz/documents/djcode/st/sycorax', script_directory)
config = load_config(directory)
num_tweets = 200
lists = lists()
list_selected = list(lists)
timeline = list_timeline(list_selected)
corpus = corpus_assemble(timeline)
tweet = spew(corpus)
post(tweet)
# NEXT STEP - feed to markov chain
#results = search(search_text)
#results = trim_results(results)
#result = pick_result(results)
#quote = pick_quote(result)
#tweet = merge_pieces(result, quote)
#print str(tweet)
#post(tweet, result["_id"])