forked from nerdsfornature/changebrackets
-
Notifications
You must be signed in to change notification settings - Fork 0
/
fireslurp.rb
290 lines (265 loc) · 9.82 KB
/
fireslurp.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
require 'rubygems'
require 'twitter'
require 'pp'
require 'active_support/inflector'
require 'active_support/core_ext/object'
require 'ostruct'
require 'flickraw'
require 'instagram'
require 'csv'
require 'google_drive'
require 'trollop'
require 'yaml'
OPTS = Trollop::options do
banner <<-EOS
Harvest recent tagged photo metadata from social media services and store the
data in local CSV files or a Google Spreadsheet. Note that this only harvests
*recent* photos, not *all* photos available through the APIs, so it's designed
to be run regularly, depending on how much usage your tags get.
You MUST provide at least one tag and one API key and/or secret for
this script to do anything. It will write to local CSV files by default.
Since this is pretty configuration-heavy, you can also specify all the API
keys and secrets in a YAML file specified by the --config option.
Usage:
# Save info about Instagram photos for two tags in a local CSV file
bundle exec ruby fireslurp.rb --instagram-key=xxx morganfire01 morganfire02
# Save info about Twitter photos for two tags to a Google Spreadsheet
bundle exec ruby fireslurp.rb \
--twitter-key=xxx \
--twitter-secret=yyy \
--google-email=your.email@gmail.com \
--google-password=worstpasswordevar \
--google-spreadsheet-id=1234abcd \
morganfire01 morganfire02
# Save info about photos for two tags using config from a YAML file
bundle exec ruby fireslurp.rb -c fireslurp.yaml morganfire01 morganfire02
where [options] are:
EOS
opt :debug, "Print debug statements, don't save data", :type => :boolean, :short => "-d"
opt :auto_approve, "
For a Google Spreadsheet, automatically approve all contributions by
filling the usable_tag column with the tag the contributor used
".strip, :type => :boolean
opt :config, "Path to YAML configuration file", :type => :string, :short => "-c"
opt :twitter_key, "Twitter API key", :type => :string
opt :twitter_secret, "Twitter API secret", :type => :string
opt :flickr_key, "Flickr API key", :type => :string
opt :flickr_secret, "Flickr API secret", :type => :string
opt :instagram_key, "Instagram API key", :type => :string
opt :google_email, "Google email address, used to add data to a Google spreadsheet", :type => :string
opt :google_password, "Google account password", :type => :string
opt :google_spreadsheet_id, "
Write data to a Google Spreadsheet instead of CSV. The value should be the
Google Spreadhseet ID. If it's blank and the flag is used, the script will
look for an internal variable.
".strip, :type => :string, :short => '-g'
end
config = if OPTS[:config] && File.exists?(OPTS[:config])
YAML.load_file(OPTS[:config])
else
{}
end
## CONFIG ###############################################
TAGS = ARGV
TWITTER_KEY = OPTS[:twitter_key] || config['twitter_key']
TWITTER_SECRET = OPTS[:twitter_secret] || config['twitter_secret']
FLICKR_KEY = OPTS[:flickr_key] || config['flickr_key']
FLICKR_SECRET = OPTS[:flickr_secret] || config['flickr_secret']
INSTAGRAM_KEY = OPTS[:instagram_key] || config['instagram_key']
GOOGLE_EMAIL = OPTS[:google_email] || config['google_email']
GOOGLE_PASSWORD = OPTS[:google_password] || config['google_password']
GOOGLE_SPREADSHEET_ID = OPTS[:google_spreadsheet_id] || config['google_spreadsheet_id']
###############################################################
HEADERS = %w(provider tag datetime username usable_tag image_url url image_url_s image_url_m license title)
Trollop::die "you must specify at least one tag" if ARGV.empty?
Trollop::die "you must specify at least one API key" if [TWITTER_KEY, FLICKR_KEY, INSTAGRAM_KEY].compact.reject(&:blank?).blank?
num_google_opts = [GOOGLE_EMAIL, GOOGLE_PASSWORD, GOOGLE_SPREADSHEET_ID].compact.reject(&:blank?).size
if num_google_opts > 0 && num_google_opts < 3
Trollop::die "you must specify a Google email, password, and spreadsheet ID if you specify any of those options"
end
class TwitterProvider
def self.method_missing(method, *args, &block)
@@instance ||= self.new
@@instance.send(method, *args, &block)
end
def search(q, &block)
client.search("#{q} -rt", :result_type => "recent").each do |tweet|
next unless tweet.media.first
block.yield(photo_from_api_response(tweet))
end
end
def client
return @client if @client
@client = Twitter::REST::Client.new do |config|
config.consumer_key = TWITTER_KEY
config.consumer_secret = TWITTER_SECRET
end
end
def photo_from_api_response(tweet)
return nil unless p = tweet.media.first
max_size = if p.sizes[:large] then "large"
elsif p.sizes[:medium] then "medium"
elsif p.sizes[:small] then "small"
end
return nil if max_size.nil? # skip if there's no reasonable image url
OpenStruct.new(
:response => tweet,
:image_url => "#{p.media_url}:#{max_size}",
:image_url_m => "#{p.media_url}:medium",
:image_url_s => "#{p.media_url}:small",
:username => tweet.user.name,
:url => tweet.url,
:datetime => tweet.created_at,
:title => tweet.text,
:license => "all rights reserved"
)
end
end
class FlickrProvider
def self.method_missing(method, *args, &block)
@@instance ||= self.new
@@instance.send(method, *args, &block)
end
def search(q, &block)
client.photos.search(:tags => [q], :per_page => 500, :extras => 'url_o,url_l,url_m,url_c,owner_name,date_taken,license').each do |photo|
block.yield(OpenStruct.new(
:response => photo,
:image_url => photo.to_hash["url_o"],
:image_url_m => photo.to_hash["url_c"],
:image_url_s => photo.to_hash["url_m"],
:title => photo.title,
:license => decode_license(photo.to_hash["license"]),
:username => photo.ownername,
:url => "http://flickr.com/photos/#{photo.owner}/#{photo.id}",
:datetime => Time.parse(photo.datetaken)
))
end
end
def decode_license(code)
case code.to_i
when 1 then "CC BY-NC-SA"
when 2 then "CC BY-NC"
when 3 then "CC BY-NC-ND"
when 4 then "CC BY"
when 5 then "CC SA"
when 6 then "CC ND"
when 7 then "PD"
when 8 then "United States Government Work"
else "all rights reserved"
end
end
def client
if FlickRaw.api_key.nil?
FlickRaw.api_key = FLICKR_KEY
FlickRaw.shared_secret = FLICKR_SECRET
end
flickr
end
end
class InstagramProvider
def self.method_missing(method, *args, &block)
@@instance ||= self.new
@@instance.send(method, *args, &block)
end
def search(q, &block)
client.tag_recent_media(q).each do |photo|
block.yield(OpenStruct.new(
:response => photo,
:image_url => photo.images.standard_resolution.url,
:image_url_m => photo.images.standard_resolution.url,
:image_url_s => photo.images.low_resolution.url,
:username => photo.user.username,
:url => photo.link,
:datetime => Time.at(photo.created_time.to_i),
:title => photo.caption ? photo.caption.text : 'Untitled',
:license => "all rights reserved"
))
end
rescue Instagram::BadRequest => e
puts "Instagram request failed: #{e.message}"
return
end
def client
return @client if @client
Instagram.configure do |config|
config.client_id = INSTAGRAM_KEY
end
@client = Instagram
end
end
def write_to_csv(providers, tags)
now = Time.now
path = "fireslurp-#{now.strftime('%Y-%m-%d')}-#{now.to_i}.csv"
csv = CSV.open(path, 'w') unless OPTS[:debug]
csv << HEADERS unless OPTS[:debug]
providers.each do |provider|
provider_name = provider.name.underscore.split('_').first.capitalize
tags.each do |tag|
provider.search(tag) do |photo|
next unless photo
puts "#{provider}\t#{tag}\t#{photo.datetime}\t#{photo.username}\t\t#{photo.image_url}\t#{photo.url}"
unless OPTS[:debug]
csv << [provider_name, tag, photo.datetime.iso8601, photo.username, 't', photo.image_url, photo.url]
end
end
end
end
csv.close unless OPTS[:debug]
end
def write_to_google(providers, tags)
session = GoogleDrive.login(GOOGLE_EMAIL, GOOGLE_PASSWORD)
ws = session.spreadsheet_by_key(GOOGLE_SPREADSHEET_ID).worksheets[0]
if ws.rows[0].nil? || ws.rows[0].size == 0
HEADERS.each_with_index do |header,i|
ws[1,i+1] = header
end
ws.save unless OPTS[:debug]
end
urls = ws.rows.map{|r| r[HEADERS.index('url')]}
providers.each do |provider|
provider_name = provider.name.underscore.split('_').first.capitalize
puts
tags.each do |tag|
provider.search(tag) do |photo|
next unless photo
puts [
provider_name.ljust(10),
tag.ljust(15),
photo.datetime.to_s.ljust(30),
photo.username.ljust(30),
photo.image_url.to_s.ljust(70),
photo.url.to_s.ljust(70)
].join
row = (urls.index(photo.url.to_s) || ws.num_rows) + 1
existing_usable_tag = ws[row,HEADERS.index('usable_tag')+1]
usable_tag = existing_usable_tag
usable_tag = tag if OPTS[:auto_approve] && existing_usable_tag.blank?
[
provider_name,
tag,
photo.datetime.iso8601,
photo.username,
usable_tag,
photo.image_url,
photo.url,
photo.image_url_s,
photo.image_url_m,
photo.license,
photo.title,
].each_with_index do |value,i|
ws[row,i+1] = value
end
end
end
ws.save unless OPTS[:debug]
end
end
providers = []
providers << TwitterProvider if TWITTER_KEY
providers << FlickrProvider if FLICKR_KEY
providers << InstagramProvider if INSTAGRAM_KEY
if GOOGLE_EMAIL && GOOGLE_PASSWORD && GOOGLE_SPREADSHEET_ID
write_to_google(providers, TAGS)
else
write_to_csv(providers, TAGS)
end