-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathget-questions-for-arbitrary-time-period.rb
executable file
·133 lines (125 loc) · 4.14 KB
/
get-questions-for-arbitrary-time-period.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#!/usr/bin/env ruby
require 'json'
require 'rubygems'
require 'typhoeus'
require 'awesome_print'
require 'json'
require 'time'
require 'date'
require 'csv'
require 'logger'
logger = Logger.new(STDERR)
logger.level = Logger::DEBUG
def getKitsuneResponse(url, params, logger)
logger.debug url
logger.debug params
try_count = 0
begin
result = Typhoeus::Request.get(
url,
:params => params )
x = JSON.parse(result.body)
rescue JSON::ParserError => e
try_count += 1
if try_count < 4
$stderr.printf("JSON::ParserError exception, retry:%d\n",\
try_count)
sleep(10)
retry
else
$stderr.printf("JSON::ParserError exception, retrying FAILED\n")
x = nil
end
end
return x
end
if ARGV.length < 6
puts "usage: #{$0} yyyy mm dd end-yyyy mm id"
exit
end
# because of issue 3686, https://github.com/mozilla/kitsune/issues/3686,
# go back one day and forward one day
created_time = Time.gm(ARGV[0].to_i, ARGV[1].to_i, ARGV[2].to_i)
greater_than_time = (created_time - 3600 * 24).strftime("%Y-%-m-%-e")
less_than = Time.gm(ARGV[3].to_i, ARGV[4].to_i, ARGV[5].to_i)
less_than_time = (less_than + 3600 * 24).strftime("%Y-%-m-%-e")
less_than_time_parsed = Time.parse(less_than_time)
logger.debug "created_time" + created_time.to_s
logger.debug "greater than time" + greater_than_time.to_s
logger.debug "less than" + less_than.to_s
logger.debug "less than time" + less_than_time.to_s
url_params = {
:format => "json",
:product => "firefox",
:created__gt => greater_than_time,
:created__lt => less_than_time,
:ordering => "+created",
}
url = "https://support.mozilla.org/api/2/question/"
end_program = false
question_number = 0
issue_3686_offset = 7 * 3600 # 7 hours off
csv = []
while !end_program
sleep(1.0) # sleep 1 second between API calls
questions = getKitsuneResponse(url, url_params, logger)
url = questions["next"]
if url.nil?
logger.debug "nil next url"
else
logger.debug "next url:" + url
end
url_params = nil
questions["results"].each do|q|
updated = q["updated"]
logger.debug "created:" + q["created"]
created = Time.parse(q["created"])
# kitsune issue 3686 has been fixed
#logger.debug "QUESTION created w/error:" + created.to_i.to_s
#q["created"] = created.to_i + issue_3686_offset
#logger.debug "Question created w/error fixed:" + q["created"].to_s
if !updated.nil?
logger.debug "updated:" + updated
updated = Time.parse(q["updated"])
logger.debug "created:" + q["created"]
#logger.debug "QUESTION updated w/error:" + updated.to_i.to_s
#q["updated"] = updated.to_i + issue_3686_offset
#logger.debug "Question updated w/error fixed:" + q["updated"].to_s
end
id = q["id"]
logger.debug "QUESTION id:" + id.to_s
question_number += 1
logger.debug "QUESTION number:" + question_number.to_s
tags = q["tags"]
tag_str = ""
tags.each { |t| tag_str = tag_str + t["slug"] + ";" }
created = Time.parse(q["created"])
logger.debug 'createdtop:' + created.to_i.to_s
logger.debug 'created_time:' + created_time.to_i.to_s
logger.debug 'less_than_time_parsed:' + less_than_time_parsed.to_i.to_s
if created.to_i >= created_time.to_i && created.to_i < less_than_time_parsed.to_i
logger.debug "NOT skipping"
csv.push(
[
id, created.to_s, q["updated"].to_s, q["title"], q["content"],
tag_str, q["product"], q["topic"], q["locale"]
])
else
logger.debug "SKIPPING"
end
logger.debug 'url:' + url
logger.debug 'created:' + q["created"].to_i.to_s
logger.debug'created_time:' + created_time.to_i.to_s
if (created.to_i < created_time.to_i) || url.nil?
end_program = true
break
end
end
end
headers = ['id', 'created', 'updated', 'title', 'content', 'tags', 'product', 'topic', 'locale']
FILENAME = sprintf("%4.4d-%2.2d-%2.2d-%4.4d-%2.2d-%2.2d-firefox-desktop-all-locales.csv",
ARGV[0].to_i, ARGV[1].to_i, ARGV[2].to_i,
ARGV[3].to_i, ARGV[4].to_i, ARGV[5].to_i)
CSV.open(FILENAME, "w", write_headers: true, headers: headers) do |csv_object|
csv.each {|row_array| csv_object << row_array }
end