Skip to content

Commit 30e403c

Browse files
committed
SRCH-5154 Bulk delete zombie records from Elastic Search
1 parent 8523c6d commit 30e403c

12 files changed

+418
-0
lines changed
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# frozen_string_literal: true
2+
3+
module Admin
4+
class BulkZombieUrlUploadController < AdminController
5+
def index
6+
@page_title = 'Bulk Zombie URL Upload'
7+
end
8+
9+
def upload
10+
begin
11+
@file = params[:bulk_upload_urls]
12+
BulkZombieUrlUploader::UrlFileValidator.new(@file).validate!
13+
enqueue_job
14+
flash[:success] = success_message(@file.original_filename)
15+
rescue BulkZombieUrlUploader::Error => e
16+
Rails.logger.error 'Url upload failed', e
17+
flash[:error] = e.message
18+
end
19+
20+
redirect_to admin_bulk_zombie_url_upload_index_path
21+
end
22+
23+
private
24+
25+
def success_message(filename)
26+
<<~SUCCESS_MESSAGE
27+
Successfully uploaded #{filename} for processing.
28+
The results will be emailed to you.
29+
SUCCESS_MESSAGE
30+
end
31+
32+
def enqueue_job
33+
BulkZombieUrlUploaderJob.perform_later(
34+
current_user,
35+
@file.original_filename,
36+
@file.tempfile.set_encoding('UTF-8').readlines,
37+
reindex: ActiveModel::Type::Boolean.new.cast(params[:reindex])
38+
)
39+
end
40+
end
41+
end
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# frozen_string_literal: true
2+
3+
class BulkZombieUrlUploaderJob < ApplicationJob
4+
queue_as :searchgov
5+
6+
delegate :upload, to: :@uploader
7+
8+
def perform(user, filename, urls, reindex: false)
9+
@user = user
10+
@uploader = BulkZombieUrlUploader.new(filename, urls)
11+
12+
upload
13+
report_results
14+
end
15+
16+
def report_results
17+
log_results
18+
send_results_email
19+
end
20+
21+
def log_results
22+
results = @uploader.results
23+
Rails.logger.info "BulkZombieUrlUploaderJob: #{results.name}"
24+
Rails.logger.info " #{results.total_count} URLs"
25+
Rails.logger.info " #{results.error_count} errors"
26+
end
27+
28+
def send_results_email
29+
results = @uploader.results
30+
email = BulkZombieUrlUploadResultsMailer.with(user: @user, results: results).results_email
31+
email.deliver_now!
32+
end
33+
end
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# frozen_string_literal: true
2+
3+
class BulkZombieUrlUploadResultsMailer < ApplicationMailer
4+
def results_email
5+
@results = params[:results]
6+
mail(to: params[:user].email, subject: "Bulk Zombie URL upload results for #{@results.name}")
7+
end
8+
end
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
# frozen_string_literal: true
2+
3+
class BulkZombieUrlUploader
4+
MAXIMUM_FILE_SIZE = 4.megabytes
5+
VALID_CONTENT_TYPES = %w[text/plain].freeze
6+
7+
attr_reader :results
8+
9+
class Error < StandardError
10+
end
11+
12+
class Results
13+
attr_accessor :searchgov_domains, :ok_count, :error_count, :name
14+
15+
def initialize(name)
16+
@name = name
17+
@ok_count = 0
18+
@error_count = 0
19+
@searchgov_domains = Set.new
20+
@errors = Hash.new { |hash, key| hash[key] = [] }
21+
end
22+
23+
def delete_ok
24+
self.ok_count += 1
25+
end
26+
27+
def add_error(error_message, url)
28+
self.error_count += 1
29+
@errors[error_message] << url
30+
end
31+
32+
def total_count
33+
ok_count + error_count
34+
end
35+
36+
def urls_with(error_message)
37+
@errors[error_message]
38+
end
39+
end
40+
41+
class UrlFileValidator
42+
def initialize(uploaded_file)
43+
@uploaded_file = uploaded_file
44+
end
45+
46+
def validate!
47+
ensure_present
48+
ensure_valid_content_type
49+
ensure_not_too_big
50+
end
51+
52+
def ensure_valid_content_type
53+
return if BulkZombieUrlUploader::VALID_CONTENT_TYPES.include?(@uploaded_file.content_type)
54+
55+
error_message = "Files of type #{@uploaded_file.content_type} are not supported."
56+
raise(BulkZombieUrlUploader::Error, error_message)
57+
end
58+
59+
def ensure_present
60+
return if @uploaded_file.present?
61+
62+
error_message = 'Please choose a file to upload.'
63+
raise(BulkZombieUrlUploader::Error, error_message)
64+
end
65+
66+
def ensure_not_too_big
67+
return if @uploaded_file.size <= BulkZombieUrlUploader::MAXIMUM_FILE_SIZE
68+
69+
error_message = "#{@uploaded_file.original_filename} is too big; please split it."
70+
raise(BulkZombieUrlUploader::Error, error_message)
71+
end
72+
end
73+
74+
def initialize(name, urls)
75+
@urls = urls
76+
@name = name
77+
end
78+
79+
def upload
80+
@results = Results.new(@name)
81+
upload_urls
82+
end
83+
84+
private
85+
86+
def upload_urls
87+
@urls.each do |raw_url|
88+
process_url(raw_url)
89+
end
90+
end
91+
92+
def process_url(raw_url)
93+
searchgov_url = SearchgovUrl.find_or_initialize_by(url: raw_url.strip)
94+
searchgov_url.destroy if searchgov_url.persisted?
95+
@results.delete_ok
96+
rescue StandardError => e
97+
@results.add_error(e.message, raw_url)
98+
Rails.logger.error "Failed to process url: #{raw_url}", e
99+
end
100+
end
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
%h2
2+
Bulk Zombie URL Upload
3+
%p
4+
To upload a list of URLs to be deleted, follow these instructions:
5+
%ul.bulk-upload-instructions
6+
%li
7+
Create a new text file containing one URL per line. An example of this format
8+
is shown below.
9+
%li
10+
Save the file on your computer; you can name the file whatever you like, as
11+
long as it's plain text and has a .txt extension.
12+
%li
13+
%b
14+
Do not use Microsoft Word files, or any other file type except plain text.
15+
%li
16+
The maximum file size is #{number_to_human_size(BulkUrlUploader::MAXIMUM_FILE_SIZE)}.
17+
%li
18+
Browse for the file on your computer.
19+
%li
20+
Upload the file to Search.gov using the upload button below.
21+
%li
22+
You will receive an email when processing of your URLs is complete.
23+
%p
24+
%strong
25+
Sample file format:
26+
%code
27+
%pre
28+
http://www.sample.gov/1.html
29+
http://www.sample.gov/2.html
30+
http://www.sample.gov/3.html
31+
= form_tag upload_admin_bulk_zombie_url_upload_index_path, :multipart => true do
32+
= file_field_tag 'bulk_zombie_upload_urls', :accept => 'text/plain', :class => 'file'
33+
%p
34+
= check_box_tag :reindex
35+
= label_tag :reindex, 'Reindex existing URLs?'
36+
%p
37+
= submit_tag "Upload", :class => 'submit btn-upload'

app/views/admin/home/index.html.haml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
%li=link_to("Superfresh Urls", admin_superfresh_urls_path)
1616
%li=link_to("Superfresh Bulk Upload", admin_superfresh_urls_bulk_upload_index_path)
1717
%li=link_to("Bulk Search.gov URL Upload", admin_bulk_url_upload_index_path)
18+
%li=link_to("Bulk Zombie URL Upload", admin_bulk_zombie_url_upload_index_path)
1819
%li=link_to("ODIE URL Source Update", admin_odie_url_source_update_index_path)
1920
%li=link_to("Bulk Affiliate Styles Upload", admin_bulk_affiliate_styles_upload_index_path)
2021

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
<p>
2+
The bulk zombie url upload job for <%= @results.name %> has
3+
finished.
4+
</p>
5+
6+
<p>
7+
There were <%= @results.total_count %> URLs.
8+
</p>
9+
10+
<% if @results.error_count == 0 %>
11+
<p>
12+
There were no errors.
13+
</p>
14+
<% else %>
15+
<p>
16+
<%= @results.ok_count %> URLs were deleted successfully.
17+
</p>
18+
19+
<p>
20+
<%= @results.error_count %> URLs failed validation.
21+
</p>
22+
23+
<% @results.error_messages.each do |error_message| %>
24+
<h3><%= error_message %></h3>
25+
<% @results.urls_with(error_message).each do |url| %>
26+
<%= url %>
27+
<br />
28+
<% end %>
29+
<% end %>
30+
<% end %>
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
The bulk zombie url upload job for <%= @results.name %> has
2+
finished.
3+
4+
There were <%= @results.total_count %> URLs.
5+
6+
<% if @results.error_count == 0 %>
7+
There were no errors.
8+
<% else %>
9+
<%= @results.ok_count %> URLs were deleted successfully.
10+
<%= @results.error_count %> URLs failed validation.
11+
12+
<% @results.error_messages.each do |error_message| %>
13+
<%= error_message %>
14+
<% @results.urls_with(error_message).each do |url| %>
15+
<%= url %>
16+
<% end %>
17+
<% end %>
18+
<% end %>

config/routes.rb

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,11 @@
185185
post :upload
186186
end
187187
end
188+
resources :bulk_zombie_url_upload, only: :index do
189+
collection do
190+
post :upload
191+
end
192+
end
188193
resources :bulk_affiliate_styles_upload, only: :index do
189194
collection do
190195
post :upload
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
Feature: Bulk Zombie URL Upload
2+
In order to give affiliates the ability to submit a file of URLs for indexing
3+
As an admin
4+
I want to upload a file containing URLs
5+
6+
Scenario: Bulk-uploading URLs for on-demand indexing as an admin
7+
Given I am logged in with email "affiliate_admin@fixtures.org"
8+
When I go to the bulk url upload admin page
9+
Then I should see "Bulk Zombie URL Upload"
10+
And I should see "The maximum file size is 4 MB"
11+
12+
When I attach the file "features/support/bulk_upload_urls.txt" to "bulk_zombie_upload_urls"
13+
And I press "Upload"
14+
Then I should be on the bulk zombie url upload admin page
15+
And I should see "Successfully uploaded bulk_zombie_upload_urls.txt"
16+
And I should see "The results will be emailed to you."
17+
18+
When I do not attach a file to "bulk_zombie_upload_urls"
19+
And I press "Upload"
20+
Then I should be on the bulk zombie url upload admin page
21+
And I should see "Please choose a file to upload"
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# frozen_string_literal: true
2+
3+
RSpec.shared_examples 'a bulk zombie upload notification email' do
4+
it 'has the correct subject' do
5+
expect(mail.subject).to eq("Bulk zombie URL upload results for #{filename}")
6+
end
7+
8+
it 'has the correct recepient' do
9+
expect(mail.to).to eq([user.email])
10+
end
11+
12+
it 'has the correct from header' do
13+
expect(mail.from).to eq([DELIVER_FROM_EMAIL_ADDRESS])
14+
end
15+
16+
it 'has the correct reply-to' do
17+
expect(mail.reply_to).to eq([SUPPORT_EMAIL_ADDRESS])
18+
end
19+
20+
it 'has the correct total number of URLs' do
21+
expect(mail_body).to match(/There were #{results.total_count} URLs/)
22+
end
23+
end
24+
25+
RSpec.describe BulkZombieUrlUploadResultsMailer, type: :mailer do
26+
describe '#results_email' do
27+
let(:user) { users(:affiliate_admin) }
28+
let(:filename) { 'test-file.txt' }
29+
let(:results) do
30+
results = BulkZombieUrlUploader::Results.new(filename)
31+
results.add_ok
32+
results.add_ok
33+
results
34+
end
35+
let(:mail) do
36+
described_class.with(user: user, results: results).results_email
37+
end
38+
let(:mail_body) { mail.body.encoded }
39+
40+
describe 'with no errors' do
41+
it_behaves_like 'a bulk upload notification email'
42+
43+
it 'reports that there were no URLs with problems' do
44+
expect(mail_body).to match(/There were no errors/)
45+
end
46+
end
47+
48+
describe 'with errors' do
49+
let(:first_error_message) { 'First validation failure' }
50+
let(:first_bad_url) { 'https://agency.gov/first-bad-url' }
51+
let(:second_error_message) { 'Second validation failure' }
52+
let(:second_bad_url) { 'https://agency.gov/second-bad-url' }
53+
54+
before do
55+
results.add_error(first_error_message, first_bad_url)
56+
results.add_error(second_error_message, second_bad_url)
57+
end
58+
59+
it_behaves_like 'a bulk zombie upload notification email'
60+
61+
it 'reports the correct number of OK URLs' do
62+
expect(mail_body).to match(
63+
/#{results.ok_count} URLs were deleted successfully/
64+
)
65+
end
66+
67+
it 'reports the correct number of URLs with problems' do
68+
expect(mail_body).to match(/#{results.error_count} URLs failed validation/)
69+
end
70+
71+
it 'shows the first URL validation failure' do
72+
expect(mail_body).to match(/#{first_error_message}\s+#{first_bad_url}/)
73+
end
74+
75+
it 'shows the second URL validation failure' do
76+
expect(mail_body).to match(/#{second_error_message}\s+#{second_bad_url}/)
77+
end
78+
end
79+
end
80+
end

0 commit comments

Comments
 (0)