From 8ce15d9604ace1a68236016dbbeb3ae0373e7de4 Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Thu, 8 Feb 2024 06:32:06 -0800 Subject: [PATCH 01/14] Denormalize Status Message to that Entry Look Up Can Be Fast (#913) * Thats it, Ive had enough. we need status_message as a column on entry. Today is the day * call the cops * call the cops * Add guard condition for column existence --------- Co-authored-by: Jeremy Friesen --- app/models/bulkrax/importer.rb | 12 ++++-------- app/models/bulkrax/status.rb | 6 +++++- app/models/concerns/bulkrax/status_info.rb | 3 +++ bulkrax.gemspec | 1 + .../20240208005801_denormalize_status_message.rb | 7 +++++++ lib/bulkrax.rb | 1 + lib/tasks/bulkrax_tasks.rake | 12 ++++++++++++ 7 files changed, 33 insertions(+), 9 deletions(-) create mode 100644 db/migrate/20240208005801_denormalize_status_message.rb diff --git a/app/models/bulkrax/importer.rb b/app/models/bulkrax/importer.rb index 76be42c07..94c400f31 100644 --- a/app/models/bulkrax/importer.rb +++ b/app/models/bulkrax/importer.rb @@ -123,16 +123,16 @@ def last_run @last_run ||= self.importer_runs.last end + def failed_entries? + entries.failed.any? + end + def failed_statuses @failed_statuses ||= Bulkrax::Status.latest_by_statusable .includes(:statusable) .where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', self.entries.pluck(:id), 'Bulkrax::Entry', 'Failed') end - def failed_entries - @failed_entries ||= failed_statuses.map(&:statusable) - end - def failed_messages failed_statuses.each_with_object({}) do |e, i| i[e.error_message] ||= [] @@ -146,10 +146,6 @@ def completed_statuses .where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', self.entries.pluck(:id), 'Bulkrax::Entry', 'Complete') end - def completed_entries - @completed_entries ||= completed_statuses.map(&:statusable) - end - def seen @seen ||= {} end diff --git a/app/models/bulkrax/status.rb b/app/models/bulkrax/status.rb index fbcb5a872..534f176f0 100644 --- a/app/models/bulkrax/status.rb +++ b/app/models/bulkrax/status.rb @@ -2,7 +2,7 @@ module Bulkrax class Status < ApplicationRecord - belongs_to :statusable, polymorphic: true + belongs_to :statusable, polymorphic: true, denormalize: { fields: %i[status_message], if: :latest? } belongs_to :runnable, polymorphic: true serialize :error_backtrace, Array @@ -21,5 +21,9 @@ def self.latest_by_statusable_subtable status_table.join(latest_status_query.as(latest_status_table.name.to_s), Arel::Nodes::InnerJoin) .on(status_table[:id].eq(latest_status_table[:latest_status_id])) end + + def latest? + self.id == self.class.where(statusable_id: self.statusable_id, statusable_type: self.statusable_type).order('id desc').pick(:id) + end end end diff --git a/app/models/concerns/bulkrax/status_info.rb b/app/models/concerns/bulkrax/status_info.rb index 5edc22388..02ce18838 100644 --- a/app/models/concerns/bulkrax/status_info.rb +++ b/app/models/concerns/bulkrax/status_info.rb @@ -10,6 +10,9 @@ module StatusInfo as: :statusable, class_name: "Bulkrax::Status", inverse_of: :statusable + scope :failed, -> { where(status_message: 'Failed') } + scope :complete, -> { where(status_message: 'Complete') } + scope :pending, -> { where(status_message: 'Pending') } end def current_status diff --git a/bulkrax.gemspec b/bulkrax.gemspec index 3da1cb159..adf49b707 100644 --- a/bulkrax.gemspec +++ b/bulkrax.gemspec @@ -21,6 +21,7 @@ Gem::Specification.new do |s| s.add_dependency 'rails', '>= 5.1.6' s.add_dependency 'bagit', '~> 0.4' s.add_dependency 'coderay' + s.add_dependency 'denormalize_fields' s.add_dependency 'iso8601', '~> 0.9.0' s.add_dependency 'kaminari' s.add_dependency 'language_list', '~> 1.2', '>= 1.2.1' diff --git a/db/migrate/20240208005801_denormalize_status_message.rb b/db/migrate/20240208005801_denormalize_status_message.rb new file mode 100644 index 000000000..314f2e625 --- /dev/null +++ b/db/migrate/20240208005801_denormalize_status_message.rb @@ -0,0 +1,7 @@ +class DenormalizeStatusMessage < ActiveRecord::Migration[5.2] + def change + add_column :bulkrax_entries, :status_message, :string, default: 'Pending' unless column_exists?(:bulkrax_entries, :status_message) + add_column :bulkrax_importers, :status_message, :string, default: 'Pending' unless column_exists?(:bulkrax_importers, :status_message) + add_column :bulkrax_exporters, :status_message, :string, default: 'Pending' unless column_exists?(:bulkrax_exporters, :status_message) + end +end diff --git a/lib/bulkrax.rb b/lib/bulkrax.rb index e88f608bf..f485a5923 100644 --- a/lib/bulkrax.rb +++ b/lib/bulkrax.rb @@ -3,6 +3,7 @@ require "bulkrax/version" require "bulkrax/engine" require 'active_support/all' +require 'denormalize_fields' # rubocop:disable Metrics/ModuleLength module Bulkrax diff --git a/lib/tasks/bulkrax_tasks.rake b/lib/tasks/bulkrax_tasks.rake index f0a741cb7..f3da8ddac 100644 --- a/lib/tasks/bulkrax_tasks.rake +++ b/lib/tasks/bulkrax_tasks.rake @@ -1,6 +1,18 @@ # frozen_string_literal: true namespace :bulkrax do + desc 'Update all status messages from the latest status. This is to refresh the denormalized field' + task update_status_messages: :environment do + @progress = ProgressBar.create(total: Bulkrax::Status.latest_by_statusable.count, + format: "%a %b\u{15E7}%i %c/%C %p%% %t", + progress_mark: ' ', + remainder_mark: "\u{FF65}") + Bulkrax::Status.latest_by_statusable.includes(:statusable).find_each do |status| + status.statusable.update(status_message: status.status_message) + @progress.increment + end + end + # Usage example: rails bulkrax:generate_test_csvs['5','100','GenericWork'] desc 'Generate CSVs with fake data for testing purposes' task :generate_test_csvs, [:num_of_csvs, :csv_rows, :record_type] => :environment do |_t, args| From 7964466d4f63bb5f0de2ca9675a01e27beeb0db3 Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Thu, 8 Feb 2024 01:04:23 -0800 Subject: [PATCH 02/14] Found another place we need to include the monad lib. This time so that generators work --- spec/test_app/config/application.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/spec/test_app/config/application.rb b/spec/test_app/config/application.rb index af0ed0959..bb6896ceb 100644 --- a/spec/test_app/config/application.rb +++ b/spec/test_app/config/application.rb @@ -11,6 +11,7 @@ require "action_cable/engine" # require "rails/test_unit/railtie" require "sprockets/railtie" +require 'dry/monads' Bundler.require(*Rails.groups) require "bulkrax" From 5c8b264a1cb95cea7b7cab3327768b0a3b18839d Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Thu, 8 Feb 2024 20:28:09 -0800 Subject: [PATCH 03/14] Need to Include dry/monads in sample app to get generators working (#917) Found another place we need to include the monad lib. This time so that generators work --- spec/test_app/config/application.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/spec/test_app/config/application.rb b/spec/test_app/config/application.rb index af0ed0959..bb6896ceb 100644 --- a/spec/test_app/config/application.rb +++ b/spec/test_app/config/application.rb @@ -11,6 +11,7 @@ require "action_cable/engine" # require "rails/test_unit/railtie" require "sprockets/railtie" +require 'dry/monads' Bundler.require(*Rails.groups) require "bulkrax" From 3571792df1357f4c31e859366c71c7bcfeed4f04 Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Thu, 8 Feb 2024 22:12:42 -0800 Subject: [PATCH 04/14] Strip out all special characters from csv headers (#918) * strip special characters out of header names. excel likes to leave odd unicode items, including the unicode bom, laying around. This causes havic. By stopping it right from the start we should prevent saving invisible characters to raw_metadata and other places they get stuck --- app/models/bulkrax/csv_entry.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/models/bulkrax/csv_entry.rb b/app/models/bulkrax/csv_entry.rb index a106a74bf..f8573c586 100644 --- a/app/models/bulkrax/csv_entry.rb +++ b/app/models/bulkrax/csv_entry.rb @@ -16,11 +16,12 @@ def self.fields_from_data(data) class_attribute(:csv_read_data_options, default: {}) # there's a risk that this reads the whole file into memory and could cause a memory leak + # we strip any special characters out of the headers. looking at you Excel def self.read_data(path) raise StandardError, 'CSV path empty' if path.blank? options = { headers: true, - header_converters: ->(h) { h.to_s.strip.to_sym }, + header_converters: ->(h) { h.to_s.gsub(/[^\w\d\. -]+/, '').strip.to_sym }, encoding: 'utf-8' }.merge(csv_read_data_options) From ab7a6ba16e58a1c7a0214d5a699b9574f479a2ef Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Fri, 9 Feb 2024 00:16:34 -0800 Subject: [PATCH 05/14] Restore rails5 support (#919) * rails 5 does not have pick, fixes latest status check --- app/models/bulkrax/status.rb | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/app/models/bulkrax/status.rb b/app/models/bulkrax/status.rb index 534f176f0..86c850b0f 100644 --- a/app/models/bulkrax/status.rb +++ b/app/models/bulkrax/status.rb @@ -23,7 +23,12 @@ def self.latest_by_statusable_subtable end def latest? - self.id == self.class.where(statusable_id: self.statusable_id, statusable_type: self.statusable_type).order('id desc').pick(:id) + # TODO: remove if statment when we stop supporting Hyrax < 4 + self.id == if Gem::Version.new(Rails::VERSION::STRING) >= Gem::Version.new('6.0.0') + self.class.where(statusable_id: self.statusable_id, statusable_type: self.statusable_type).order('id desc').pick(:id) + else + self.class.where(statusable_id: self.statusable_id, statusable_type: self.statusable_type).order('id desc').pluck(:id).first # rubocop:disable Rails/Pick + end end end end From 0e822f56262397e6cd9e0c4bb964bfe57b56fb58 Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Fri, 9 Feb 2024 17:31:30 -0800 Subject: [PATCH 06/14] Update Importer Index and Show Entries With Search, Filtering, Sort and More (#914) Use datatables for importer index and show pages --------- Co-authored-by: Jeremy Friesen Co-authored-by: Kirk Wang --- app/assets/javascripts/bulkrax/datatables.js | 115 +++++++++++++ .../bulkrax/importers_controller.rb | 27 ++- .../concerns/bulkrax/datatables_behavior.rb | 134 +++++++++++++++ app/views/bulkrax/importers/index.html.erb | 84 +++------- app/views/bulkrax/importers/show.html.erb | 22 +-- .../bulkrax/shared/_entries_tab.html.erb | 38 +---- config/routes.rb | 2 + .../bulkrax/importers_controller_spec.rb | 37 ++++ .../bulkrax/datatables_behavior_spec.rb | 158 ++++++++++++++++++ spec/test_app/db/schema.rb | 17 +- 10 files changed, 510 insertions(+), 124 deletions(-) create mode 100644 app/assets/javascripts/bulkrax/datatables.js create mode 100644 app/controllers/concerns/bulkrax/datatables_behavior.rb create mode 100644 spec/controllers/concerns/bulkrax/datatables_behavior_spec.rb diff --git a/app/assets/javascripts/bulkrax/datatables.js b/app/assets/javascripts/bulkrax/datatables.js new file mode 100644 index 000000000..65619e72c --- /dev/null +++ b/app/assets/javascripts/bulkrax/datatables.js @@ -0,0 +1,115 @@ +Blacklight.onLoad(function() { + if($('#importer-show-table').length) { + $('#importer-show-table').DataTable( { + 'processing': true, + 'serverSide': true, + "ajax": window.location.href.replace(/(\/importers\/\d+)/, "$1/entry_table.json"), + "pageLength": 30, + "lengthMenu": [[30, 100, 200], [30, 100, 200]], + "columns": [ + { "data": "identifier" }, + { "data": "id" }, + { "data": "status_message" }, + { "data": "type" }, + { "data": "updated_at" }, + { "data": "errors", "orderable": false }, + { "data": "actions", "orderable": false } + ], + initComplete: function () { + // Add entry class filter + entrySelect.bind(this)() + // Add status filter + statusSelect.bind(this)() + // Add refresh link + refreshLink.bind(this)() + } + } ); + } + + if($('#importers-table').length) { + $('#importers-table').DataTable( { + 'processing': true, + 'serverSide': true, + "ajax": window.location.href.replace(/(\/importers)/, "$1/importer_table.json"), + "pageLength": 30, + "lengthMenu": [[30, 100, 200], [30, 100, 200]], + "columns": [ + { "data": "name" }, + { "data": "status_message" }, + { "data": "last_imported_at" }, + { "data": "next_import_at" }, + { "data": "enqueued_records", "orderable": false }, + { "data": "processed_records", "orderable": false }, + { "data": "failed_records", "orderable": false }, + { "data": "deleted_records", "orderable": false }, + { "data": "total_collection_entries", "orderable": false }, + { "data": "total_work_entries", "orderable": false }, + { "data": "total_file_set_entries", "orderable": false }, + { "data": "actions", "orderable": false } + ], + initComplete: function () { + // Add status filter + statusSelect.bind(this)() + // Add refresh link + refreshLink.bind(this)() + } + } ); + } + +}) + +function entrySelect() { + let entrySelect = document.createElement('select') + entrySelect.id = 'entry-filter' + entrySelect.classList.value = 'form-control input-sm' + entrySelect.style.marginRight = '10px' + + entrySelect.add(new Option('Filter by Entry Class', '')) + // Read the options from the footer and add them to the entrySelect + $('#importer-entry-classes').text().split('|').forEach(function (col, i) { + entrySelect.add(new Option(col.trim())) + }) + document.querySelector('div#importer-show-table_filter').firstChild.prepend(entrySelect) + + // Apply listener for user change in value + entrySelect.addEventListener('change', function () { + var val = entrySelect.value; + this.api() + .search(val ? val : '', false, false) + .draw(); + }.bind(this)); +} + +function statusSelect() { + let statusSelect = document.createElement('select'); + statusSelect.id = 'status-filter' + statusSelect.classList.value = 'form-control input-sm' + statusSelect.style.marginRight = '10px' + + statusSelect.add(new Option('Filter by Status', '')); + statusSelect.add(new Option('Complete')) + statusSelect.add(new Option('Pending')) + statusSelect.add(new Option('Failed')) + statusSelect.add(new Option('Deleted')) + statusSelect.add(new Option('Complete (with failures)')) + + document.querySelector('div.dataTables_filter').firstChild.prepend(statusSelect) + + // Apply listener for user change in value + statusSelect.addEventListener('change', function () { + var val = statusSelect.value; + this.api() + .search(val ? val : '', false, false) + .draw(); + }.bind(this)); +} + +function refreshLink() { + let refreshLink = document.createElement('a'); + refreshLink.onclick = function() { + this.api().ajax.reload(null, false) + }.bind(this) + refreshLink.classList.value = 'glyphicon glyphicon-refresh' + refreshLink.style.marginLeft = '10px' + document.querySelector('div.dataTables_filter').firstChild.append(refreshLink) +} diff --git a/app/controllers/bulkrax/importers_controller.rb b/app/controllers/bulkrax/importers_controller.rb index 8248975cd..43c2c1efc 100644 --- a/app/controllers/bulkrax/importers_controller.rb +++ b/app/controllers/bulkrax/importers_controller.rb @@ -6,26 +6,35 @@ class ImportersController < ::Bulkrax::ApplicationController include Hyrax::ThemedLayoutController if defined?(::Hyrax) include Bulkrax::DownloadBehavior include Bulkrax::API + include Bulkrax::DatatablesBehavior include Bulkrax::ValidationHelper protect_from_forgery unless: -> { api_request? } before_action :token_authenticate!, if: -> { api_request? }, only: [:create, :update, :delete] before_action :authenticate_user!, unless: -> { api_request? } before_action :check_permissions - before_action :set_importer, only: [:show, :edit, :update, :destroy] + before_action :set_importer, only: [:show, :entry_table, :edit, :update, :destroy] with_themed_layout 'dashboard' if defined?(::Hyrax) # GET /importers def index # NOTE: We're paginating this in the browser. - @importers = Importer.order(created_at: :desc).all if api_request? + @importers = Importer.order(created_at: :desc).all json_response('index') elsif defined?(::Hyrax) add_importer_breadcrumbs end end + def importer_table + @importers = Importer.order(table_order).page(table_page).per(table_per_page) + @importers = @importers.where(importer_table_search) if importer_table_search.present? + respond_to do |format| + format.json { render json: format_importers(@importers) } + end + end + # GET /importers/1 def show if api_request? @@ -34,9 +43,15 @@ def show add_importer_breadcrumbs add_breadcrumb @importer.name end - @work_entries = @importer.entries.where(type: @importer.parser.entry_class.to_s).page(params[:work_entries_page]).per(30) - @collection_entries = @importer.entries.where(type: @importer.parser.collection_entry_class.to_s).page(params[:collections_entries_page]).per(30) - @file_set_entries = @importer.entries.where(type: @importer.parser.file_set_entry_class.to_s).page(params[:file_set_entries_page]).per(30) + @first_entry = @importer.entries.first + end + + def entry_table + @entries = @importer.entries.order(table_order).page(table_page).per(table_per_page) + @entries = @entries.where(entry_table_search) if entry_table_search.present? + respond_to do |format| + format.json { render json: format_entries(@entries, @importer) } + end end # GET /importers/new @@ -210,7 +225,7 @@ def files_for_import(file, cloud_files) # Use callbacks to share common setup or constraints between actions. def set_importer - @importer = Importer.find(params[:id]) + @importer = Importer.find(params[:id] || params[:importer_id]) end def importable_params diff --git a/app/controllers/concerns/bulkrax/datatables_behavior.rb b/app/controllers/concerns/bulkrax/datatables_behavior.rb new file mode 100644 index 000000000..8ffcdd5d5 --- /dev/null +++ b/app/controllers/concerns/bulkrax/datatables_behavior.rb @@ -0,0 +1,134 @@ +# frozen_string_literal: true + +module Bulkrax + module DatatablesBehavior + extend ActiveSupport::Concern + + def table_per_page + per_page = params[:length].to_i + per_page < 1 ? 30 : per_page + end + + def order_value(column) + params['columns']&.[](column)&.[]('data') + end + + def table_order + "#{order_value(params&.[]('order')&.[]('0')&.[]('column'))} #{params&.[]('order')&.[]('0')&.[]('dir')}" if params&.[]('order')&.[]('0')&.[]('column').present? + end + + # convert offset to page number + def table_page + params[:start].blank? ? 1 : (params[:start].to_i / params[:length].to_i) + 1 + end + + def entry_table_search + return @entry_table_search if @entry_table_search + return @entry_table_search = false if params['search']&.[]('value').blank? + + table_search_value = params['search']&.[]('value')&.downcase + + ['identifier', 'id', 'status_message', 'type', 'updated_at'].map do |col| + column = Bulkrax::Entry.arel_table[col] + column = Arel::Nodes::NamedFunction.new('CAST', [column.as('text')]) + column = Arel::Nodes::NamedFunction.new('LOWER', [column]) + @entry_table_search = if @entry_table_search + @entry_table_search.or(column.matches("%#{table_search_value}%")) + else + column.matches("%#{table_search_value}%") + end + end + + @entry_table_search + end + + def importer_table_search + return @importer_table_search if @importer_table_search + return @importer_table_search = false if params['search']&.[]('value').blank? + + table_search_value = params['search']&.[]('value')&.downcase + + ['name', 'id', 'status_message', 'last_error_at', 'last_succeeded_at', 'updated_at'].map do |col| + column = Bulkrax::Importer.arel_table[col] + column = Arel::Nodes::NamedFunction.new('CAST', [column.as('text')]) + column = Arel::Nodes::NamedFunction.new('LOWER', [column]) + @importer_table_search = if @importer_table_search + @importer_table_search.or(column.matches("%#{table_search_value}%")) + else + column.matches("%#{table_search_value}%") + end + end + + @importer_table_search + end + + def format_importers(importers) + result = importers.map do |i| + { + name: view_context.link_to(i.name, view_context.importer_path(i)), + status_message: status_message_for(i), + last_imported_at: i.last_imported_at&.strftime("%b %d, %Y"), + next_import_at: i.next_import_at&.strftime("%b %d, %Y"), + enqueued_records: i.last_run&.enqueued_records, + processed_records: i.last_run&.processed_records || 0, + failed_records: i.last_run&.failed_records || 0, + deleted_records: i.last_run&.deleted_records, + total_collection_entries: i.last_run&.total_collection_entries, + total_work_entries: i.last_run&.total_work_entries, + total_file_set_entries: i.last_run&.total_file_set_entries, + actions: importer_util_links(i) + } + end + { + data: result, + recordsTotal: Bulkrax::Importer.count, + recordsFiltered: importers.size + } + end + + def format_entries(entries, item) + result = entries.map do |e| + { + identifier: view_context.link_to(e.identifier, view_context.item_entry_path(item, e)), + id: e.id, + status_message: status_message_for(e), + type: e.type, + updated_at: e.updated_at, + errors: e.latest_status&.error_class&.present? ? view_context.link_to(e.latest_status.error_class, view_context.item_entry_path(item, e), title: e.latest_status.error_message) : "", + actions: entry_util_links(e, item) + } + end + { + data: result, + recordsTotal: item.entries.size, + recordsFiltered: item.entries.size + } + end + + def entry_util_links(e, item) + links = [] + links << view_context.link_to(view_context.raw(''), view_context.item_entry_path(item, e)) + links << "" if view_context.an_importer?(item) + links << view_context.link_to(view_context.raw(''), view_context.item_entry_path(item, e), method: :delete, data: { confirm: 'This will delete the entry and any work associated with it. Are you sure?' }) + links.join(" ") + end + + def status_message_for(e) + if e.status_message == "Complete" + " #{e.status_message}" + elsif e.status_message == "Pending" + " #{e.status_message}" + else + " #{e.status_message}" + end + end + + def importer_util_links(i) + links = [] + links << view_context.link_to(view_context.raw(''), importer_path(i)) + links << view_context.link_to(view_context.raw(''), edit_importer_path(i)) + links << view_context.link_to(view_context.raw(''), i, method: :delete, data: { confirm: 'Are you sure?' }) + links.join(" ") + end + end +end diff --git a/app/views/bulkrax/importers/index.html.erb b/app/views/bulkrax/importers/index.html.erb index ea37d80fe..d4ffa5e17 100644 --- a/app/views/bulkrax/importers/index.html.erb +++ b/app/views/bulkrax/importers/index.html.erb @@ -13,69 +13,25 @@
- <% if @importers.present? %> -
- - - - - - - - - - - - - - - - - - - - - <% @importers.each do |importer| %> - - - - - - - - - - - - - - - - - <% end %> - -
NameStatusLast RunNext RunEntries EnqueuedEntries ProcessedEntries FailedEntries Deleted UpstreamTotal Collection EntriesTotal Work EntriesTotal File Set Entries
<%= link_to importer.name, importer_path(importer) %><%= importer.status %><%= importer.last_imported_at.strftime("%b %d, %Y") if importer.last_imported_at %><%= importer.next_import_at.strftime("%b %d, %Y") if importer.next_import_at %><%= importer.last_run&.enqueued_records %><%= (importer.last_run&.processed_records || 0) %><%= (importer.last_run&.failed_records || 0) %><%= importer.last_run&.deleted_records %><%= importer.last_run&.total_collection_entries %><%= importer.last_run&.total_work_entries %><%= importer.last_run&.total_file_set_entries %><%= link_to raw(''), importer_path(importer) %><%= link_to raw(''), edit_importer_path(importer) %><%= link_to raw(''), importer, method: :delete, data: { confirm: 'Are you sure?' } %>
-
- <% else %> -

No importers have been created.

- <% end %> +
+ + + + + + + + + + + + + + + + + +
NameStatusLast RunNext RunEntries EnqueuedEntries ProcessedEntries FailedEntries Deleted UpstreamTotal Collection EntriesTotal Work EntriesTotal File Set EntriesActions
+
- - diff --git a/app/views/bulkrax/importers/show.html.erb b/app/views/bulkrax/importers/show.html.erb index b95a79807..183a9d08d 100644 --- a/app/views/bulkrax/importers/show.html.erb +++ b/app/views/bulkrax/importers/show.html.erb @@ -1,9 +1,10 @@

Importer: <%= @importer.name %>

- <% if @importer.parser_klass == 'Bulkrax::CsvParser' && @work_entries.map(&:failed?).any? %> + + <% if @importer.failed_entries? %>
- <%= link_to 'Export Errored Entries', importer_export_errors_path(@importer.id), class: 'btn btn-primary' %> - <%= link_to 'Upload Corrected Entries', importer_upload_corrected_entries_path(@importer.id), class: 'btn btn-primary' %> + <%= link_to 'Export Errored Entries', importer_export_errors_path(@importer.id), class: 'btn btn-primary', data: { turbolinks: false }%> + <%= link_to 'Upload Corrected Entries', importer_upload_corrected_entries_path(@importer.id), class: 'btn btn-primary' if @importer.parser.is_a?(Bulkrax::CsvParser) %>
<% end %>
@@ -75,19 +76,10 @@
- - -
- <%= render partial: 'bulkrax/shared/entries_tab', locals: { item: @importer, entries: @work_entries, pagination_param_name: :work_entries_page, pagination_anchor: 'work-entries' } %> - <%= render partial: 'bulkrax/shared/entries_tab', locals: { item: @importer, entries: @collection_entries, pagination_param_name: :collection_entries_path, pagination_anchor: 'collection-entries' } %> - <%= render partial: 'bulkrax/shared/entries_tab', locals: { item: @importer, entries: @file_set_entries, pagination_param_name: :file_set_entries_path, pagination_anchor: 'file-set-entries' } %> +
+ <%= render partial: 'bulkrax/shared/entries_tab' %>
- <% all_entries = @work_entries + @collection_entries + @file_set_entries %> - <%= render partial: 'bulkrax/importers/edit_item_buttons', locals: { item: @importer, e: all_entries.first } if all_entries.present? %> + <%= render partial: 'bulkrax/importers/edit_item_buttons', locals: { item: @importer, e: @first_entry } if @first_entry.present? %>

diff --git a/app/views/bulkrax/shared/_entries_tab.html.erb b/app/views/bulkrax/shared/_entries_tab.html.erb index d2f1da520..7590fed02 100644 --- a/app/views/bulkrax/shared/_entries_tab.html.erb +++ b/app/views/bulkrax/shared/_entries_tab.html.erb @@ -1,44 +1,16 @@ -

- +
+
+ + - - - <% entries.each do |e| %> - - - - <% if e.status == "Complete" %> - - <% elsif e.status == "Pending" %> - - <% else %> - - <% end %> - <% if e.last_error.present? %> - - <% else %> - - <% end %> - - - - <% end %> -
<%= t('bulkrax.table_header.labels.identifier') %> <%= t('bulkrax.table_header.labels.entry_id') %> <%= t('bulkrax.table_header.labels.status') %><%= t('bulkrax.table_header.labels.type') %><%= t('bulkrax.table_header.labels.updated_at') %> <%= t('bulkrax.table_header.labels.errors') %><%= t('bulkrax.table_header.labels.status_set_at') %> <%= t('bulkrax.table_header.labels.actions') %>
<%= link_to e.identifier, item_entry_path(item, e) %><%= e.id %> <%= e.status %> <%= e.status %> <%= e.status %><%= link_to e.last_error.dig("error_class"), item_entry_path(item, e) %><%= e.status_at %> - <%= link_to raw(''), item_entry_path(item, e) %> - <% if an_importer?(item) %> - - <% end %> - <%= link_to raw(''), item_entry_path(item, e), method: :delete, data: { confirm: 'This will delete the entry and any work associated with it. Are you sure?' } %> -
- <%= page_entries_info(entries) %>
- <%= paginate(entries, theme: 'blacklight', param_name: pagination_param_name, params: { anchor: pagination_anchor }) %> +
diff --git a/config/routes.rb b/config/routes.rb index e4a709e27..d728f65d4 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -7,8 +7,10 @@ end resources :importers do put :continue + get :entry_table get :export_errors collection do + get :importer_table post :external_sets end resources :entries, only: %i[show update destroy] diff --git a/spec/controllers/bulkrax/importers_controller_spec.rb b/spec/controllers/bulkrax/importers_controller_spec.rb index 4fc512c5e..9855b0110 100644 --- a/spec/controllers/bulkrax/importers_controller_spec.rb +++ b/spec/controllers/bulkrax/importers_controller_spec.rb @@ -401,6 +401,43 @@ def current_user expect(response.status).to eq(200) end end + + describe 'GET #entry_table' do + let(:importer) { FactoryBot.create(:bulkrax_importer) } + let(:valid_session) { {} } + + before do + allow(controller).to receive(:table_order).and_return('created_at asc') + end + + it 'returns a success response' do + get :entry_table, params: { importer_id: importer.to_param, format: :json }, session: valid_session + expect(response).to be_successful + end + + it 'returns entries in the correct order' do + entry1 = FactoryBot.create(:bulkrax_csv_entry, importerexporter: importer, created_at: 2.days.ago) + entry2 = FactoryBot.create(:bulkrax_csv_entry, importerexporter: importer, created_at: 1.day.ago) + entry3 = FactoryBot.create(:bulkrax_csv_entry, importerexporter: importer, created_at: 3.days.ago) + get :entry_table, params: { importer_id: importer.to_param, format: :json }, session: valid_session + parsed_response = JSON.parse(response.body) + entry_ids_in_response = parsed_response["data"].map { |e| e["id"] } + expect(entry_ids_in_response).to eq([entry3.id, entry1.id, entry2.id]) + end + + context 'when table_search is present' do + it 'filters entries based on table_search' do + identifier_value = 'test_identifier' + matching_entry = FactoryBot.create(:bulkrax_csv_entry, importerexporter: importer, identifier: identifier_value) + non_matching_entry1 = FactoryBot.create(:bulkrax_csv_entry, importerexporter: importer, identifier: "unrelated_identifier") + non_matching_entry2 = FactoryBot.create(:bulkrax_csv_entry, importerexporter: importer, identifier: 'another_unrelated_identifier') + get :entry_table, params: { importer_id: importer.to_param, search: { value: identifier_value }, format: :json }, session: valid_session + entry_ids_in_response = JSON.parse(response.body)["data"].map { |e| e["id"] } + expect(entry_ids_in_response).to include(matching_entry.id) + expect(entry_ids_in_response).not_to include(non_matching_entry1.id, non_matching_entry2.id) + end + end + end end end end diff --git a/spec/controllers/concerns/bulkrax/datatables_behavior_spec.rb b/spec/controllers/concerns/bulkrax/datatables_behavior_spec.rb new file mode 100644 index 000000000..41c83c147 --- /dev/null +++ b/spec/controllers/concerns/bulkrax/datatables_behavior_spec.rb @@ -0,0 +1,158 @@ +# frozen_string_literal: true + +require 'rails_helper' + +RSpec.describe Bulkrax::ImportersController, type: :controller do + before do + module Bulkrax::Auth + def authenticate_user! + @current_user = User.first + true + end + + def current_user + @current_user + end + end + described_class.prepend Bulkrax::Auth + allow(Bulkrax::ImporterJob).to receive(:perform_later).and_return(true) + end + + controller do + include Bulkrax::DatatablesBehavior + end + + describe '#table_per_page' do + it 'returns 30 when params[:length] is less than 1' do + get :index, params: { length: '0' } + expect(controller.table_per_page).to eq(30) + end + + it 'returns params[:length] when it is greater than 0' do + get :index, params: { length: '10' } + expect(controller.table_per_page).to eq(10) + end + end + + describe '#order_value' do + it 'returns the value of the specified column' do + get :index, params: { columns: { '0' => { data: 'some_data' } } } + expect(controller.order_value('0')).to eq('some_data') + end + end + + describe '#table_order' do + it 'returns the order value and direction' do + get :index, params: { order: { '0' => { column: '0', dir: 'asc' } }, columns: { '0' => { data: 'some_data' } } } + expect(controller.table_order).to eq('some_data asc') + end + end + + describe '#table_page' do + it 'returns 1 when params[:start] is blank' do + get :index, params: { start: '' } + expect(controller.table_page).to eq(1) + end + + it 'returns the page number when params[:start] is not blank' do + get :index, params: { start: '10', length: '5' } + expect(controller.table_page).to eq(3) + end + end + + describe '#entry_table_search' do + it 'returns false when params[:search][:value] is blank' do + get :index, params: { search: { value: '' } } + expect(controller.entry_table_search).to eq(false) + end + + it 'returns a Arel::Nodes::Grouping node when params[:search][:value] is not blank' do + get :index, params: { search: { value: 'some_value' } } + expect(controller.entry_table_search).to be_a(Arel::Nodes::Grouping) + end + end + + describe '#format_entries' do + let(:item) { FactoryBot.create(:bulkrax_importer) } + let(:entry_1) { FactoryBot.create(:bulkrax_entry, importerexporter: item) } + let(:entry_2) { FactoryBot.create(:bulkrax_entry, importerexporter: item) } + let(:entries) { [entry_1, entry_2] } + + it 'returns a hash with the correct structure' do + get :index + result = controller.format_entries(entries, item) + expect(result).to be_a(Hash) + expect(result.keys).to contain_exactly(:data, :recordsTotal, :recordsFiltered) + expect(result[:data]).to be_a(Array) + expect(result[:data].first.keys).to contain_exactly(:identifier, :id, :status_message, :type, :updated_at, :errors, :actions) + end + + it 'returns the correct number of entries' do + get :index + result = controller.format_entries(entries, item) + expect(result[:recordsTotal]).to eq(entries.size) + expect(result[:recordsFiltered]).to eq(entries.size) + end + end + + describe '#entry_util_links' do + include Bulkrax::Engine.routes.url_helpers + + let(:item) { FactoryBot.create(:bulkrax_importer) } + let(:entry) { FactoryBot.create(:bulkrax_entry, importerexporter: item) } + + it 'returns a string of HTML links' do + get :index + result = controller.entry_util_links(entry, item) + expect(result).to be_a(String) + expect(result).to include('glyphicon-info-sign') + expect(result).to include('glyphicon-repeat') + expect(result).to include('glyphicon-trash') + end + + it 'includes a link to the entry' do + get :index + result = controller.entry_util_links(entry, item) + expect(result).to include(importer_entry_path(item, entry)) + end + + it 'includes a delete link to the entry' do + get :index + result = controller.entry_util_links(entry, item) + expect(result).to include(importer_entry_path(item, entry)) + expect(result).to include('method="delete"') + end + end + + describe '#status_message_for' do + let(:item) { FactoryBot.create(:bulkrax_importer) } + + it 'returns a string of HTML with a green checkmark when status_message is "Complete"' do + entry = FactoryBot.create(:bulkrax_entry, importerexporter: item, status_message: 'Complete') + get :index + result = controller.status_message_for(entry) + expect(result).to include(' Complete') + end + + it 'returns a string of HTML with a blue "horizontal ellipsis" icon when status_message is "Pending"' do + entry = FactoryBot.create(:bulkrax_entry, importerexporter: item, status_message: 'Pending') + get :index + result = controller.status_message_for(entry) + expect(result).to include(' Pending') + end + + it 'returns a string of HTML with a red "remove" icon when status_message is neither "Complete" nor "Pending"' do + entry = FactoryBot.create(:bulkrax_entry, importerexporter: item, status_message: 'Error') + get :index + result = controller.status_message_for(entry) + expect(result).to include(' Error') + end + + it 'returns a string of HTML with a red "remove" icon when status_message is "Deleted"' do + entry = FactoryBot.create(:bulkrax_entry, importerexporter: item, status_message: 'Deleted') + get :index + result = controller.status_message_for(entry) + expect(result).to include(' Deleted') + end + end +end diff --git a/spec/test_app/db/schema.rb b/spec/test_app/db/schema.rb index 70580d6ec..c0b95bb21 100644 --- a/spec/test_app/db/schema.rb +++ b/spec/test_app/db/schema.rb @@ -2,15 +2,15 @@ # of editing this file, please use the migrations feature of Active Record to # incrementally modify your database, and then regenerate this schema definition. # -# Note that this schema.rb definition is the authoritative source for your -# database schema. If you need to create the application database on another -# system, you should be using db:schema:load, not running all the migrations -# from scratch. The latter is a flawed and unsustainable approach (the more migrations -# you'll amass, the slower it'll run and the greater likelihood for issues). +# This file is the source Rails uses to define your schema when running `bin/rails +# db:schema:load`. When creating a new database, `bin/rails db:schema:load` tends to +# be faster and is potentially less error prone than running all of your +# migrations from scratch. Old migrations may fail to apply correctly if those +# migrations use external dependencies or application code. # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2023_06_08_153601) do +ActiveRecord::Schema.define(version: 2024_02_08_005801) do create_table "accounts", force: :cascade do |t| t.string "name" @@ -41,6 +41,7 @@ t.datetime "last_succeeded_at" t.string "importerexporter_type", default: "Bulkrax::Importer" t.integer "import_attempts", default: 0 + t.string "status_message", default: "Pending" t.index ["identifier"], name: "index_bulkrax_entries_on_identifier" t.index ["importerexporter_id", "importerexporter_type"], name: "bulkrax_entries_importerexporter_idx" t.index ["type"], name: "index_bulkrax_entries_on_type" @@ -76,6 +77,7 @@ t.string "workflow_status" t.boolean "include_thumbnails", default: false t.boolean "generated_metadata", default: false + t.string "status_message", default: "Pending" t.index ["user_id"], name: "index_bulkrax_exporters_on_user_id" end @@ -99,6 +101,8 @@ t.integer "total_file_set_entries", default: 0 t.integer "processed_works", default: 0 t.integer "failed_works", default: 0 + t.integer "processed_children", default: 0 + t.integer "failed_children", default: 0 t.index ["importer_id"], name: "index_bulkrax_importer_runs_on_importer_id" end @@ -116,6 +120,7 @@ t.boolean "validate_only" t.datetime "last_error_at" t.datetime "last_succeeded_at" + t.string "status_message", default: "Pending" t.index ["user_id"], name: "index_bulkrax_importers_on_user_id" end From 40c808d8dbaebac3f4131fa0b7b787c08e2d2d35 Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Fri, 9 Feb 2024 17:35:50 -0800 Subject: [PATCH 07/14] Add Skip to the datatabels (#920) Skip status added to the datatables work --------- Co-authored-by: Jeremy Friesen Co-authored-by: Kirk Wang --- app/assets/javascripts/bulkrax/datatables.js | 1 + app/controllers/concerns/bulkrax/datatables_behavior.rb | 2 ++ 2 files changed, 3 insertions(+) diff --git a/app/assets/javascripts/bulkrax/datatables.js b/app/assets/javascripts/bulkrax/datatables.js index 65619e72c..a60d01880 100644 --- a/app/assets/javascripts/bulkrax/datatables.js +++ b/app/assets/javascripts/bulkrax/datatables.js @@ -90,6 +90,7 @@ function statusSelect() { statusSelect.add(new Option('Complete')) statusSelect.add(new Option('Pending')) statusSelect.add(new Option('Failed')) + statusSelect.add(new Option('Skipped')) statusSelect.add(new Option('Deleted')) statusSelect.add(new Option('Complete (with failures)')) diff --git a/app/controllers/concerns/bulkrax/datatables_behavior.rb b/app/controllers/concerns/bulkrax/datatables_behavior.rb index 8ffcdd5d5..1765c1f4e 100644 --- a/app/controllers/concerns/bulkrax/datatables_behavior.rb +++ b/app/controllers/concerns/bulkrax/datatables_behavior.rb @@ -118,6 +118,8 @@ def status_message_for(e) " #{e.status_message}" elsif e.status_message == "Pending" " #{e.status_message}" + elsif e.status_message == "Skipped" + " #{e.status_message}" else " #{e.status_message}" end From 06bde078690c3eaf9ce83faf7299e9fcabe5974f Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Fri, 9 Feb 2024 17:37:32 -0800 Subject: [PATCH 08/14] For every importer / record combo there should be exactly one entry (#921) * for every importer record combo there should be an entry. no longer reuse entries from previous importers * entry identifier is no longer unique and needs a scoped index --- app/parsers/bulkrax/application_parser.rb | 8 ++++++-- db/migrate/20240209070952_update_identifier_index.rb | 6 ++++++ spec/test_app/db/schema.rb | 4 ++-- 3 files changed, 14 insertions(+), 4 deletions(-) create mode 100644 db/migrate/20240209070952_update_identifier_index.rb diff --git a/app/parsers/bulkrax/application_parser.rb b/app/parsers/bulkrax/application_parser.rb index 2a46d124d..43d48e649 100644 --- a/app/parsers/bulkrax/application_parser.rb +++ b/app/parsers/bulkrax/application_parser.rb @@ -305,11 +305,15 @@ def new_entry(entryclass, type) end def find_or_create_entry(entryclass, identifier, type, raw_metadata = nil) - entry = entryclass.where( + # limit entry search to just this importer or exporter. Don't go moving them + entry = importerexporter.entries.where( + identifier: identifier + ).first + entry ||= entryclass.new( importerexporter_id: importerexporter.id, importerexporter_type: type, identifier: identifier - ).first_or_create! + ) entry.raw_metadata = raw_metadata entry.save! entry diff --git a/db/migrate/20240209070952_update_identifier_index.rb b/db/migrate/20240209070952_update_identifier_index.rb new file mode 100644 index 000000000..2b8aae12e --- /dev/null +++ b/db/migrate/20240209070952_update_identifier_index.rb @@ -0,0 +1,6 @@ +class UpdateIdentifierIndex < ActiveRecord::Migration[5.2] + def change + remove_index :bulkrax_entries, :identifier if index_exists?(:bulkrax_entries, :identifier ) + add_index :bulkrax_entries, [:identifier, :importerexporter_id, :importerexporter_type], name: 'bulkrax_identifier_idx' unless index_exists?(:bulkrax_entries, [:identifier, :importerexporter_id, :importerexporter_type], name: 'bulkrax_identifier_idx') + end +end diff --git a/spec/test_app/db/schema.rb b/spec/test_app/db/schema.rb index c0b95bb21..0b5d3734b 100644 --- a/spec/test_app/db/schema.rb +++ b/spec/test_app/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2024_02_08_005801) do +ActiveRecord::Schema.define(version: 2024_02_09_070952) do create_table "accounts", force: :cascade do |t| t.string "name" @@ -42,7 +42,7 @@ t.string "importerexporter_type", default: "Bulkrax::Importer" t.integer "import_attempts", default: 0 t.string "status_message", default: "Pending" - t.index ["identifier"], name: "index_bulkrax_entries_on_identifier" + t.index ["identifier", "importerexporter_id", "importerexporter_type"], name: "bulkrax_identifier_idx" t.index ["importerexporter_id", "importerexporter_type"], name: "bulkrax_entries_importerexporter_idx" t.index ["type"], name: "index_bulkrax_entries_on_type" end From 7bbb9b73ee1e1c4d26cd0e98b76661f8abd502ab Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Fri, 9 Feb 2024 17:38:01 -0800 Subject: [PATCH 09/14] Mark records as skipped if we do not see them during an import run (#922) * mark entries as skipped if they have not been seen during an importer run --- app/models/bulkrax/importer.rb | 9 +++++++++ app/models/concerns/bulkrax/status_info.rb | 5 +++++ 2 files changed, 14 insertions(+) diff --git a/app/models/bulkrax/importer.rb b/app/models/bulkrax/importer.rb index 94c400f31..efa432954 100644 --- a/app/models/bulkrax/importer.rb +++ b/app/models/bulkrax/importer.rb @@ -195,10 +195,19 @@ def import_objects(types_array = nil) end end parser.create_objects(types) + mark_unseen_as_skipped rescue StandardError => e set_status_info(e) end + # After an import any entries we did not touch are skipped. + # They are not really pending, complete for the last run, or failed + def mark_unseen_as_skipped + entries.where.not(id. seen).find_each do |entry| + entry.set_status_info('Skipped') + end + end + # Prepend the base_url to ensure unique set identifiers # @todo - move to parser, as this is OAI specific def unique_collection_identifier(id) diff --git a/app/models/concerns/bulkrax/status_info.rb b/app/models/concerns/bulkrax/status_info.rb index 02ce18838..c48dc58ea 100644 --- a/app/models/concerns/bulkrax/status_info.rb +++ b/app/models/concerns/bulkrax/status_info.rb @@ -13,6 +13,7 @@ module StatusInfo scope :failed, -> { where(status_message: 'Failed') } scope :complete, -> { where(status_message: 'Complete') } scope :pending, -> { where(status_message: 'Pending') } + scope :skipped, -> { where(status_message: 'Skipped') } end def current_status @@ -28,6 +29,10 @@ def succeeded? current_status&.status_message&.match(/^Complete$/) end + def skipped? + current_status&.status_message&.match('Skipped') + end + def status current_status&.status_message || 'Pending' end From 2b649e9e175c3c7d78ad0ef7313d3bb273217e88 Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Fri, 9 Feb 2024 17:59:40 -0800 Subject: [PATCH 10/14] Redo records with the remove_and_rerun property in the data, move individual remove and rerun to a background job (#923) * Add ability to set a remove_and_rerun property. setting this will delete the record before recreating it * use the delet and import job to make remove and rerun work better * individual entry update should happen in the background --- .github/workflows/test.yml | 2 +- Rakefile | 14 +-- .../javascripts/bulkrax/importers.js.erb | 16 ++- .../stylesheets/bulkrax/import_export.scss | 7 +- app/controllers/bulkrax/entries_controller.rb | 24 ++++- .../bulkrax/importers_controller.rb | 2 +- .../delete_and_import_collection_job.rb | 8 ++ .../bulkrax/delete_and_import_file_set_job.rb | 8 ++ app/jobs/bulkrax/delete_and_import_job.rb | 20 ++++ .../bulkrax/delete_and_import_work_job.rb | 8 ++ app/jobs/bulkrax/delete_job.rb | 4 + app/jobs/bulkrax/import_job.rb | 7 ++ app/models/bulkrax/exporter.rb | 11 +- app/models/bulkrax/importer.rb | 17 ++- app/parsers/bulkrax/application_parser.rb | 101 +++++++++++++++--- app/parsers/bulkrax/bagit_parser.rb | 23 ---- app/parsers/bulkrax/csv_parser.rb | 52 --------- app/parsers/bulkrax/oai_dc_parser.rb | 42 +++++--- app/parsers/bulkrax/xml_parser.rb | 39 ++++--- .../bulkrax/importers/_csv_fields.html.erb | 8 +- lib/bulkrax.rb | 2 +- .../bulkrax/application_parser_spec.rb | 23 ++-- 22 files changed, 275 insertions(+), 163 deletions(-) create mode 100644 app/jobs/bulkrax/delete_and_import_collection_job.rb create mode 100644 app/jobs/bulkrax/delete_and_import_file_set_job.rb create mode 100644 app/jobs/bulkrax/delete_and_import_job.rb create mode 100644 app/jobs/bulkrax/delete_and_import_work_job.rb create mode 100644 app/jobs/bulkrax/import_job.rb diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 38896fc21..1eacd7b52 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -40,7 +40,7 @@ jobs: run: bundle exec rake db:migrate db:test:prepare - name: Run rspec - run: bundle exec rake + run: bundle exec rake spec - name: Upload coverage results uses: actions/upload-artifact@v2 diff --git a/Rakefile b/Rakefile index 41f4f9cd1..6dc616de5 100644 --- a/Rakefile +++ b/Rakefile @@ -25,18 +25,18 @@ require 'bundler/gem_tasks' require 'solr_wrapper/rake_task' unless Rails.env.production? +require 'rubocop/rake_task' + +RuboCop::RakeTask.new(:rubocop) do |t| + t.options = ['--display-cop-names', '--ignore-parent-exclusion', '-a'] +end + begin require 'rspec/core/rake_task' RSpec::Core::RakeTask.new(:spec) - task default: :spec + task default: [:rubocop, :spec] rescue LoadError # rubocop:disable Lint/HandleExceptions # no rspec available end - -require 'rubocop/rake_task' - -RuboCop::RakeTask.new(:rubocop) do |t| - t.options = ['--display-cop-names'] -end diff --git a/app/assets/javascripts/bulkrax/importers.js.erb b/app/assets/javascripts/bulkrax/importers.js.erb index 87c0f7b1d..e254c60e2 100644 --- a/app/assets/javascripts/bulkrax/importers.js.erb +++ b/app/assets/javascripts/bulkrax/importers.js.erb @@ -74,12 +74,14 @@ function handleFileToggle(file_path) { $('#file_path').hide() $('#file_upload').hide() $('#cloud').hide() + $('#existing_options').hide() $('#file_path input').attr('required', null) $('#file_upload input').attr('required', null) } else { $('#file_path').show() $('#file_upload').hide() $('#cloud').hide() + $('#existing_options').hide() $('#file_path input').attr('required', 'required') $('#file_upload input').attr('required', null) $('#importer_parser_fields_file_style_specify_a_path_on_the_server').attr('checked', true) @@ -89,6 +91,7 @@ function handleFileToggle(file_path) { $('#file_path').hide() $('#file_upload').show() $('#cloud').hide() + $('#existing_options').hide() $('#file_path input').attr('required', null) $('#file_upload input').attr('required', 'required') }) @@ -96,6 +99,7 @@ function handleFileToggle(file_path) { $('#file_path').show() $('#file_upload').hide() $('#cloud').hide() + $('#existing_options').hide() $('#file_path input').attr('required', 'required') $('#file_upload input').attr('required', null) }) @@ -103,9 +107,19 @@ function handleFileToggle(file_path) { $('#file_path').hide() $('#file_upload').hide() $('#cloud').show() + $('#existing_options').hide() $('#file_path input').attr('required', null) $('#file_upload input').attr('required', null) }) + $('#importer_parser_fields_file_style_existing_entries').click(function(e){ + $('#file_path').hide() + $('#file_upload').hide() + $('#cloud').hide() + $('#existing_options').show() + $('#file_path input').attr('required', null) + $('#file_upload input').attr('required', null) + }) + } function handleParserKlass() { @@ -189,4 +203,4 @@ function setError(selector, error) { selector.attr('disabled', true) } -$(document).on({'ready': prepBulkrax, 'turbolinks:load': prepBulkrax}) \ No newline at end of file +$(document).on({'ready': prepBulkrax, 'turbolinks:load': prepBulkrax}) diff --git a/app/assets/stylesheets/bulkrax/import_export.scss b/app/assets/stylesheets/bulkrax/import_export.scss index 1834840ac..0e182842f 100644 --- a/app/assets/stylesheets/bulkrax/import_export.scss +++ b/app/assets/stylesheets/bulkrax/import_export.scss @@ -34,4 +34,9 @@ div#s2id_exporter_export_source_collection { .bulkrax-clear-toggles { clear: both; -} \ No newline at end of file +} + +#existing_options .collection_check_boxes { + margin-left: 10px; + margin-right: 10px; +} diff --git a/app/controllers/bulkrax/entries_controller.rb b/app/controllers/bulkrax/entries_controller.rb index 4328ed0bf..ea1e4feea 100644 --- a/app/controllers/bulkrax/entries_controller.rb +++ b/app/controllers/bulkrax/entries_controller.rb @@ -17,13 +17,29 @@ def show def update @entry = Entry.find(params[:id]) - @entry.factory&.find&.destroy if params[:destroy_first] - @entry.build - @entry.save + type = case @entry.type.downcase + when /fileset/ + 'file_set' + when /collection/ + 'collection' + else + 'work' + end item = @entry.importerexporter + # do not run counters as it loads the whole parser + current_run = item.current_run(skip_counts: true) + @entry.set_status_info('Pending', current_run) + ScheduleRelationshipsJob.set(wait: 5.minutes).perform_later(importer_id: @entry.importer.id) + + if params[:destroy_first] + "Bulkrax::DeleteAndImport#{type.camelize}Job".constantize.perform_later(@entry, current_run) + else + "Bulkrax::Import#{type.camelize}Job".constantize.perform_later(@entry.id, current_run.id) + end + entry_path = item.class.to_s.include?('Importer') ? bulkrax.importer_entry_path(item.id, @entry.id) : bulkrax.exporter_entry_path(item.id, @entry.id) - redirect_back fallback_location: entry_path, notice: "Entry update ran, new status is #{@entry.status}" + redirect_back fallback_location: entry_path, notice: "Entry #{@entry.id} update has been queued" end def destroy diff --git a/app/controllers/bulkrax/importers_controller.rb b/app/controllers/bulkrax/importers_controller.rb index 43c2c1efc..7fcbeff04 100644 --- a/app/controllers/bulkrax/importers_controller.rb +++ b/app/controllers/bulkrax/importers_controller.rb @@ -233,7 +233,7 @@ def importable_params end def importable_parser_fields - params&.[](:importer)&.[](:parser_fields)&.except(:file)&.keys + params&.[](:importer)&.[](:parser_fields)&.except(:file, :entry_statuses)&.keys&. + [{ "entry_statuses" => [] }] end # Only allow a trusted parameters through. diff --git a/app/jobs/bulkrax/delete_and_import_collection_job.rb b/app/jobs/bulkrax/delete_and_import_collection_job.rb new file mode 100644 index 000000000..2e434fb6b --- /dev/null +++ b/app/jobs/bulkrax/delete_and_import_collection_job.rb @@ -0,0 +1,8 @@ +# frozen_string_literal: true + +module Bulkrax + class DeleteAndImportCollectionJob < DeleteAndImportJob + DELETE_CLASS = Bulkrax::DeleteCollectionJob + IMPORT_CLASS = Bulkrax::ImportCollectionJob + end +end diff --git a/app/jobs/bulkrax/delete_and_import_file_set_job.rb b/app/jobs/bulkrax/delete_and_import_file_set_job.rb new file mode 100644 index 000000000..8660a082e --- /dev/null +++ b/app/jobs/bulkrax/delete_and_import_file_set_job.rb @@ -0,0 +1,8 @@ +# frozen_string_literal: true + +module Bulkrax + class DeleteAndImportFileSetJob < DeleteAndImportJob + DELETE_CLASS = Bulkrax::DeleteFileSetJob + IMPORT_CLASS = Bulkrax::ImportFileSetJob + end +end diff --git a/app/jobs/bulkrax/delete_and_import_job.rb b/app/jobs/bulkrax/delete_and_import_job.rb new file mode 100644 index 000000000..03be3f142 --- /dev/null +++ b/app/jobs/bulkrax/delete_and_import_job.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +module Bulkrax + class DeleteAndImportJob < ApplicationJob + queue_as :import + + def perform(entry, importer_run) + status = self.class::DELETE_CLASS.perform_now(entry, importer_run) + if status.status_message == "Deleted" + entry = Bulkrax::Entry.find(entry.id) # maximum reload + self.class::IMPORT_CLASS.perform_now(entry.id, importer_run.id) + end + + rescue => e + entry.set_status_info(e) + # this causes caught exception to be reraised + raise + end + end +end diff --git a/app/jobs/bulkrax/delete_and_import_work_job.rb b/app/jobs/bulkrax/delete_and_import_work_job.rb new file mode 100644 index 000000000..318982cf3 --- /dev/null +++ b/app/jobs/bulkrax/delete_and_import_work_job.rb @@ -0,0 +1,8 @@ +# frozen_string_literal: true + +module Bulkrax + class DeleteAndImportWorkJob < DeleteAndImportJob + DELETE_CLASS = Bulkrax::DeleteWorkJob + IMPORT_CLASS = Bulkrax::ImportWorkJob + end +end diff --git a/app/jobs/bulkrax/delete_job.rb b/app/jobs/bulkrax/delete_job.rb index 1fcd04cca..0b337261b 100644 --- a/app/jobs/bulkrax/delete_job.rb +++ b/app/jobs/bulkrax/delete_job.rb @@ -15,6 +15,10 @@ def perform(entry, importer_run) entry.importer.current_run = ImporterRun.find(importer_run.id) entry.importer.record_status entry.set_status_info("Deleted", ImporterRun.find(importer_run.id)) + rescue => e + entry.set_status_info(e) + # this causes caught exception to be reraised + raise end end end diff --git a/app/jobs/bulkrax/import_job.rb b/app/jobs/bulkrax/import_job.rb new file mode 100644 index 000000000..b8ff2d5dd --- /dev/null +++ b/app/jobs/bulkrax/import_job.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +module Bulkrax + class ImportJob < ApplicationJob + queue_as :import + end +end diff --git a/app/models/bulkrax/exporter.rb b/app/models/bulkrax/exporter.rb index 42b62c13e..de054a593 100644 --- a/app/models/bulkrax/exporter.rb +++ b/app/models/bulkrax/exporter.rb @@ -23,6 +23,10 @@ def export set_status_info(e) end + def remove_and_rerun + self.parser_fields['remove_and_rerun'] + end + # #export_source accessors # Used in form to prevent it from getting confused as to which value to populate #export_source with. # Also, used to display the correct selected value when rendering edit form. @@ -102,9 +106,12 @@ def importers_list Importer.all.map { |i| [i.name, i.id] } end - def current_run + def current_run(skip_counts: false) + @current_run ||= self.exporter_runs.create! if skip_counts + return @current_run if @current_run + total = self.limit || parser.total - @current_run ||= self.exporter_runs.create!(total_work_entries: total, enqueued_records: total) + @current_run = self.exporter_runs.create!(total_work_entries: total, enqueued_records: total) end def last_run diff --git a/app/models/bulkrax/importer.rb b/app/models/bulkrax/importer.rb index efa432954..e24320380 100644 --- a/app/models/bulkrax/importer.rb +++ b/app/models/bulkrax/importer.rb @@ -103,11 +103,12 @@ def schedulable? frequency.to_seconds != 0 end - def current_run + def current_run(skip_counts: false) return @current_run if @current_run.present? @current_run = self.importer_runs.create! return @current_run if file? && zip? + return @current_run if skip_counts entry_counts = { total_work_entries: self.limit || parser.works_total, @@ -166,6 +167,10 @@ def metadata_only? parser.parser_fields['metadata_only'] == true end + def existing_entries? + parser.parser_fields['file_style']&.match(/Existing Entries/) + end + def import_works import_objects(['work']) end @@ -188,13 +193,7 @@ def import_objects(types_array = nil) self.only_updates ||= false self.save if self.new_record? # Object needs to be saved for statuses types = types_array || DEFAULT_OBJECT_TYPES - if remove_and_rerun - self.entries.find_each do |e| - e.factory.find&.destroy! - e.destroy! - end - end - parser.create_objects(types) + existing_entries? ? parser.rebuild_entries(types) : parser.create_objects(types) mark_unseen_as_skipped rescue StandardError => e set_status_info(e) @@ -203,7 +202,7 @@ def import_objects(types_array = nil) # After an import any entries we did not touch are skipped. # They are not really pending, complete for the last run, or failed def mark_unseen_as_skipped - entries.where.not(id. seen).find_each do |entry| + entries.where.not(identifier: seen.keys).find_each do |entry| entry.set_status_info('Skipped') end end diff --git a/app/parsers/bulkrax/application_parser.rb b/app/parsers/bulkrax/application_parser.rb index 43d48e649..d2b603ee3 100644 --- a/app/parsers/bulkrax/application_parser.rb +++ b/app/parsers/bulkrax/application_parser.rb @@ -14,7 +14,7 @@ class ApplicationParser # rubocop:disable Metrics/ClassLength :seen, :increment_counters, :parser_fields, :user, :keys_without_numbers, :key_without_numbers, :status, :set_status_info, :status_info, :status_at, :exporter_export_path, :exporter_export_zip_path, :importer_unzip_path, :validate_only, - :zip?, :file?, + :zip?, :file?, :remove_and_rerun, to: :importerexporter # @todo Convert to `class_attribute :parser_fiels, default: {}` @@ -47,6 +47,10 @@ def entry_class raise NotImplementedError, 'must be defined' end + def work_entry_class + entry_class + end + # @api public # @abstract Subclass and override {#collection_entry_class} to implement behavior for the parser. def collection_entry_class @@ -157,6 +161,22 @@ def visibility @visibility ||= self.parser_fields['visibility'] || 'open' end + def create_collections + create_objects(['collection']) + end + + def create_works + create_objects(['work']) + end + + def create_file_sets + create_objects(['file_set']) + end + + def create_relationships + create_objects(['relationship']) + end + # @api public # # @param types [Array] the types of objects that we'll create. @@ -166,30 +186,77 @@ def visibility # @see #create_works # @see #create_file_sets # @see #create_relationships - def create_objects(types = []) - types.each do |object_type| - send("create_#{object_type.pluralize}") + def create_objects(types_array = nil) + index = 0 + (types_array || %w[collection work file_set relationship]).each do |type| + if type.eql?('relationship') + ScheduleRelationshipsJob.set(wait: 5.minutes).perform_later(importer_id: importerexporter.id) + next + end + send(type.pluralize).each do |current_record| + next unless record_has_source_identifier(current_record, index) + break if limit_reached?(limit, index) + seen[current_record[source_identifier]] = true + create_entry_and_job(current_record, type) + increment_counters(index, "#{type}": true) + index += 1 + end + importer.record_status + end + true + rescue StandardError => e + set_status_info(e) + end + + def rebuild_entries(types_array = nil) + index = 0 + (types_array || %w[collection work file_set relationship]).each do |type| + # works are not gurneteed to have Work in the type + + importer.entries.where(rebuild_entry_query(type, parser_fields['entry_statuses'])).find_each do |e| + seen[e.identifier] = true + e.status_info('Pending', importer.current_run) + if remove_and_rerun + delay = calculate_type_delay(type) + "Bulkrax::DeleteAndImport#{type.camelize}Job".constantize.set(wait: delay).send(perform_method, e, current_run) + else + "Bulkrax::Import#{type.camelize}Job".constantize.send(perform_method, e.id, current_run.id) + end + increment_counters(index) + index += 1 + end end end - # @abstract Subclass and override {#create_collections} to implement behavior for the parser. - def create_collections - raise NotImplementedError, 'must be defined' if importer? - end + def rebuild_entry_query(type, statuses) + type_col = Bulkrax::Entry.arel_table['type'] + status_col = Bulkrax::Entry.arel_table['status_message'] - # @abstract Subclass and override {#create_works} to implement behavior for the parser. - def create_works - raise NotImplementedError, 'must be defined' if importer? + query = (type == 'work' ? type_col.not.matches(%w[collection file_set]) : type_col.matches(type.camelize)) + query.and(status_col.in(statuses)) end - # @abstract Subclass and override {#create_file_sets} to implement behavior for the parser. - def create_file_sets - raise NotImplementedError, 'must be defined' if importer? + def calculate_type_delay(type) + return 2.minutes if type == 'file_set' + return 1.minute if type == 'work' + return 0 end - # @abstract Subclass and override {#create_relationships} to implement behavior for the parser. - def create_relationships - raise NotImplementedError, 'must be defined' if importer? + def create_entry_and_job(current_record, type, identifier = nil) + identifier ||= current_record[source_identifier] + new_entry = find_or_create_entry(send("#{type}_entry_class"), + identifier, + 'Bulkrax::Importer', + current_record.to_h) + new_entry.status_info('Pending', importer.current_run) + if current_record[:delete].present? + "Bulkrax::Delete#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run) + elsif current_record[:remove_and_rerun].present? || remove_and_rerun + delay = calculate_type_delay(type) + "Bulkrax::DeleteAndImport#{type.camelize}Job".constantize.set(wait: delay).send(perform_method, new_entry, current_run) + else + "Bulkrax::Import#{type.camelize}Job".constantize.send(perform_method, new_entry.id, current_run.id) + end end # Optional, define if using browse everything for file upload diff --git a/app/parsers/bulkrax/bagit_parser.rb b/app/parsers/bulkrax/bagit_parser.rb index eccbee16f..8d93a1b54 100644 --- a/app/parsers/bulkrax/bagit_parser.rb +++ b/app/parsers/bulkrax/bagit_parser.rb @@ -63,29 +63,6 @@ def get_data(bag, data) data end - def create_works - entry_class == CsvEntry ? super : create_rdf_works - end - - def create_rdf_works - records.each_with_index do |record, index| - next unless record_has_source_identifier(record, index) - break if limit_reached?(limit, index) - - seen[record[source_identifier]] = true - new_entry = find_or_create_entry(entry_class, record[source_identifier], 'Bulkrax::Importer', record) - if record[:delete].present? - DeleteWorkJob.send(perform_method, new_entry, current_run) - else - ImportWorkJob.send(perform_method, new_entry.id, current_run.id) - end - increment_counters(index, work: true) - end - importer.record_status - rescue StandardError => e - set_status_info(e) - end - # export methods # rubocop:disable Metrics/MethodLength, Metrics/AbcSize diff --git a/app/parsers/bulkrax/csv_parser.rb b/app/parsers/bulkrax/csv_parser.rb index f7e34ddac..653c15b94 100644 --- a/app/parsers/bulkrax/csv_parser.rb +++ b/app/parsers/bulkrax/csv_parser.rb @@ -113,57 +113,6 @@ def valid_import? false end - def create_collections - create_objects(['collection']) - end - - def create_works - create_objects(['work']) - end - - def create_file_sets - create_objects(['file_set']) - end - - def create_relationships - create_objects(['relationship']) - end - - def create_objects(types_array = nil) - index = 0 - (types_array || %w[collection work file_set relationship]).each do |type| - if type.eql?('relationship') - ScheduleRelationshipsJob.set(wait: 5.minutes).perform_later(importer_id: importerexporter.id) - next - end - send(type.pluralize).each do |current_record| - next unless record_has_source_identifier(current_record, index) - break if limit_reached?(limit, index) - - seen[current_record[source_identifier]] = true - create_entry_and_job(current_record, type) - increment_counters(index, "#{type}": true) - index += 1 - end - importer.record_status - end - true - rescue StandardError => e - set_status_info(e) - end - - def create_entry_and_job(current_record, type) - new_entry = find_or_create_entry(send("#{type}_entry_class"), - current_record[source_identifier], - 'Bulkrax::Importer', - current_record.to_h) - if current_record[:delete].present? - "Bulkrax::Delete#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run) - else - "Bulkrax::Import#{type.camelize}Job".constantize.send(perform_method, new_entry.id, current_run.id) - end - end - def write_partial_import_file(file) import_filename = import_file_path.split('/').last partial_import_filename = "#{File.basename(import_filename, '.csv')}_corrected_entries.csv" @@ -204,7 +153,6 @@ def create_new_entries def entry_class CsvEntry end - alias work_entry_class entry_class def collection_entry_class CsvCollectionEntry diff --git a/app/parsers/bulkrax/oai_dc_parser.rb b/app/parsers/bulkrax/oai_dc_parser.rb index 4319ab3f1..03a3a663d 100644 --- a/app/parsers/bulkrax/oai_dc_parser.rb +++ b/app/parsers/bulkrax/oai_dc_parser.rb @@ -63,6 +63,12 @@ def import_fields delegate :list_sets, to: :client + def create_objects(types = []) + types.each do |object_type| + send("create_#{object_type.pluralize}") + end + end + def create_collections metadata = { visibility: 'open' @@ -86,27 +92,31 @@ def create_works results = self.records(quick: true) return if results.blank? results.full.each_with_index do |record, index| - identifier = record.send(source_identifier) - if identifier.blank? - if Bulkrax.fill_in_blank_source_identifiers.present? - identifier = Bulkrax.fill_in_blank_source_identifiers.call(self, index) - else - invalid_record("Missing #{source_identifier} for #{record.to_h}\n") - next - end - end - + identifier = record_has_source_identifier(record, index) + next unless identifier break if limit_reached?(limit, index) + seen[identifier] = true - new_entry = entry_class.where(importerexporter: self.importerexporter, identifier: identifier).first_or_create! - if record.deleted? - DeleteWorkJob.send(perform_method, new_entry, importerexporter.current_run) - else - ImportWorkJob.send(perform_method, new_entry.id, importerexporter.current_run.id) - end + create_entry_and_job(record, 'work', identifier) increment_counters(index, work: true) end importer.record_status + rescue StandardError => e + set_status_info(e) + end + + # oai records so not let us set the source identifier easily + def record_has_source_identifier(record, index) + identifier = record.send(source_identifier) + if identifier.blank? + if Bulkrax.fill_in_blank_source_identifiers.present? + identifier = Bulkrax.fill_in_blank_source_identifiers.call(self, index) + else + invalid_record("Missing #{source_identifier} for #{record.to_h}\n") + return false + end + end + identifier end def collections diff --git a/app/parsers/bulkrax/xml_parser.rb b/app/parsers/bulkrax/xml_parser.rb index c76d9a485..a5854e3c2 100644 --- a/app/parsers/bulkrax/xml_parser.rb +++ b/app/parsers/bulkrax/xml_parser.rb @@ -11,13 +11,29 @@ def entry_class def collection_entry_class; end # @todo not yet supported - def create_collections; end + def create_collections + raise NotImplementedError + end # @todo not yet supported def file_set_entry_class; end # @todo not yet supported - def create_file_sets; end + def create_file_sets + raise NotImplementedError + end + + def file_sets + raise NotImplementedError + end + + def collections + raise NotImplementedError + end + + def works + records + end # TODO: change to differentiate between collection and work records when adding ability to import collection metadata def works_total @@ -92,25 +108,6 @@ def good_file_type?(path) %w[.xml .xls .xsd].include?(File.extname(path)) || ::Marcel::MimeType.for(path).include?('application/xml') end - def create_works - records.each_with_index do |record, index| - next unless record_has_source_identifier(record, index) - break if !limit.nil? && index >= limit - - seen[record[source_identifier]] = true - new_entry = find_or_create_entry(entry_class, record[source_identifier], 'Bulkrax::Importer', record) - if record[:delete].present? - DeleteWorkJob.send(perform_method, new_entry, current_run) - else - ImportWorkJob.send(perform_method, new_entry.id, current_run.id) - end - increment_counters(index, work: true) - end - importer.record_status - rescue StandardError => e - set_status_info(e) - end - def total records.size end diff --git a/app/views/bulkrax/importers/_csv_fields.html.erb b/app/views/bulkrax/importers/_csv_fields.html.erb index 758766e73..faf96d4be 100644 --- a/app/views/bulkrax/importers/_csv_fields.html.erb +++ b/app/views/bulkrax/importers/_csv_fields.html.erb @@ -25,13 +25,19 @@

Add CSV File to Import:

<%# accept a single file upload; data files and bags will need to be added another way %> - <%= fi.input :file_style, collection: ['Upload a File', 'Specify a Path on the Server'], as: :radio_buttons, label: false %> + <% file_style_list = ['Upload a File', 'Specify a Path on the Server'] %> + <% file_style_list << 'Existing Entries' unless importer.new_record? %> + <%= fi.input :file_style, collection: file_style_list, as: :radio_buttons, label: false %>
<%= fi.input 'file', as: :file, input_html: { accept: 'text/csv,application/zip' } %>
<%= fi.input :import_file_path, as: :string, input_html: { value: importer.parser_fields['import_file_path'] } %>
+
+ <%= fi.collection_check_boxes :entry_statuses, [['Failed'], ['Pending'], ['Skipped'], ['Deleted'], ['Complete']], :first, :first %> +
+ <% if defined?(::Hyrax) && Hyrax.config.browse_everything? %>

Add Files to Import:

Choose files to upload. The filenames must be unique, and the filenames must be referenced in a column called 'file' in the accompanying CSV file.

diff --git a/lib/bulkrax.rb b/lib/bulkrax.rb index f485a5923..07068879b 100644 --- a/lib/bulkrax.rb +++ b/lib/bulkrax.rb @@ -3,8 +3,8 @@ require "bulkrax/version" require "bulkrax/engine" require 'active_support/all' +require 'coderay' require 'denormalize_fields' - # rubocop:disable Metrics/ModuleLength module Bulkrax extend self # rubocop:disable Style/ModuleFunction diff --git a/spec/parsers/bulkrax/application_parser_spec.rb b/spec/parsers/bulkrax/application_parser_spec.rb index f8cc8af9e..2eefed7c9 100644 --- a/spec/parsers/bulkrax/application_parser_spec.rb +++ b/spec/parsers/bulkrax/application_parser_spec.rb @@ -17,13 +17,24 @@ module Bulkrax describe '#create_objects' do subject(:application_parser) { described_class.new(importer) } - it 'sends the create_* methods based on given types' do - expect(application_parser).to receive(:create_works) - expect(application_parser).to receive(:create_collections) - expect(application_parser).to receive(:create_file_sets) - expect(application_parser).to receive(:create_relationships) + it 'create_works calls create_objects' do + expect(application_parser).to receive(:create_objects).with(['work']) + application_parser.create_works + end + + it 'create_collections calls create_objects' do + expect(application_parser).to receive(:create_objects).with(['collection']) + application_parser.create_collections + end + + it 'create_file_sets calls create_objects' do + expect(application_parser).to receive(:create_objects).with(['file_set']) + application_parser.create_file_sets + end - application_parser.create_objects(%w[collection work file_set relationship]) + it 'create_relationships calls create_objects' do + expect(application_parser).to receive(:create_objects).with(['relationship']) + application_parser.create_relationships end end From bf0628b360bb6e14fdd55c64e3c20b848b7cad04 Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Fri, 9 Feb 2024 22:57:15 -0800 Subject: [PATCH 11/14] Fix exporters show page - 2 (#926) * Fix exporters show page Mimicking the importers show page, this change adds support for the datatables.js to the exporters show page. This commit will add the same style of datatables from the importer index page. --------- Co-authored-by: Kirk Wang --- app/assets/javascripts/bulkrax/datatables.js | 25 ++++++- .../bulkrax/exporters_controller.rb | 24 +++++-- .../concerns/bulkrax/datatables_behavior.rb | 65 +++++++++++++++++ app/views/bulkrax/exporters/index.html.erb | 70 ++++--------------- app/views/bulkrax/exporters/show.html.erb | 13 +--- app/views/bulkrax/importers/show.html.erb | 2 +- .../bulkrax/shared/_entries_tab.html.erb | 2 +- config/routes.rb | 4 ++ 8 files changed, 129 insertions(+), 76 deletions(-) diff --git a/app/assets/javascripts/bulkrax/datatables.js b/app/assets/javascripts/bulkrax/datatables.js index a60d01880..eabd2f716 100644 --- a/app/assets/javascripts/bulkrax/datatables.js +++ b/app/assets/javascripts/bulkrax/datatables.js @@ -3,7 +3,7 @@ Blacklight.onLoad(function() { $('#importer-show-table').DataTable( { 'processing': true, 'serverSide': true, - "ajax": window.location.href.replace(/(\/importers\/\d+)/, "$1/entry_table.json"), + "ajax": window.location.href.replace(/(\/(importers|exporters)\/\d+)/, "$1/entry_table.json"), "pageLength": 30, "lengthMenu": [[30, 100, 200], [30, 100, 200]], "columns": [ @@ -56,6 +56,29 @@ Blacklight.onLoad(function() { } ); } + if($('#exporters-table').length) { + $('#exporters-table').DataTable( { + 'processing': true, + 'serverSide': true, + "ajax": window.location.href.replace(/(\/exporters)/, "$1/exporter_table.json"), + "pageLength": 30, + "lengthMenu": [[30, 100, 200], [30, 100, 200]], + "columns": [ + { "data": "name" }, + { "data": "status_message" }, + { "data": "created_at" }, + { "data": "download" }, + { "data": "actions", "orderable": false } + ], + initComplete: function () { + // Add status filter + statusSelect.bind(this)() + // Add refresh link + refreshLink.bind(this)() + } + } ); + } + }) function entrySelect() { diff --git a/app/controllers/bulkrax/exporters_controller.rb b/app/controllers/bulkrax/exporters_controller.rb index 02ab2b95f..ea4ed99b1 100644 --- a/app/controllers/bulkrax/exporters_controller.rb +++ b/app/controllers/bulkrax/exporters_controller.rb @@ -4,9 +4,10 @@ module Bulkrax class ExportersController < ApplicationController include Hyrax::ThemedLayoutController if defined?(::Hyrax) include Bulkrax::DownloadBehavior + include Bulkrax::DatatablesBehavior before_action :authenticate_user! before_action :check_permissions - before_action :set_exporter, only: [:show, :edit, :update, :destroy] + before_action :set_exporter, only: [:show, :entry_table, :edit, :update, :destroy] with_themed_layout 'dashboard' if defined?(::Hyrax) # GET /exporters @@ -17,16 +18,29 @@ def index add_exporter_breadcrumbs if defined?(::Hyrax) end + def exporter_table + @exporters = Exporter.order(table_order).page(table_page).per(table_per_page) + @exporters = @exporters.where(exporter_table_search) if exporter_table_search.present? + respond_to do |format| + format.json { render json: format_exporters(@exporters) } + end + end + # GET /exporters/1 def show if defined?(::Hyrax) add_exporter_breadcrumbs add_breadcrumb @exporter.name end + @first_entry = @exporter.entries.first + end - @work_entries = @exporter.entries.where(type: @exporter.parser.entry_class.to_s).page(params[:work_entries_page]).per(30) - @collection_entries = @exporter.entries.where(type: @exporter.parser.collection_entry_class.to_s).page(params[:collections_entries_page]).per(30) - @file_set_entries = @exporter.entries.where(type: @exporter.parser.file_set_entry_class.to_s).page(params[:file_set_entries_page]).per(30) + def entry_table + @entries = @exporter.entries.order(table_order).page(table_page).per(table_per_page) + @entries = @entries.where(entry_table_search) if entry_table_search.present? + respond_to do |format| + format.json { render json: format_entries(@entries, @exporter) } + end end # GET /exporters/new @@ -100,7 +114,7 @@ def download # Use callbacks to share common setup or constraints between actions. def set_exporter - @exporter = Exporter.find(params[:id]) + @exporter = Exporter.find(params[:id] || params[:exporter_id]) end # Only allow a trusted parameters through. diff --git a/app/controllers/concerns/bulkrax/datatables_behavior.rb b/app/controllers/concerns/bulkrax/datatables_behavior.rb index 1765c1f4e..302839894 100644 --- a/app/controllers/concerns/bulkrax/datatables_behavior.rb +++ b/app/controllers/concerns/bulkrax/datatables_behavior.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true module Bulkrax + # rubocop:disable Metrics/ModuleLength module DatatablesBehavior extend ActiveSupport::Concern @@ -62,6 +63,26 @@ def importer_table_search @importer_table_search end + def exporter_table_search + return @exporter_table_search if @exporter_table_search + return @exporter_table_search = false if params['search']&.[]('value').blank? + + table_search_value = params['search']&.[]('value')&.downcase + + ['name', 'status_message', 'created_at'].map do |col| + column = Bulkrax::Exporter.arel_table[col] + column = Arel::Nodes::NamedFunction.new('CAST', [column.as('text')]) + column = Arel::Nodes::NamedFunction.new('LOWER', [column]) + @exporter_table_search = if @exporter_table_search + @exporter_table_search.or(column.matches("%#{table_search_value}%")) + else + column.matches("%#{table_search_value}%") + end + end + + @exporter_table_search + end + def format_importers(importers) result = importers.map do |i| { @@ -86,6 +107,23 @@ def format_importers(importers) } end + def format_exporters(exporters) + result = exporters.map do |e| + { + name: view_context.link_to(e.name, view_context.exporter_path(e)), + status_message: status_message_for(e), + created_at: e.created_at, + download: download_zip(e), + actions: exporter_util_links(e) + } + end + { + data: result, + recordsTotal: Bulkrax::Exporter.count, + recordsFiltered: exporters.size + } + end + def format_entries(entries, item) result = entries.map do |e| { @@ -132,5 +170,32 @@ def importer_util_links(i) links << view_context.link_to(view_context.raw(''), i, method: :delete, data: { confirm: 'Are you sure?' }) links.join(" ") end + + def exporter_util_links(i) + links = [] + links << view_context.link_to(view_context.raw(''), exporter_path(i)) + links << view_context.link_to(view_context.raw(''), edit_exporter_path(i)) + links << view_context.link_to(view_context.raw(''), i, method: :delete, data: { confirm: 'Are you sure?' }) + links.join(" ") + end + + def download_zip(e) + return unless File.exist?(e.exporter_export_zip_path) + + options_html = e.exporter_export_zip_files.flatten.map do |file_name| + "" + end.join + + form_html = "
" + form_html += "" + form_html += "\n" # add newline here to add a space between the dropdown and the download button + form_html += "" + form_html += "
" + + form_html + end end + # rubocop:enable Metrics/ModuleLength end diff --git a/app/views/bulkrax/exporters/index.html.erb b/app/views/bulkrax/exporters/index.html.erb index eb14be749..d3f4cb52a 100644 --- a/app/views/bulkrax/exporters/index.html.erb +++ b/app/views/bulkrax/exporters/index.html.erb @@ -13,62 +13,18 @@
- <% if @exporters.present? %> -
- - - - - - - - - - - - - - <% @exporters.each do |exporter| %> - - - - - - - - - - <% end %> - -
NameStatusDate ExportedDownloadable Files
<%= link_to exporter.name, exporter_path(exporter) %><%= exporter.status %><%= exporter.created_at %> - <% if File.exist?(exporter.exporter_export_zip_path) %> - <%= simple_form_for(exporter, method: :get, url: exporter_download_path(exporter)) do |form| %> - <%= render 'downloads', exporter: exporter, form: form %> - <%= form.button :submit, value: 'Download', data: { disable_with: false } %> - <% end %> - <% end%> - <%= link_to raw(''), exporter_path(exporter) %><%= link_to raw(''), edit_exporter_path(exporter), data: { turbolinks: false } %><%= link_to raw(''), exporter, method: :delete, data: { confirm: 'Are you sure?' } %>
-
- <% else %> -

No exporters have been created.

- <% end %> +
+ + + + + + + + + + +
NameStatusDate ExportedDownloadable FilesActions
+
- - diff --git a/app/views/bulkrax/exporters/show.html.erb b/app/views/bulkrax/exporters/show.html.erb index b0b42b5fe..50a78f66e 100644 --- a/app/views/bulkrax/exporters/show.html.erb +++ b/app/views/bulkrax/exporters/show.html.erb @@ -95,18 +95,9 @@
- - -
- <%= render partial: 'bulkrax/shared/entries_tab', locals: { item: @exporter, entries: @work_entries, pagination_param_name: :work_entries_page, pagination_anchor: 'work-entries' } %> - <%= render partial: 'bulkrax/shared/entries_tab', locals: { item: @exporter, entries: @collection_entries, pagination_param_name: :collection_entries_path, pagination_anchor: 'collection-entries' } %> - <%= render partial: 'bulkrax/shared/entries_tab', locals: { item: @exporter, entries: @file_set_entries, pagination_param_name: :file_set_entries_path, pagination_anchor: 'file-set-entries' } %> +
+ <%= render partial: 'bulkrax/shared/entries_tab', locals: { item: @exporter } %>
-
<%= link_to 'Edit', edit_exporter_path(@exporter) %> | diff --git a/app/views/bulkrax/importers/show.html.erb b/app/views/bulkrax/importers/show.html.erb index 183a9d08d..26f384153 100644 --- a/app/views/bulkrax/importers/show.html.erb +++ b/app/views/bulkrax/importers/show.html.erb @@ -77,7 +77,7 @@
- <%= render partial: 'bulkrax/shared/entries_tab' %> + <%= render partial: 'bulkrax/shared/entries_tab', locals: { item: @importer} %>
<%= render partial: 'bulkrax/importers/edit_item_buttons', locals: { item: @importer, e: @first_entry } if @first_entry.present? %>
diff --git a/app/views/bulkrax/shared/_entries_tab.html.erb b/app/views/bulkrax/shared/_entries_tab.html.erb index 7590fed02..1a93b12f8 100644 --- a/app/views/bulkrax/shared/_entries_tab.html.erb +++ b/app/views/bulkrax/shared/_entries_tab.html.erb @@ -12,5 +12,5 @@ - +
diff --git a/config/routes.rb b/config/routes.rb index d728f65d4..401511494 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -3,6 +3,10 @@ Bulkrax::Engine.routes.draw do resources :exporters do get :download + get :entry_table + collection do + get :exporter_table + end resources :entries, only: %i[show update destroy] end resources :importers do From 933b44d893c7e7a8cfccd96d4e5833212906f318 Mon Sep 17 00:00:00 2001 From: Jeremy Friesen Date: Wed, 14 Feb 2024 19:37:16 -0500 Subject: [PATCH 12/14] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Favor=20configuratio?= =?UTF-8?q?n=20over=20hard-coding=20(#927)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Given that Hyrax provides a mechanism for specifying a queue name, we should echo that configuration, but also provide our own configuration as well as the same fallback. --- app/jobs/bulkrax/create_relationships_job.rb | 2 +- app/jobs/bulkrax/delete_job.rb | 2 +- app/jobs/bulkrax/download_cloud_file_job.rb | 2 +- app/jobs/bulkrax/import_collection_job.rb | 2 +- app/jobs/bulkrax/import_file_set_job.rb | 2 +- app/jobs/bulkrax/import_work_job.rb | 2 +- app/jobs/bulkrax/importer_job.rb | 2 +- lib/bulkrax.rb | 9 +++++++++ 8 files changed, 16 insertions(+), 7 deletions(-) diff --git a/app/jobs/bulkrax/create_relationships_job.rb b/app/jobs/bulkrax/create_relationships_job.rb index a0dfc44ca..9aa2d5cc3 100644 --- a/app/jobs/bulkrax/create_relationships_job.rb +++ b/app/jobs/bulkrax/create_relationships_job.rb @@ -40,7 +40,7 @@ class CreateRelationshipsJob < ApplicationJob include DynamicRecordLookup - queue_as :import + queue_as Bulkrax.config.ingest_queue_name # @param parent_identifier [String] Work/Collection ID or Bulkrax::Entry source_identifiers # @param importer_run [Bulkrax::ImporterRun] current importer run (needed to properly update counters) diff --git a/app/jobs/bulkrax/delete_job.rb b/app/jobs/bulkrax/delete_job.rb index 0b337261b..b4c428cc3 100644 --- a/app/jobs/bulkrax/delete_job.rb +++ b/app/jobs/bulkrax/delete_job.rb @@ -2,7 +2,7 @@ module Bulkrax class DeleteJob < ApplicationJob - queue_as :import + queue_as Bulkrax.config.ingest_queue_name def perform(entry, importer_run) obj = entry.factory.find diff --git a/app/jobs/bulkrax/download_cloud_file_job.rb b/app/jobs/bulkrax/download_cloud_file_job.rb index f56e81285..313c2f010 100644 --- a/app/jobs/bulkrax/download_cloud_file_job.rb +++ b/app/jobs/bulkrax/download_cloud_file_job.rb @@ -2,7 +2,7 @@ module Bulkrax class DownloadCloudFileJob < ApplicationJob - queue_as :import + queue_as Bulkrax.config.ingest_queue_name # Retrieve cloud file and write to the imports directory # Note: if using the file system, the mounted directory in diff --git a/app/jobs/bulkrax/import_collection_job.rb b/app/jobs/bulkrax/import_collection_job.rb index 03405180c..8bbdfc430 100644 --- a/app/jobs/bulkrax/import_collection_job.rb +++ b/app/jobs/bulkrax/import_collection_job.rb @@ -2,7 +2,7 @@ module Bulkrax class ImportCollectionJob < ApplicationJob - queue_as :import + queue_as Bulkrax.config.ingest_queue_name # rubocop:disable Rails/SkipsModelValidations def perform(*args) diff --git a/app/jobs/bulkrax/import_file_set_job.rb b/app/jobs/bulkrax/import_file_set_job.rb index 07fc6a388..b29c57bbb 100644 --- a/app/jobs/bulkrax/import_file_set_job.rb +++ b/app/jobs/bulkrax/import_file_set_job.rb @@ -6,7 +6,7 @@ class MissingParentError < ::StandardError; end class ImportFileSetJob < ApplicationJob include DynamicRecordLookup - queue_as :import + queue_as Bulkrax.config.ingest_queue_name attr_reader :importer_run_id diff --git a/app/jobs/bulkrax/import_work_job.rb b/app/jobs/bulkrax/import_work_job.rb index a3a22ea86..8374aca2a 100644 --- a/app/jobs/bulkrax/import_work_job.rb +++ b/app/jobs/bulkrax/import_work_job.rb @@ -2,7 +2,7 @@ module Bulkrax class ImportWorkJob < ApplicationJob - queue_as :import + queue_as Bulkrax.config.ingest_queue_name # rubocop:disable Rails/SkipsModelValidations # diff --git a/app/jobs/bulkrax/importer_job.rb b/app/jobs/bulkrax/importer_job.rb index 42691b4b1..9fb0f4456 100644 --- a/app/jobs/bulkrax/importer_job.rb +++ b/app/jobs/bulkrax/importer_job.rb @@ -2,7 +2,7 @@ module Bulkrax class ImporterJob < ApplicationJob - queue_as :import + queue_as Bulkrax.config.ingest_queue_name def perform(importer_id, only_updates_since_last_import = false) importer = Importer.find(importer_id) diff --git a/lib/bulkrax.rb b/lib/bulkrax.rb index 07068879b..9392fe0af 100644 --- a/lib/bulkrax.rb +++ b/lib/bulkrax.rb @@ -63,6 +63,15 @@ def factory_class_name_coercer @factory_class_name_coercer || Bulkrax::FactoryClassFinder::DefaultCoercer end + attr_writer :ingest_queue_name + ## + # @return [String, Proc] + def ingest_queue_name + return @ingest_queue_name if @ingest_queue_name.present? + return Hyrax.config.ingest_queue_name if defined?(Hyrax) + :import + end + ## # Configure the persistence adapter used for persisting imported data. # From a6b44d1ed77b29d7486652cc635fbdf1d678fb43 Mon Sep 17 00:00:00 2001 From: Kirk Wang Date: Mon, 19 Feb 2024 06:20:03 -0800 Subject: [PATCH 13/14] =?UTF-8?q?=F0=9F=90=9B=20Fix=20exporter=20page=20fo?= =?UTF-8?q?r=20Bootstrap=203=20applications=20(#928)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit will add the `hidden` class back in addition to the `d-none` class. This way older Hyku/Hyrax applications can still use it without unintended effects. Also, adding a turbolinks false to the edit exporter link because the buttons on the form page was not working without a refresh. --- app/assets/javascripts/bulkrax/exporters.js | 8 ++++---- .../concerns/bulkrax/datatables_behavior.rb | 2 +- app/views/bulkrax/exporters/_form.html.erb | 20 +++++++++---------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/app/assets/javascripts/bulkrax/exporters.js b/app/assets/javascripts/bulkrax/exporters.js index 67944d41a..eaf90b26a 100644 --- a/app/assets/javascripts/bulkrax/exporters.js +++ b/app/assets/javascripts/bulkrax/exporters.js @@ -26,14 +26,14 @@ function removeRequired(allSources) { // hide all export_source function hide(allSources) { - allSources.addClass('d-none'); - allSources.find('#exporter_export_source').addClass('.d-none').attr('type', 'd-none'); + allSources.addClass('d-none hidden'); + allSources.find('#exporter_export_source').addClass('.d-none hidden').attr('type', 'd-none hidden'); } // unhide selected export_source function unhideSelected(selectedSource) { - selectedSource.removeClass('d-none').removeAttr('type'); - selectedSource.parent().removeClass('d-none').removeAttr('type'); + selectedSource.removeClass('d-none hidden').removeAttr('type'); + selectedSource.parent().removeClass('d-none hidden').removeAttr('type'); }; // add the autocomplete javascript diff --git a/app/controllers/concerns/bulkrax/datatables_behavior.rb b/app/controllers/concerns/bulkrax/datatables_behavior.rb index 302839894..d130de6df 100644 --- a/app/controllers/concerns/bulkrax/datatables_behavior.rb +++ b/app/controllers/concerns/bulkrax/datatables_behavior.rb @@ -174,7 +174,7 @@ def importer_util_links(i) def exporter_util_links(i) links = [] links << view_context.link_to(view_context.raw(''), exporter_path(i)) - links << view_context.link_to(view_context.raw(''), edit_exporter_path(i)) + links << view_context.link_to(view_context.raw(''), edit_exporter_path(i), data: { turbolinks: false }) links << view_context.link_to(view_context.raw(''), i, method: :delete, data: { confirm: 'Are you sure?' }) links.join(" ") end diff --git a/app/views/bulkrax/exporters/_form.html.erb b/app/views/bulkrax/exporters/_form.html.erb index df3af0b38..72477b19f 100644 --- a/app/views/bulkrax/exporters/_form.html.erb +++ b/app/views/bulkrax/exporters/_form.html.erb @@ -33,8 +33,8 @@ label: t('bulkrax.exporter.labels.importer'), required: true, prompt: 'Select from the list', - label_html: { class: 'importer export-source-option d-none' }, - input_html: { class: 'importer export-source-option d-none form-control' }, + label_html: { class: 'importer export-source-option d-none hidden' }, + input_html: { class: 'importer export-source-option d-none hidden form-control' }, collection: form.object.importers_list.sort %> <%= form.input :export_source_collection, @@ -42,9 +42,9 @@ label: t('bulkrax.exporter.labels.collection'), required: true, placeholder: @collection&.title&.first, - label_html: { class: 'collection export-source-option d-none' }, + label_html: { class: 'collection export-source-option d-none hidden' }, input_html: { - class: 'collection export-source-option d-none form-control', + class: 'collection export-source-option d-none hidden form-control', data: { 'autocomplete-url' => '/authorities/search/collections', 'autocomplete' => 'collection' @@ -56,8 +56,8 @@ label: t('bulkrax.exporter.labels.worktype'), required: true, prompt: 'Select from the list', - label_html: { class: 'worktype export-source-option d-none' }, - input_html: { class: 'worktype export-source-option d-none form-control' }, + label_html: { class: 'worktype export-source-option d-none hidden' }, + input_html: { class: 'worktype export-source-option d-none hidden form-control' }, collection: Bulkrax.curation_concerns.map { |cc| [cc.to_s, cc.to_s] } %> <%= form.input :limit, @@ -80,7 +80,7 @@ as: :boolean, label: t('bulkrax.exporter.labels.filter_by_date') %> -
+