Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up patient imports #1946

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 16 additions & 22 deletions app/models/concerns/csv_importable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ module CSVImportable
scope :csv_not_removed, -> { where(csv_removed_at: nil) }

enum :status,
%i[pending_import rows_are_invalid processed recorded],
%i[pending_import rows_are_invalid recorded],
default: :pending_import,
validate: true

Expand All @@ -37,7 +37,7 @@ module CSVImportable
validate :headers_are_valid
validate :rows_are_valid

before_save :ensure_processed_with_count_statistics
before_save :ensure_recorded_with_count_statistics
end

def csv=(file)
Expand Down Expand Up @@ -80,8 +80,8 @@ def parse_rows!
end
end

def process!
return if processed?
def record!
return if recorded?

parse_rows! if rows.nil?
return if invalid?
Expand All @@ -94,21 +94,17 @@ def process!
rows.each do |row|
count_column_to_increment = process_row(row)
counts[count_column_to_increment] += 1
end

update!(processed_at: Time.zone.now, status: :processed, **counts)
end
end

def record!
return if recorded?
# Perform bulk import every 100 rows
bulk_import_records if (@patients_to_import&.size || 0) >= 1000
end

process! unless processed?
return if invalid?
# Import any remaining records
bulk_import_records if @patients_to_import&.any?

ActiveRecord::Base.transaction do
record_rows
update!(recorded_at: Time.zone.now, status: :recorded)

update!(recorded_at: Time.zone.now, status: :recorded, **counts)
end
end

Expand Down Expand Up @@ -154,13 +150,11 @@ def rows_are_valid
end
end

def ensure_processed_with_count_statistics
return if recorded?

if processed? && count_columns.any? { |column| send(column).nil? }
raise "Count statistics must be set for a processed import."
elsif !processed? && count_columns.any? { |column| !send(column).nil? }
raise "Count statistics must not be set for a non-processed import."
def ensure_recorded_with_count_statistics
if recorded? && count_columns.any? { |column| send(column).nil? }
raise "Count statistics must be set for a recorded import."
elsif !recorded? && count_columns.any? { |column| !send(column).nil? }
raise "Count statistics must not be set for a non-recorded import."
end
end
end
54 changes: 39 additions & 15 deletions app/models/patient_import.rb
Original file line number Diff line number Diff line change
Expand Up @@ -37,31 +37,55 @@ def process_row(row)
parent_relationships = row.to_parent_relationships(parents, patient)

count_column_to_increment =
(
if parents.any?(&:new_record?) || patient.new_record? ||
parent_relationships.any?(&:new_record?)
:new_record_count
elsif parents.any?(&:changed?) || patient.changed? ||
parent_relationships.any?(&:changed?)
:changed_record_count
else
:exact_duplicate_record_count
end
)
count_column(parents, patient, parent_relationships)

parents.each(&:save!)
patient.save!
parent_relationships.each(&:save!)
# Instead of saving individually, we'll collect the records
@parents_to_import ||= []
@patients_to_import ||= []
@parent_relationships_to_import ||= []

link_records(*parents, *parent_relationships, patient)
@parents_to_import.concat(parents)
@patients_to_import << patient
@parent_relationships_to_import.concat(parent_relationships)

# We'll handle linking records after bulk import
@records_to_link ||= []
@records_to_link.concat([*parents, *parent_relationships, patient])

count_column_to_increment
end

def count_column(parents, patient, parent_relationships)
if parents.any?(&:new_record?) || patient.new_record? ||
parent_relationships.any?(&:new_record?)
:new_record_count
elsif parents.any?(&:changed?) || patient.changed? ||
parent_relationships.any?(&:changed?)
:changed_record_count
else
:exact_duplicate_record_count
end
end

def record_rows
Time.zone.now.tap do |recorded_at|
patients.draft.update_all(recorded_at:)
parents.draft.update_all(recorded_at:)
end
end

def bulk_import_records
Parent.import(@parents_to_import, validate: false)
Patient.import(@patients_to_import, validate: false)
ParentRelationship.import(@parent_relationships_to_import, validate: false)

# Link records after bulk import
@records_to_link.each_slice(4) { |records| link_records(*records) }

# Clear the arrays for the next batch
@parents_to_import.clear
@patients_to_import.clear
@parent_relationships_to_import.clear
@records_to_link.clear
end
end
80 changes: 80 additions & 0 deletions spec/features/import_child_records_slow_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# frozen_string_literal: true

describe "Import child records" do
around { |example| travel_to(Date.new(2023, 5, 20)) { example.run } }

scenario "User uploads a large file" do
given_the_app_is_setup
and_an_hpv_programme_is_underway

when_i_visit_the_cohort_page_for_the_hpv_programme
and_i_start_adding_children_to_the_cohort
then_i_should_see_the_import_page

when_i_upload_a_valid_file
then_i_should_see_the_imports_page_with_the_processing_flash

when_i_wait_for_the_background_job_to_complete
when_i_go_to_the_import_page
then_i_should_see_the_upload
and_i_should_see_the_patients
end

def given_the_app_is_setup
@team = create(:team, :with_one_nurse)
create(:location, :school, urn: "141939")
@user = @team.users.first
end

def and_an_hpv_programme_is_underway
programme = create(:programme, :hpv)
create(:team_programme, team: @team, programme:)
end

def when_i_visit_the_cohort_page_for_the_hpv_programme
sign_in @user
visit "/dashboard"
click_on "Programmes", match: :first
click_on "HPV"
click_on "Cohort"
end

def and_i_start_adding_children_to_the_cohort
click_on "Import child records"
end

def then_i_should_see_the_import_page
expect(page).to have_content("Import child records")
end

def when_i_upload_a_valid_file
attach_file(
"cohort_import[csv]",
"spec/fixtures/cohort_import/valid_1000_rows.csv"
)
click_on "Continue"
end

def and_i_should_see_the_patients
expect(page).to have_content("Full nameNHS numberDate of birthPostcode")
expect(page).to have_content("Roxanna Mayer")
end

def then_i_should_see_the_upload
expect(page).to have_content("Uploaded on")
expect(page).to have_content("Uploaded byTest User")
expect(page).to have_content("ProgrammeHPV")
end

def then_i_should_see_the_imports_page_with_the_processing_flash
expect(page).to have_content("Import processing started")
end

def when_i_wait_for_the_background_job_to_complete
perform_enqueued_jobs
end

def when_i_go_to_the_import_page
click_link CohortImport.last.created_at.to_fs(:long), match: :first
end
end
Loading
Loading