Skip to content

Commit

Permalink
Speed up patient imports
Browse files Browse the repository at this point in the history
Use .import to import patient and parent records in bulk, while skipping
Rails validations.
  • Loading branch information
tvararu committed Oct 8, 2024
1 parent 069c7dc commit 27e8dbf
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 40 deletions.
38 changes: 16 additions & 22 deletions app/models/concerns/csv_importable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ module CSVImportable
scope :csv_not_removed, -> { where(csv_removed_at: nil) }

enum :status,
%i[pending_import rows_are_invalid processed recorded],
%i[pending_import rows_are_invalid recorded],
default: :pending_import,
validate: true

Expand All @@ -37,7 +37,7 @@ module CSVImportable
validate :headers_are_valid
validate :rows_are_valid

before_save :ensure_processed_with_count_statistics
before_save :ensure_recorded_with_count_statistics
end

def csv=(file)
Expand Down Expand Up @@ -80,8 +80,8 @@ def parse_rows!
end
end

def process!
return if processed?
def record!
return if recorded?

parse_rows! if rows.nil?
return if invalid?
Expand All @@ -94,21 +94,17 @@ def process!
rows.each do |row|
count_column_to_increment = process_row(row)
counts[count_column_to_increment] += 1
end

update!(processed_at: Time.zone.now, status: :processed, **counts)
end
end

def record!
return if recorded?
# Perform bulk import every 100 rows
bulk_import_records if (@patients_to_import&.size || 0) >= 1000
end

process! unless processed?
return if invalid?
# Import any remaining records
bulk_import_records if @patients_to_import&.any?

ActiveRecord::Base.transaction do
record_rows
update!(recorded_at: Time.zone.now, status: :recorded)

update!(recorded_at: Time.zone.now, status: :recorded, **counts)
end
end

Expand Down Expand Up @@ -154,13 +150,11 @@ def rows_are_valid
end
end

def ensure_processed_with_count_statistics
return if recorded?

if processed? && count_columns.any? { |column| send(column).nil? }
raise "Count statistics must be set for a processed import."
elsif !processed? && count_columns.any? { |column| !send(column).nil? }
raise "Count statistics must not be set for a non-processed import."
def ensure_recorded_with_count_statistics
if recorded? && count_columns.any? { |column| send(column).nil? }
raise "Count statistics must be set for a recorded import."
elsif !recorded? && count_columns.any? { |column| !send(column).nil? }
raise "Count statistics must not be set for a non-recorded import."
end
end
end
54 changes: 39 additions & 15 deletions app/models/patient_import.rb
Original file line number Diff line number Diff line change
Expand Up @@ -37,31 +37,55 @@ def process_row(row)
parent_relationships = row.to_parent_relationships(parents, patient)

count_column_to_increment =
(
if parents.any?(&:new_record?) || patient.new_record? ||
parent_relationships.any?(&:new_record?)
:new_record_count
elsif parents.any?(&:changed?) || patient.changed? ||
parent_relationships.any?(&:changed?)
:changed_record_count
else
:exact_duplicate_record_count
end
)
count_column(parents, patient, parent_relationships)

parents.each(&:save!)
patient.save!
parent_relationships.each(&:save!)
# Instead of saving individually, we'll collect the records
@parents_to_import ||= []
@patients_to_import ||= []
@parent_relationships_to_import ||= []

link_records(*parents, *parent_relationships, patient)
@parents_to_import.concat(parents)
@patients_to_import << patient
@parent_relationships_to_import.concat(parent_relationships)

# We'll handle linking records after bulk import
@records_to_link ||= []
@records_to_link.concat([*parents, *parent_relationships, patient])

count_column_to_increment
end

def count_column(parents, patient, parent_relationships)
if parents.any?(&:new_record?) || patient.new_record? ||
parent_relationships.any?(&:new_record?)
:new_record_count
elsif parents.any?(&:changed?) || patient.changed? ||
parent_relationships.any?(&:changed?)
:changed_record_count
else
:exact_duplicate_record_count
end
end

def record_rows
Time.zone.now.tap do |recorded_at|
patients.draft.update_all(recorded_at:)
parents.draft.update_all(recorded_at:)
end
end

def bulk_import_records
Parent.import(@parents_to_import, validate: false)
Patient.import(@patients_to_import, validate: false)
ParentRelationship.import(@parent_relationships_to_import, validate: false)

# Link records after bulk import
@records_to_link.each_slice(4) { |records| link_records(*records) }

# Clear the arrays for the next batch
@parents_to_import.clear
@patients_to_import.clear
@parent_relationships_to_import.clear
@records_to_link.clear
end
end
6 changes: 3 additions & 3 deletions spec/jobs/process_import_job_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

it "parses and processes the rows" do
expect(import).to receive(:parse_rows!)
expect(import).to receive(:process!)
expect(import).to receive(:record!)
end
end

Expand All @@ -20,7 +20,7 @@

it "parses and processes the rows" do
expect(import).to receive(:parse_rows!)
expect(import).to receive(:process!)
expect(import).to receive(:record!)
end
end

Expand All @@ -29,7 +29,7 @@

it "parses and processes the rows" do
expect(import).to receive(:parse_rows!)
expect(import).to receive(:process!)
expect(import).to receive(:record!)
end
end
end
Expand Down

0 comments on commit 27e8dbf

Please sign in to comment.