Skip to content

Commit

Permalink
Merge pull request #3659 from mlibrary/HELIO-4674/scholarlyiq_counter…
Browse files Browse the repository at this point in the history
…_data

HELIO-4674 - Scholarly iQ tasks: add COUNTER, remove local file
  • Loading branch information
sethaj authored Jun 25, 2024
2 parents c177937 + 860a300 commit 6ee05b4
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 3 deletions.
52 changes: 52 additions & 0 deletions lib/tasks/scholarlyiq/scholarlyiq_upload_counter_reports.rake
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# frozen_string_literal: true

########################################
## NOTE: THIS IS RUN FROM A CRON JOB! ##
########################################

desc 'Upload COUNTER Reports Data to S3 Bucket For ScholarlyIQ'
namespace :heliotrope do
task :scholarlyiq_upload_counter_reports, [:output_directory, :all_rows] => :environment do |_t, args|
# Usage: bundle exec rails "heliotrope:scholarlyiq_upload_counter_reports[output_directory, <all_rows>]"

if !File.writable?(args.output_directory)
puts "Provided directory (#{args.output_directory}) is not writable. Exiting."
exit
end

# this optional parameter will very rarely be used, maybe twice as we start up the Scholarly iQ feeds
all_rows = args.all_rows == 'all_rows'

# For now let's assume these will be tidied up manually, or by a separate cron
output_file = if all_rows
File.join(args.output_directory, "counter_reports_all_rows-#{Time.now.getlocal.strftime("%Y-%m-%d")}.tsv")
else
# note this is named to indicate the dates of the actual records
File.join(args.output_directory, "counter_reports-#{Time.now.days_ago(1).getlocal.strftime("%Y-%m-%d")}.tsv")
end

# find_each should default to 1000 rows stored at a time, not gobbling up RAM for the entire resultset
rows = if all_rows
CounterReport.find_each
else
CounterReport.where("created_at >= CURDATE() - INTERVAL 1 DAY AND created_at < CURDATE()").find_each
end

CSV.open(output_file, "w", col_sep: "\t", write_headers: true) do |tsv|
rows.with_index do |row, index|
if index.zero?
tsv << row.attributes.map { |key, _value| key }
else
tsv << row.attributes.map { |_key, value| value }
end
end
end
# puts "COUNTER report data for ScholarlyIQ saved to #{output_file}"

fail unless scholarlyiq_s3_deposit(output_file)

# No real purpose keeping this, the DB records are sticking around anyways!
# Deleting it means the crons can use system /tmp for these. No chance of trying to save to a missing/broken mount.
File.delete(output_file)
end
end
6 changes: 5 additions & 1 deletion lib/tasks/scholarlyiq/scholarlyiq_upload_institutions.rake
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,13 @@ namespace :heliotrope do
tsv << [institution.identifier, institution.name, institution.display_name, institution.entity_id]
end
end
# puts "Institution data for ScholarlyIQ saved to #{output_file}"

puts "Institution data for ScholarlyIQ saved to #{output_file}"
fail unless scholarlyiq_s3_deposit(output_file)

# No real purpose keeping this, the DB records are sticking around anyways!
# Deleting it means the crons can use system /tmp for these. No chance of trying to save to a missing/broken mount.
File.delete(output_file)
end

# Because of the way task namespacing works, this should be usable by the other ScholarlyIQ tasks
Expand Down
6 changes: 5 additions & 1 deletion lib/tasks/scholarlyiq/scholarlyiq_upload_items.rake
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,12 @@ namespace :heliotrope do
doc['resource_type_tesim']&.map(&:strip)&.reject(&:blank?)&.join('; ')]
end
end
# puts "Item data for ScholarlyIQ saved to #{output_file}"

puts "Item data for ScholarlyIQ saved to #{output_file}"
fail unless scholarlyiq_s3_deposit(output_file)

# No real purpose keeping this, the DB records are sticking around anyways!
# Deleting it means the crons can use system /tmp for these. No chance of trying to save to a missing/broken mount.
File.delete(output_file)
end
end
6 changes: 5 additions & 1 deletion lib/tasks/scholarlyiq/scholarlyiq_upload_presses.rake
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,12 @@ namespace :heliotrope do
]
end
end
# puts "Press data for ScholarlyIQ saved to #{output_file}"

puts "Press data for ScholarlyIQ saved to #{output_file}"
fail unless scholarlyiq_s3_deposit(output_file)

# No real purpose keeping this, the DB records are sticking around anyways!
# Deleting it means the crons can use system /tmp for these. No chance of trying to save to a missing/broken mount.
File.delete(output_file)
end
end

0 comments on commit 6ee05b4

Please sign in to comment.