From f10f8f1a75e27dcb056fed114ae2386dad95161c Mon Sep 17 00:00:00 2001 From: Eric O Date: Tue, 28 May 2024 14:06:29 -0400 Subject: [PATCH 1/7] Add AWS checksum verification logic --- .rubocop.yml | 8 ++ config/deploy/dev.rb | 2 +- .../aws/object_fixity_verifier.rb | 53 +++++++++++++ lib/check_please/exceptions.rb | 3 + lib/tasks/check_please/verification.rake | 23 ++++++ .../aws/object_fixity_verifier_spec.rb | 74 +++++++++++++++++++ 6 files changed, 162 insertions(+), 1 deletion(-) create mode 100644 lib/check_please/aws/object_fixity_verifier.rb create mode 100644 lib/tasks/check_please/verification.rake create mode 100644 spec/check_please/aws/object_fixity_verifier_spec.rb diff --git a/.rubocop.yml b/.rubocop.yml index 386444b..3cbfc81 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -19,3 +19,11 @@ AllCops: Lint/MissingCopEnableDirective: Enabled: false + +Metrics/MethodLength: + Exclude: + - lib/check_please/aws/object_fixity_verifier.rb + +RSpec/VerifiedDoubles: + Exclude: + - spec/check_please/aws/object_fixity_verifier_spec.rb diff --git a/config/deploy/dev.rb b/config/deploy/dev.rb index 197aa87..6f327d1 100644 --- a/config/deploy/dev.rb +++ b/config/deploy/dev.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true -server 'check-please-dev.library.columbia.edu', user: fetch(:remote_user), roles: %w[app db web] +server 'ec2-3-230-115-99.compute-1.amazonaws.com', user: fetch(:remote_user), roles: %w[app db web] # Current branch is suggested by default in development ask :branch, `git rev-parse --abbrev-ref HEAD`.chomp diff --git a/lib/check_please/aws/object_fixity_verifier.rb b/lib/check_please/aws/object_fixity_verifier.rb new file mode 100644 index 0000000..f001e63 --- /dev/null +++ b/lib/check_please/aws/object_fixity_verifier.rb @@ -0,0 +1,53 @@ +# frozen_string_literal: true + +module CheckPlease::Aws::ObjectFixityVerifier + def self.digester_for_checksum_algorithm!(checksum_algorithm_name) + case checksum_algorithm_name + when 'sha256' + Digest::SHA256.new + when 'sha512' + Digest::SHA512.new + when 'md5' + Digest::MD5.new + when 'crc32c' + Digest::CRC32c.new + else + raise ArgumentError, "Unsupported checksum algorithm: #{checksum_algorithm_name}" + end + end + + def self.verify(bucket_name, object_path, checksum_algorithm_name, print_memory_stats: false) + digester_for_checksum_algorithm = digester_for_checksum_algorithm!(checksum_algorithm_name) + bytes_read = 0 + memory_monitoring_counter = 0 + + obj = S3_CLIENT.get_object({ bucket: bucket_name, key: object_path }) do |chunk, _headers| + digester_for_checksum_algorithm.update(chunk) + bytes_read += chunk.bytesize + + memory_monitoring_counter += 1 + collect_and_print_memory_stats(bytes_read) if print_memory_stats && (memory_monitoring_counter % 100).zero? + end + + # The bytes_read sum should equal the AWS-reported obj.content_length, + # but we'll add a check here just in case there's ever a mismatch. + verify_read_byte_count!(bytes_read, obj.content_length) + + [digester_for_checksum_algorithm.hexdigest, bytes_read] + rescue Aws::S3::Errors::NoSuchKey + raise CheckPlease::Exceptions::ObjectNotFoundError, + "Could not find AWS object: bucket=#{bucket_name}, path=#{object_path}" + end + + def self.verify_read_byte_count!(bytes_read, expected_total_byte_count) + return if bytes_read == expected_total_byte_count + + raise CheckPlease::Exceptions::ReportedFileSizeMismatchError, + "S3 reported an object size of #{expected_total_byte_count} bytes, but we only received #{bytes_read} bytes" + end + + def self.collect_and_print_memory_stats(bytes_read) + pid, size = `ps ax -o pid,rss | grep -E "^[[:space:]]*#{$PROCESS_ID}"`.strip.split.map(&:to_i) + puts "Read: #{bytes_read / 1.megabyte} MB. Memory usage for pid #{pid}: #{size.to_f / 1.kilobyte} MB." # rubocop:disable Rails/Output + end +end diff --git a/lib/check_please/exceptions.rb b/lib/check_please/exceptions.rb index 3189aad..f700e81 100644 --- a/lib/check_please/exceptions.rb +++ b/lib/check_please/exceptions.rb @@ -2,4 +2,7 @@ module CheckPlease::Exceptions class CheckPleaseError < StandardError; end + + class ObjectNotFoundError < CheckPleaseError; end + class ReportedFileSizeMismatchError < CheckPleaseError; end end diff --git a/lib/tasks/check_please/verification.rake b/lib/tasks/check_please/verification.rake new file mode 100644 index 0000000..fc7c81c --- /dev/null +++ b/lib/tasks/check_please/verification.rake @@ -0,0 +1,23 @@ +BUFFER_SIZE = 5.megabytes + +namespace :check_please do + namespace :verification do + desc 'Verify the checksum for the file at the given bucket_name and object_path' + task verify_s3_object: :environment do + bucket_name = ENV['bucket_name'] + object_path = ENV['object_path'] + checksum_algorithm_name = ENV['checksum_algorithm_name'] + print_memory_stats = ENV['print_memory_stats'] == 'true' + + checksum, object_size = CheckPlease::Aws::ObjectFixityVerifier.verify( + bucket_name, + object_path, + checksum_algorithm_name, + print_memory_stats: print_memory_stats + ) + puts "#{bucket_name}: #{object_path}" + puts "#{checksum_algorithm_name} checksum is: #{checksum}" + puts "object_size is: #{object_size}" + end + end +end diff --git a/spec/check_please/aws/object_fixity_verifier_spec.rb b/spec/check_please/aws/object_fixity_verifier_spec.rb new file mode 100644 index 0000000..791425e --- /dev/null +++ b/spec/check_please/aws/object_fixity_verifier_spec.rb @@ -0,0 +1,74 @@ +# frozen_string_literal: true + +require 'rails_helper' + +describe CheckPlease::Aws::ObjectFixityVerifier do + describe '.digester_for_checksum_algorithm' do + { + 'sha256' => Digest::SHA256, + 'sha512' => Digest::SHA512, + 'md5' => Digest::MD5, + 'crc32c' => Digest::CRC32c + }.each do |checksum_algorithm_name, digester_class| + it "returns a #{digester_class.name} instance when the string \"#{checksum_algorithm_name}\" is given" do + expect(described_class.digester_for_checksum_algorithm!(checksum_algorithm_name)).to be_a(digester_class) + end + end + + it 'raises an exception when an unhandled checksum algorithm name is provided' do + expect { described_class.digester_for_checksum_algorithm!('nope') }.to raise_error(ArgumentError) + end + end + + describe '.verify' do + let(:bucket_name) { 'example-bucket' } + let(:object_path) { 'a/b/c.txt' } + let(:checksum_algorithm_name) { 'sha256' } + let(:print_memory_stats) { false } + let(:chunk1) { 'aaaaa' } + let(:chunk2) { 'bbbbb' } + let(:chunk3) { 'c' } + let(:expected_content_length) { chunk1.bytesize + chunk2.bytesize + chunk3.bytesize } + let(:get_object_response) do + headers = double(Seahorse::Client::Response) + allow(headers).to receive(:content_length).and_return(expected_content_length) + headers + end + let(:get_object_response_headers) { double(Seahorse::Client::Http::Headers) } + let(:expected_sha256_checksum_hexdigest) { Digest::SHA256.hexdigest(chunk1 + chunk2 + chunk3) } + + before do + allow(S3_CLIENT).to receive(:get_object).with( + { bucket: bucket_name, key: object_path } + ).and_return(get_object_response).and_yield( + chunk1, get_object_response_headers + ).and_yield( + chunk2, get_object_response_headers + ).and_yield( + chunk3, get_object_response_headers + ) + end + + it 'returns the expected value' do + expect( + described_class.verify( + bucket_name, object_path, checksum_algorithm_name, print_memory_stats: print_memory_stats + ) + ).to eq([expected_sha256_checksum_hexdigest, expected_content_length]) + end + + context 'when expected content length does not equal the number of bytes read' do + let(:expected_content_length) { 1 } + + it 'raises an exception' do + expect { + described_class.verify( + bucket_name, object_path, checksum_algorithm_name, print_memory_stats: print_memory_stats + ) + }.to raise_error( + CheckPlease::Exceptions::ReportedFileSizeMismatchError + ) + end + end + end +end From ede59d4d468e1db9d58c74656ba6658066662aa0 Mon Sep 17 00:00:00 2001 From: Eric O Date: Mon, 3 Jun 2024 15:23:17 -0400 Subject: [PATCH 2/7] Add Action Cable implementation, FixityCheckChannel, and tests --- .rubocop.yml | 4 +- Gemfile | 2 + Gemfile.lock | 5 + app/channels/application_cable/connection.rb | 14 ++ app/channels/fixity_check_channel.rb | 46 +++++ app/jobs/aws_check_fixity_job.rb | 66 +++++++ config/cable.yml | 12 ++ config/environments/deployed.rb | 2 + config/environments/development.rb | 2 +- config/environments/test.rb | 4 + config/routes.rb | 3 + config/templates/check_please.template.yml | 7 +- ...y_verifier.rb => object_fixity_checker.rb} | 24 +-- lib/check_please/queues.rb | 1 + lib/tasks/check_please/rubocop.rake | 1 + lib/tasks/check_please/verification.rake | 12 +- .../application_cable/connection_spec.rb | 24 +++ spec/channels/fixity_check_channel_spec.rb | 74 ++++++++ ..._spec.rb => object_fixity_checker_spec.rb} | 26 ++- spec/factories/users.rb | 2 + .../run_fixity_check_for_s3_object_spec.rb | 170 ++++++++++++++++++ spec/rails_helper.rb | 3 + 22 files changed, 481 insertions(+), 23 deletions(-) create mode 100644 app/channels/fixity_check_channel.rb create mode 100644 app/jobs/aws_check_fixity_job.rb create mode 100644 config/cable.yml rename lib/check_please/aws/{object_fixity_verifier.rb => object_fixity_checker.rb} (64%) create mode 100644 spec/channels/application_cable/connection_spec.rb create mode 100644 spec/channels/fixity_check_channel_spec.rb rename spec/check_please/aws/{object_fixity_verifier_spec.rb => object_fixity_checker_spec.rb} (70%) create mode 100644 spec/features/channel/fixity_check/run_fixity_check_for_s3_object_spec.rb diff --git a/.rubocop.yml b/.rubocop.yml index 3cbfc81..23efe87 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -22,8 +22,8 @@ Lint/MissingCopEnableDirective: Metrics/MethodLength: Exclude: - - lib/check_please/aws/object_fixity_verifier.rb + - lib/check_please/aws/object_fixity_checker.rb RSpec/VerifiedDoubles: Exclude: - - spec/check_please/aws/object_fixity_verifier_spec.rb + - spec/check_please/aws/object_fixity_checker_spec.rb diff --git a/Gemfile b/Gemfile index 512c151..56276fb 100644 --- a/Gemfile +++ b/Gemfile @@ -16,6 +16,8 @@ gem 'best_type', '~> 1.0' gem 'bootsnap', require: false # Add CRC32C support to the Ruby Digest module gem 'digest-crc', '~> 0.6.5' +# Client library for connecting to a websocket endpoint +gem 'faye-websocket', '~> 0.11.3' # Google Cloud Storage SDK gem 'google-cloud-storage', '~> 1.49' # Use JavaScript with ESM import maps [https://github.com/rails/importmap-rails] diff --git a/Gemfile.lock b/Gemfile.lock index 793c20e..f221ced 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -164,6 +164,7 @@ GEM drb (2.2.1) ed25519 (1.3.0) erubi (1.12.0) + eventmachine (1.2.7) factory_bot (6.4.6) activesupport (>= 5.0.0) factory_bot_rails (6.4.3) @@ -173,6 +174,9 @@ GEM faraday-net_http (>= 2.0, < 3.2) faraday-net_http (3.1.0) net-http + faye-websocket (0.11.3) + eventmachine (>= 0.12.0) + websocket-driver (>= 0.5.1) ffi (1.16.3) globalid (1.2.1) activesupport (>= 6.1) @@ -518,6 +522,7 @@ DEPENDENCIES devise digest-crc (~> 0.6.5) factory_bot_rails + faye-websocket (~> 0.11.3) google-cloud-storage (~> 1.49) importmap-rails jbuilder diff --git a/app/channels/application_cable/connection.rb b/app/channels/application_cable/connection.rb index 8d6c2a1..150fd08 100644 --- a/app/channels/application_cable/connection.rb +++ b/app/channels/application_cable/connection.rb @@ -2,5 +2,19 @@ module ApplicationCable class Connection < ActionCable::Connection::Base + identified_by :uuid + + def connect + authenticate! # reject connections that do not successfully authenticate + self.uuid = SecureRandom.uuid # assign a random uuid value when a user connects + end + + private + + def authenticate! + return if request.authorization&.split(' ')&.at(1) == CHECK_PLEASE['remote_request_api_key'] + + reject_unauthorized_connection + end end end diff --git a/app/channels/fixity_check_channel.rb b/app/channels/fixity_check_channel.rb new file mode 100644 index 0000000..ae847ce --- /dev/null +++ b/app/channels/fixity_check_channel.rb @@ -0,0 +1,46 @@ +# frozen_string_literal: true + +class FixityCheckChannel < ApplicationCable::Channel + FIXITY_CHECK_STREAM_PREFIX = "#{CHECK_PLEASE['action_cable_stream_prefix']}fixity_check:".freeze + + # A websocket client subscribes by sending this message: + # { + # "command" => "subscribe", + # "identifier" => { "channel" => "FixityCheckChannel", "job_identifier" => "cool-job-id1" }.to_json + # } + def subscribed + return if params[:job_identifier].blank? + + stream_name = "#{FIXITY_CHECK_STREAM_PREFIX}#{params[:job_identifier]}" + Rails.logger.debug "A client has started streaming from: #{stream_name}" + stream_from stream_name + end + + def unsubscribed + # Any cleanup needed when channel is unsubscribed + return if params[:job_identifier].blank? + + stream_name = "#{FIXITY_CHECK_STREAM_PREFIX}#{params[:job_identifier]}" + Rails.logger.debug "A client has stopped streaming from: #{stream_name}" + stop_stream_from stream_name + end + + # A websocket client runs this command by sending this message: + # { + # "command" => "run_fixity_check_for_s3_object", + # "identifier" => { "channel" => "FixityCheckChannel", "job_identifier" => "cool-job-id1" }.to_json, + # "data" => { + # "action" => "run_fixity_check_for_s3_object", "bucket_name" => "some-bucket", + # "object_path" => "path/to/object.png", "checksum_algorithm_name" => "sha256" + # }.to_json + # } + def run_fixity_check_for_s3_object(data) + Rails.logger.debug("run_fixity_check_for_s3_object action received with job_identifier: #{params[:job_identifier]}") + job_identifier = params[:job_identifier] + bucket_name = data['bucket_name'] + object_path = data['object_path'] + checksum_algorithm_name = data['checksum_algorithm_name'] + + AwsCheckFixityJob.perform_later(job_identifier, bucket_name, object_path, checksum_algorithm_name) + end +end diff --git a/app/jobs/aws_check_fixity_job.rb b/app/jobs/aws_check_fixity_job.rb new file mode 100644 index 0000000..ea9d798 --- /dev/null +++ b/app/jobs/aws_check_fixity_job.rb @@ -0,0 +1,66 @@ +# frozen_string_literal: true + +# rubocop:disable Metrics/MethodLength + +class AwsCheckFixityJob < ApplicationJob + queue_as CheckPlease::Queues::CHECK_FIXITY + + def perform(job_identifier, bucket_name, object_path, checksum_algorithm_name) + response_stream_name = "#{FixityCheckChannel::FIXITY_CHECK_STREAM_PREFIX}#{job_identifier}" + progress_report_lambda = lambda { |_chunk, _bytes_read, chunk_counter| + return unless (chunk_counter % 100).zero? + + # TODO: Broadcast a message to indicate that the processing is still happening. + # This way, clients will know if a job has stalled and will not wait indefinitely for results. + ActionCable.server.broadcast( + response_stream_name, + { type: 'fixity_check_in_progress' }.to_json + ) + } + + checksum_hexdigest, object_size = CheckPlease::Aws::ObjectFixityChecker.check( + bucket_name, + object_path, + checksum_algorithm_name, + on_chunk: progress_report_lambda + ) + + # Broadcast message when job is complete + broadcast_fixity_check_complete( + response_stream_name, bucket_name, object_path, checksum_algorithm_name, checksum_hexdigest, object_size + ) + rescue StandardError => e + broadcast_fixity_check_error(response_stream_name, e.message, bucket_name, object_path, checksum_algorithm_name) + end + + def broadcast_fixity_check_complete( + response_stream_name, bucket_name, object_path, checksum_algorithm_name, checksum_hexdigest, object_size + ) + ActionCable.server.broadcast( + response_stream_name, + { + type: 'fixity_check_complete', + data: { + bucket_name: bucket_name, object_path: object_path, + checksum_algorithm_name: checksum_algorithm_name, + checksum_hexdigest: checksum_hexdigest, object_size: object_size + } + }.to_json + ) + end + + def broadcast_fixity_check_error( + response_stream_name, error_message, bucket_name, object_path, checksum_algorithm_name + ) + ActionCable.server.broadcast( + response_stream_name, + { + type: 'fixity_check_error', + data: { + error_message: error_message, bucket_name: bucket_name, + object_path: object_path, checksum_algorithm_name: checksum_algorithm_name + } + }.to_json + ) + end +end diff --git a/config/cable.yml b/config/cable.yml new file mode 100644 index 0000000..d6e4447 --- /dev/null +++ b/config/cable.yml @@ -0,0 +1,12 @@ +development: + # adapter: async + adapter: redis + url: redis://localhost:6379/1 + +test: + adapter: test + +production: + adapter: redis + url: <%= ENV.fetch("REDIS_URL") { "redis://localhost:6379/1" } %> + channel_prefix: check_please_production diff --git a/config/environments/deployed.rb b/config/environments/deployed.rb index 2209a91..bcc0133 100644 --- a/config/environments/deployed.rb +++ b/config/environments/deployed.rb @@ -44,6 +44,8 @@ # config.action_cable.mount_path = nil # config.action_cable.url = 'wss://example.com/cable' # config.action_cable.allowed_request_origins = [ 'http://example.com', /http:\/\/example.*/ ] + # Allow Action Cable access from any origin. + config.action_cable.disable_request_forgery_protection = true # Force all access to the app over SSL, use Strict-Transport-Security, and use secure cookies. # config.force_ssl = true diff --git a/config/environments/development.rb b/config/environments/development.rb index 80e0587..cca1101 100644 --- a/config/environments/development.rb +++ b/config/environments/development.rb @@ -71,7 +71,7 @@ # config.action_view.annotate_rendered_view_with_filenames = true # Uncomment if you wish to allow Action Cable access from any origin. - # config.action_cable.disable_request_forgery_protection = true + config.action_cable.disable_request_forgery_protection = true # Raise error when a before_action's only/except options reference missing actions config.action_controller.raise_on_missing_callback_actions = true diff --git a/config/environments/test.rb b/config/environments/test.rb index f1d2fb5..8c03e95 100644 --- a/config/environments/test.rb +++ b/config/environments/test.rb @@ -63,4 +63,8 @@ # Raise error when a before_action's only/except options reference missing actions config.action_controller.raise_on_missing_callback_actions = true + + # Allow Action Cable access from any origin (so that it works in Capybara tests) + config.action_cable.disable_request_forgery_protection = true + # config.action_cable.allowed_request_origins = ['https://rubyonrails.com', %r{http://ruby.*}] end diff --git a/config/routes.rb b/config/routes.rb index d62f286..e506bdb 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -25,4 +25,7 @@ # Defines the root path route ("/") root 'pages#home' + + # Mount ActionCable Websocket route + mount ActionCable.server => '/cable' end diff --git a/config/templates/check_please.template.yml b/config/templates/check_please.template.yml index c825006..a6230e9 100644 --- a/config/templates/check_please.template.yml +++ b/config/templates/check_please.template.yml @@ -1,4 +1,9 @@ development: run_queued_jobs_inline: true + remote_request_api_key: changethis + action_cable_stream_prefix: '<%= "#{Rails.application.class.module_parent_name}:#{Rails.env}:" %>' + test: - run_queued_jobs_inline: true \ No newline at end of file + run_queued_jobs_inline: true + remote_request_api_key: changethis + action_cable_stream_prefix: '<%= "#{Rails.application.class.module_parent_name}:#{Rails.env}:" %>' diff --git a/lib/check_please/aws/object_fixity_verifier.rb b/lib/check_please/aws/object_fixity_checker.rb similarity index 64% rename from lib/check_please/aws/object_fixity_verifier.rb rename to lib/check_please/aws/object_fixity_checker.rb index f001e63..20c5691 100644 --- a/lib/check_please/aws/object_fixity_verifier.rb +++ b/lib/check_please/aws/object_fixity_checker.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -module CheckPlease::Aws::ObjectFixityVerifier +module CheckPlease::Aws::ObjectFixityChecker def self.digester_for_checksum_algorithm!(checksum_algorithm_name) case checksum_algorithm_name when 'sha256' @@ -16,17 +16,24 @@ def self.digester_for_checksum_algorithm!(checksum_algorithm_name) end end - def self.verify(bucket_name, object_path, checksum_algorithm_name, print_memory_stats: false) + # Checks the specified object and returns + # @param bucket_name [String] The name of the S3 bucket + # @param object_path [String] The object path in the S3 bucket + # @param checksum_algorithm_name [String] A checksum algorithm name. + # Allowed values include: sha256, sha512, md5, crc32c + # @param on_chunk [lambda] A lambda that is called once per data chunk read, during the fixity check. + # @return [Array] An with two elements, the first being a hex digest of the object's bytes and the second + # being the object size in bytes. + def self.check(bucket_name, object_path, checksum_algorithm_name, on_chunk: nil) digester_for_checksum_algorithm = digester_for_checksum_algorithm!(checksum_algorithm_name) bytes_read = 0 - memory_monitoring_counter = 0 + chunk_counter = 0 obj = S3_CLIENT.get_object({ bucket: bucket_name, key: object_path }) do |chunk, _headers| digester_for_checksum_algorithm.update(chunk) bytes_read += chunk.bytesize - - memory_monitoring_counter += 1 - collect_and_print_memory_stats(bytes_read) if print_memory_stats && (memory_monitoring_counter % 100).zero? + chunk_counter += 1 + on_chunk&.call(chunk, bytes_read, chunk_counter) end # The bytes_read sum should equal the AWS-reported obj.content_length, @@ -45,9 +52,4 @@ def self.verify_read_byte_count!(bytes_read, expected_total_byte_count) raise CheckPlease::Exceptions::ReportedFileSizeMismatchError, "S3 reported an object size of #{expected_total_byte_count} bytes, but we only received #{bytes_read} bytes" end - - def self.collect_and_print_memory_stats(bytes_read) - pid, size = `ps ax -o pid,rss | grep -E "^[[:space:]]*#{$PROCESS_ID}"`.strip.split.map(&:to_i) - puts "Read: #{bytes_read / 1.megabyte} MB. Memory usage for pid #{pid}: #{size.to_f / 1.kilobyte} MB." # rubocop:disable Rails/Output - end end diff --git a/lib/check_please/queues.rb b/lib/check_please/queues.rb index cae7816..4e24a69 100644 --- a/lib/check_please/queues.rb +++ b/lib/check_please/queues.rb @@ -1,4 +1,5 @@ # frozen_string_literal: true module CheckPlease::Queues + CHECK_FIXITY = 'check_fixity' end diff --git a/lib/tasks/check_please/rubocop.rake b/lib/tasks/check_please/rubocop.rake index 4fa7b1c..75eca9b 100644 --- a/lib/tasks/check_please/rubocop.rake +++ b/lib/tasks/check_please/rubocop.rake @@ -19,6 +19,7 @@ if ['development', 'test'].include?(Rails.env) 'Layout/SpaceAroundKeyword', 'Layout/SpaceAroundOperators', 'Layout/SpaceBeforeBlockBraces', + 'Layout/SpaceBeforeFirstArg', 'Layout/SpaceInsideArrayLiteralBrackets', 'Layout/SpaceInsideBlockBraces', 'Layout/SpaceInsideHashLiteralBraces', diff --git a/lib/tasks/check_please/verification.rake b/lib/tasks/check_please/verification.rake index fc7c81c..0b65932 100644 --- a/lib/tasks/check_please/verification.rake +++ b/lib/tasks/check_please/verification.rake @@ -1,5 +1,11 @@ BUFFER_SIZE = 5.megabytes +memory_stat_lambda = lambda { |_chunk, bytes_read, chunk_counter| + return unless (chunk_counter % 100).zero? + pid, size = `ps ax -o pid,rss | grep -E "^[[:space:]]*#{$PROCESS_ID}"`.strip.split.map(&:to_i) + puts "Read: #{bytes_read / 1.megabyte} MB. Memory usage for pid #{pid}: #{size.to_f / 1.kilobyte} MB." # rubocop:disable Rails/Output +} + namespace :check_please do namespace :verification do desc 'Verify the checksum for the file at the given bucket_name and object_path' @@ -9,12 +15,14 @@ namespace :check_please do checksum_algorithm_name = ENV['checksum_algorithm_name'] print_memory_stats = ENV['print_memory_stats'] == 'true' - checksum, object_size = CheckPlease::Aws::ObjectFixityVerifier.verify( + memory_monitoring_counter = 0 + checksum, object_size = CheckPlease::Aws::ObjectFixityChecker.check( bucket_name, object_path, checksum_algorithm_name, - print_memory_stats: print_memory_stats + on_chunk: print_memory_stats ? memory_stat_lambda : nil ) + puts "#{bucket_name}: #{object_path}" puts "#{checksum_algorithm_name} checksum is: #{checksum}" puts "object_size is: #{object_size}" diff --git a/spec/channels/application_cable/connection_spec.rb b/spec/channels/application_cable/connection_spec.rb new file mode 100644 index 0000000..f897795 --- /dev/null +++ b/spec/channels/application_cable/connection_spec.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +require 'rails_helper' + +RSpec.describe ApplicationCable::Connection, type: :channel do + let(:uuid_regex) { /[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/ } + let(:invalid_authorization_header_value) { "Bearer: invalid-#{CHECK_PLEASE['remote_request_api_key']}" } + let(:valid_authorization_header_value) { "Bearer: #{CHECK_PLEASE['remote_request_api_key']}" } + + it 'rejects a connection when no authorization header is given' do + expect { connect '/cable' }.to have_rejected_connection + end + + it 'rejects a connection when an invalid authorization header value is given' do + expect { + connect '/cable', headers: { 'Authorization' => invalid_authorization_header_value } + }.to have_rejected_connection + end + + it "successfully connects and assigns a uuid value to the connection's uuid field" do + connect '/cable', headers: { 'Authorization' => valid_authorization_header_value } + expect(connection.uuid).to match(uuid_regex) + end +end diff --git a/spec/channels/fixity_check_channel_spec.rb b/spec/channels/fixity_check_channel_spec.rb new file mode 100644 index 0000000..c4c4eef --- /dev/null +++ b/spec/channels/fixity_check_channel_spec.rb @@ -0,0 +1,74 @@ +# frozen_string_literal: true + +# rubocop:disable RSpec/ExampleLength + +require 'rails_helper' + +RSpec.describe FixityCheckChannel, type: :channel do + let(:connection_uuid) { 'ef8416ee-9a10-43e8-9e8a-a84465ef1dea' } + let(:job_identifier) { 'great-job-identifier' } + + before do + # This is a convenient way to create a connection with a specific, known uuid (rather than a random one) + stub_connection(uuid: connection_uuid) + end + + it 'does not connect to a stream if subscription job_identifier param is absent' do + subscribe + expect(subscription).to be_confirmed + expect(subscription.streams.length).to eq(0) + end + + context 'with a successful subscription' do + before do + subscribe(job_identifier: job_identifier) + end + + it 'connects to the expected stream if subscription job_identifier param is present' do + expect(subscription).to be_confirmed + expect(subscription.streams.length).to eq(1) + expect(subscription.streams).to include("CheckPlease:test:fixity_check:#{job_identifier}") + end + + context 'when a client sends a run_fixity_check_for_s3_object message' do + let(:bucket_name) { 'example-bucket' } + let(:object_path) { 'path/to/object.png' } + let(:checksum_algorithm_name) { 'sha256' } + let(:file_content) { 'A' * 1024 } + let(:checksum_hexdigest) { Digest::SHA256.hexdigest(file_content) } + let(:object_size) { file_content.bytesize } + + before do + allow(CheckPlease::Aws::ObjectFixityChecker).to receive(:check).with( + bucket_name, + object_path, + checksum_algorithm_name, + on_chunk: Proc # any Proc + ).and_return([checksum_hexdigest, object_size]) + end + + it 'initiates a checksum calculation, which queues a background job' do + expect(AwsCheckFixityJob).to receive(:perform_later).with( + job_identifier, bucket_name, object_path, checksum_algorithm_name + ).and_call_original + + expect { + perform :run_fixity_check_for_s3_object, + job_identifier: job_identifier, + bucket_name: bucket_name, + object_path: object_path, + checksum_algorithm_name: checksum_algorithm_name + }.to have_broadcasted_to("#{FixityCheckChannel::FIXITY_CHECK_STREAM_PREFIX}#{job_identifier}").with( + { + type: 'fixity_check_complete', + data: { + bucket_name: bucket_name, object_path: object_path, + checksum_algorithm_name: checksum_algorithm_name, + checksum_hexdigest: checksum_hexdigest, object_size: object_size + } + }.to_json + ) + end + end + end +end diff --git a/spec/check_please/aws/object_fixity_verifier_spec.rb b/spec/check_please/aws/object_fixity_checker_spec.rb similarity index 70% rename from spec/check_please/aws/object_fixity_verifier_spec.rb rename to spec/check_please/aws/object_fixity_checker_spec.rb index 791425e..e7547d6 100644 --- a/spec/check_please/aws/object_fixity_verifier_spec.rb +++ b/spec/check_please/aws/object_fixity_checker_spec.rb @@ -1,8 +1,10 @@ # frozen_string_literal: true +# rubocop:disable RSpec/MultipleExpectations + require 'rails_helper' -describe CheckPlease::Aws::ObjectFixityVerifier do +describe CheckPlease::Aws::ObjectFixityChecker do describe '.digester_for_checksum_algorithm' do { 'sha256' => Digest::SHA256, @@ -20,7 +22,7 @@ end end - describe '.verify' do + describe '.check' do let(:bucket_name) { 'example-bucket' } let(:object_path) { 'a/b/c.txt' } let(:checksum_algorithm_name) { 'sha256' } @@ -51,8 +53,20 @@ it 'returns the expected value' do expect( - described_class.verify( - bucket_name, object_path, checksum_algorithm_name, print_memory_stats: print_memory_stats + described_class.check( + bucket_name, object_path, checksum_algorithm_name + ) + ).to eq([expected_sha256_checksum_hexdigest, expected_content_length]) + end + + it 'invokes the on_chunk lambda when provided' do + on_chunk_lambda = ->(_chunk, _bytes_read, _chunk_counter) { next } + expect(on_chunk_lambda).to receive(:call).with(chunk1, chunk1.bytesize, 1) + expect(on_chunk_lambda).to receive(:call).with(chunk2, chunk1.bytesize + chunk2.bytesize, 2) + expect(on_chunk_lambda).to receive(:call).with(chunk3, chunk1.bytesize + chunk2.bytesize + chunk3.bytesize, 3) + expect( + described_class.check( + bucket_name, object_path, checksum_algorithm_name, on_chunk: on_chunk_lambda ) ).to eq([expected_sha256_checksum_hexdigest, expected_content_length]) end @@ -62,8 +76,8 @@ it 'raises an exception' do expect { - described_class.verify( - bucket_name, object_path, checksum_algorithm_name, print_memory_stats: print_memory_stats + described_class.check( + bucket_name, object_path, checksum_algorithm_name ) }.to raise_error( CheckPlease::Exceptions::ReportedFileSizeMismatchError diff --git a/spec/factories/users.rb b/spec/factories/users.rb index 32033ed..df3063b 100644 --- a/spec/factories/users.rb +++ b/spec/factories/users.rb @@ -2,5 +2,7 @@ FactoryBot.define do factory :user do + email { 'example@example.com' } + password { 'greatpassword' } end end diff --git a/spec/features/channel/fixity_check/run_fixity_check_for_s3_object_spec.rb b/spec/features/channel/fixity_check/run_fixity_check_for_s3_object_spec.rb new file mode 100644 index 0000000..d020660 --- /dev/null +++ b/spec/features/channel/fixity_check/run_fixity_check_for_s3_object_spec.rb @@ -0,0 +1,170 @@ +# frozen_string_literal: true + +# rubocop:disable RSpec/ExampleLength + +# NOTE: When troubleshooting websocket feature tests, make sure to tail +# logs/test.log while testing so that you can see errors that are raised on a separate thread. + +require 'rails_helper' + +# Runs the given block in a new thread, but stops the thread +# after the given max_run_time (in seconds) has elapsed. +# @return [Thread] A reference to the thread. +def with_separate_thread(max_run_time, &block) + Thread.new do + Timeout.timeout(max_run_time, &block) + rescue Timeout::Error + # Capture and ignore Timeout::Error + end +end + +def authorized_websocket_connection + ws_url = "#{Capybara.current_session.server_url.gsub('http:', 'ws:')}/cable" + # ws_url = 'ws://localhost:4000/cable' + Faye::WebSocket::Client.new(ws_url, nil, { + headers: { 'Authorization' => "Bearer: #{CHECK_PLEASE['remote_request_api_key']}" } + }) +end + +# NOTE: For an overview of the Action Cable API, this is a useful article: +# https://stanko.io/deconstructing-action-cable-DC7F33OsjGmK + +# NOTE: `js: true` is required for websocket feature tests so that we can +# get the server url from Capybara.current_session.server_url. +RSpec.describe 'run_fixity_check_for_s3_object action', type: :feature, js: true do + let(:received_messages) { [] } + let(:job_identifier) { 'job-123' } + let(:bucket_name) { 'example-bucket' } + let(:valid_object_path) { 'valid/object/path.png' } + let(:invalid_object_path) { 'invalid/object/path.png' } + let(:checksum_algorithm_name) { 'sha256' } + let(:file_content) { 'A' * 1024 } + let(:checksum_hexdigest) { Digest::SHA256.hexdigest(file_content) } + let(:object_size) { file_content.bytesize } + + before do + allow(CheckPlease::Aws::ObjectFixityChecker).to receive(:check).with( + bucket_name, + valid_object_path, + checksum_algorithm_name, + on_chunk: Proc # any Proc + ).and_return([checksum_hexdigest, object_size]) + + allow(CheckPlease::Aws::ObjectFixityChecker).to receive(:check).with( + bucket_name, + invalid_object_path, + checksum_algorithm_name, + on_chunk: Proc # any Proc + ).and_raise(StandardError, 'This is an error') + + # NOTE: We run EM inside a thread with a timeout just in case EventMachine::stop_event_loop + # is never called because of an error. + t = with_separate_thread(10) do + EM.run do + ws = authorized_websocket_connection + + ws.on :open do |event| + # p [:open] + end + + ws.on :message do |event| + data = JSON.parse(event.data) + # p [:message, data] + received_messages << data unless data['type'] == 'ping' # We're ignoring ping messages + + if data['type'] == 'welcome' + # After welcome message is received, subscribe to the FixityCheckChannel + ws.send( + { + 'command': 'subscribe', + 'identifier': { 'channel': 'FixityCheckChannel', 'job_identifier': job_identifier }.to_json + }.to_json + ) + elsif data['type'] == 'confirm_subscription' + # After receiving a subscription confirmation, send a run_fixity_check_for_s3_object message on the channel + ws.send( + { + 'command': 'message', + 'identifier': { 'channel': 'FixityCheckChannel', 'job_identifier': job_identifier }.to_json, + 'data': { + 'action': 'run_fixity_check_for_s3_object', + 'bucket_name': bucket_name, + 'object_path': object_path, + 'checksum_algorithm_name': checksum_algorithm_name + }.to_json + }.to_json + ) + elsif data['type'].nil? && data['message'].present? + if JSON.parse(data['identifier']) == { + 'channel' => 'FixityCheckChannel', 'job_identifier' => job_identifier + } + ws.close + end + end + end + + ws.on :close do |_event| + # p [:close, event.code, event.reason] + ws = nil + EventMachine.stop_event_loop + end + end + end + t.join + end + + context 'when a client subscribes to a FixityCheckChannel stream based on job_identifier and sends a '\ + 'run_fixity_check_for_s3_object message for a valid object' do + let(:object_path) { valid_object_path } + + it 'completes successfully and broadcasts the expected response' do + expect(received_messages[0]).to eq({ 'type' => 'welcome' }) + expect(received_messages[1]).to eq({ + 'type' => 'confirm_subscription', + 'identifier' => { 'channel' => 'FixityCheckChannel', 'job_identifier' => job_identifier }.to_json + }) + expect(received_messages[2]).to eq( + { + 'identifier' => { 'channel' => 'FixityCheckChannel', 'job_identifier' => job_identifier }.to_json, + 'message' => { + 'type' => 'fixity_check_complete', + 'data' => { + 'bucket_name' => bucket_name, + 'object_path' => object_path, + 'checksum_algorithm_name' => checksum_algorithm_name, + 'checksum_hexdigest' => checksum_hexdigest, + 'object_size' => object_size + } + }.to_json + } + ) + end + end + + context 'when a client subscribes to a FixityCheckChannel stream based on job_identifier and sends a '\ + 'run_fixity_check_for_s3_object message for an invalid object' do + let(:object_path) { invalid_object_path } + + it 'fails to calculate a checksum and broadcasts the expected error response' do + expect(received_messages[0]).to eq({ 'type' => 'welcome' }) + expect(received_messages[1]).to eq({ + 'type' => 'confirm_subscription', + 'identifier' => { 'channel' => 'FixityCheckChannel', 'job_identifier' => job_identifier }.to_json + }) + expect(received_messages[2]).to eq( + { + 'identifier' => { 'channel' => 'FixityCheckChannel', 'job_identifier' => job_identifier }.to_json, + 'message' => { + 'type' => 'fixity_check_error', + 'data' => { + 'error_message' => 'This is an error', + 'bucket_name' => bucket_name, + 'object_path' => object_path, + 'checksum_algorithm_name' => checksum_algorithm_name + } + }.to_json + } + ) + end + end +end diff --git a/spec/rails_helper.rb b/spec/rails_helper.rb index 9758637..7b88b4b 100644 --- a/spec/rails_helper.rb +++ b/spec/rails_helper.rb @@ -60,6 +60,9 @@ # https://rspec.info/features/6-0/rspec-rails config.infer_spec_type_from_file_location! + # Enable Action Cable testing + config.include ActionCable::TestHelper + # Filter lines from Rails gems in backtraces. config.filter_rails_from_backtrace! # arbitrary gems may also be filtered via: From 4059199d4a3cb59840640d3f2fd1d24560c3bc62 Mon Sep 17 00:00:00 2001 From: Eric O Date: Tue, 4 Jun 2024 15:01:25 -0400 Subject: [PATCH 3/7] No longer tracking config/cable.yml because we want a config that's specific to each deployed environment; Added config/cable.yml to deployment symlinks --- config/deploy.rb | 3 ++- config/{cable.yml => templates/cable.template.yml} | 0 2 files changed, 2 insertions(+), 1 deletion(-) rename config/{cable.yml => templates/cable.template.yml} (100%) diff --git a/config/deploy.rb b/config/deploy.rb index 965978e..cccd762 100644 --- a/config/deploy.rb +++ b/config/deploy.rb @@ -26,7 +26,8 @@ 'config/gcp.yml', 'config/permissions.yml', 'config/redis.yml', - 'config/resque.yml' + 'config/resque.yml', + 'config/cable.yml' # Default value for linked_dirs is [] append :linked_dirs, diff --git a/config/cable.yml b/config/templates/cable.template.yml similarity index 100% rename from config/cable.yml rename to config/templates/cable.template.yml From 836bad6e210154b12b7454736d583783a93d84d6 Mon Sep 17 00:00:00 2001 From: Eric O Date: Thu, 13 Jun 2024 09:39:13 -0400 Subject: [PATCH 4/7] Test updates --- app/jobs/aws_check_fixity_job.rb | 27 +++--- spec/aws_check_fixity_job_spec.rb | 101 +++++++++++++++++++++ spec/channels/fixity_check_channel_spec.rb | 3 +- 3 files changed, 117 insertions(+), 14 deletions(-) create mode 100644 spec/aws_check_fixity_job_spec.rb diff --git a/app/jobs/aws_check_fixity_job.rb b/app/jobs/aws_check_fixity_job.rb index ea9d798..5b31887 100644 --- a/app/jobs/aws_check_fixity_job.rb +++ b/app/jobs/aws_check_fixity_job.rb @@ -1,28 +1,16 @@ # frozen_string_literal: true -# rubocop:disable Metrics/MethodLength - class AwsCheckFixityJob < ApplicationJob queue_as CheckPlease::Queues::CHECK_FIXITY def perform(job_identifier, bucket_name, object_path, checksum_algorithm_name) response_stream_name = "#{FixityCheckChannel::FIXITY_CHECK_STREAM_PREFIX}#{job_identifier}" - progress_report_lambda = lambda { |_chunk, _bytes_read, chunk_counter| - return unless (chunk_counter % 100).zero? - - # TODO: Broadcast a message to indicate that the processing is still happening. - # This way, clients will know if a job has stalled and will not wait indefinitely for results. - ActionCable.server.broadcast( - response_stream_name, - { type: 'fixity_check_in_progress' }.to_json - ) - } checksum_hexdigest, object_size = CheckPlease::Aws::ObjectFixityChecker.check( bucket_name, object_path, checksum_algorithm_name, - on_chunk: progress_report_lambda + on_chunk: progress_report_lambda(response_stream_name) ) # Broadcast message when job is complete @@ -63,4 +51,17 @@ def broadcast_fixity_check_error( }.to_json ) end + + def progress_report_lambda(response_stream_name) + lambda do |_chunk, _bytes_read, chunk_counter| + return unless (chunk_counter % 100).zero? + + # We periodically broadcast a message to indicate that the processing is still happening. + # This is so that a client can check whether a job has stalled. + ActionCable.server.broadcast( + response_stream_name, + { type: 'fixity_check_in_progress' }.to_json + ) + end + end end diff --git a/spec/aws_check_fixity_job_spec.rb b/spec/aws_check_fixity_job_spec.rb new file mode 100644 index 0000000..621c0c9 --- /dev/null +++ b/spec/aws_check_fixity_job_spec.rb @@ -0,0 +1,101 @@ +# frozen_string_literal: true + +# rubocop:disable RSpec/ExampleLength + +require 'rails_helper' + +describe AwsCheckFixityJob do + let(:aws_check_fixity_job) { described_class.new } + let(:job_identifier) { 'great-job' } + let(:bucket_name) { 'example-bucket' } + let(:object_path) { 'path/to/object.png' } + let(:checksum_algorithm_name) { 'sha256' } + let(:example_content) { 'example' } + let(:checksum_hexdigest) { Digest::SHA256.hexdigest(example_content) } + let(:object_size) { example_content.bytesize } + let(:stream_name) { "#{FixityCheckChannel::FIXITY_CHECK_STREAM_PREFIX}#{job_identifier}" } + let(:error_message) { 'oh no!' } + + describe '#perform' do + it 'works as expected' do + allow(CheckPlease::Aws::ObjectFixityChecker).to receive(:check).with( + bucket_name, + object_path, + checksum_algorithm_name, + on_chunk: Proc + ).and_return([checksum_hexdigest, object_size]) + expect(aws_check_fixity_job).to receive(:broadcast_fixity_check_complete).with( + stream_name, + bucket_name, + object_path, + checksum_algorithm_name, + checksum_hexdigest, + object_size + ) + aws_check_fixity_job.perform(job_identifier, bucket_name, object_path, checksum_algorithm_name) + end + + it 'broadcasts a fixity check error message when an error occurs during processing' do + allow(CheckPlease::Aws::ObjectFixityChecker).to receive(:check).and_raise(StandardError, error_message) + + expect(aws_check_fixity_job).to receive(:broadcast_fixity_check_error).with( + stream_name, + error_message, + bucket_name, + object_path, + checksum_algorithm_name + ) + aws_check_fixity_job.perform(job_identifier, bucket_name, object_path, checksum_algorithm_name) + end + end + + describe '#progress_report_lambda' do + let(:chunk) { 'a chunk of content' } + let(:bytes_read) { 12_345 } + + it 'broadcasts an Action Cable message at the expected interval' do + progress_report_lambda = aws_check_fixity_job.progress_report_lambda(stream_name) + expect(ActionCable.server).to receive(:broadcast).exactly(10).times + (1..1000).each do |i| + progress_report_lambda.call(chunk, bytes_read, i) + end + end + end + + describe '#broadcast_fixity_check_complete' do + it 'results in the expected broadcast' do + expect(ActionCable.server).to receive(:broadcast).with( + stream_name, + { + type: 'fixity_check_complete', + data: { + bucket_name: bucket_name, object_path: object_path, + checksum_algorithm_name: checksum_algorithm_name, + checksum_hexdigest: checksum_hexdigest, object_size: object_size + } + }.to_json + ) + aws_check_fixity_job.broadcast_fixity_check_complete( + stream_name, bucket_name, object_path, checksum_algorithm_name, checksum_hexdigest, object_size + ) + end + end + + describe '#broadcast_fixity_check_error' do + it 'results in the expected broadcast' do + expect(ActionCable.server).to receive(:broadcast).with( + stream_name, + { + type: 'fixity_check_error', + data: { + error_message: error_message, bucket_name: bucket_name, + object_path: object_path, checksum_algorithm_name: checksum_algorithm_name + } + }.to_json + ) + aws_check_fixity_job.broadcast_fixity_check_error( + stream_name, error_message, bucket_name, object_path, checksum_algorithm_name + ) + end + end +end diff --git a/spec/channels/fixity_check_channel_spec.rb b/spec/channels/fixity_check_channel_spec.rb index c4c4eef..fcd4619 100644 --- a/spec/channels/fixity_check_channel_spec.rb +++ b/spec/channels/fixity_check_channel_spec.rb @@ -47,7 +47,8 @@ ).and_return([checksum_hexdigest, object_size]) end - it 'initiates a checksum calculation, which queues a background job' do + it 'initiates a checksum calculation, which queues a background job and '\ + 'responds with a fixity_check_complete broadcast' do expect(AwsCheckFixityJob).to receive(:perform_later).with( job_identifier, bucket_name, object_path, checksum_algorithm_name ).and_call_original From 3596146610e1c4d72ead46d5a88c58d79c476e2f Mon Sep 17 00:00:00 2001 From: Eric O Date: Thu, 13 Jun 2024 09:42:27 -0400 Subject: [PATCH 5/7] Deployment server hostname updates --- config/deploy/dev.rb | 2 +- config/deploy/prod.rb | 2 +- config/deploy/test.rb | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/config/deploy/dev.rb b/config/deploy/dev.rb index 6f327d1..4c86982 100644 --- a/config/deploy/dev.rb +++ b/config/deploy/dev.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true -server 'ec2-3-230-115-99.compute-1.amazonaws.com', user: fetch(:remote_user), roles: %w[app db web] +server 'fixity-test-1.svc.cul.columbia.edu', user: fetch(:remote_user), roles: %w[app db web] # Current branch is suggested by default in development ask :branch, `git rev-parse --abbrev-ref HEAD`.chomp diff --git a/config/deploy/prod.rb b/config/deploy/prod.rb index 77d0c75..b3924aa 100644 --- a/config/deploy/prod.rb +++ b/config/deploy/prod.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true -server 'check-please.library.columbia.edu', user: fetch(:remote_user), roles: %w[app db web] +server 'not-available-yet.library.columbia.edu', user: fetch(:remote_user), roles: %w[app db web] # In test/prod, suggest latest tag as default version to deploy ask :branch, proc { `git tag --sort=version:refname`.split("\n").last } diff --git a/config/deploy/test.rb b/config/deploy/test.rb index 1ba287c..b3924aa 100644 --- a/config/deploy/test.rb +++ b/config/deploy/test.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true -server 'check-please-test.library.columbia.edu', user: fetch(:remote_user), roles: %w[app db web] +server 'not-available-yet.library.columbia.edu', user: fetch(:remote_user), roles: %w[app db web] # In test/prod, suggest latest tag as default version to deploy ask :branch, proc { `git tag --sort=version:refname`.split("\n").last } From 55b870c3a018b96c7229d2a6d3affdf76b747f60 Mon Sep 17 00:00:00 2001 From: Eric O Date: Thu, 13 Jun 2024 12:10:20 -0400 Subject: [PATCH 6/7] README updates --- README.md | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 9b1c955..92f8afb 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,10 @@ DLST app for performing fixity checks on cloud storage files. +## Development + +### First-Time Setup -**First-Time Setup (for developers)** Clone the repository. `git clone git@github.com:cul/check_please.git` @@ -16,8 +18,15 @@ Set up config files. Run database migrations. `bundle exec rake db:migrate` -Seed the database with necessary values for operation. -`rails db:seed` +Start the application using `bundle exec rails server`. +`bundle exec rails s -p 3000` + +## Testing + +Run: `bundle exec rspec` + +## Deployment + +Run: `bundle exec cap [env] deploy` -Start the application using `rails server`. -`rails s -p 3000` +NOTE: Only the `dev` environment deploy target is fully set up at this time. From 46a6110a040d35d56c0d2a6d2f8af485e3cfd288 Mon Sep 17 00:00:00 2001 From: Eric O Date: Thu, 18 Jul 2024 15:39:24 -0400 Subject: [PATCH 7/7] Add http fixity check endpoint --- .rubocop.yml | 1 + Gemfile | 2 + Gemfile.lock | 8 ++ app/controllers/api_controller.rb | 36 +++++ app/controllers/fixity_checks_controller.rb | 44 ++++++ config/deploy/dev.rb | 2 +- config/routes.rb | 2 + spec/rails_helper.rb | 6 +- .../run_fixity_check_for_s3_object_spec.rb | 136 ++++++++++++++++++ spec/support/authenticated_requests.rb | 15 ++ 10 files changed, 250 insertions(+), 2 deletions(-) create mode 100644 app/controllers/api_controller.rb create mode 100644 app/controllers/fixity_checks_controller.rb create mode 100644 spec/requests/fixity_checks/run_fixity_check_for_s3_object_spec.rb create mode 100644 spec/support/authenticated_requests.rb diff --git a/.rubocop.yml b/.rubocop.yml index 23efe87..7f122ca 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -23,6 +23,7 @@ Lint/MissingCopEnableDirective: Metrics/MethodLength: Exclude: - lib/check_please/aws/object_fixity_checker.rb + - app/controllers/fixity_checks_controller.rb RSpec/VerifiedDoubles: Exclude: diff --git a/Gemfile b/Gemfile index 56276fb..4cac3fd 100644 --- a/Gemfile +++ b/Gemfile @@ -72,6 +72,8 @@ gem 'omniauth-cul', '~> 0.2.0' group :development, :test do # See https://guides.rubyonrails.org/debugging_rails_applications.html#debugging-with-the-debug-gem gem 'debug', platforms: %i[mri windows] + # json_spec for easier json comparison in tests + gem 'json_spec' # Rubocul for linting gem 'rubocul', '~> 4.0.11' # gem 'rubocul', path: '../rubocul' diff --git a/Gemfile.lock b/Gemfile.lock index f221ced..a7ed602 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -231,6 +231,9 @@ GEM activesupport (>= 5.0.0) jmespath (1.6.2) json (2.7.1) + json_spec (1.1.5) + multi_json (~> 1.0) + rspec (>= 2.0, < 4.0) jwt (2.8.1) base64 language_server-protocol (3.17.0.3) @@ -365,6 +368,10 @@ GEM sinatra (>= 0.9.2) retriable (3.1.2) rexml (3.2.6) + rspec (3.13.0) + rspec-core (~> 3.13.0) + rspec-expectations (~> 3.13.0) + rspec-mocks (~> 3.13.0) rspec-core (3.13.0) rspec-support (~> 3.13.0) rspec-expectations (3.13.0) @@ -526,6 +533,7 @@ DEPENDENCIES google-cloud-storage (~> 1.49) importmap-rails jbuilder + json_spec omniauth omniauth-cul (~> 0.2.0) puma (~> 6.0) diff --git a/app/controllers/api_controller.rb b/app/controllers/api_controller.rb new file mode 100644 index 0000000..cf745a4 --- /dev/null +++ b/app/controllers/api_controller.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +class ApiController < ActionController::API + include ActionController::HttpAuthentication::Token::ControllerMethods + + rescue_from ActiveRecord::RecordNotFound do + render json: errors('Not Found'), status: :not_found + end + + private + + # Returns 406 status if format requested is not json. This method can be + # used as a before_action callback for any controllers that only respond + # to json. + def ensure_json_request + return if request.format.blank? || request.format == :json + + head :not_acceptable + end + + # Renders with an :unauthorized status if no request token is provided, or renders with a + # :forbidden status if the request uses an invalid request token. This method should be + # used as a before_action callback for any controller actions that require authorization. + def authenticate_request_token + authenticate_or_request_with_http_token do |token, _options| + ActiveSupport::SecurityUtils.secure_compare(CHECK_PLEASE['remote_request_api_key'], token) + end + end + + # Generates JSON with errors + # + # @param String|Array json response describing errors + def errors(errors) + { errors: Array.wrap(errors).map { |e| { message: e } } } + end +end diff --git a/app/controllers/fixity_checks_controller.rb b/app/controllers/fixity_checks_controller.rb new file mode 100644 index 0000000..0c98cbc --- /dev/null +++ b/app/controllers/fixity_checks_controller.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true + +class FixityChecksController < ApiController + before_action :authenticate_request_token + + # POST /fixity_checks/run_fixity_check_for_s3_object + def run_fixity_check_for_s3_object + bucket_name = fixity_check_params['bucket_name'] + object_path = fixity_check_params['object_path'] + checksum_algorithm_name = fixity_check_params['checksum_algorithm_name'] + + checksum_hexdigest, object_size = CheckPlease::Aws::ObjectFixityChecker.check( + bucket_name, object_path, checksum_algorithm_name + ) + + render plain: { + bucket_name: bucket_name, object_path: object_path, checksum_algorithm_name: checksum_algorithm_name, + checksum_hexdigest: checksum_hexdigest, object_size: object_size + }.to_json + rescue StandardError => e + render plain: { + error_message: e.message, + bucket_name: bucket_name, object_path: object_path, checksum_algorithm_name: checksum_algorithm_name + }.to_json, status: :bad_request + end + + private + + def fixity_check_response(bucket_name, object_path, checksum_algorithm_name, checksum_hexdigest, object_size) + run_fixity_check_for_s3_object + { + bucket_name: bucket_name, object_path: object_path, checksum_algorithm_name: checksum_algorithm_name, + checksum_hexdigest: checksum_hexdigest, object_size: object_size + }.to_json + end + + def fixity_check_params + params.require(:fixity_check).tap do |fixity_check_params| + fixity_check_params.require(:bucket_name) + fixity_check_params.require(:object_path) + fixity_check_params.require(:checksum_algorithm_name) + end + end +end diff --git a/config/deploy/dev.rb b/config/deploy/dev.rb index 4c86982..cc6f90f 100644 --- a/config/deploy/dev.rb +++ b/config/deploy/dev.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true -server 'fixity-test-1.svc.cul.columbia.edu', user: fetch(:remote_user), roles: %w[app db web] +server 'fixity-test-2.svc.cul.columbia.edu', user: fetch(:remote_user), roles: %w[app db web] # Current branch is suggested by default in development ask :branch, `git rev-parse --abbrev-ref HEAD`.chomp diff --git a/config/routes.rb b/config/routes.rb index e506bdb..7830e8d 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -26,6 +26,8 @@ # Defines the root path route ("/") root 'pages#home' + post '/fixity_checks/run_fixity_check_for_s3_object', to: 'fixity_checks#run_fixity_check_for_s3_object' + # Mount ActionCable Websocket route mount ActionCable.server => '/cable' end diff --git a/spec/rails_helper.rb b/spec/rails_helper.rb index 7b88b4b..dcb265d 100644 --- a/spec/rails_helper.rb +++ b/spec/rails_helper.rb @@ -22,7 +22,7 @@ # directory. Alternatively, in the individual `*_spec.rb` files, manually # require only the support files necessary. # -# Rails.root.glob('spec/support/**/*.rb').sort.each { |f| require f } +Rails.root.glob('spec/support/**/*.rb').sort.each { |f| require f } # Checks for pending migrations and applies them before tests are run. # If you are not using ActiveRecord, you can remove these lines. @@ -70,4 +70,8 @@ config.before(:suite) do Rails.application.load_seed # loading seeds end + + # Include helpers + config.include AuthenticatedRequests, type: :request + config.include JsonSpec::Helpers end diff --git a/spec/requests/fixity_checks/run_fixity_check_for_s3_object_spec.rb b/spec/requests/fixity_checks/run_fixity_check_for_s3_object_spec.rb new file mode 100644 index 0000000..714f99e --- /dev/null +++ b/spec/requests/fixity_checks/run_fixity_check_for_s3_object_spec.rb @@ -0,0 +1,136 @@ +# frozen_string_literal: true + +require 'rails_helper' + +endpoint = '/fixity_checks/run_fixity_check_for_s3_object' + +RSpec.describe endpoint, type: :request do + describe "POST #{endpoint}" do + context 'when unauthenticated request' do + it 'returns a 401 (unauthorized) status when no auth token is provided' do + post endpoint + expect(response.status).to eq(401) + end + + it 'returns a 401 (unauthorized) status when an incorrect auth token is provided' do + post endpoint, headers: { 'Authorization' => 'Token NOTVALID' } + expect(response.status).to eq(401) + end + end + + context 'when authenticated request' do + let(:bucket_name) { 'cul-dlstor-digital-testing1' } + let(:object_path) { 'test-909kb-file.jpg' } + let(:checksum_algorithm_name) { 'sha256' } + + let(:example_content) { 'example' } + let(:checksum_hexdigest) { Digest::SHA256.hexdigest(example_content) } + let(:object_size) { example_content.bytesize } + + let(:fixity_check_params) do + { + fixity_check: { + bucket_name: bucket_name, + object_path: object_path, + checksum_algorithm_name: checksum_algorithm_name + } + } + end + + context 'when valid params are given' do + before do + allow(CheckPlease::Aws::ObjectFixityChecker).to receive(:check).with( + bucket_name, + object_path, + checksum_algorithm_name + ).and_return([checksum_hexdigest, object_size]) + end + + it 'returns a 200 (ok) status ' do + post_with_auth endpoint, params: fixity_check_params + expect(response.status).to eq(200) + end + + it 'returns the expected response body' do + post_with_auth endpoint, params: fixity_check_params + expect(response.body).to be_json_eql(%( + { + "bucket_name": "#{bucket_name}", + "checksum_algorithm_name": "#{checksum_algorithm_name}", + "checksum_hexdigest": "#{checksum_hexdigest}", + "object_path": "#{object_path}", + "object_size": #{object_size} + } + )) + end + end + + context 'when a required param is missing' do + [:bucket_name, :object_path, :checksum_algorithm_name].each do |required_param| + context "when required param #{required_param} is missing" do + before do + fixity_check_params[:fixity_check].delete(required_param) + post_with_auth endpoint, params: fixity_check_params + end + + it 'returns a 400 (bad request) status ' do + expect(response.status).to eq(400) + end + + it 'returns the expected error' do + expect(response.body).to be_json_eql(%({ + "error_message" : "param is missing or the value is empty: #{required_param}", + "bucket_name": null, + "checksum_algorithm_name": null, + "object_path": null + })) + end + end + end + end + + context 'when an unsupported checksum algorithm is supplied' do + let(:checksum_algorithm_name) { 'sha256-and-a-half' } + + before do + post_with_auth endpoint, params: fixity_check_params + end + + it 'returns a 400 (bad request) status' do + expect(response.status).to eq(400) + end + + it 'returns the expected error' do + expect(response.body).to be_json_eql(%({ + "error_message" : "Unsupported checksum algorithm: #{checksum_algorithm_name}", + "bucket_name": "#{bucket_name}", + "checksum_algorithm_name": "#{checksum_algorithm_name}", + "object_path": "#{object_path}" + })) + end + end + + context 'when any StandardError is raised during processing' do + let(:error_message) { 'oh no!' } + + before do + allow(CheckPlease::Aws::ObjectFixityChecker).to receive(:check).and_raise(StandardError, error_message) + post_with_auth endpoint, params: fixity_check_params + end + + it 'returns a 400 (bad request) status' do + expect(response.status).to eq(400) + end + + it 'returns the expected error' do + expect(response.body).to be_json_eql(%({ + "error_message" : "#{error_message}", + "bucket_name": "#{bucket_name}", + "checksum_algorithm_name": "#{checksum_algorithm_name}", + "object_path": "#{object_path}" + })) + end + end + end + end +end diff --git a/spec/support/authenticated_requests.rb b/spec/support/authenticated_requests.rb new file mode 100644 index 0000000..35dbf30 --- /dev/null +++ b/spec/support/authenticated_requests.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +module AuthenticatedRequests + # Generates custom http request methods that end in `_with_auth`. These methods + # add authentication to each request. + [:get, :post, :put, :patch, :delete].each do |http_method| + define_method "#{http_method}_with_auth" do |path, **args| + args[:headers] = args.fetch(:headers, {}).merge( + 'Authorization' => "Token #{CHECK_PLEASE['remote_request_api_key']}" + ) + + send(http_method, path, **args) + end + end +end