From 9424e623906acdc3c509beeae86a11d88b7607e4 Mon Sep 17 00:00:00 2001 From: Adam Coffman Date: Tue, 10 Dec 2024 15:13:16 -0600 Subject: [PATCH 1/3] port gene summaries tsv exporter to feature exporter --- server/app/jobs/generate_tsvs.rb | 11 +- .../tsv_formatters/feature_tsv_formatter.rb | 111 ++++++++++++++++++ .../app/tsv_formatters/gene_tsv_formatter.rb | 37 ------ .../tsv_formatters/variant_tsv_formatter.rb | 2 +- 4 files changed, 122 insertions(+), 39 deletions(-) create mode 100644 server/app/tsv_formatters/feature_tsv_formatter.rb delete mode 100644 server/app/tsv_formatters/gene_tsv_formatter.rb diff --git a/server/app/jobs/generate_tsvs.rb b/server/app/jobs/generate_tsvs.rb index 4970fc41d..ed1298ff1 100644 --- a/server/app/jobs/generate_tsvs.rb +++ b/server/app/jobs/generate_tsvs.rb @@ -14,6 +14,15 @@ def perform public_path = public_file_path(e.file_name) FileUtils.cp(tmp_file.path, public_path) File.chmod(0644, public_path) + + #symlink in legacy TSV names for backwards compatability + if e.respond_to?(:file_aliases) + e.file_aliases.each do |fa| + link_path = public_file_path(fa) + FileUtils.ln_s(public_path, link_path, force: true) + File.chmod(0644, link_path) + end + end ensure tmp_file.unlink end @@ -22,7 +31,7 @@ def perform def tsvs_to_generate [ - GeneTsvFormatter, + FeatureTsvFormatter, VariantTsvFormatter, EvidenceItemTsvFormatter, VariantGroupTsvFormatter, diff --git a/server/app/tsv_formatters/feature_tsv_formatter.rb b/server/app/tsv_formatters/feature_tsv_formatter.rb new file mode 100644 index 000000000..d6002f0d6 --- /dev/null +++ b/server/app/tsv_formatters/feature_tsv_formatter.rb @@ -0,0 +1,111 @@ +class FeatureTsvFormatter + def self.objects + Feature.joins(variants: { molecular_profiles: [:evidence_items]}) + .includes(:feature_instance) + .distinct + end + + def self.headers + shared_headers = [ + 'feature_id', + 'feature_civic_url', + 'feature_type', + 'name', + 'feature_aliases', + 'description', + 'last_review_date', + 'is_flagged' + ] + shared_headers + gene_headers + factor_headers + fusion_headers + end + + def self.gene_headers + ['entrez_id'] + end + + def self.factor_headers + ['ncit_id'] + end + + def self.fusion_headers + [ + 'five_prime_partner_status', + 'three_prime_partner_status', + 'five_prime_gene_id', + 'five_prime_gene_name', + 'five_prime_gene_entrez_id', + 'three_prime_gene_id', + 'three_prime_gene_name', + 'three_prime_gene_entrez_id', + ] + end + + def self.create_gene_row(feature) + row = [ + feature.feature_instance.entrez_id + ] + row += Array.new(factor_headers.size) + row += Array.new(fusion_headers.size) + return row + end + + def self.create_factor_row(feature) + row = Array.new(gene_headers.size) + row += [ + feature.feature_instance.ncit_id + ] + row += Array.new(fusion_headers.size) + end + + def self.create_fusion_row(feature) + feature_instance = feature.feature_instance + row = Array.new(gene_headers.size) + row += Array.new(factor_headers.size) + row += [ + feature_instance.five_prime_partner_status, + feature_instance.three_prime_partner_status, + feature_instance.five_prime_gene_id, + feature_instance.five_prime_gene&.name, + feature_instance.five_prime_gene&.entrez_id, + feature_instance.three_prime_gene_id, + feature_instance.three_prime_gene&.name, + feature_instance.three_prime_gene&.entrez_id, + ] + end + + def self.row_from_object(feature) + shared_cols = [ + feature.id, + LinkAdaptors::Feature.new(feature).permalink_path(include_domain: true), + feature.feature_instance_type.demodulize, + feature.name, + feature.feature_aliases.map(&:name).join(","), + feature.description&.squish, + feature.updated_at, + feature.flagged + ] + + feature_cols = case feature.feature_instance + when Features::Gene + create_gene_row(feature) + when Features::Factor + create_factor_row(feature) + when Features::Fusion + create_fusion_row(feature) + else + raise StandardError.new("Unknown feature type for TSV export: #{feature.feature_instance_type}") + end + shared_cols + feature_cols + end + + def self.file_name + 'FeatureSummaries.tsv' + end + + def self.file_aliases + [ + 'GeneSummaries.tsv' + ] + end +end + diff --git a/server/app/tsv_formatters/gene_tsv_formatter.rb b/server/app/tsv_formatters/gene_tsv_formatter.rb deleted file mode 100644 index 256c244b3..000000000 --- a/server/app/tsv_formatters/gene_tsv_formatter.rb +++ /dev/null @@ -1,37 +0,0 @@ -class GeneTsvFormatter - def self.objects - Feature.joins(variants: { molecular_profiles: [:evidence_items]}) - .where(feature_instance_type: 'Features::Gene') - .includes(:feature_instance) - .distinct - end - - def self.headers - [ - 'gene_id', - 'gene_civic_url', - 'name', - 'entrez_id', - 'description', - 'last_review_date', - 'is_flagged' - ] - end - - def self.row_from_object(feature) - [ - feature.feature_instance.id, - LinkAdaptors::Gene.new(feature.feature_instance).permalink_path(include_domain: true), - feature.name, - feature.feature_instance.entrez_id, - feature.description.squish, - feature.updated_at, - feature.flagged - ] - end - - def self.file_name - 'GeneSummaries.tsv' - end -end - diff --git a/server/app/tsv_formatters/variant_tsv_formatter.rb b/server/app/tsv_formatters/variant_tsv_formatter.rb index e88290f8c..29792527a 100644 --- a/server/app/tsv_formatters/variant_tsv_formatter.rb +++ b/server/app/tsv_formatters/variant_tsv_formatter.rb @@ -44,7 +44,7 @@ def self.gene_headers ] end - def self.factor_headers + def self.factor_headers [ 'ncit_id' ] From beca0a664497befdab472d54ddbd089ebcf6d214 Mon Sep 17 00:00:00 2001 From: Adam Coffman Date: Tue, 10 Dec 2024 15:20:44 -0600 Subject: [PATCH 2/3] fix spelling error --- server/app/jobs/generate_tsvs.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/app/jobs/generate_tsvs.rb b/server/app/jobs/generate_tsvs.rb index ed1298ff1..885aa1a87 100644 --- a/server/app/jobs/generate_tsvs.rb +++ b/server/app/jobs/generate_tsvs.rb @@ -15,7 +15,7 @@ def perform FileUtils.cp(tmp_file.path, public_path) File.chmod(0644, public_path) - #symlink in legacy TSV names for backwards compatability + #symlink in legacy TSV names for backwards compatibility if e.respond_to?(:file_aliases) e.file_aliases.each do |fa| link_path = public_file_path(fa) From cb21e0dec3287d80a0507d0af87c3975fada2166 Mon Sep 17 00:00:00 2001 From: Adam Coffman Date: Tue, 10 Dec 2024 15:23:07 -0600 Subject: [PATCH 3/3] update AWS upload job with new filename --- server/app/jobs/upload_tsvs_to_aws.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/app/jobs/upload_tsvs_to_aws.rb b/server/app/jobs/upload_tsvs_to_aws.rb index ceaeff6ff..957b8b78c 100644 --- a/server/app/jobs/upload_tsvs_to_aws.rb +++ b/server/app/jobs/upload_tsvs_to_aws.rb @@ -26,7 +26,7 @@ def perform def absolute_local_path(file) File.join(TsvRelease.downloads_path, file) end - + def bucket_name 'civic-aws-opendata' end @@ -61,7 +61,7 @@ def expected_files { "AssertionSummaries/date=#{date_string}/AssertionSummaries.tsv" => "#{date_string}/#{date_string}-AssertionSummaries.tsv", "ClinicalEvidenceSummaries/date=#{date_string}/ClinicalEvidenceSummaries.tsv" => "#{date_string}/#{date_string}-ClinicalEvidenceSummaries.tsv", - "GeneSummaries/date=#{date_string}/GeneSummaries.tsv" => "#{date_string}/#{date_string}-GeneSummaries.tsv", + "FeatureSummaries/date=#{date_string}/FeatureSummaries.tsv" => "#{date_string}/#{date_string}-FeatureSummaries.tsv", "VariantGroupSummaries/date=#{date_string}/VariantGroupSummaries.tsv"=> "#{date_string}/#{date_string}-VariantGroupSummaries.tsv", "VariantSummaries/date=#{date_string}/VariantSummaries.tsv" => "#{date_string}/#{date_string}-VariantSummaries.tsv", "MolecularProfileSummaries/date=#{date_string}/MolecularProfileSummaries.tsv" => "#{date_string}/#{date_string}-MolecularProfileSummaries.tsv",