From 8bf397f8fd2764c48cc77d695c1c53e1dd98345f Mon Sep 17 00:00:00 2001 From: PlushZ Date: Wed, 4 Sep 2024 19:15:23 +0200 Subject: [PATCH 1/4] fix fasta.gz viewer issue --- lib/galaxy/datatypes/sequence.py | 66 ++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/lib/galaxy/datatypes/sequence.py b/lib/galaxy/datatypes/sequence.py index 7a721d8df465..8b73ea5689f2 100644 --- a/lib/galaxy/datatypes/sequence.py +++ b/lib/galaxy/datatypes/sequence.py @@ -336,8 +336,43 @@ def split(cls, input_datasets: List, subdir_generator_function: Callable, split_ raise NotImplementedError("Can't split generic alignment files") +class BaseSequence(Sequence): + """ + Common base class provides common methods used by FASTQ and FASTA sequence format classes. + It includes functionality for displaying data shared among FASTQ and FASTA formats. + """ + + def display_data( + self, + trans, + dataset: DatasetHasHidProtocol, + preview: bool = False, + filename: Optional[str] = None, + to_ext: Optional[str] = None, + **kwd, + ): + headers = kwd.get("headers", {}) + if preview: + with compression_utils.get_fileobj(dataset.get_file_name()) as fh: + max_peek_size = 100000 + try: + chunk = fh.read(max_peek_size + 1) + except UnicodeDecodeError: + raise InvalidFileFormatError("Dataset appears to contain binary data, cannot display.") + if len(chunk) <= max_peek_size: + mime = "text/plain" + self._clean_and_set_mime_type(trans, mime, headers) + return chunk[:-1], headers + return ( + trans.fill_template_mako("/dataset/large_file.mako", truncated_data=chunk[:-1], data=dataset), + headers, + ) + else: + return Sequence.display_data(self, trans, dataset, preview, filename, to_ext, **kwd) + + @build_sniff_from_prefix -class Fasta(Sequence): +class Fasta(BaseSequence): """Class representing a FASTA sequence""" edam_format = "format_1929" @@ -692,7 +727,7 @@ def set_peek(self, dataset: DatasetProtocol, **kwd) -> None: @build_sniff_from_prefix -class BaseFastq(Sequence): +class BaseFastq(BaseSequence): """Base class for FastQ sequences""" edam_format = "format_1930" @@ -764,33 +799,6 @@ def sniff_prefix(self, file_prefix: FilePrefix) -> bool: return False return self.check_first_block(file_prefix) - def display_data( - self, - trans, - dataset: DatasetHasHidProtocol, - preview: bool = False, - filename: Optional[str] = None, - to_ext: Optional[str] = None, - **kwd, - ): - headers = kwd.get("headers", {}) - if preview: - with compression_utils.get_fileobj(dataset.get_file_name()) as fh: - max_peek_size = 100000 - try: - chunk = fh.read(max_peek_size + 1) - except UnicodeDecodeError: - raise InvalidFileFormatError("Dataset appears to contain binary data, cannot display.") - if len(chunk) <= max_peek_size: - mime = "text/plain" - self._clean_and_set_mime_type(trans, mime, headers) - return chunk[:-1], headers - return ( - trans.fill_template_mako("/dataset/large_file.mako", truncated_data=chunk[:-1], data=dataset), - headers, - ) - else: - return Sequence.display_data(self, trans, dataset, preview, filename, to_ext, **kwd) @classmethod def split(cls, input_datasets: List, subdir_generator_function: Callable, split_params: Optional[Dict]) -> None: From 622793e4657996c22049c093615b7f02069fad4c Mon Sep 17 00:00:00 2001 From: PlushZ Date: Thu, 5 Sep 2024 16:09:28 +0200 Subject: [PATCH 2/4] fix linting --- lib/galaxy/datatypes/sequence.py | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/galaxy/datatypes/sequence.py b/lib/galaxy/datatypes/sequence.py index 8b73ea5689f2..5b1aba82c7db 100644 --- a/lib/galaxy/datatypes/sequence.py +++ b/lib/galaxy/datatypes/sequence.py @@ -799,7 +799,6 @@ def sniff_prefix(self, file_prefix: FilePrefix) -> bool: return False return self.check_first_block(file_prefix) - @classmethod def split(cls, input_datasets: List, subdir_generator_function: Callable, split_params: Optional[Dict]) -> None: """ From c6d30f9fd36aaa57e5c2cd0628c2bbeda78b0274 Mon Sep 17 00:00:00 2001 From: PlushZ Date: Thu, 5 Sep 2024 19:01:05 +0200 Subject: [PATCH 3/4] move display_data to Sequence --- lib/galaxy/datatypes/sequence.py | 47 ++++++++++++++------------------ 1 file changed, 20 insertions(+), 27 deletions(-) diff --git a/lib/galaxy/datatypes/sequence.py b/lib/galaxy/datatypes/sequence.py index 5b1aba82c7db..b3a5baa97acc 100644 --- a/lib/galaxy/datatypes/sequence.py +++ b/lib/galaxy/datatypes/sequence.py @@ -318,30 +318,6 @@ def get_split_commands_sequential( return [cmd] - -class Alignment(data.Text): - """Class describing an alignment""" - - edam_data = "data_0863" - - MetadataElement( - name="species", desc="Species", default=[], param=metadata.SelectParameter, multiple=True, readonly=True - ) - - @classmethod - def split(cls, input_datasets: List, subdir_generator_function: Callable, split_params: Optional[Dict]) -> None: - """Split a generic alignment file (not sensible or possible, see subclasses).""" - if split_params is None: - return None - raise NotImplementedError("Can't split generic alignment files") - - -class BaseSequence(Sequence): - """ - Common base class provides common methods used by FASTQ and FASTA sequence format classes. - It includes functionality for displaying data shared among FASTQ and FASTA formats. - """ - def display_data( self, trans, @@ -368,11 +344,28 @@ def display_data( headers, ) else: - return Sequence.display_data(self, trans, dataset, preview, filename, to_ext, **kwd) + return super().display_data(self, trans, dataset, preview, filename, to_ext, **kwd) + + +class Alignment(data.Text): + """Class describing an alignment""" + + edam_data = "data_0863" + + MetadataElement( + name="species", desc="Species", default=[], param=metadata.SelectParameter, multiple=True, readonly=True + ) + + @classmethod + def split(cls, input_datasets: List, subdir_generator_function: Callable, split_params: Optional[Dict]) -> None: + """Split a generic alignment file (not sensible or possible, see subclasses).""" + if split_params is None: + return None + raise NotImplementedError("Can't split generic alignment files") @build_sniff_from_prefix -class Fasta(BaseSequence): +class Fasta(Sequence): """Class representing a FASTA sequence""" edam_format = "format_1929" @@ -727,7 +720,7 @@ def set_peek(self, dataset: DatasetProtocol, **kwd) -> None: @build_sniff_from_prefix -class BaseFastq(BaseSequence): +class BaseFastq(Sequence): """Base class for FastQ sequences""" edam_format = "format_1930" From 1bb00d323459b5da05ca857a477058316ed07efa Mon Sep 17 00:00:00 2001 From: PlushZ Date: Thu, 5 Sep 2024 20:19:22 +0200 Subject: [PATCH 4/4] fix arguments to match with Data class --- lib/galaxy/datatypes/sequence.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy/datatypes/sequence.py b/lib/galaxy/datatypes/sequence.py index b3a5baa97acc..844934021a78 100644 --- a/lib/galaxy/datatypes/sequence.py +++ b/lib/galaxy/datatypes/sequence.py @@ -344,7 +344,7 @@ def display_data( headers, ) else: - return super().display_data(self, trans, dataset, preview, filename, to_ext, **kwd) + return super().display_data(trans, dataset, preview, filename, to_ext, **kwd) class Alignment(data.Text):