Skip to content

Commit c71f410

Browse files
committed
Use CompressedFile.open_tar() to open tar files
1 parent 85ecf0f commit c71f410

File tree

6 files changed

+35
-51
lines changed

6 files changed

+35
-51
lines changed

lib/galaxy/tool_util/verify/interactor.py

+10-6
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
import re
66
import shutil
77
import sys
8-
import tarfile
98
import tempfile
109
import time
1110
import urllib.parse
@@ -45,6 +44,7 @@
4544
)
4645
from galaxy.util import requests
4746
from galaxy.util.bunch import Bunch
47+
from galaxy.util.compression_utils import CompressedFile
4848
from galaxy.util.hash_util import (
4949
memory_bound_hexdigest,
5050
parse_checksum_hash,
@@ -434,7 +434,14 @@ def test_data_path(self, tool_id, filename, tool_version=None):
434434
return result
435435
raise Exception(result["err_msg"])
436436

437-
def test_data_download(self, tool_id, filename, mode="file", is_output=True, tool_version=None):
437+
def test_data_download(
438+
self,
439+
tool_id: str,
440+
filename: str,
441+
mode: Literal["directory", "file"] = "file",
442+
is_output: bool = True,
443+
tool_version: Optional[str] = None,
444+
):
438445
result = None
439446
local_path = None
440447

@@ -453,10 +460,7 @@ def test_data_download(self, tool_id, filename, mode="file", is_output=True, too
453460
contents.extractall(path=path)
454461
else:
455462
# Galaxy < 21.01
456-
with tarfile.open(fileobj=fileobj) as tar_contents:
457-
tar_contents.extraction_filter = getattr(
458-
tarfile, "data_filter", (lambda member, path: member)
459-
)
463+
with CompressedFile.open_tar(fileobj) as tar_contents:
460464
tar_contents.extractall(path=path)
461465
result = path
462466
else:

lib/galaxy/tools/imp_exp/unpack_tar_gz_archive.py

+3-14
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
from galaxy.files import ConfiguredFileSources
1818
from galaxy.files.uris import stream_url_to_file
19+
from galaxy.util.compression_utils import CompressedFile
1920

2021
# Set max size of archive/file that will be handled to be 100 GB. This is
2122
# arbitrary and should be adjusted as needed.
@@ -52,19 +53,6 @@ def check_archive(archive_file, dest_dir):
5253
return True
5354

5455

55-
def unpack_archive(archive_file, dest_dir):
56-
"""
57-
Unpack a tar and/or gzipped archive into a destination directory.
58-
"""
59-
if zipfile.is_zipfile(archive_file):
60-
with zipfile.ZipFile(archive_file, "r") as zip_archive:
61-
zip_archive.extractall(path=dest_dir)
62-
else:
63-
with tarfile.open(archive_file, mode="r") as archive_fp:
64-
archive_fp.extraction_filter = getattr(tarfile, "data_filter", (lambda member, path: member))
65-
archive_fp.extractall(path=dest_dir)
66-
67-
6856
def main(options, args):
6957
is_url = bool(options.is_url)
7058
is_file = bool(options.is_file)
@@ -84,7 +72,8 @@ def main(options, args):
8472

8573
# Unpack archive.
8674
check_archive(archive_file, dest_dir)
87-
unpack_archive(archive_file, dest_dir)
75+
with CompressedFile(archive_file) as cf:
76+
cf.extract(dest_dir)
8877

8978

9079
if __name__ == "__main__":

lib/galaxy/util/compression_utils.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -345,18 +345,24 @@ def isfile(self, member: ArchiveMemberType) -> bool:
345345
return True
346346
return False
347347

348-
def open_tar(self, filepath: StrPath, mode: Literal["a", "r", "w", "x"]) -> tarfile.TarFile:
349-
tf = tarfile.open(filepath, mode, errorlevel=0)
348+
@staticmethod
349+
def open_tar(file: Union[StrPath, IO[bytes]], mode: Literal["a", "r", "w", "x"] = "r") -> tarfile.TarFile:
350+
if isinstance(file, (str, os.PathLike)):
351+
tf = tarfile.open(file, mode=mode, errorlevel=0)
352+
else:
353+
tf = tarfile.open(mode=mode, fileobj=file, errorlevel=0)
350354
# Set a safe default ("data_filter") for the extraction filter if
351355
# available, reverting to Python 3.11 behavior otherwise, see
352356
# https://docs.python.org/3/library/tarfile.html#supporting-older-python-versions
353357
tf.extraction_filter = getattr(tarfile, "data_filter", (lambda member, path: member))
354358
return tf
355359

356-
def open_zip(self, filepath: StrPath, mode: Literal["a", "r", "w", "x"]) -> zipfile.ZipFile:
357-
return zipfile.ZipFile(filepath, mode)
360+
@staticmethod
361+
def open_zip(file: Union[StrPath, IO[bytes]], mode: Literal["a", "r", "w", "x"] = "r") -> zipfile.ZipFile:
362+
return zipfile.ZipFile(file, mode)
358363

359-
def zipfile_ok(self, path_to_archive: StrPath) -> bool:
364+
@staticmethod
365+
def zipfile_ok(path_to_archive: StrPath) -> bool:
360366
"""
361367
This function is a bit pedantic and not functionally necessary. It checks whether there is
362368
no file pointing outside of the extraction, because ZipFile.extractall() has some potential

lib/tool_shed/test/base/twilltestcase.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@
5555
DEFAULT_SOCKET_TIMEOUT,
5656
smart_str,
5757
)
58+
from galaxy.util.compression_utils import CompressedFile
59+
from galaxy.util.resources import as_file
5860
from galaxy_test.base.api_asserts import assert_status_code_is_ok
5961
from galaxy_test.base.api_util import get_admin_api_key
6062
from galaxy_test.base.populators import wait_on_assertion
@@ -64,7 +66,6 @@
6466
hgweb_config,
6567
xml_util,
6668
)
67-
from tool_shed.util.repository_content_util import tar_open
6869
from tool_shed.webapp.model import Repository as DbRepository
6970
from tool_shed_client.schema import (
7071
Category,
@@ -1146,7 +1147,8 @@ def add_file_to_repository(
11461147
target = os.path.basename(source)
11471148
full_target = os.path.join(temp_directory, target)
11481149
full_source = TEST_DATA_REPO_FILES.joinpath(source)
1149-
shutil.copyfile(str(full_source), full_target)
1150+
with as_file(full_source) as full_source_path:
1151+
shutil.copyfile(full_source_path, full_target)
11501152
commit_message = commit_message or "Uploaded revision with added file."
11511153
self._upload_dir_to_repository(
11521154
repository, temp_directory, commit_message=commit_message, strings_displayed=strings_displayed
@@ -1155,9 +1157,9 @@ def add_file_to_repository(
11551157
def add_tar_to_repository(self, repository: Repository, source: str, strings_displayed=None):
11561158
with self.cloned_repo(repository) as temp_directory:
11571159
full_source = TEST_DATA_REPO_FILES.joinpath(source)
1158-
tar = tar_open(full_source)
1159-
tar.extractall(path=temp_directory)
1160-
tar.close()
1160+
with full_source.open("rb") as full_source_fileobj:
1161+
with CompressedFile.open_tar(full_source_fileobj) as tar:
1162+
tar.extractall(path=temp_directory)
11611163
commit_message = "Uploaded revision with added files from tar."
11621164
self._upload_dir_to_repository(
11631165
repository, temp_directory, commit_message=commit_message, strings_displayed=strings_displayed

lib/tool_shed/util/repository_content_util.py

+2-18
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import os
22
import shutil
3-
import tarfile
43
import tempfile
54
from typing import (
65
Optional,
@@ -9,7 +8,7 @@
98

109
import tool_shed.repository_types.util as rt_util
1110
from galaxy.tool_shed.util.hg_util import clone_repository
12-
from galaxy.util import checkers
11+
from galaxy.util.compression_utils import CompressedFile
1312
from tool_shed.dependencies.attribute_handlers import (
1413
RepositoryDependencyAttributeHandler,
1514
ToolDependencyAttributeHandler,
@@ -26,21 +25,6 @@
2625
from tool_shed.webapp.model import Repository
2726

2827

29-
def tar_open(uploaded_file):
30-
isgzip = False
31-
isbz2 = False
32-
isgzip = checkers.is_gzip(uploaded_file)
33-
if not isgzip:
34-
isbz2 = checkers.is_bz2(uploaded_file)
35-
if isgzip or isbz2:
36-
# Open for reading with transparent compression.
37-
tar = tarfile.open(uploaded_file, "r:*")
38-
else:
39-
tar = tarfile.open(uploaded_file)
40-
tar.extraction_filter = getattr(tarfile, "data_filter", (lambda member, path: member))
41-
return tar
42-
43-
4428
def upload_tar(
4529
trans: "ProvidesRepositoriesContext",
4630
username: str,
@@ -55,7 +39,7 @@ def upload_tar(
5539
) -> ChangeResponseT:
5640
host = trans.repositories_hostname
5741
app = trans.app
58-
tar = tar_open(uploaded_file)
42+
tar = CompressedFile.open_tar(uploaded_file)
5943
rdah = rdah or RepositoryDependencyAttributeHandler(trans, unpopulate=False)
6044
tdah = tdah or ToolDependencyAttributeHandler(trans, unpopulate=False)
6145
# Upload a tar archive of files.

test/unit/tool_shed/test_shed_index.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import os
22
import shutil
3-
import tarfile
43
import tempfile
54
from collections import namedtuple
65
from io import BytesIO
@@ -9,6 +8,7 @@
98
import requests
109
from whoosh import index
1110

11+
from galaxy.util.compression_utils import CompressedFile
1212
from tool_shed.util.shed_index import build_index
1313

1414
URL = "https://github.com/mvdbeek/toolshed-test-data/blob/master/toolshed_community_files.tgz?raw=true"
@@ -29,8 +29,7 @@ def community_file_dir():
2929
response = requests.get(URL)
3030
response.raise_for_status()
3131
b = BytesIO(response.content)
32-
with tarfile.open(fileobj=b, mode="r:gz") as tar:
33-
tar.extraction_filter = getattr(tarfile, "data_filter", (lambda member, path: member))
32+
with CompressedFile.open_tar(b) as tar:
3433
tar.extractall(extracted_archive_dir)
3534
try:
3635
yield extracted_archive_dir

0 commit comments

Comments
 (0)