Skip to content

Commit 2a13a0d

Browse files
Precommit
1 parent d0e1eeb commit 2a13a0d

12 files changed

+147
-140
lines changed

S3MP/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
"""S3 MirrorPath package."""
2-
from ._version import __version__
2+
from ._version import __version__
3+
4+
__all__ = ["__version__"]

S3MP/async_utils.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
"""Asynchronous transfer utilities."""
2-
from S3MP.global_config import S3MPConfig
3-
import aioboto3
42
import asyncio
53
from typing import Coroutine, List
4+
5+
import aioboto3
6+
7+
from S3MP.global_config import S3MPConfig
68
from S3MP.mirror_path import MirrorPath
79

10+
811
async def async_upload_from_mirror(mirror_path: MirrorPath):
912
"""Asynchronously upload a file from a MirrorPath."""
1013
session = aioboto3.Session()

S3MP/callbacks.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
1-
"""
2-
S3 callbacks to be used for boto3 transfers (uploads, downloads, and copies).
3-
"""
1+
"""S3 callbacks to be used for boto3 transfers (uploads, downloads, and copies)."""
42
from pathlib import Path
5-
from S3MP.global_config import S3MPConfig
6-
import os
3+
74
import tqdm
85

6+
from S3MP.global_config import S3MPConfig
97
from S3MP.mirror_path import MirrorPath
10-
from S3MP.types import SList, S3Resource
8+
from S3MP.types import S3Resource, SList
119

1210

1311
class FileSizeTQDMCallback(tqdm.tqdm):
@@ -29,25 +27,31 @@ def __init__(
2927
:param is_download: Marker for upload/download transfer.
3028
"""
3129
if transfer_objs is None:
32-
return
30+
return
3331
if resource is None:
3432
resource = S3MPConfig.s3_resource
3533
if bucket_key is None:
3634
bucket_key = S3MPConfig.default_bucket_key
3735
if not isinstance(transfer_objs, list):
3836
transfer_objs = [transfer_objs]
39-
4037

4138
self._total_bytes = 0
4239
for transfer_mapping in transfer_objs:
4340
if is_download:
44-
s3_key = transfer_mapping.s3_key if isinstance(transfer_mapping, MirrorPath) else transfer_mapping
41+
s3_key = (
42+
transfer_mapping.s3_key
43+
if isinstance(transfer_mapping, MirrorPath)
44+
else transfer_mapping
45+
)
4546
self._total_bytes += resource.Object(bucket_key, s3_key).content_length
4647
else:
47-
local_path = transfer_mapping.local_path if isinstance(transfer_mapping, MirrorPath) else transfer_mapping
48+
local_path = (
49+
transfer_mapping.local_path
50+
if isinstance(transfer_mapping, MirrorPath)
51+
else transfer_mapping
52+
)
4853
self._total_bytes += local_path.stat().st_size
4954

50-
5155
transfer_str = "Download" if is_download else "Upload"
5256
super().__init__(
5357
self,

S3MP/global_config.py

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
"""Set global values for S3MP module."""
2+
import tempfile
23
from configparser import ConfigParser
34
from dataclasses import dataclass
45
from pathlib import Path
5-
import tempfile
66
from typing import Callable
7+
78
import boto3
8-
from S3MP.types import S3Client, S3Resource, S3Bucket, S3TransferConfig
9+
10+
from S3MP.types import S3Bucket, S3Client, S3Resource, S3TransferConfig
11+
912

1013
def get_config_file_path() -> Path:
1114
"""Get the location of the config file."""
@@ -14,17 +17,21 @@ def get_config_file_path() -> Path:
1417

1518

1619
class Singleton(type):
17-
# Singleton metaclass
20+
"""Singleton metaclass."""
21+
1822
_instances = {}
1923

2024
def __call__(cls, *args, **kwargs):
25+
"""Get instance of class."""
2126
if cls not in cls._instances:
2227
cls._instances[cls] = super().__call__(*args, **kwargs)
2328
return cls._instances[cls]
2429

30+
2531
@dataclass
2632
class S3MPConfig(metaclass=Singleton):
2733
"""Singleton class for S3MP globals."""
34+
2835
# Boto3 Objects
2936
_s3_client: S3Client = None
3037
_s3_resource: S3Resource = None
@@ -45,14 +52,14 @@ def s3_client(self) -> S3Client:
4552
if not self._s3_client:
4653
self._s3_client = boto3.client("s3")
4754
return self._s3_client
48-
55+
4956
@property
5057
def s3_resource(self) -> S3Resource:
5158
"""Get S3 resource."""
5259
if not self._s3_resource:
5360
self._s3_resource = boto3.resource("s3")
5461
return self._s3_resource
55-
62+
5663
@property
5764
def bucket(self, bucket_key: str = None) -> S3Bucket:
5865
"""Get bucket."""
@@ -63,42 +70,44 @@ def bucket(self, bucket_key: str = None) -> S3Bucket:
6370
raise ValueError("No default bucket key set.")
6471
self._bucket = self.s3_resource.Bucket(self.default_bucket_key)
6572
return self._bucket
66-
73+
6774
@property
6875
def mirror_root(self) -> Path:
6976
"""Get mirror root."""
7077
if self._mirror_root is None:
71-
print("Mirror Root not set, a temporary directory will be used as the mirror root.")
78+
print(
79+
"Mirror Root not set, a temporary directory will be used as the mirror root."
80+
)
7281
self._mirror_root = Path(tempfile.gettempdir())
7382
return self._mirror_root
74-
83+
7584
def load_config(self, config_file_path: Path = None):
7685
"""Load the config file."""
7786
config_file_path = config_file_path or get_config_file_path()
7887
config = ConfigParser()
7988
config.read(config_file_path)
80-
89+
8190
if "DEFAULT" not in config:
82-
return
83-
91+
return
92+
8493
if "default_bucket_key" in config["DEFAULT"]:
8594
self.default_bucket_key = config["DEFAULT"]["default_bucket_key"]
86-
95+
8796
if "mirror_root" in config["DEFAULT"]:
8897
self._mirror_root = Path(config["DEFAULT"]["mirror_root"])
89-
98+
9099
def save_config(self, config_file_path: Path = None):
91100
"""Write config file."""
92101
config_file_path = config_file_path or get_config_file_path()
93102
config = ConfigParser()
94-
config['DEFAULT'] = {}
103+
config["DEFAULT"] = {}
95104
if self.default_bucket_key:
96-
config['DEFAULT']['default_bucket_key'] = self.default_bucket_key
105+
config["DEFAULT"]["default_bucket_key"] = self.default_bucket_key
97106
if self._mirror_root:
98-
config['DEFAULT']['mirror_root'] = str(self._mirror_root)
99-
with open(config_file_path, 'w') as configfile:
107+
config["DEFAULT"]["mirror_root"] = str(self._mirror_root)
108+
with open(config_file_path, "w") as configfile:
100109
config.write(configfile)
101110

102111

103-
S3MPConfig = S3MPConfig()
112+
S3MPConfig = S3MPConfig()
104113
S3MPConfig.load_config()

S3MP/keys.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
"""S3 key modification utilities."""
2-
from enum import Enum
32
import itertools
43
from dataclasses import dataclass
4+
from enum import Enum
55
from typing import List, Tuple
6-
from S3MP.prefix_queries import get_folders_within_folder, get_files_within_folder
6+
7+
from S3MP.prefix_queries import get_files_within_folder, get_folders_within_folder
78

89

910
@dataclass
@@ -21,16 +22,16 @@ def __call__(self, *args, **kwargs):
2122
"""Set data via calling."""
2223
self = self.__copy__()
2324
if len(args) == 1:
24-
if type(args[0]) == str:
25-
self.name = args[0]
25+
if isinstance(args[0], str):
26+
self.name = args[0]
2627
elif isinstance(args[0], Enum):
2728
self.name = args[0].value
2829
else:
29-
try:
30+
try:
3031
self.name = str(args[0])
31-
except:
32+
except Exception:
3233
raise TypeError(f"Cannot convert {args[0]} to str.")
33-
34+
3435
for key in self.__dict__.keys():
3536
if key in kwargs:
3637
setattr(self, key, kwargs[key])
@@ -40,11 +41,11 @@ def __call__(self, *args, **kwargs):
4041
def __copy__(self):
4142
"""Copy."""
4243
return KeySegment(self.depth, self.name, self.is_file, self.incomplete_name)
43-
44+
4445
def copy(self):
4546
"""Copy."""
4647
return self.__copy__()
47-
48+
4849
def __repr__(self):
4950
"""Class representation."""
5051
return f"{self.__class__.__name__}(depth={self.depth}, name={self.name}, is_file={self.is_file}, incomplete_name={self.incomplete_name})"
@@ -77,7 +78,7 @@ def replace_key_segments(
7778
key: str, segments: List[KeySegment], max_len: int = None
7879
) -> str:
7980
"""Replace segments of a key with new segments."""
80-
if type(segments) == KeySegment:
81+
if isinstance(segments, KeySegment):
8182
segments = [segments]
8283
segments = sorted(segments, key=lambda x: x.depth)
8384
key_segments = key.split("/")
@@ -102,9 +103,10 @@ def replace_key_segments(
102103
def replace_key_segments_at_relative_depth(key: str, segments: List[KeySegment]) -> str:
103104
"""
104105
Replace segments of a key with new segments at a relative depth.
106+
105107
0 would be the deepest segment, -1 would be the second deepest, etc.
106108
"""
107-
if type(segments) == KeySegment:
109+
if isinstance(segments, KeySegment):
108110
segments = [segments]
109111
segments = sorted(segments, key=lambda x: x.depth)
110112
key_segments = [seg for seg in key.split("/") if seg]
@@ -166,7 +168,7 @@ async def dfs_matching_key_gen(
166168
def sync_dfs_matching_key_gen(
167169
segments: List[KeySegment], path: str = None, current_depth: int = None
168170
):
169-
"""Synchronous generation of all matching keys from a path, depth first."""
171+
"""Generate all matching keys synchronously from a path, depth first."""
170172
if current_depth is None:
171173
segments = sorted(segments, key=lambda x: x.depth)
172174
path, current_depth = build_s3_key(segments)

S3MP/multipart_uploads.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
"""S3MP multipart uploads."""
22
import concurrent.futures
33
import math
4-
import S3MP
5-
from S3MP.async_utils import sync_gather_threads
6-
from S3MP.global_config import S3MPConfig
7-
from S3MP.transfer_configs import MB
84

5+
import S3MP # noqa: F401
6+
from S3MP.global_config import S3MPConfig
97
from S3MP.mirror_path import MirrorPath
8+
from S3MP.transfer_configs import MB
109
from S3MP.types import S3Bucket
1110

1211

@@ -53,7 +52,7 @@ def resume_multipart_upload(
5352
with open(mirror_path.local_path, "rb") as f:
5453
with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor:
5554
# Verify existing parts.
56-
assert(all(part.size == part_size for part in mpu_parts[:-1]))
55+
assert all(part.size == part_size for part in mpu_parts[:-1])
5756

5857
f.seek(part_size * n_uploaded_parts)
5958
if S3MPConfig.callback:
@@ -75,13 +74,12 @@ def resume_multipart_upload(
7574
if S3MPConfig.callback:
7675
S3MPConfig.callback(part_size)
7776

78-
obj = mpu.complete(
79-
MultipartUpload=mpu_dict
80-
)
77+
obj = mpu.complete(MultipartUpload=mpu_dict)
8178
if abs(total_size_bytes - obj.content_length) > MB:
8279
print()
83-
print(f"Uploaded size {obj.content_length} does not match local size {total_size_bytes}")
80+
print(
81+
f"Uploaded size {obj.content_length} does not match local size {total_size_bytes}"
82+
)
8483
obj.delete()
8584
print("Deleted object, restarting upload.")
8685
return resume_multipart_upload(mirror_path, max_threads=max_threads)
87-

S3MP/prefix_queries.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,20 @@
11
"""S3 prefix queries.."""
22
from __future__ import annotations
3+
34
from typing import List
5+
46
from S3MP.global_config import S3MPConfig
57

68

79
def get_prefix_paginator(folder_key: str, bucket_key: str = None, delimiter: str = "/"):
810
"""Get a paginator for a specified prefix."""
911
if not bucket_key:
1012
bucket_key = S3MPConfig.default_bucket_key
11-
if folder_key != '' and folder_key[-1] != "/":
13+
if folder_key != "" and folder_key[-1] != "/":
1214
folder_key += "/"
1315
s3_client = S3MPConfig.s3_client
1416
paginator = s3_client.get_paginator("list_objects_v2")
15-
return paginator.paginate(
16-
Bucket=bucket_key, Prefix=folder_key, Delimiter=delimiter
17-
)
17+
return paginator.paginate(Bucket=bucket_key, Prefix=folder_key, Delimiter=delimiter)
1818

1919

2020
def get_files_within_folder(folder_key: str, key_filter: str = None) -> List[str]:
@@ -37,4 +37,3 @@ def get_folders_within_folder(folder_key: str, key_filter: str = None) -> List[s
3737
if key_filter and key_filter not in obj:
3838
continue
3939
yield obj
40-

S3MP/transfer_configs.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,21 @@
11
"""Transfer configurations and utilities."""
2+
from s3transfer.constants import GB, KB, MB
3+
24
from S3MP.global_config import S3MPConfig
35
from S3MP.types import S3TransferConfig
4-
from s3transfer.constants import KB, MB, GB
56

67

78
def get_transfer_config(
8-
n_threads: int,
9+
n_threads: int,
910
block_size: int = 8 * MB,
1011
max_ram: int = 4 * GB,
1112
io_queue_size: int = 10e4,
1213
io_chunk_size: int = 256 * KB,
1314
set_global: bool = True,
1415
) -> S3TransferConfig:
1516
"""Get transfer config."""
16-
1717
max_in_mem_upload_chunks = (max_ram - (n_threads * block_size)) // block_size
18-
max_in_mem_download_chunks = (max_ram // block_size)
18+
max_in_mem_download_chunks = max_ram // block_size
1919

2020
config = S3TransferConfig(
2121
multipart_threshold=block_size,
@@ -27,7 +27,7 @@ def get_transfer_config(
2727
max_io_queue_size=io_queue_size,
2828
io_chunksize=io_chunk_size,
2929
)
30-
config.use_threads = (n_threads > 1)
30+
config.use_threads = n_threads > 1
3131
if set_global:
3232
S3MPConfig.transfer_config = config
33-
return config
33+
return config

0 commit comments

Comments
 (0)