Skip to content

Commit bcca1d3

Browse files
committed
Add azure flat file source plugin
1 parent 591e466 commit bcca1d3

File tree

3 files changed

+174
-0
lines changed

3 files changed

+174
-0
lines changed

client/src/api/schema/schema.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11903,6 +11903,7 @@ export interface components {
1190311903
| "posix"
1190411904
| "s3fs"
1190511905
| "azure"
11906+
| "azureflat"
1190611907
| "onedata"
1190711908
| "webdav"
1190811909
| "dropbox"
@@ -23061,6 +23062,7 @@ export interface components {
2306123062
| "posix"
2306223063
| "s3fs"
2306323064
| "azure"
23065+
| "azureflat"
2306423066
| "onedata"
2306523067
| "webdav"
2306623068
| "dropbox"
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
import logging
2+
from typing import (
3+
Optional,
4+
Union,
5+
)
6+
7+
from galaxy import exceptions
8+
from galaxy.files.models import (
9+
AnyRemoteEntry,
10+
FilesSourceRuntimeContext,
11+
)
12+
from galaxy.files.sources._fsspec import (
13+
CacheOptionsDictType,
14+
FsspecBaseFileSourceConfiguration,
15+
FsspecBaseFileSourceTemplateConfiguration,
16+
FsspecFilesSource,
17+
)
18+
from galaxy.util.config_templates import TemplateExpansion
19+
20+
try:
21+
from adlfs import AzureBlobFileSystem
22+
except ImportError:
23+
AzureBlobFileSystem = None
24+
25+
26+
REQUIRED_PACKAGE = FS_PLUGIN_TYPE = "azure_flat"
27+
28+
log = logging.getLogger(__name__)
29+
30+
31+
class AzureFlatFileSourceTemplateConfiguration(FsspecBaseFileSourceTemplateConfiguration):
32+
account_name: Union[str, TemplateExpansion]
33+
container_name: Union[str, TemplateExpansion]
34+
account_key: Union[str, TemplateExpansion]
35+
36+
37+
class AzureFlatFileSourceConfiguration(FsspecBaseFileSourceConfiguration):
38+
account_name: str
39+
container_name: str
40+
account_key: str
41+
42+
43+
class AzureFlatFilesSource(
44+
FsspecFilesSource[AzureFlatFileSourceTemplateConfiguration, AzureFlatFileSourceConfiguration]
45+
):
46+
plugin_type = FS_PLUGIN_TYPE
47+
required_module = AzureBlobFileSystem
48+
required_package = REQUIRED_PACKAGE
49+
template_config_class = AzureFlatFileSourceTemplateConfiguration
50+
resolved_config_class = AzureFlatFileSourceConfiguration
51+
52+
def _open_fs(
53+
self, context: FilesSourceRuntimeContext[AzureFlatFileSourceConfiguration], cache_options: CacheOptionsDictType
54+
):
55+
if AzureBlobFileSystem is None:
56+
raise self.required_package_exception
57+
else:
58+
config = context.config
59+
fs = AzureBlobFileSystem(account_name=config.account_name, credential=config.account_key, **cache_options)
60+
return fs
61+
62+
def _to_container_path(self, path: str, config: AzureFlatFileSourceConfiguration) -> str:
63+
result = ""
64+
if path.startswith("az://"):
65+
result = path.replace("az://", "", 1)
66+
else:
67+
container = config.container_name
68+
if not container and not path.startswith("az://"):
69+
raise exceptions.MessageException("Container name is required for AzureFlatFilesSource.")
70+
else:
71+
result = self._container_path(container or "", path)
72+
return result
73+
74+
def _adapt_entry_path(self, filesystem_path: str) -> str:
75+
result = filesystem_path
76+
container_name = self.template_config.container_name
77+
if container_name:
78+
prefix = f"{container_name}/"
79+
if filesystem_path.startswith(prefix):
80+
result = filesystem_path.replace(prefix, "", 1)
81+
else:
82+
result = filesystem_path
83+
else:
84+
result = filesystem_path
85+
return result
86+
87+
def _list(
88+
self,
89+
context: FilesSourceRuntimeContext[AzureFlatFileSourceConfiguration],
90+
path: str = "/",
91+
recursive: bool = False,
92+
write_intent: bool = False,
93+
limit: Optional[int] = None,
94+
offset: Optional[int] = None,
95+
query: Optional[str] = None,
96+
sort_by: Optional[str] = None,
97+
) -> tuple[list[AnyRemoteEntry], int]:
98+
container_path = self._to_container_path(path, context.config)
99+
entries, count = super()._list(
100+
context=context,
101+
path=container_path,
102+
recursive=recursive,
103+
limit=limit,
104+
offset=offset,
105+
query=query,
106+
sort_by=sort_by,
107+
)
108+
return entries, count
109+
110+
def _realize_to(
111+
self, source_path: str, native_path: str, context: FilesSourceRuntimeContext[AzureFlatFileSourceConfiguration]
112+
):
113+
container_path = self._to_container_path(source_path, context.config)
114+
super()._realize_to(source_path=container_path, native_path=native_path, context=context)
115+
116+
def _write_from(
117+
self, target_path: str, native_path: str, context: FilesSourceRuntimeContext[AzureFlatFileSourceConfiguration]
118+
):
119+
container_path = self._to_container_path(target_path, context.config)
120+
super()._write_from(target_path=container_path, native_path=native_path, context=context)
121+
122+
def _container_path(self, container_name: str, path: str) -> str:
123+
adjusted_path = path
124+
if path.startswith("az://"):
125+
adjusted_path = path.replace("az://", "", 1)
126+
else:
127+
if not path.startswith("/"):
128+
adjusted_path = f"/{path}"
129+
else:
130+
adjusted_path = path
131+
return f"{container_name}{adjusted_path}"
132+
133+
def score_url_match(self, url: str):
134+
container_name = self.template_config.container_name
135+
result = 0
136+
if container_name:
137+
prefix = f"az://{container_name}"
138+
if url.startswith(f"{prefix}/") or url == prefix:
139+
result = len(prefix)
140+
else:
141+
result = super().score_url_match(url)
142+
else:
143+
if url.startswith("az://"):
144+
result = len("az://")
145+
else:
146+
result = super().score_url_match(url)
147+
return result
148+
149+
150+
__all__ = ("AzureFlatFilesSource",)

lib/galaxy/files/templates/models.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
"posix",
3737
"s3fs",
3838
"azure",
39+
"azureflat",
3940
"onedata",
4041
"webdav",
4142
"dropbox",
@@ -173,6 +174,24 @@ class AzureFileSourceConfiguration(StrictModel):
173174
writable: bool = False
174175

175176

177+
class AzureFlatFileSourceTemplateConfiguration(StrictModel):
178+
type: Literal["azureflat"]
179+
account_name: Union[str, TemplateExpansion]
180+
container_name: Union[str, TemplateExpansion]
181+
account_key: Union[str, TemplateExpansion]
182+
writable: Union[bool, TemplateExpansion] = False
183+
template_start: Optional[str] = None
184+
template_end: Optional[str] = None
185+
186+
187+
class AzureFlatFileSourceConfiguration(StrictModel):
188+
type: Literal["azureflat"]
189+
account_name: str
190+
container_name: str
191+
account_key: str
192+
writable: bool = False
193+
194+
176195
class OnedataFileSourceTemplateConfiguration(StrictModel):
177196
type: Literal["onedata"]
178197
access_token: Union[str, TemplateExpansion]
@@ -317,6 +336,7 @@ class HuggingFaceFileSourceConfiguration(StrictModel):
317336
S3FSFileSourceTemplateConfiguration,
318337
FtpFileSourceTemplateConfiguration,
319338
AzureFileSourceTemplateConfiguration,
339+
AzureFlatFileSourceTemplateConfiguration,
320340
OnedataFileSourceTemplateConfiguration,
321341
WebdavFileSourceTemplateConfiguration,
322342
DropboxFileSourceTemplateConfiguration,
@@ -337,6 +357,7 @@ class HuggingFaceFileSourceConfiguration(StrictModel):
337357
S3FSFileSourceConfiguration,
338358
FtpFileSourceConfiguration,
339359
AzureFileSourceConfiguration,
360+
AzureFlatFileSourceConfiguration,
340361
OnedataFileSourceConfiguration,
341362
WebdavFileSourceConfiguration,
342363
DropboxFileSourceConfiguration,
@@ -415,6 +436,7 @@ def template_to_configuration(
415436
"posix": PosixFileSourceConfiguration,
416437
"s3fs": S3FSFileSourceConfiguration,
417438
"azure": AzureFileSourceConfiguration,
439+
"azureflat": AzureFlatFileSourceConfiguration,
418440
"onedata": OnedataFileSourceConfiguration,
419441
"webdav": WebdavFileSourceConfiguration,
420442
"dropbox": DropboxFileSourceConfiguration,

0 commit comments

Comments
 (0)