Skip to content

Commit 2fc29ed

Browse files
authored
Merge pull request #1 from effigies/rf/upath
rf: Replace direntry with UPath in FileTree
2 parents 1360680 + 0d31404 commit 2fc29ed

File tree

3 files changed

+68
-86
lines changed

3 files changed

+68
-86
lines changed

src/bids_validator/context.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def datatype_to_modality(datatype: str, schema: Namespace) -> str:
6161
@cache
6262
def load_tsv(file: FileTree, *, max_rows=0) -> Namespace:
6363
"""Load TSV contents into a Namespace."""
64-
with open(file) as fobj:
64+
with file.path_obj.open() as fobj:
6565
if max_rows > 0:
6666
fobj = itertools.islice(fobj, max_rows)
6767
contents = (line.rstrip('\r\n').split('\t') for line in fobj)
@@ -72,7 +72,7 @@ def load_tsv(file: FileTree, *, max_rows=0) -> Namespace:
7272
@cache
7373
def load_json(file: FileTree) -> dict[str]:
7474
"""Load JSON file contents."""
75-
return orjson.loads(UPath(file).read_bytes())
75+
return orjson.loads(file.path_obj.read_bytes())
7676

7777

7878
class Subjects:
@@ -360,7 +360,7 @@ def modality(self) -> str | None:
360360
@property
361361
def size(self) -> int:
362362
"""Length of the current file in bytes."""
363-
return self.file.direntry.stat().st_size
363+
return self.file.path_obj.stat().st_size
364364

365365
@property
366366
def associations(self) -> ctx.Associations:

src/bids_validator/types/files.py

Lines changed: 27 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -4,103 +4,50 @@
44

55
import os
66
import posixpath
7-
import stat
87
from functools import cached_property
98
from pathlib import Path
109

1110
import attrs
11+
from upath import UPath
1212

1313
from . import _typings as t
1414

1515
__all__ = ('FileTree',)
1616

1717

18-
@attrs.define
19-
class UserDirEntry:
20-
"""Partial reimplementation of :class:`os.DirEntry`.
21-
22-
:class:`os.DirEntry` can't be instantiated from Python, but this can.
23-
"""
24-
25-
path: str = attrs.field(repr=False, converter=os.fspath)
26-
name: str = attrs.field(init=False)
27-
_stat: os.stat_result = attrs.field(init=False, repr=False, default=None)
28-
_lstat: os.stat_result = attrs.field(init=False, repr=False, default=None)
29-
30-
def __attrs_post_init__(self) -> None:
31-
self.name = os.path.basename(self.path)
32-
33-
def __fspath__(self) -> str:
34-
return self.path
35-
36-
def stat(self, *, follow_symlinks: bool = True) -> os.stat_result:
37-
"""Return stat_result object for the entry; cached per entry."""
38-
if follow_symlinks:
39-
if self._stat is None:
40-
self._stat = os.stat(self.path, follow_symlinks=True)
41-
return self._stat
42-
else:
43-
if self._lstat is None:
44-
self._lstat = os.stat(self.path, follow_symlinks=False)
45-
return self._lstat
46-
47-
def is_dir(self, *, follow_symlinks: bool = True) -> bool:
48-
"""Return True if the entry is a directory; cached per entry."""
49-
_stat = self.stat(follow_symlinks=follow_symlinks)
50-
return stat.S_ISDIR(_stat.st_mode)
51-
52-
def is_file(self, *, follow_symlinks: bool = True) -> bool:
53-
"""Return True if the entry is a file; cached per entry."""
54-
_stat = self.stat(follow_symlinks=follow_symlinks)
55-
return stat.S_ISREG(_stat.st_mode)
56-
57-
def is_symlink(self) -> bool:
58-
"""Return True if the entry is a symlink; cached per entry."""
59-
_stat = self.stat(follow_symlinks=False)
60-
return stat.S_ISLNK(_stat.st_mode)
61-
62-
63-
def as_direntry(obj: os.PathLike) -> os.DirEntry | UserDirEntry:
64-
"""Convert PathLike into DirEntry-like object."""
65-
if isinstance(obj, os.DirEntry):
66-
return obj
67-
return UserDirEntry(obj)
68-
69-
70-
@attrs.define
18+
@attrs.define(frozen=True)
7119
class FileTree:
7220
"""Represent a FileTree with cached metadata."""
7321

74-
direntry: os.DirEntry | UserDirEntry = attrs.field(repr=False, converter=as_direntry)
75-
parent: FileTree | None = attrs.field(repr=False, default=None)
76-
is_dir: bool = attrs.field(default=False)
77-
children: dict[str, FileTree] = attrs.field(repr=False, factory=dict)
78-
name: str = attrs.field(init=False)
22+
path_obj: UPath = attrs.field(repr=False, converter=UPath)
23+
is_dir: bool = attrs.field(repr=False, default=None)
24+
parent: FileTree | None = attrs.field(repr=False, default=None, eq=False)
25+
children: dict[str, FileTree] = attrs.field(repr=False, factory=dict, eq=False)
7926

8027
def __attrs_post_init__(self):
81-
self.name = self.direntry.name
82-
self.children = {
83-
name: attrs.evolve(child, parent=self) for name, child in self.children.items()
84-
}
28+
if self.is_dir is None:
29+
object.__setattr__(self, 'is_dir', self.path_obj.is_dir())
30+
object.__setattr__(
31+
self,
32+
'children',
33+
{name: attrs.evolve(child, parent=self) for name, child in self.children.items()},
34+
)
8535

8636
@classmethod
87-
def read_from_filesystem(
88-
cls,
89-
direntry: os.PathLike,
90-
parent: FileTree | None = None,
91-
) -> t.Self:
92-
"""Read a FileTree from the filesystem.
93-
94-
Uses :func:`os.scandir` to walk the directory tree.
95-
"""
96-
self = cls(direntry, parent=parent)
97-
if self.direntry.is_dir():
98-
self.is_dir = True
99-
self.children = {
100-
entry.name: FileTree.read_from_filesystem(entry, parent=self)
101-
for entry in os.scandir(self.direntry)
37+
def read_from_filesystem(cls, path_obj: os.PathLike) -> t.Self:
38+
"""Read a FileTree from the filesystem."""
39+
path_obj = UPath(path_obj)
40+
children = {}
41+
if is_dir := path_obj.is_dir():
42+
children = {
43+
entry.name: FileTree.read_from_filesystem(entry) for entry in path_obj.iterdir()
10244
}
103-
return self
45+
return cls(path_obj, is_dir=is_dir, children=children)
46+
47+
@property
48+
def name(self) -> bool:
49+
"""The name of the current FileTree node."""
50+
return self.path_obj.name
10451

10552
def __contains__(self, relpath: os.PathLike) -> bool:
10653
parts = Path(relpath).parts
@@ -110,10 +57,7 @@ def __contains__(self, relpath: os.PathLike) -> bool:
11057
return child and (len(parts) == 1 or posixpath.join(*parts[1:]) in child)
11158

11259
def __fspath__(self):
113-
return self.direntry.path
114-
115-
def __hash__(self):
116-
return hash(self.direntry.path)
60+
return self.path_obj.__fspath__()
11761

11862
def __truediv__(self, relpath: str | os.PathLike) -> t.Self:
11963
parts = Path(relpath).parts

tests/test_context.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
import json
2+
3+
import fsspec
14
import pytest
25
from bidsschematools.types.context import Subject
36

@@ -10,6 +13,14 @@ def synthetic_dataset(examples):
1013
return FileTree.read_from_filesystem(examples / 'synthetic')
1114

1215

16+
@pytest.fixture
17+
def memfs():
18+
mem = fsspec.filesystem('memory')
19+
mem.store.clear()
20+
yield mem
21+
mem.store.clear()
22+
23+
1324
def test_load(synthetic_dataset, schema):
1425
ds = context.Dataset(synthetic_dataset, schema)
1526

@@ -115,6 +126,33 @@ def test_sidecar_inheritance(examples):
115126
assert sidecar['RepetitionTimePreparation'] == 5.5
116127

117128

129+
def test_sidecar_order(memfs):
130+
"""Test to ensure inheritance principle is skipped when inherit=False"""
131+
root_json = {'rootOverwriteA': 'root', 'rootOverwriteB': 'root', 'rootValue': 'root'}
132+
subject_json = {'rootOverwriteA': 'subject', 'subOverwrite': 'subject', 'subValue': 'subject'}
133+
anat_json = {'rootOverwriteB': 'anat', 'subOverwrite': 'anat', 'anatValue': 'anat'}
134+
memfs.pipe(
135+
{
136+
'/T1w.json': json.dumps(root_json).encode(),
137+
'/sub-01/sub-01_T1w.json': json.dumps(subject_json).encode(),
138+
'/sub-01/anat/sub-01_T1w.json': json.dumps(anat_json).encode(),
139+
'/sub-01/anat/sub-01_T1w.nii': b'',
140+
}
141+
)
142+
143+
dataset = FileTree.read_from_filesystem('memory://')
144+
file = dataset / 'sub-01' / 'anat' / 'sub-01_T1w.nii'
145+
sidecar = context.load_sidecar(file)
146+
assert sidecar == {
147+
'rootValue': 'root',
148+
'subValue': 'subject',
149+
'rootOverwriteA': 'subject',
150+
'anatValue': 'anat',
151+
'rootOverwriteB': 'anat',
152+
'subOverwrite': 'anat',
153+
}
154+
155+
118156
def test_sessions(synthetic_dataset):
119157
sub01 = synthetic_dataset / 'sub-01'
120158

0 commit comments

Comments
 (0)