Skip to content

Commit 084ff3e

Browse files
committed
implemented file-system mount identification & handling to FileSet.copy
1 parent 3d7ac49 commit 084ff3e

File tree

10 files changed

+128
-29
lines changed

10 files changed

+128
-29
lines changed

fileformats/core/datatype.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
classproperty,
1414
subpackages,
1515
add_exc_note,
16+
)
17+
from .identification import (
1618
to_mime_format_name,
1719
from_mime_format_name,
1820
IANA_MIME_TYPE_REGISTRIES,

fileformats/core/fileset.py

Lines changed: 47 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,14 @@
1616
from .utils import (
1717
classproperty,
1818
fspaths_converter,
19-
to_mime_format_name,
20-
IANA_MIME_TYPE_REGISTRIES,
2119
describe_task,
2220
matching_source,
2321
import_extras_module,
24-
SampleFileGenerator,
22+
)
23+
from .sampling import SampleFileGenerator
24+
from .identification import (
25+
to_mime_format_name,
26+
IANA_MIME_TYPE_REGISTRIES,
2527
)
2628
from .converter import SubtypeVar
2729
from .classifier import Classifier
@@ -36,6 +38,8 @@
3638
)
3739
from .datatype import DataType
3840
from . import hook
41+
from .fs_mount_identifier import FsMountIdentifier
42+
3943

4044
try:
4145
from typing import Self
@@ -1181,6 +1185,7 @@ class CopyMode(Enum):
11811185

11821186
# All other combinations (typically the result of bit-masking)
11831187

1188+
leave_or_copy = 0b1001
11841189
leave_or_symlink = 0b0011
11851190
leave_or_hardlink = 0b0101
11861191
leave_or_link = 0b0111
@@ -1297,20 +1302,49 @@ def copy(
12971302
if isinstance(collation, str)
12981303
else collation
12991304
)
1300-
if new_stem:
1305+
# Rule out any copy modes that are not supported given the collation mode
1306+
# and file-system mounts the paths and destination directory reside on
1307+
constraints = []
1308+
if FsMountIdentifier.on_cifs(dest_dir) and mode & self.CopyMode.symlink:
1309+
supported_modes -= self.CopyMode.symlink
1310+
constraint = (
1311+
f"Destination directory is on CIFS mount ({dest_dir}) "
1312+
"and we therefore cannot create a symlink"
1313+
)
1314+
logger.debug(constraint)
1315+
constraints.append(constraint)
1316+
not_on_same_mount = [
1317+
p for p in self.fspaths if not FsMountIdentifier.on_same_mount(p, dest_dir)
1318+
]
1319+
if not_on_same_mount and mode & self.CopyMode.hardlink:
1320+
supported_modes -= self.CopyMode.hardlink
1321+
constraint = (
1322+
f"Some paths ({', '.join(str(p) for p in not_on_same_mount)}) are on "
1323+
f"not on same file-system mount as the destination directory {dest_dir}"
1324+
"and therefore cannot be hard-linked"
1325+
)
1326+
logger.debug(constraint)
1327+
constraints.append(constraint)
1328+
if new_stem or (
1329+
collation >= self.CopyCollation.siblings
1330+
and not all(p.parent == self.parent for p in self.fspaths)
1331+
):
13011332
supported_modes -= self.CopyMode.leave
1333+
1334+
# Get the intersection of copy modes that are supported and have been requested
13021335
selected_mode = mode & supported_modes
1303-
if collation >= self.CopyCollation.siblings:
1304-
if not all(p.parent == self.parent for p in self.fspaths):
1305-
selected_mode -= self.CopyMode.leave
13061336
if not selected_mode:
1307-
raise FileFormatsError(
1308-
f"Cannot copy {self} using {mode} mode as it is not supported by "
1309-
f"the {supported_modes} given the collation specification, {collation}"
1337+
msg = (
1338+
f"Cannot copy {self} using '{mode}' mode as it is not supported by "
1339+
f"the '{supported_modes}' given the collation specification, {collation}"
13101340
)
1341+
if constraints:
1342+
msg += ", and the following constraints:\n" + "\n".join(constraints)
1343+
raise FileFormatsError(msg)
13111344
if selected_mode & self.CopyMode.leave:
13121345
return self # Don't need to do anything
13131346

1347+
# Select inner copy/link methods
13141348
if selected_mode & self.CopyMode.symlink:
13151349
copy_dir = copy_file = os.symlink
13161350
elif selected_mode & self.CopyMode.hardlink:
@@ -1339,10 +1373,12 @@ def hardlink_dir(src: Path, dest: Path):
13391373
extension_decomposition=extension_decomposition,
13401374
)
13411375

1342-
dest_dir = Path(dest_dir) # ensure a Path not a string
1376+
# Prepare destination directory
1377+
dest_dir = Path(dest_dir)
13431378
if make_dirs:
13441379
dest_dir.mkdir(parents=True, exist_ok=True)
13451380

1381+
# Iterate through the paths to copy, copying them to the destination directory
13461382
new_paths = []
13471383
for fspath in fspaths_to_copy:
13481384
new_path, fspath = self._new_copy_path(
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from .utils import logger
88

99

10-
class FileSystemMountIndentifier:
10+
class FsMountIdentifier:
1111
"""Used to check the mount type that given file paths reside on in order to determine
1212
features that can be used (e.g. symlinks)"""
1313

fileformats/core/identification.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,11 +157,11 @@ def to_mime(datatype: type, official: bool = True):
157157

158158
def from_paths(
159159
fspaths: ty.Iterable[Path],
160-
*candidates: ty.Tuple[ty.Type[fileformats.core.FileSet]],
160+
*candidates: ty.Tuple[ty.Type["fileformats.core.FileSet"]],
161161
common_ok: bool = False,
162162
ignore: ty.Optional[str] = None,
163163
**kwargs,
164-
) -> ty.List[fileformats.core.FileSet]:
164+
) -> ty.List["fileformats.core.FileSet"]:
165165
"""Given a list of candidate classes (defaults to all installed in alphabetical order),
166166
instantiates all possible file-set instances from a collection of file-system paths.
167167

fileformats/core/mixin.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
import logging
55
from . import hook
66
from .fileset import FileSet
7-
from .utils import classproperty, describe_task, to_mime_format_name, matching_source
7+
from .utils import classproperty, describe_task, matching_source
8+
from .identification import to_mime_format_name
89
from .converter import SubtypeVar
910
from .exceptions import FileFormatsError, FormatMismatchError, FormatRecognitionError
1011

fileformats/core/sampling.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def rng(self):
4545

4646
def generate(
4747
self,
48-
file_type: ty.Type[fileformats.core.FileSet],
48+
file_type: ty.Type["fileformats.core.FileSet"],
4949
contents: ty.Union[str, bytes] = None,
5050
fill: int = 0,
5151
**kwargs,
@@ -99,7 +99,7 @@ def generate(
9999

100100
def generate_fspath(
101101
self,
102-
file_type: ty.Optional[ty.Type[fileformats.core.FileSet]] = None,
102+
file_type: ty.Optional[ty.Type["fileformats.core.FileSet"]] = None,
103103
fname_stem: ty.Optional[str] = None,
104104
relpath: ty.Optional[Path] = None,
105105
):

fileformats/core/tests/test_fs_mounts.py renamed to fileformats/core/tests/test_fs_mount_identifier.py

Lines changed: 68 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
import os.path
12
import pytest
2-
from ..fs_mounts import MountIndentifier
3+
from fileformats.core.fs_mount_identifier import FsMountIdentifier
4+
from fileformats.generic import File
35

46

57
MOUNT_OUTPUTS = (
@@ -158,12 +160,12 @@
158160

159161
@pytest.mark.parametrize("output, exit_code, expected", MOUNT_OUTPUTS)
160162
def test_parse_mount_table(output, exit_code, expected):
161-
assert MountIndentifier.parse_mount_table(exit_code, output) == expected
163+
assert FsMountIdentifier.parse_mount_table(exit_code, output) == expected
162164

163165

164166
def test_cifs_check():
165-
assert isinstance(MountIndentifier.get_mount_table(), list)
166-
assert isinstance(MountIndentifier.on_cifs("/"), bool)
167+
assert isinstance(FsMountIdentifier.get_mount_table(), list)
168+
assert isinstance(FsMountIdentifier.on_cifs("/"), bool)
167169
fake_table = [("/scratch/tmp", "ext4"), ("/scratch", "cifs")]
168170
cifs_targets = [
169171
("/scratch/tmp/x/y", False),
@@ -175,10 +177,68 @@ def test_cifs_check():
175177
("/", False),
176178
]
177179

178-
with MountIndentifier.patch_table([]):
180+
with FsMountIdentifier.patch_table([]):
179181
for target, _ in cifs_targets:
180-
assert MountIndentifier.on_cifs(target) is False
182+
assert FsMountIdentifier.on_cifs(target) is False
181183

182-
with MountIndentifier.patch_table(fake_table):
184+
with FsMountIdentifier.patch_table(fake_table):
183185
for target, expected in cifs_targets:
184-
assert MountIndentifier.on_cifs(target) is expected
186+
assert FsMountIdentifier.on_cifs(target) is expected
187+
188+
189+
def test_copy_constraints(tmp_path):
190+
191+
ext4_mnt1 = tmp_path / "ext4_mnt1"
192+
ext4_mnt2 = tmp_path / "ext4_mnt2"
193+
cifs_mnt = tmp_path / "cifs_mnt"
194+
195+
fake_mount_table = [
196+
(str(ext4_mnt1), "ext4"),
197+
(str(ext4_mnt2), "ext4"),
198+
(str(cifs_mnt), "cifs"),
199+
]
200+
201+
# Create sample files
202+
ext4_file = File.sample(dest_dir=ext4_mnt1, seed=1)
203+
cifs_file = File.sample(dest_dir=cifs_mnt, seed=3)
204+
205+
with FsMountIdentifier.patch_table(fake_mount_table):
206+
# Check that symlinks work on ext4
207+
copy_modes = File.CopyMode.copy | File.CopyMode.hardlink | File.CopyMode.symlink
208+
new_ext4_file = ext4_file.copy(
209+
ext4_mnt1 / "dest",
210+
mode=copy_modes,
211+
)
212+
213+
assert new_ext4_file.contents == ext4_file.contents
214+
assert os.path.islink(new_ext4_file)
215+
216+
# Symlinks not supported on CIFS
217+
new_cifs_file = cifs_file.copy(
218+
cifs_mnt / "dest",
219+
mode=copy_modes,
220+
)
221+
assert new_cifs_file.contents == cifs_file.contents
222+
assert not os.path.islink(new_cifs_file)
223+
assert os.stat(new_cifs_file).st_ino == os.stat(cifs_file).st_ino # Hardlink
224+
225+
# Hardlinks not supported across logical volumes
226+
new_ext4_file2 = ext4_file.copy(
227+
ext4_mnt2 / "dest", mode=File.CopyMode.copy | File.CopyMode.hardlink
228+
)
229+
assert new_ext4_file2.contents == ext4_file.contents
230+
assert not os.path.islink(new_ext4_file2)
231+
assert (
232+
os.stat(ext4_file).st_ino != os.stat(new_ext4_file2).st_ino
233+
) # Not hardlink
234+
235+
# Hardlinks not supported across logical volumes 2 (from CIFS)
236+
ext4_file_on_cifs = ext4_file.copy(
237+
cifs_mnt / "dest",
238+
mode=copy_modes,
239+
)
240+
assert ext4_file_on_cifs.contents == ext4_file.contents
241+
assert not os.path.islink(ext4_file_on_cifs)
242+
assert (
243+
os.stat(ext4_file).st_ino != os.stat(ext4_file_on_cifs).st_ino
244+
) # Not hardlink

fileformats/core/tests/test_mime.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from fileformats.generic import FileSet
2-
from fileformats.core.utils import from_mime
2+
from fileformats.core.identification import from_mime
33
from fileformats.testing import Classified, U, V
44
from fileformats.testing_subpackage import Psi, SubpackageClassified, Zeta, Theta
55

fileformats/core/tests/test_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,13 +205,13 @@ def test_copy_collation_leave_diff_dir(work_dir: Path, dest_dir: Path):
205205

206206
with pytest.raises(
207207
FileFormatsError,
208-
match="using leave mode as it is not supported by the any given the collation specification",
208+
match="given the collation specification",
209209
):
210210
fileset.copy(dest_dir=dest_dir, mode="leave", collation="siblings")
211211

212212
with pytest.raises(
213213
FileFormatsError,
214-
match="using leave mode as it is not supported by the any given the collation specification",
214+
match="given the collation specification",
215215
):
216216
fileset.copy(dest_dir=dest_dir, mode="leave", collation="adjacent")
217217

fileformats/core/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def fspaths_converter(
7979
str,
8080
os.PathLike,
8181
bytes,
82-
fileformats.core.FileSet,
82+
"fileformats.core.FileSet",
8383
]
8484
):
8585
"""Ensures fs-paths are a set of pathlib.Path"""

0 commit comments

Comments
 (0)