Skip to content

Commit ca22d6c

Browse files
committed
removed __bytes_repr__ implementation from fileset and mock, pydra can call byte_chunks directly
1 parent 5b84d2b commit ca22d6c

File tree

2 files changed

+32
-28
lines changed

2 files changed

+32
-28
lines changed

fileformats/core/fileset.py

Lines changed: 17 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -888,30 +888,6 @@ def hash_files(
888888
file_hashes[str(path)] = crypto_obj.hexdigest()
889889
return file_hashes
890890

891-
def __bytes_repr__(
892-
self, cache: ty.Dict[ty.Any, str] # pylint: disable=unused-argument
893-
) -> ty.Iterable[bytes]:
894-
"""Provided for compatibility with Pydra's hashing function, return the contents
895-
of all the files in the file-set in chunks
896-
897-
Parameters
898-
----------
899-
cache : dict[Any, str]
900-
an object passed around by Pydra's hashing function to store cached versions
901-
of previously hashed objects, to allow recursive structures
902-
903-
Yields
904-
------
905-
bytes
906-
a chunk of bytes of length FILE_CHUNK_LEN_DEFAULT from the contents of all
907-
files in the file-set.
908-
"""
909-
cls = type(self)
910-
yield f"{cls.__module__}.{cls.__name__}:".encode()
911-
for key, chunk_iter in self.byte_chunks():
912-
yield (",'" + key + "'=").encode()
913-
yield from chunk_iter
914-
915891
@classmethod
916892
def referenced_types(cls) -> ty.Set[ty.Type[Classifier]]:
917893
"""Returns a flattened list of nested types referenced within the fileset type
@@ -1702,8 +1678,23 @@ def type_name(cls) -> str:
17021678
assert class_name.endswith("Mock")
17031679
return class_name[: -len("Mock")]
17041680

1705-
def __bytes_repr__(self, cache: ty.Dict[str, ty.Any]) -> ty.Iterable[bytes]:
1706-
yield from (str(fspath).encode() for fspath in self.fspaths)
1681+
def byte_chunks(
1682+
self,
1683+
mtime: bool = False,
1684+
chunk_len=FILE_CHUNK_LEN_DEFAULT,
1685+
relative_to: ty.Optional[os.PathLike] = None,
1686+
ignore_hidden_files: bool = False,
1687+
ignore_hidden_dirs: bool = False,
1688+
):
1689+
if relative_to is None:
1690+
relative_to = os.path.commonpath(self.fspaths)
1691+
else:
1692+
relative_to = str(relative_to)
1693+
for key, fspath in sorted(
1694+
((str(p)[len(relative_to) :], p) for p in self.fspaths),
1695+
key=itemgetter(0),
1696+
):
1697+
yield (key, iter([key.encode()])) # empty iterator as files don't exist
17071698

17081699
@classproperty
17091700
def namespace(cls) -> str:

fileformats/core/tests/test_utils.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@
44
import shutil
55
import time
66
import pytest
7-
from fileformats.core import FileSet
8-
from fileformats.generic import File, Directory, FsObject
7+
from fileformats.core import FileSet, MockMixin
8+
from fileformats.generic import File, Directory, FsObject, SetOf
9+
from fileformats.text import TextFile
910
from fileformats.core.mixin import WithSeparateHeader
1011
from fileformats.core.exceptions import UnsatisfiableCopyModeError
1112
from fileformats.core.utils import mtime_cached_property
@@ -54,6 +55,11 @@ def fsobject(luigi_file, bowser_dir, request):
5455
assert False
5556

5657

58+
@pytest.fixture
59+
def mock_fileset():
60+
return SetOf[TextFile].mock("/path/to/a/mock", "/path/to/another/mock")
61+
62+
5763
@pytest.fixture
5864
def dest_dir(work_dir):
5965
dest_dir = work_dir / "new-dir"
@@ -407,3 +413,10 @@ def test_mtime_cached_property_force_clear(tmp_path: Path):
407413
file.flag = 1
408414
MtimeTestFile.cached_prop.clear(file)
409415
assert file.cached_prop == 1
416+
417+
418+
def test_hash_mock_files(mock_fileset: MockMixin, work_dir: Path, dest_dir: Path):
419+
file_hashes = mock_fileset.hash_files(relative_to="")
420+
assert sorted(Path(p) for p in file_hashes) == sorted(
421+
p for p in mock_fileset.fspaths
422+
)

0 commit comments

Comments
 (0)