Skip to content

Commit 17f6a8d

Browse files
authored
Merge pull request #55 from ArcanaFramework/sample-file-generator
refactored generate_sample_data
2 parents 150f8d6 + a46079f commit 17f6a8d

File tree

5 files changed

+213
-109
lines changed

5 files changed

+213
-109
lines changed

extras/fileformats/extras/application/medical.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
import typing as ty
22
from pathlib import Path
3-
from random import Random
43
import pydicom
54
from fileformats.core import FileSet
65
from fileformats.application import Dicom
76
import medimages4tests.dummy.dicom.mri.t1w.siemens.skyra.syngo_d13c
7+
from fileformats.core.utils import SampleFileGenerator
88

99

1010
@FileSet.read_metadata.register
@@ -24,10 +24,10 @@ def dicom_read_metadata(
2424
@FileSet.generate_sample_data.register
2525
def dicom_generate_sample_data(
2626
dicom: Dicom,
27-
dest_dir: Path,
28-
seed: ty.Union[int, Random] = 0,
29-
stem: ty.Optional[str] = None,
27+
generator: SampleFileGenerator,
3028
) -> ty.Iterable[Path]:
3129
return next(
32-
medimages4tests.dummy.dicom.mri.t1w.siemens.skyra.syngo_d13c.get_image().iterdir()
30+
medimages4tests.dummy.dicom.mri.t1w.siemens.skyra.syngo_d13c.get_image(
31+
out_dir=generator.dest_dir
32+
).iterdir()
3333
)

fileformats/application/serialization.py

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
import json
22
import typing as ty
3-
from random import Random
43
from pathlib import Path
54
from fileformats.core import hook, DataType, FileSet
65
from fileformats.core.mixin import WithClassifiers
76
from ..generic import File
87
from fileformats.core.exceptions import FormatMismatchError
9-
from fileformats.core.utils import gen_filename
8+
from fileformats.core.utils import SampleFileGenerator
109

1110

1211
class Schema(DataType):
@@ -96,15 +95,17 @@ class Toml(DataSerialization):
9695
@FileSet.generate_sample_data.register
9796
def generate_json_sample_data(
9897
js: Json,
99-
dest_dir: Path,
100-
seed: ty.Union[int, Random] = 0,
101-
stem: ty.Optional[str] = None,
98+
generator: SampleFileGenerator,
10299
) -> ty.Iterable[Path]:
103-
js_file = dest_dir / gen_filename(seed, file_type=js, stem=stem)
104-
rng = Random(seed + 1)
100+
js_file = generator.generate_fspath(file_type=Json)
105101
with open(js_file, "w") as f:
106102
json.dump(
107-
{"a": True, "b": "two", "c": 3, "d": [rng.randint(0, 10), rng.random(), 6]},
103+
{
104+
"a": True,
105+
"b": "two",
106+
"c": 3,
107+
"d": [generator.rng.randint(0, 10), generator.rng.random(), 6],
108+
},
108109
f,
109110
)
110111
return [js_file]
@@ -113,21 +114,18 @@ def generate_json_sample_data(
113114
@FileSet.generate_sample_data.register
114115
def generate_yaml_sample_data(
115116
yml: Yaml,
116-
dest_dir: Path,
117-
seed: ty.Union[int, Random] = 0,
118-
stem: ty.Optional[str] = None,
117+
generator: SampleFileGenerator,
119118
) -> ty.Iterable[Path]:
120-
yml_file = dest_dir / gen_filename(seed, file_type=yml, stem=stem)
121-
rng = Random(seed + 1)
119+
yml_file = generator.generate_fspath(file_type=Yaml)
122120
with open(yml_file, "w") as f:
123121
f.write(
124122
f"""# Generated sample YAML file by FileFormats
125123
a: True
126124
b: two
127125
c: 3
128126
d:
129-
- {rng.randint(0, 10)}
130-
- {rng.random()}
127+
- {generator.rng.randint(0, 10)}
128+
- {generator.rng.random()}
131129
- 6
132130
"""
133131
)

fileformats/core/fileset.py

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from enum import Enum, IntEnum
55
from warnings import warn
66
import tempfile
7-
from random import Random
87
from collections import Counter
98
import typing as ty
109
import shutil
@@ -23,6 +22,7 @@
2322
describe_task,
2423
matching_source,
2524
import_extras_module,
25+
SampleFileGenerator,
2626
)
2727
from .converter import SubtypeVar
2828
from .classifier import Classifier
@@ -859,9 +859,9 @@ def referenced_types(cls) -> ty.Set[Classifier]:
859859
return types
860860

861861
@classmethod
862-
def mock(cls, *fspaths: ty.Tuple[ty.Union[Path, str]]) -> Self:
862+
def mock(cls, *fspaths: ty.Tuple[ty.Union[Path, str]]) -> "FileSet":
863863
"""Return an instance of a mocked sub-class of the file format to be used in
864-
test routines like doctests.
864+
test routines like doctests that doesn't require to point at actual files
865865
866866
Parameters
867867
----------
@@ -909,8 +909,9 @@ def sample(
909909
dest_dir = Path(tempfile.mkdtemp())
910910
# Need to use mock to get an instance in order to use the singledispatch-based
911911
# hook.extra decorator
912-
mock = cls.mock()
913-
fspaths = mock.generate_sample_data(dest_dir, seed, stem)
912+
fspaths = cls.sample_data(
913+
SampleFileGenerator(dest_dir=dest_dir, seed=seed, fname_stem=stem)
914+
)
914915
try:
915916
obj = cls(fspaths)
916917
except FormatMismatchError as e:
@@ -921,12 +922,28 @@ def sample(
921922
)
922923
return obj
923924

925+
@classmethod
926+
def sample_data(cls, generator: SampleFileGenerator) -> ty.Iterable[Path]:
927+
"""Converts the `generate_sample_data` method into a class method by mocking up
928+
a class instance and calling the method on it
929+
930+
Parameters
931+
----------
932+
generator : SampleFileGenerator
933+
the generator to use to create the sample data
934+
935+
Returns
936+
-------
937+
ty.Iterable[Path]
938+
the generated file-system paths
939+
"""
940+
mock: FileSet = cls.mock()
941+
return mock.generate_sample_data(generator)
942+
924943
@hook.extra
925944
def generate_sample_data(
926945
self,
927-
dest_dir: Path,
928-
seed: ty.Union[int, Random] = 0,
929-
stem: ty.Optional[str] = None,
946+
generator: SampleFileGenerator,
930947
) -> ty.Iterable[Path]:
931948
"""Generate test data at the fspaths of the file-set
932949

fileformats/core/utils.py

Lines changed: 149 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import random
66
import string
77
import inspect
8+
from functools import cached_property
89
import typing as ty
910
import re
1011
import urllib.request
@@ -488,45 +489,157 @@ def import_extras_module(klass: type) -> ExtrasModule:
488489
LIST_MIME = "+list-of"
489490

490491

491-
def gen_filename(
492-
seed_or_rng: ty.Union[random.Random, int],
493-
file_type: ty.Type[fileformats.core.FileSet] = None,
494-
length: int = 32,
495-
stem: ty.Optional[str] = None,
496-
):
497-
"""Generates a random filename of length `length` and extension `ext`
492+
class SampleFileGenerator:
493+
"""Generates sample files. Designed to be used within generate_sample_data overrides
498494
499495
Parameters
500496
----------
501-
seed_or_rng : random.Random or int
502-
used to seed the random number generator
503-
file_type : Type[FileSet], optional
504-
type of the file to generate the filename for, used to append any extensions
505-
and seed the random number generator if required
506-
length : int
507-
length of the filename (minus extension)
508-
stem : str, optional
509-
the stem to use for the filename if provided
510-
511-
Returns
512-
-------
513-
filename : str
514-
randomly generated filename
497+
dest_dir : Path
498+
the directory to write the sample files to
499+
seed : int
500+
the seed for the random number generator
501+
fname_stem : str
502+
the stem of the file name to generate
515503
"""
516-
if file_type is None:
517-
import fileformats.generic
518504

519-
file_type = fileformats.generic.FsObject
520-
if stem:
521-
fname = stem
522-
else:
523-
if isinstance(seed_or_rng, random.Random):
524-
rng = seed_or_rng
505+
dest_dir: Path
506+
seed: int
507+
fname_stem: str
508+
509+
FNAME_STEM_LENGTH = 24
510+
511+
def __init__(self, dest_dir: Path, seed: int, fname_stem: str = None):
512+
self.dest_dir = dest_dir
513+
self.seed = seed
514+
self.fname_stem = (
515+
self._generate_fname_stem() if fname_stem is None else fname_stem
516+
)
517+
518+
def _generate_fname_stem(self):
519+
return "".join(
520+
self.rng.choices(
521+
string.ascii_letters + string.digits, k=self.FNAME_STEM_LENGTH
522+
)
523+
)
524+
525+
@cached_property
526+
def rng(self):
527+
return random.Random(self.seed)
528+
529+
def generate(
530+
self,
531+
file_type: ty.Type[fileformats.core.FileSet],
532+
contents: ty.Union[str, bytes] = None,
533+
fill: int = 0,
534+
**kwargs,
535+
):
536+
"""Generates a random file of length `length` and extension `ext`
537+
538+
Parameters
539+
----------
540+
file_type : Type[FileSet]
541+
type of the file to generate the filename for, used to append any extensions
542+
and seed the random number generator if required
543+
contents : Union[str, bytes]
544+
the contents of the file to write
545+
fill : int
546+
length of the random string to generate for the file contents. Will be appended
547+
after any explicitly provided contents
548+
**kwargs : dict
549+
additional keyword arguments to pass to generate_fspath
550+
551+
Returns
552+
-------
553+
fspath : Path
554+
path to the randomly generated file
555+
"""
556+
if not contents and not fill:
557+
raise ValueError("Either contents or random_fill_length must be provided")
558+
fspath = self.generate_fspath(file_type, **kwargs)
559+
fspath.parent.mkdir(parents=True, exist_ok=True)
560+
try:
561+
is_binary = file_type.binary
562+
except AttributeError:
563+
is_binary = False
564+
if not contents:
565+
contents = (
566+
bytes(random.choices(list(range(256)), k=fill))
567+
if is_binary
568+
else "".join(random.choices(string.printable, k=fill))
569+
)
570+
else:
571+
contents_type = bytes if is_binary else str
572+
if not isinstance(contents, bytes):
573+
raise TypeError(
574+
f"contents must be {contents_type} for {file_type} files, "
575+
f"not {type(contents)}"
576+
)
577+
if is_binary:
578+
fspath.write_bytes(contents)
525579
else:
526-
if not inspect.isclass(file_type):
527-
file_type = type(file_type)
528-
rng = random.Random(str(seed_or_rng) + file_type.mime_like)
529-
fname = "".join(rng.choices(string.ascii_letters + string.digits, k=length))
530-
if file_type and file_type.ext:
531-
fname += file_type.ext
532-
return fname
580+
fspath.write_text(contents)
581+
return fspath
582+
583+
def generate_fspath(
584+
self,
585+
file_type: ty.Optional[ty.Type[fileformats.core.FileSet]] = None,
586+
fname_stem: ty.Optional[str] = None,
587+
relpath: ty.Optional[Path] = None,
588+
):
589+
"""Generates a random file path in the destination directory of length `length`
590+
and extension `ext`
591+
592+
Parameters
593+
----------
594+
file_type : Type[FileSet]
595+
type of the file to generate the filename for, used to append any extensions
596+
and seed the random number generator if required
597+
fname_stem : str, optional or bool
598+
Use explicitly provided if it is a string
599+
relpath : Path
600+
the path to generate the filename at, relative to the destination directory
601+
602+
Returns
603+
-------
604+
fspath : Path
605+
randomly generated file-system path
606+
"""
607+
if file_type is None:
608+
import fileformats.generic
609+
610+
file_type = fileformats.generic.FsObject
611+
if fname_stem is not None:
612+
fname = fname_stem
613+
else:
614+
fname = self.fname_stem
615+
if file_type and file_type.ext:
616+
fname += file_type.ext
617+
fspath = self.dest_dir
618+
if relpath:
619+
fspath /= relpath
620+
return fspath / fname
621+
622+
def child(
623+
self, dest_dir: ty.Optional[Path] = None, fname_stem: str = None
624+
) -> "SampleFileGenerator":
625+
"""Creates a new instance of SampleFileGenerator with the same destination
626+
directory and seed, but a new random filename stem
627+
628+
Parameters
629+
----------
630+
relpath : Path, optional
631+
the path to generate the filename at, relative to the destination directory
632+
fname_stem : str, optional
633+
the stem of the file name to generate
634+
635+
Returns
636+
-------
637+
SampleFileGenerator
638+
the new instance of SampleFileGenerator
639+
"""
640+
if dest_dir is None:
641+
dest_dir = self.dest_dir
642+
kwargs = {"fname_stem": fname_stem} if fname_stem else {}
643+
return SampleFileGenerator(
644+
dest_dir, seed=self.rng.randint(0, 2**32 - 1), **kwargs
645+
)

0 commit comments

Comments
 (0)