|
5 | 5 | import random |
6 | 6 | import string |
7 | 7 | import inspect |
| 8 | +from functools import cached_property |
8 | 9 | import typing as ty |
9 | 10 | import re |
10 | 11 | import urllib.request |
@@ -488,45 +489,157 @@ def import_extras_module(klass: type) -> ExtrasModule: |
488 | 489 | LIST_MIME = "+list-of" |
489 | 490 |
|
490 | 491 |
|
491 | | -def gen_filename( |
492 | | - seed_or_rng: ty.Union[random.Random, int], |
493 | | - file_type: ty.Type[fileformats.core.FileSet] = None, |
494 | | - length: int = 32, |
495 | | - stem: ty.Optional[str] = None, |
496 | | -): |
497 | | - """Generates a random filename of length `length` and extension `ext` |
| 492 | +class SampleFileGenerator: |
| 493 | + """Generates sample files. Designed to be used within generate_sample_data overrides |
498 | 494 |
|
499 | 495 | Parameters |
500 | 496 | ---------- |
501 | | - seed_or_rng : random.Random or int |
502 | | - used to seed the random number generator |
503 | | - file_type : Type[FileSet], optional |
504 | | - type of the file to generate the filename for, used to append any extensions |
505 | | - and seed the random number generator if required |
506 | | - length : int |
507 | | - length of the filename (minus extension) |
508 | | - stem : str, optional |
509 | | - the stem to use for the filename if provided |
510 | | -
|
511 | | - Returns |
512 | | - ------- |
513 | | - filename : str |
514 | | - randomly generated filename |
| 497 | + dest_dir : Path |
| 498 | + the directory to write the sample files to |
| 499 | + seed : int |
| 500 | + the seed for the random number generator |
| 501 | + fname_stem : str |
| 502 | + the stem of the file name to generate |
515 | 503 | """ |
516 | | - if file_type is None: |
517 | | - import fileformats.generic |
518 | 504 |
|
519 | | - file_type = fileformats.generic.FsObject |
520 | | - if stem: |
521 | | - fname = stem |
522 | | - else: |
523 | | - if isinstance(seed_or_rng, random.Random): |
524 | | - rng = seed_or_rng |
| 505 | + dest_dir: Path |
| 506 | + seed: int |
| 507 | + fname_stem: str |
| 508 | + |
| 509 | + FNAME_STEM_LENGTH = 24 |
| 510 | + |
| 511 | + def __init__(self, dest_dir: Path, seed: int, fname_stem: str = None): |
| 512 | + self.dest_dir = dest_dir |
| 513 | + self.seed = seed |
| 514 | + self.fname_stem = ( |
| 515 | + self._generate_fname_stem() if fname_stem is None else fname_stem |
| 516 | + ) |
| 517 | + |
| 518 | + def _generate_fname_stem(self): |
| 519 | + return "".join( |
| 520 | + self.rng.choices( |
| 521 | + string.ascii_letters + string.digits, k=self.FNAME_STEM_LENGTH |
| 522 | + ) |
| 523 | + ) |
| 524 | + |
| 525 | + @cached_property |
| 526 | + def rng(self): |
| 527 | + return random.Random(self.seed) |
| 528 | + |
| 529 | + def generate( |
| 530 | + self, |
| 531 | + file_type: ty.Type[fileformats.core.FileSet], |
| 532 | + contents: ty.Union[str, bytes] = None, |
| 533 | + fill: int = 0, |
| 534 | + **kwargs, |
| 535 | + ): |
| 536 | + """Generates a random file of length `length` and extension `ext` |
| 537 | +
|
| 538 | + Parameters |
| 539 | + ---------- |
| 540 | + file_type : Type[FileSet] |
| 541 | + type of the file to generate the filename for, used to append any extensions |
| 542 | + and seed the random number generator if required |
| 543 | + contents : Union[str, bytes] |
| 544 | + the contents of the file to write |
| 545 | + fill : int |
| 546 | + length of the random string to generate for the file contents. Will be appended |
| 547 | + after any explicitly provided contents |
| 548 | + **kwargs : dict |
| 549 | + additional keyword arguments to pass to generate_fspath |
| 550 | +
|
| 551 | + Returns |
| 552 | + ------- |
| 553 | + fspath : Path |
| 554 | + path to the randomly generated file |
| 555 | + """ |
| 556 | + if not contents and not fill: |
| 557 | + raise ValueError("Either contents or random_fill_length must be provided") |
| 558 | + fspath = self.generate_fspath(file_type, **kwargs) |
| 559 | + fspath.parent.mkdir(parents=True, exist_ok=True) |
| 560 | + try: |
| 561 | + is_binary = file_type.binary |
| 562 | + except AttributeError: |
| 563 | + is_binary = False |
| 564 | + if not contents: |
| 565 | + contents = ( |
| 566 | + bytes(random.choices(list(range(256)), k=fill)) |
| 567 | + if is_binary |
| 568 | + else "".join(random.choices(string.printable, k=fill)) |
| 569 | + ) |
| 570 | + else: |
| 571 | + contents_type = bytes if is_binary else str |
| 572 | + if not isinstance(contents, bytes): |
| 573 | + raise TypeError( |
| 574 | + f"contents must be {contents_type} for {file_type} files, " |
| 575 | + f"not {type(contents)}" |
| 576 | + ) |
| 577 | + if is_binary: |
| 578 | + fspath.write_bytes(contents) |
525 | 579 | else: |
526 | | - if not inspect.isclass(file_type): |
527 | | - file_type = type(file_type) |
528 | | - rng = random.Random(str(seed_or_rng) + file_type.mime_like) |
529 | | - fname = "".join(rng.choices(string.ascii_letters + string.digits, k=length)) |
530 | | - if file_type and file_type.ext: |
531 | | - fname += file_type.ext |
532 | | - return fname |
| 580 | + fspath.write_text(contents) |
| 581 | + return fspath |
| 582 | + |
| 583 | + def generate_fspath( |
| 584 | + self, |
| 585 | + file_type: ty.Optional[ty.Type[fileformats.core.FileSet]] = None, |
| 586 | + fname_stem: ty.Optional[str] = None, |
| 587 | + relpath: ty.Optional[Path] = None, |
| 588 | + ): |
| 589 | + """Generates a random file path in the destination directory of length `length` |
| 590 | + and extension `ext` |
| 591 | +
|
| 592 | + Parameters |
| 593 | + ---------- |
| 594 | + file_type : Type[FileSet] |
| 595 | + type of the file to generate the filename for, used to append any extensions |
| 596 | + and seed the random number generator if required |
| 597 | + fname_stem : str, optional or bool |
| 598 | + Use explicitly provided if it is a string |
| 599 | + relpath : Path |
| 600 | + the path to generate the filename at, relative to the destination directory |
| 601 | +
|
| 602 | + Returns |
| 603 | + ------- |
| 604 | + fspath : Path |
| 605 | + randomly generated file-system path |
| 606 | + """ |
| 607 | + if file_type is None: |
| 608 | + import fileformats.generic |
| 609 | + |
| 610 | + file_type = fileformats.generic.FsObject |
| 611 | + if fname_stem is not None: |
| 612 | + fname = fname_stem |
| 613 | + else: |
| 614 | + fname = self.fname_stem |
| 615 | + if file_type and file_type.ext: |
| 616 | + fname += file_type.ext |
| 617 | + fspath = self.dest_dir |
| 618 | + if relpath: |
| 619 | + fspath /= relpath |
| 620 | + return fspath / fname |
| 621 | + |
| 622 | + def child( |
| 623 | + self, dest_dir: ty.Optional[Path] = None, fname_stem: str = None |
| 624 | + ) -> "SampleFileGenerator": |
| 625 | + """Creates a new instance of SampleFileGenerator with the same destination |
| 626 | + directory and seed, but a new random filename stem |
| 627 | +
|
| 628 | + Parameters |
| 629 | + ---------- |
| 630 | + relpath : Path, optional |
| 631 | + the path to generate the filename at, relative to the destination directory |
| 632 | + fname_stem : str, optional |
| 633 | + the stem of the file name to generate |
| 634 | +
|
| 635 | + Returns |
| 636 | + ------- |
| 637 | + SampleFileGenerator |
| 638 | + the new instance of SampleFileGenerator |
| 639 | + """ |
| 640 | + if dest_dir is None: |
| 641 | + dest_dir = self.dest_dir |
| 642 | + kwargs = {"fname_stem": fname_stem} if fname_stem else {} |
| 643 | + return SampleFileGenerator( |
| 644 | + dest_dir, seed=self.rng.randint(0, 2**32 - 1), **kwargs |
| 645 | + ) |
0 commit comments