Skip to content

Commit 0855ccb

Browse files
committed
✨ Add --mets-files-list option to give a list of input files
1 parent 4178f1e commit 0855ccb

File tree

1 file changed

+21
-5
lines changed

1 file changed

+21
-5
lines changed

src/mods4pandas/mods4pandas.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -513,8 +513,14 @@ def get_struct_log_parents(div):
513513
return result
514514

515515

516+
def validate_mets_files(ctx, param, mets_files):
517+
if not mets_files and "mets_files_list" not in ctx.params:
518+
raise click.BadParameter("Neither mets_files nor mets_files_list given")
519+
return mets_files
520+
521+
516522
@click.command()
517-
@click.argument("mets_files", type=click.Path(exists=True), required=True, nargs=-1)
523+
@click.argument("mets_files", type=click.Path(exists=True), nargs=-1, callback=validate_mets_files)
518524
@click.option(
519525
"--output",
520526
"-o",
@@ -527,7 +533,10 @@ def get_struct_log_parents(div):
527533
@click.option(
528534
"--output-page-info", type=click.Path(), help="Output page info Parquet file"
529535
)
530-
def process_command(mets_files: list[str], output_file: str, output_page_info: str):
536+
@click.option(
537+
"--mets-files-list", type=click.Path(), help="Read list of METS files from this file"
538+
)
539+
def process_command(mets_files: list[str], output_file: str, output_page_info: str, mets_files_list: str):
531540
"""
532541
A tool to convert the MODS metadata in INPUT to a pandas DataFrame.
533542
@@ -538,12 +547,19 @@ def process_command(mets_files: list[str], output_file: str, output_page_info: s
538547
539548
Per-page information (e.g. structure information) can be output to a separate Parquet file.
540549
"""
541-
process(mets_files, output_file, output_page_info)
550+
process(mets_files, output_file, output_page_info, mets_files_list)
542551

543552

544-
def process(mets_files: list[str], output_file: str, output_page_info: str):
545-
# Extend file list if directories are given
553+
554+
555+
def process(mets_files: list[str], output_file: str, output_page_info: str, mets_files_list: str):
546556
mets_files_real: list[str] = []
557+
558+
if mets_files_list:
559+
with open(mets_files_list) as f:
560+
mets_files_real = [line.strip() for line in f.readlines()]
561+
562+
# Extend file list if directories are given
547563
for m in mets_files:
548564
if os.path.isdir(m):
549565
logger.info("Scanning directory {}".format(m))

0 commit comments

Comments
 (0)