@@ -513,8 +513,14 @@ def get_struct_log_parents(div):
513513 return result
514514
515515
516+ def validate_mets_files (ctx , param , mets_files ):
517+ if not mets_files and "mets_files_list" not in ctx .params :
518+ raise click .BadParameter ("Neither mets_files nor mets_files_list given" )
519+ return mets_files
520+
521+
516522@click .command ()
517- @click .argument ("mets_files" , type = click .Path (exists = True ), required = True , nargs = - 1 )
523+ @click .argument ("mets_files" , type = click .Path (exists = True ), nargs = - 1 , callback = validate_mets_files )
518524@click .option (
519525 "--output" ,
520526 "-o" ,
@@ -527,7 +533,10 @@ def get_struct_log_parents(div):
527533@click .option (
528534 "--output-page-info" , type = click .Path (), help = "Output page info Parquet file"
529535)
530- def process_command (mets_files : list [str ], output_file : str , output_page_info : str ):
536+ @click .option (
537+ "--mets-files-list" , type = click .Path (), help = "Read list of METS files from this file"
538+ )
539+ def process_command (mets_files : list [str ], output_file : str , output_page_info : str , mets_files_list : str ):
531540 """
532541 A tool to convert the MODS metadata in INPUT to a pandas DataFrame.
533542
@@ -538,12 +547,19 @@ def process_command(mets_files: list[str], output_file: str, output_page_info: s
538547
539548 Per-page information (e.g. structure information) can be output to a separate Parquet file.
540549 """
541- process (mets_files , output_file , output_page_info )
550+ process (mets_files , output_file , output_page_info , mets_files_list )
542551
543552
544- def process (mets_files : list [str ], output_file : str , output_page_info : str ):
545- # Extend file list if directories are given
553+
554+
555+ def process (mets_files : list [str ], output_file : str , output_page_info : str , mets_files_list : str ):
546556 mets_files_real : list [str ] = []
557+
558+ if mets_files_list :
559+ with open (mets_files_list ) as f :
560+ mets_files_real = [line .strip () for line in f .readlines ()]
561+
562+ # Extend file list if directories are given
547563 for m in mets_files :
548564 if os .path .isdir (m ):
549565 logger .info ("Scanning directory {}" .format (m ))
0 commit comments