@@ -258,6 +258,16 @@ def transcode_dicom_to_htj2k(
258258 progression_order : str = "RPCL" ,
259259 max_batch_size : int = 256 ,
260260 add_basic_offset_table : bool = True ,
261+ skip_transfer_syntaxes : list = (
262+ _get_transfer_syntax_constants ()['HTJ2K' ] |
263+ frozenset ([
264+ # Lossy JPEG 2000
265+ "1.2.840.10008.1.2.4.91" , # JPEG 2000 Image Compression (lossy allowed)
266+ # Lossy JPEG
267+ "1.2.840.10008.1.2.4.50" , # JPEG Baseline (Process 1) - always lossy
268+ "1.2.840.10008.1.2.4.51" , # JPEG Extended (Process 2 & 4, can be lossy)
269+ ])
270+ ),
261271) -> str :
262272 """
263273 Transcode DICOM files to HTJ2K (High Throughput JPEG 2000) lossless compression.
@@ -280,7 +290,7 @@ def transcode_dicom_to_htj2k(
280290 in memory simultaneously.
281291
282292 Supported source transfer syntaxes:
283- - HTJ2K (High-Throughput JPEG 2000) - decoded and re-encoded to add BOT if needed
293+ - HTJ2K (High-Throughput JPEG 2000) - decoded and re-encoded ( add bot if needed)
284294 - JPEG 2000 (lossless and lossy)
285295 - JPEG (baseline, extended, lossless)
286296 - Uncompressed (Explicit/Implicit VR Little/Big Endian)
@@ -307,6 +317,10 @@ def transcode_dicom_to_htj2k(
307317 add_basic_offset_table: If True, creates Basic Offset Table for multi-frame DICOMs (default: True)
308318 BOT enables O(1) frame access without parsing entire pixel data stream
309319 Per DICOM Part 5 Section A.4. Only affects multi-frame files.
320+ skip_transfer_syntaxes: Optional list of Transfer Syntax UIDs to skip transcoding (default: HTJ2K, lossy JPEG 2000, and lossy JPEG)
321+ Files with these transfer syntaxes will be copied directly to output
322+ without transcoding. Useful for preserving already-compressed formats.
323+ Example: ["1.2.840.10008.1.2.4.201", "1.2.840.10008.1.2.4.202"]
310324
311325 Returns:
312326 str: Path to output directory containing transcoded DICOM files
@@ -337,6 +351,12 @@ def transcode_dicom_to_htj2k(
337351 ... max_batch_size=5
338352 ... )
339353
354+ >>> # Skip transcoding for files already in HTJ2K format
355+ >>> output_dir = transcode_dicom_to_htj2k(
356+ ... input_dir="/path/to/dicoms",
357+ ... skip_transfer_syntaxes=["1.2.840.10008.1.2.4.201", "1.2.840.10008.1.2.4.202"]
358+ ... )
359+
340360 Note:
341361 Requires nvidia-nvimgcodec to be installed:
342362 pip install nvidia-nvimgcodec-cu{XX}[all]
@@ -396,8 +416,17 @@ def transcode_dicom_to_htj2k(
396416 ts_constants = _get_transfer_syntax_constants ()
397417 NVIMGCODEC_SYNTAXES = ts_constants ['NVIMGCODEC' ]
398418
419+ # Initialize skip list
420+ if skip_transfer_syntaxes is None :
421+ skip_transfer_syntaxes = []
422+ else :
423+ # Convert to set of strings for faster lookup
424+ skip_transfer_syntaxes = set (str (ts ) for ts in skip_transfer_syntaxes )
425+ logger .info (f"Files with these transfer syntaxes will be copied without transcoding: { skip_transfer_syntaxes } " )
426+
399427 start_time = time .time ()
400428 transcoded_count = 0
429+ skipped_count = 0
401430
402431 # Calculate batch info for logging
403432 total_files = len (valid_dicom_files )
@@ -411,20 +440,37 @@ def transcode_dicom_to_htj2k(
411440 batch_datasets = [pydicom .dcmread (file ) for file in batch_files ]
412441 nvimgcodec_batch = []
413442 pydicom_batch = []
443+ skip_batch = [] # Indices of files to skip (copy directly)
414444
415445 for idx , ds in enumerate (batch_datasets ):
416446 current_ts = getattr (ds , 'file_meta' , {}).get ('TransferSyntaxUID' , None )
417447 if current_ts is None :
418448 raise ValueError (f"DICOM file { os .path .basename (batch_files [idx ])} does not have a Transfer Syntax UID" )
419449
420450 ts_str = str (current_ts )
451+
452+ # Check if this transfer syntax should be skipped
453+ if ts_str in skip_transfer_syntaxes :
454+ skip_batch .append (idx )
455+ logger .info (f" Skipping { os .path .basename (batch_files [idx ])} (Transfer Syntax: { ts_str } )" )
456+ continue
457+
421458 if ts_str in NVIMGCODEC_SYNTAXES :
422459 if not hasattr (ds , "PixelData" ) or ds .PixelData is None :
423460 raise ValueError (f"DICOM file { os .path .basename (batch_files [idx ])} does not have a PixelData member" )
424461 nvimgcodec_batch .append (idx )
425462 else :
426463 pydicom_batch .append (idx )
427464
465+ # Handle skip_batch: copy files directly to output
466+ if skip_batch :
467+ for idx in skip_batch :
468+ source_file = batch_files [idx ]
469+ output_file = os .path .join (output_dir , os .path .basename (source_file ))
470+ shutil .copy2 (source_file , output_file )
471+ skipped_count += 1
472+ logger .info (f" Copied { os .path .basename (source_file )} to output (skipped transcoding)" )
473+
428474 num_frames = []
429475 encoded_data = []
430476
@@ -545,12 +591,7 @@ def transcode_dicom_to_htj2k(
545591
546592 # Update dataset with HTJ2K encoded data
547593 # Create Basic Offset Table for multi-frame files if requested
548- if add_basic_offset_table and nframes > 1 :
549- batch_datasets [dataset_idx ].PixelData = pydicom .encaps .encapsulate (encoded_frames , has_bot = True )
550- logger .info (f" ✓ Basic Offset Table included for efficient frame access" )
551- else :
552- batch_datasets [dataset_idx ].PixelData = pydicom .encaps .encapsulate (encoded_frames )
553-
594+ batch_datasets [dataset_idx ].PixelData = pydicom .encaps .encapsulate (encoded_frames , has_bot = add_basic_offset_table )
554595 batch_datasets [dataset_idx ].file_meta .TransferSyntaxUID = pydicom .uid .UID (target_transfer_syntax )
555596
556597 # Update PhotometricInterpretation to RGB for YBR images since we decoded with RGB color_spec
@@ -572,6 +613,7 @@ def transcode_dicom_to_htj2k(
572613 logger .info (f"Transcoding complete:" )
573614 logger .info (f" Total files: { len (valid_dicom_files )} " )
574615 logger .info (f" Successfully transcoded: { transcoded_count } " )
616+ logger .info (f" Skipped (copied without transcoding): { skipped_count } " )
575617 logger .info (f" Time elapsed: { elapsed_time :.2f} seconds" )
576618 logger .info (f" Output directory: { output_dir } " )
577619
@@ -910,11 +952,7 @@ def convert_single_frame_dicom_series_to_multiframe(
910952 if encoded_frames_bytes is not None :
911953 # Encapsulated data (HTJ2K or preserved compressed format)
912954 # Use Basic Offset Table for multi-frame efficiency
913- if add_basic_offset_table :
914- output_ds .PixelData = pydicom .encaps .encapsulate (encoded_frames_bytes , has_bot = True )
915- logger .info (f" ✓ Basic Offset Table included for efficient frame access" )
916- else :
917- output_ds .PixelData = pydicom .encaps .encapsulate (encoded_frames_bytes )
955+ output_ds .PixelData = pydicom .encaps .encapsulate (encoded_frames_bytes , has_bot = add_basic_offset_table )
918956 else :
919957 # Uncompressed mode: combine all frames into a 3D array
920958 # Stack frames: (frames, rows, cols)
0 commit comments