Skip to content

Commit 352defc

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent b652ca7 commit 352defc

File tree

6 files changed

+251
-252
lines changed

6 files changed

+251
-252
lines changed

monailabel/datastore/utils/convert.py

Lines changed: 70 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ def _get_nvimgcodec_encoder():
5555
if _NVIMGCODEC_ENCODER is None:
5656
try:
5757
from nvidia import nvimgcodec
58+
5859
_NVIMGCODEC_ENCODER = nvimgcodec.Encoder()
5960
logger.debug("Initialized global nvimgcodec.Encoder singleton")
6061
except ImportError:
@@ -72,6 +73,7 @@ def _get_nvimgcodec_decoder():
7273
if _NVIMGCODEC_DECODER is None:
7374
try:
7475
from nvidia import nvimgcodec
76+
7577
_NVIMGCODEC_DECODER = nvimgcodec.Decoder()
7678
logger.debug("Initialized global nvimgcodec.Decoder singleton")
7779
except ImportError:
@@ -211,7 +213,7 @@ def dicom_to_nifti(series_dir, is_seg=False):
211213

212214
try:
213215
from monailabel.transform.reader import NvDicomReader
214-
216+
215217
# Use NvDicomReader with LoadImage
216218
reader = NvDicomReader()
217219
loader = LoadImage(reader=reader, image_only=False)
@@ -552,9 +554,10 @@ def nifti_to_dicom_seg(
552554

553555

554556
def itk_image_to_dicom_seg(label, series_dir, template) -> str:
555-
from monailabel.utils.others.generic import run_command
556557
import shutil
557558

559+
from monailabel.utils.others.generic import run_command
560+
558561
command = "itkimage2segimage"
559562
if not shutil.which(command):
560563
error_msg = (
@@ -648,20 +651,20 @@ def transcode_dicom_to_htj2k(
648651
) -> str:
649652
"""
650653
Transcode DICOM files to HTJ2K (High Throughput JPEG 2000) lossless compression.
651-
654+
652655
HTJ2K is a faster variant of JPEG 2000 that provides better compression performance
653656
for medical imaging applications. This function uses nvidia-nvimgcodec for hardware-
654657
accelerated decoding and encoding with batch processing for optimal performance.
655658
All transcoding is performed using lossless compression to preserve image quality.
656-
659+
657660
The function processes files in configurable batches:
658661
1. Categorizes files by transfer syntax (HTJ2K/JPEG2000/JPEG/uncompressed)
659662
2. Uses nvimgcodec decoder for compressed files (JPEG2000, JPEG)
660663
3. Falls back to pydicom pixel_array for uncompressed files
661664
4. Batch encodes all images to HTJ2K using nvimgcodec
662665
5. Saves transcoded files with updated transfer syntax
663666
6. Copies already-HTJ2K files directly (no re-encoding)
664-
667+
665668
Supported source transfer syntaxes:
666669
- JPEG 2000 (lossless and lossy)
667670
- JPEG (baseline, extended, lossless)
@@ -670,7 +673,7 @@ def transcode_dicom_to_htj2k(
670673
671674
Typical compression ratios of 60-70% with lossless quality.
672675
Processing speed depends on batch size and GPU capabilities.
673-
676+
674677
Args:
675678
input_dir: Path to directory containing DICOM files to transcode
676679
output_dir: Path to output directory for transcoded files. If None, creates temp directory
@@ -680,47 +683,47 @@ def transcode_dicom_to_htj2k(
680683
Must be powers of 2. Common values: (32,32), (64,64), (128,128)
681684
max_batch_size: Maximum number of DICOM files to process in each batch (default: 256)
682685
Lower values reduce memory usage, higher values may improve speed
683-
686+
684687
Returns:
685688
str: Path to output directory containing transcoded DICOM files
686-
689+
687690
Raises:
688691
ImportError: If nvidia-nvimgcodec is not available
689692
ValueError: If input directory doesn't exist or contains no valid DICOM files
690693
ValueError: If DICOM files are missing required attributes (TransferSyntaxUID, PixelData)
691-
694+
692695
Example:
693696
>>> # Basic usage with default settings
694697
>>> output_dir = transcode_dicom_to_htj2k("/path/to/dicoms")
695698
>>> print(f"Transcoded files saved to: {output_dir}")
696-
699+
697700
>>> # Custom output directory and batch size
698701
>>> output_dir = transcode_dicom_to_htj2k(
699702
... input_dir="/path/to/dicoms",
700703
... output_dir="/path/to/output",
701704
... max_batch_size=50,
702705
... num_resolutions=5
703706
... )
704-
707+
705708
>>> # Process with smaller code blocks for memory efficiency
706709
>>> output_dir = transcode_dicom_to_htj2k(
707710
... input_dir="/path/to/dicoms",
708711
... code_block_size=(32, 32),
709712
... max_batch_size=5
710713
... )
711-
714+
712715
Note:
713716
Requires nvidia-nvimgcodec to be installed:
714717
pip install nvidia-nvimgcodec-cu{XX}[all]
715718
Replace {XX} with your CUDA version (e.g., cu13 for CUDA 13.x)
716-
719+
717720
The function preserves all DICOM metadata including Patient, Study, and Series
718721
information. Only the transfer syntax and pixel data encoding are modified.
719722
"""
720723
import glob
721724
import shutil
722725
from pathlib import Path
723-
726+
724727
# Check for nvidia-nvimgcodec
725728
try:
726729
from nvidia import nvimgcodec
@@ -730,62 +733,62 @@ def transcode_dicom_to_htj2k(
730733
"Install it with: pip install nvidia-nvimgcodec-cu{XX}[all] "
731734
"(replace {XX} with your CUDA version, e.g., cu13)"
732735
)
733-
736+
734737
# Validate input
735738
if not os.path.exists(input_dir):
736739
raise ValueError(f"Input directory does not exist: {input_dir}")
737-
740+
738741
if not os.path.isdir(input_dir):
739742
raise ValueError(f"Input path is not a directory: {input_dir}")
740-
743+
741744
# Get all DICOM files
742745
dicom_files = []
743746
for pattern in ["*.dcm", "*"]:
744747
dicom_files.extend(glob.glob(os.path.join(input_dir, pattern)))
745-
748+
746749
# Filter to actual DICOM files
747750
valid_dicom_files = []
748751
for file_path in dicom_files:
749752
if os.path.isfile(file_path):
750753
try:
751754
# Quick check if it's a DICOM file
752-
with open(file_path, 'rb') as f:
755+
with open(file_path, "rb") as f:
753756
f.seek(128)
754757
magic = f.read(4)
755-
if magic == b'DICM':
758+
if magic == b"DICM":
756759
valid_dicom_files.append(file_path)
757760
except Exception:
758761
continue
759-
762+
760763
if not valid_dicom_files:
761764
raise ValueError(f"No valid DICOM files found in {input_dir}")
762-
765+
763766
logger.info(f"Found {len(valid_dicom_files)} DICOM files to transcode")
764-
767+
765768
# Create output directory
766769
if output_dir is None:
767770
output_dir = tempfile.mkdtemp(prefix="htj2k_")
768771
else:
769772
os.makedirs(output_dir, exist_ok=True)
770-
773+
771774
# Create encoder and decoder instances (reused for all files)
772775
encoder = _get_nvimgcodec_encoder()
773776
decoder = _get_nvimgcodec_decoder() # Always needed for decoding input DICOM images
774-
777+
775778
# HTJ2K Transfer Syntax UID - Lossless Only
776779
# 1.2.840.10008.1.2.4.201 = HTJ2K Lossless Only
777780
target_transfer_syntax = "1.2.840.10008.1.2.4.201"
778781
quality_type = nvimgcodec.QualityType.LOSSLESS
779782
logger.info("Using lossless HTJ2K compression")
780-
783+
781784
# Configure JPEG2K encoding parameters
782785
jpeg2k_encode_params = nvimgcodec.Jpeg2kEncodeParams()
783786
jpeg2k_encode_params.num_resolutions = num_resolutions
784787
jpeg2k_encode_params.code_block_size = code_block_size
785788
jpeg2k_encode_params.bitstream_type = nvimgcodec.Jpeg2kBitstreamType.JP2
786789
jpeg2k_encode_params.prog_order = nvimgcodec.Jpeg2kProgOrder.LRCP
787790
jpeg2k_encode_params.ht = True # Enable High Throughput mode
788-
791+
789792
encode_params = nvimgcodec.EncodeParams(
790793
quality_type=quality_type,
791794
jpeg2k_encode_params=jpeg2k_encode_params,
@@ -795,37 +798,43 @@ def transcode_dicom_to_htj2k(
795798
allow_any_depth=True,
796799
color_spec=nvimgcodec.ColorSpec.UNCHANGED,
797800
)
798-
801+
799802
# Define transfer syntax constants (use frozenset for O(1) membership testing)
800-
JPEG2000_SYNTAXES = frozenset([
801-
"1.2.840.10008.1.2.4.90", # JPEG 2000 Image Compression (Lossless Only)
802-
"1.2.840.10008.1.2.4.91", # JPEG 2000 Image Compression
803-
])
804-
805-
HTJ2K_SYNTAXES = frozenset([
806-
"1.2.840.10008.1.2.4.201", # High-Throughput JPEG 2000 Image Compression (Lossless Only)
807-
"1.2.840.10008.1.2.4.202", # High-Throughput JPEG 2000 with RPCL Options Image Compression (Lossless Only)
808-
"1.2.840.10008.1.2.4.203", # High-Throughput JPEG 2000 Image Compression
809-
])
810-
811-
JPEG_SYNTAXES = frozenset([
812-
"1.2.840.10008.1.2.4.50", # JPEG Baseline (Process 1)
813-
"1.2.840.10008.1.2.4.51", # JPEG Extended (Process 2 & 4)
814-
"1.2.840.10008.1.2.4.57", # JPEG Lossless, Non-Hierarchical (Process 14)
815-
"1.2.840.10008.1.2.4.70", # JPEG Lossless, Non-Hierarchical, First-Order Prediction
816-
])
817-
803+
JPEG2000_SYNTAXES = frozenset(
804+
[
805+
"1.2.840.10008.1.2.4.90", # JPEG 2000 Image Compression (Lossless Only)
806+
"1.2.840.10008.1.2.4.91", # JPEG 2000 Image Compression
807+
]
808+
)
809+
810+
HTJ2K_SYNTAXES = frozenset(
811+
[
812+
"1.2.840.10008.1.2.4.201", # High-Throughput JPEG 2000 Image Compression (Lossless Only)
813+
"1.2.840.10008.1.2.4.202", # High-Throughput JPEG 2000 with RPCL Options Image Compression (Lossless Only)
814+
"1.2.840.10008.1.2.4.203", # High-Throughput JPEG 2000 Image Compression
815+
]
816+
)
817+
818+
JPEG_SYNTAXES = frozenset(
819+
[
820+
"1.2.840.10008.1.2.4.50", # JPEG Baseline (Process 1)
821+
"1.2.840.10008.1.2.4.51", # JPEG Extended (Process 2 & 4)
822+
"1.2.840.10008.1.2.4.57", # JPEG Lossless, Non-Hierarchical (Process 14)
823+
"1.2.840.10008.1.2.4.70", # JPEG Lossless, Non-Hierarchical, First-Order Prediction
824+
]
825+
)
826+
818827
# Pre-compute combined set for nvimgcodec-compatible formats
819828
NVIMGCODEC_SYNTAXES = JPEG2000_SYNTAXES | JPEG_SYNTAXES
820-
829+
821830
start_time = time.time()
822831
transcoded_count = 0
823832
skipped_count = 0
824-
833+
825834
# Calculate batch info for logging
826835
total_files = len(valid_dicom_files)
827836
total_batches = (total_files + max_batch_size - 1) // max_batch_size
828-
837+
829838
for batch_start in range(0, total_files, max_batch_size):
830839
batch_end = min(batch_start + max_batch_size, total_files)
831840
current_batch = batch_start // max_batch_size + 1
@@ -836,14 +845,16 @@ def transcode_dicom_to_htj2k(
836845
pydicom_batch = []
837846
copy_batch = []
838847
for idx, ds in enumerate(batch_datasets):
839-
current_ts = getattr(ds, 'file_meta', {}).get('TransferSyntaxUID', None)
848+
current_ts = getattr(ds, "file_meta", {}).get("TransferSyntaxUID", None)
840849
if current_ts is None:
841850
raise ValueError(f"DICOM file {os.path.basename(batch_files[idx])} does not have a Transfer Syntax UID")
842-
851+
843852
ts_str = str(current_ts)
844853
if ts_str in NVIMGCODEC_SYNTAXES:
845854
if not hasattr(ds, "PixelData") or ds.PixelData is None:
846-
raise ValueError(f"DICOM file {os.path.basename(batch_files[idx])} does not have a PixelData member")
855+
raise ValueError(
856+
f"DICOM file {os.path.basename(batch_files[idx])} does not have a PixelData member"
857+
)
847858
nvimgcodec_batch.append(idx)
848859
elif ts_str in HTJ2K_SYNTAXES:
849860
copy_batch.append(idx)
@@ -859,7 +870,7 @@ def transcode_dicom_to_htj2k(
859870
data_sequence = []
860871
decoded_data = []
861872
num_frames = []
862-
873+
863874
# Decode using nvimgcodec for compressed formats
864875
if nvimgcodec_batch:
865876
for idx in nvimgcodec_batch:
@@ -887,21 +898,21 @@ def transcode_dicom_to_htj2k(
887898
# Reassemble and save transcoded files
888899
frame_offset = 0
889900
files_to_process = nvimgcodec_batch + pydicom_batch
890-
901+
891902
for list_idx, dataset_idx in enumerate(files_to_process):
892903
nframes = num_frames[list_idx]
893-
encoded_frames = [bytes(enc) for enc in encoded_data[frame_offset:frame_offset + nframes]]
904+
encoded_frames = [bytes(enc) for enc in encoded_data[frame_offset : frame_offset + nframes]]
894905
frame_offset += nframes
895-
906+
896907
# Update dataset with HTJ2K encoded data
897908
batch_datasets[dataset_idx].PixelData = pydicom.encaps.encapsulate(encoded_frames)
898909
batch_datasets[dataset_idx].file_meta.TransferSyntaxUID = pydicom.uid.UID(target_transfer_syntax)
899-
910+
900911
# Save transcoded file
901912
output_file = os.path.join(output_dir, os.path.basename(batch_files[dataset_idx]))
902913
batch_datasets[dataset_idx].save_as(output_file)
903914
transcoded_count += 1
904-
915+
905916
elapsed_time = time.time() - start_time
906917

907918
logger.info(f"Transcoding complete:")
@@ -910,5 +921,5 @@ def transcode_dicom_to_htj2k(
910921
logger.info(f" Already HTJ2K (copied): {skipped_count}")
911922
logger.info(f" Time elapsed: {elapsed_time:.2f} seconds")
912923
logger.info(f" Output directory: {output_dir}")
913-
924+
914925
return output_dir

0 commit comments

Comments
 (0)