From edde0a307db8ca0e15ebad69175524e09d0343c5 Mon Sep 17 00:00:00 2001 From: Tim Holdsworth Date: Wed, 19 Mar 2025 18:45:38 -0700 Subject: [PATCH 01/13] try fixing deprecated huggingface download --- CRAFT/model.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/CRAFT/model.py b/CRAFT/model.py index a2e2e4e..a430791 100644 --- a/CRAFT/model.py +++ b/CRAFT/model.py @@ -6,7 +6,7 @@ from PIL import Image import numpy as np import cv2 -from huggingface_hub import hf_hub_url, cached_download +from huggingface_hub import hf_hub_url, hf_hub_download from CRAFT.craft import CRAFT, init_CRAFT_model from CRAFT.refinenet import RefineNet, init_refiner_model @@ -71,9 +71,13 @@ def __init__( for model_name in ['craft', 'refiner']: config = HF_MODELS[model_name] paths[model_name] = os.path.join(cache_dir, config['filename']) + # Replacement code if not local_files_only: - config_file_url = hf_hub_url(repo_id=config['repo_id'], filename=config['filename']) - cached_download(config_file_url, cache_dir=cache_dir, force_filename=config['filename']) + paths[model_name] = hf_hub_download( + repo_id=config['repo_id'], + filename=config['filename'], + cache_dir=cache_dir + ) self.net = init_CRAFT_model(paths['craft'], device, fp16=fp16) if self.use_refiner: From 4e8193d35006f181466fe603f7edfb371e200f0e Mon Sep 17 00:00:00 2001 From: Tim Holdsworth Date: Wed, 19 Mar 2025 19:06:58 -0700 Subject: [PATCH 02/13] fix HF download and remove comment --- CRAFT/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CRAFT/model.py b/CRAFT/model.py index a430791..e9ba2b6 100644 --- a/CRAFT/model.py +++ b/CRAFT/model.py @@ -71,7 +71,7 @@ def __init__( for model_name in ['craft', 'refiner']: config = HF_MODELS[model_name] paths[model_name] = os.path.join(cache_dir, config['filename']) - # Replacement code + if not local_files_only: paths[model_name] = hf_hub_download( repo_id=config['repo_id'], From 4d9ac3879e442239483e0d8855e3868bc6c2dcf4 Mon Sep 17 00:00:00 2001 From: Tim Holdsworth Date: Wed, 19 Mar 2025 19:07:53 -0700 Subject: [PATCH 03/13] remove empty line so it matches upstream repo --- CRAFT/model.py | 1 - 1 file changed, 1 deletion(-) diff --git a/CRAFT/model.py b/CRAFT/model.py index e9ba2b6..3ea97ec 100644 --- a/CRAFT/model.py +++ b/CRAFT/model.py @@ -71,7 +71,6 @@ def __init__( for model_name in ['craft', 'refiner']: config = HF_MODELS[model_name] paths[model_name] = os.path.join(cache_dir, config['filename']) - if not local_files_only: paths[model_name] = hf_hub_download( repo_id=config['repo_id'], From 5e40d8a37b33b1129124b6d90a934580175cc5b6 Mon Sep 17 00:00:00 2001 From: timholds Date: Sun, 6 Apr 2025 17:54:25 -0700 Subject: [PATCH 04/13] add get_batch_polygons --- CRAFT/model.py | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/CRAFT/model.py b/CRAFT/model.py index 3ea97ec..492a3b8 100644 --- a/CRAFT/model.py +++ b/CRAFT/model.py @@ -103,6 +103,99 @@ def get_text_map(self, x: torch.Tensor, ratio_w: int, ratio_h: int) -> Tuple[np. return score_text, score_link + def get_batch_polygons(self, batch_images: torch.Tensor, ratios_w: torch.Tensor, ratios_h: torch.Tensor): + """Batch process pre-normalized images on GPU""" + # Forward pass + with torch.no_grad(): + y, _ = self.net(batch_images) + if self.refiner: + y, _ = self.refiner(y, None) + + # Batch post-processing + text_scores = y[..., 0] # [B, H, W] + link_scores = y[..., 1] if not self.refiner else y[..., 0] + + # Threshold maps on GPU + text_mask = (text_scores > self.text_threshold) + link_mask = (link_scores > self.link_threshold) + combined_mask = text_mask & link_mask + + # Find connected components using PyTorch's label + batch_labels = [ + torch.ops.torchvision.label_connected_components(mask.float()) + for mask in combined_mask + ] + + # Extract polygon coordinates for each component + batch_polys = [] + for b_idx in range(batch_images.size(0)): + polys = [] + for label in torch.unique(batch_labels[b_idx]): + if label == 0: continue + # Get component coordinates (GPU tensor) + y_coords, x_coords = torch.where(batch_labels[b_idx] == label) + if len(x_coords) < 4: continue + + # Find convex hull (custom kernel or approximation) + poly_points = self._convex_hull(x_coords, y_coords) + + # Scale coordinates using precomputed ratios + scaled_poly = poly_points * torch.tensor([ + [ratios_w[b_idx], ratios_h[b_idx]] + ], device=self.device) + + polys.append(scaled_poly) + batch_polys.append(polys) + + return batch_polys + + + def get_batch_polygons(self, batch_images: torch.Tensor, ratios_w: torch.Tensor, ratios_h: torch.Tensor): + """Batch process pre-normalized images on GPU""" + # Forward pass + with torch.no_grad(): + y, _ = self.net(batch_images) + if self.refiner: + y, _ = self.refiner(y, None) + + # Batch post-processing + text_scores = y[..., 0] # [B, H, W] + link_scores = y[..., 1] if not self.refiner else y[..., 0] + + # Threshold maps on GPU + text_mask = (text_scores > self.text_threshold) + link_mask = (link_scores > self.link_threshold) + combined_mask = text_mask & link_mask + + # Find connected components using PyTorch's label + batch_labels = [ + torch.ops.torchvision.label_connected_components(mask.float()) + for mask in combined_mask + ] + + # Extract polygon coordinates for each component + batch_polys = [] + for b_idx in range(batch_images.size(0)): + polys = [] + for label in torch.unique(batch_labels[b_idx]): + if label == 0: continue + # Get component coordinates (GPU tensor) + y_coords, x_coords = torch.where(batch_labels[b_idx] == label) + if len(x_coords) < 4: continue + + # Find convex hull (custom kernel or approximation) + poly_points = self._convex_hull(x_coords, y_coords) + + # Scale coordinates using precomputed ratios + scaled_poly = poly_points * torch.tensor([ + [ratios_w[b_idx], ratios_h[b_idx]] + ], device=self.device) + + polys.append(scaled_poly) + batch_polys.append(polys) + + return batch_polys + def get_polygons(self, image: Image.Image) -> List[List[List[int]]]: x, ratio_w, ratio_h = preprocess_image(np.array(image), self.canvas_size, self.mag_ratio) From 5b4cddec039806be29fd58d8e2a3bcb85f7019be Mon Sep 17 00:00:00 2001 From: timholds Date: Mon, 7 Apr 2025 00:50:28 -0700 Subject: [PATCH 05/13] fix call to refiner, threshold maps on gpu not working --- CRAFT/model.py | 77 ++++++++++++++++---------------------------------- 1 file changed, 25 insertions(+), 52 deletions(-) diff --git a/CRAFT/model.py b/CRAFT/model.py index 492a3b8..91bb71b 100644 --- a/CRAFT/model.py +++ b/CRAFT/model.py @@ -107,60 +107,15 @@ def get_batch_polygons(self, batch_images: torch.Tensor, ratios_w: torch.Tensor, """Batch process pre-normalized images on GPU""" # Forward pass with torch.no_grad(): - y, _ = self.net(batch_images) + y, feature = self.net(batch_images.to(self.device)) if self.refiner: - y, _ = self.refiner(y, None) - - # Batch post-processing - text_scores = y[..., 0] # [B, H, W] - link_scores = y[..., 1] if not self.refiner else y[..., 0] - - # Threshold maps on GPU - text_mask = (text_scores > self.text_threshold) - link_mask = (link_scores > self.link_threshold) - combined_mask = text_mask & link_mask - - # Find connected components using PyTorch's label - batch_labels = [ - torch.ops.torchvision.label_connected_components(mask.float()) - for mask in combined_mask - ] - - # Extract polygon coordinates for each component - batch_polys = [] - for b_idx in range(batch_images.size(0)): - polys = [] - for label in torch.unique(batch_labels[b_idx]): - if label == 0: continue - # Get component coordinates (GPU tensor) - y_coords, x_coords = torch.where(batch_labels[b_idx] == label) - if len(x_coords) < 4: continue - - # Find convex hull (custom kernel or approximation) - poly_points = self._convex_hull(x_coords, y_coords) - - # Scale coordinates using precomputed ratios - scaled_poly = poly_points * torch.tensor([ - [ratios_w[b_idx], ratios_h[b_idx]] - ], device=self.device) - - polys.append(scaled_poly) - batch_polys.append(polys) - - return batch_polys - - - def get_batch_polygons(self, batch_images: torch.Tensor, ratios_w: torch.Tensor, ratios_h: torch.Tensor): - """Batch process pre-normalized images on GPU""" - # Forward pass - with torch.no_grad(): - y, _ = self.net(batch_images) - if self.refiner: - y, _ = self.refiner(y, None) + y_refiner = self.refiner(y, feature) + link_scores = y_refiner[..., 0] # [B, H, W] + else: + link_scores = y[..., 1] # [B, H, W] + + text_scores = y[..., 0] # [B, H, W] - # Batch post-processing - text_scores = y[..., 0] # [B, H, W] - link_scores = y[..., 1] if not self.refiner else y[..., 0] # Threshold maps on GPU text_mask = (text_scores > self.text_threshold) @@ -195,6 +150,24 @@ def get_batch_polygons(self, batch_images: torch.Tensor, ratios_w: torch.Tensor, batch_polys.append(polys) return batch_polys + + def _convex_hull(self, x_coords, y_coords): + """Simple convex hull approximation for GPU tensors""" + # For character detection, a simple bounding box is often sufficient + min_x = torch.min(x_coords) + max_x = torch.max(x_coords) + min_y = torch.min(y_coords) + max_y = torch.max(y_coords) + + # Create rectangle corners + pts = torch.tensor([ + [min_x, min_y], + [max_x, min_y], + [max_x, max_y], + [min_x, max_y] + ], device=x_coords.device) + + return pts def get_polygons(self, image: Image.Image) -> List[List[List[int]]]: x, ratio_w, ratio_h = preprocess_image(np.array(image), self.canvas_size, self.mag_ratio) From 07f6d71e011e1a51fc6550d4c113cf9c8f110c16 Mon Sep 17 00:00:00 2001 From: timholds Date: Mon, 7 Apr 2025 01:33:02 -0700 Subject: [PATCH 06/13] update get_polygons() to do batch inference, still does post processing in seq on cpu --- CRAFT/model.py | 63 +++++++++++++++++++++++++------------------------- 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/CRAFT/model.py b/CRAFT/model.py index 91bb71b..178be77 100644 --- a/CRAFT/model.py +++ b/CRAFT/model.py @@ -116,38 +116,39 @@ def get_batch_polygons(self, batch_images: torch.Tensor, ratios_w: torch.Tensor, text_scores = y[..., 0] # [B, H, W] - - # Threshold maps on GPU - text_mask = (text_scores > self.text_threshold) - link_mask = (link_scores > self.link_threshold) - combined_mask = text_mask & link_mask - - # Find connected components using PyTorch's label - batch_labels = [ - torch.ops.torchvision.label_connected_components(mask.float()) - for mask in combined_mask - ] - - # Extract polygon coordinates for each component + batch_size = batch_images.size(0) + # Process each image in the batch (minimize CPU transfers) batch_polys = [] - for b_idx in range(batch_images.size(0)): - polys = [] - for label in torch.unique(batch_labels[b_idx]): - if label == 0: continue - # Get component coordinates (GPU tensor) - y_coords, x_coords = torch.where(batch_labels[b_idx] == label) - if len(x_coords) < 4: continue - - # Find convex hull (custom kernel or approximation) - poly_points = self._convex_hull(x_coords, y_coords) - - # Scale coordinates using precomputed ratios - scaled_poly = poly_points * torch.tensor([ - [ratios_w[b_idx], ratios_h[b_idx]] - ], device=self.device) - - polys.append(scaled_poly) - batch_polys.append(polys) + for b_idx in range(batch_size): + # Extract scores for this image + text_score = text_scores[b_idx].cpu().numpy() + link_score = link_scores[b_idx].cpu().numpy() + + # Get current ratios + curr_ratio_w = ratios_w[b_idx].item() if isinstance(ratios_w, torch.Tensor) else ratios_w + curr_ratio_h = ratios_h[b_idx].item() if isinstance(ratios_h, torch.Tensor) else ratios_h + + # Use existing OpenCV-based post-processing + boxes, polys = getDetBoxes( + text_score, link_score, + self.text_threshold, self.link_threshold, + self.low_text, False # Don't need detailed polygons, just boxes + ) + + # Adjust coordinates + boxes = adjustResultCoordinates(boxes, curr_ratio_w, curr_ratio_h) + + # Convert to tensor and add to batch + image_polys = [] + if len(boxes) > 0: + # Ensure boxes is in a list format before processing + boxes = boxes.tolist() if isinstance(boxes, np.ndarray) else boxes + for box in boxes: + # Convert to tensor (4 corner points) + box_tensor = torch.tensor(box, dtype=torch.float32, device=self.device) + image_polys.append(box_tensor) + + batch_polys.append(image_polys) return batch_polys From cfb7ec14907ba7dfad65bcbbb70de116776b08b1 Mon Sep 17 00:00:00 2001 From: timholds Date: Tue, 15 Apr 2025 18:32:59 -0700 Subject: [PATCH 07/13] trying to fix warning about pretrained arg being renamed to weights --- CRAFT/basenet/vgg16_bn.py | 6 +++--- CRAFT/craft.py | 4 ++-- CRAFT/model.py | 1 + 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/CRAFT/basenet/vgg16_bn.py b/CRAFT/basenet/vgg16_bn.py index 6cad358..e401b06 100644 --- a/CRAFT/basenet/vgg16_bn.py +++ b/CRAFT/basenet/vgg16_bn.py @@ -27,10 +27,10 @@ def init_weights(modules): class vgg16_bn(torch.nn.Module): - def __init__(self, pretrained=True, freeze=True): + def __init__(self, weights=True, freeze=True): super(vgg16_bn, self).__init__() model_urls['vgg16_bn'] = model_urls['vgg16_bn'].replace('https://', 'http://') - vgg_pretrained_features = models.vgg16_bn(pretrained=pretrained).features + vgg_pretrained_features = models.vgg16_bn(weights=weights).features self.slice1 = torch.nn.Sequential() self.slice2 = torch.nn.Sequential() self.slice3 = torch.nn.Sequential() @@ -52,7 +52,7 @@ def __init__(self, pretrained=True, freeze=True): nn.Conv2d(1024, 1024, kernel_size=1) ) - if not pretrained: + if not weights: init_weights(self.slice1.modules()) init_weights(self.slice2.modules()) init_weights(self.slice3.modules()) diff --git a/CRAFT/craft.py b/CRAFT/craft.py index 782a98e..5ab2b11 100755 --- a/CRAFT/craft.py +++ b/CRAFT/craft.py @@ -33,11 +33,11 @@ def forward(self, x): class CRAFT(nn.Module): - def __init__(self, pretrained=False, freeze=False): + def __init__(self, weights=False, freeze=False): super(CRAFT, self).__init__() """ Base network """ - self.basenet = vgg16_bn(pretrained, freeze) + self.basenet = vgg16_bn(weights, freeze) """ U network """ self.upconv1 = double_conv(1024, 512, 256) diff --git a/CRAFT/model.py b/CRAFT/model.py index 178be77..67f7f60 100644 --- a/CRAFT/model.py +++ b/CRAFT/model.py @@ -217,3 +217,4 @@ def get_boxes(self, image: Image.Image) -> List[List[List[int]]]: boxes_final = self._get_boxes_preproc(x, ratio_w, ratio_h) return boxes_final + From e48768a9ee0e922e2b46b44f72bc7c76e9198f34 Mon Sep 17 00:00:00 2001 From: timholds Date: Wed, 16 Apr 2025 12:51:03 -0700 Subject: [PATCH 08/13] broke get_batch_polygons with ratio_w tensor --- CRAFT/model.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/CRAFT/model.py b/CRAFT/model.py index 67f7f60..9838c44 100644 --- a/CRAFT/model.py +++ b/CRAFT/model.py @@ -106,6 +106,10 @@ def get_text_map(self, x: torch.Tensor, ratio_w: int, ratio_h: int) -> Tuple[np. def get_batch_polygons(self, batch_images: torch.Tensor, ratios_w: torch.Tensor, ratios_h: torch.Tensor): """Batch process pre-normalized images on GPU""" # Forward pass + batch_images = batch_images.float() # Convert to float32 + if self.fp16: + batch_images = batch_images.half() # Convert to half if using fp16 + with torch.no_grad(): y, feature = self.net(batch_images.to(self.device)) if self.refiner: @@ -119,10 +123,16 @@ def get_batch_polygons(self, batch_images: torch.Tensor, ratios_w: torch.Tensor, batch_size = batch_images.size(0) # Process each image in the batch (minimize CPU transfers) batch_polys = [] + text_scores = text_scores.cpu().numpy() + link_scores = link_scores.cpu().numpy() + ratios_w = ratios_w.cpu().numpy() + ratios_h = ratios_h.cpu().numpy() + + # TODO can we do some of this stuff in parallel for b_idx in range(batch_size): # Extract scores for this image - text_score = text_scores[b_idx].cpu().numpy() - link_score = link_scores[b_idx].cpu().numpy() + # text_score = text_scores[b_idx].cpu().numpy() + # link_score = link_scores[b_idx].cpu().numpy() # Get current ratios curr_ratio_w = ratios_w[b_idx].item() if isinstance(ratios_w, torch.Tensor) else ratios_w @@ -130,12 +140,13 @@ def get_batch_polygons(self, batch_images: torch.Tensor, ratios_w: torch.Tensor, # Use existing OpenCV-based post-processing boxes, polys = getDetBoxes( - text_score, link_score, + text_scores[b_idx], link_scores[b_idx], self.text_threshold, self.link_threshold, self.low_text, False # Don't need detailed polygons, just boxes ) # Adjust coordinates + breakpoint() boxes = adjustResultCoordinates(boxes, curr_ratio_w, curr_ratio_h) # Convert to tensor and add to batch @@ -144,9 +155,7 @@ def get_batch_polygons(self, batch_images: torch.Tensor, ratios_w: torch.Tensor, # Ensure boxes is in a list format before processing boxes = boxes.tolist() if isinstance(boxes, np.ndarray) else boxes for box in boxes: - # Convert to tensor (4 corner points) - box_tensor = torch.tensor(box, dtype=torch.float32, device=self.device) - image_polys.append(box_tensor) + image_polys.append(box) batch_polys.append(image_polys) From 7ca096861187abe54b91e8928554cb638d9d70d8 Mon Sep 17 00:00:00 2001 From: timholds Date: Wed, 16 Apr 2025 13:10:27 -0700 Subject: [PATCH 09/13] fixed, runs now (although slowly still) --- CRAFT/model.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/CRAFT/model.py b/CRAFT/model.py index 9838c44..d42980b 100644 --- a/CRAFT/model.py +++ b/CRAFT/model.py @@ -125,8 +125,8 @@ def get_batch_polygons(self, batch_images: torch.Tensor, ratios_w: torch.Tensor, batch_polys = [] text_scores = text_scores.cpu().numpy() link_scores = link_scores.cpu().numpy() - ratios_w = ratios_w.cpu().numpy() - ratios_h = ratios_h.cpu().numpy() + # ratios_w = ratios_w.cpu().numpy() + # ratios_h = ratios_h.cpu().numpy() # TODO can we do some of this stuff in parallel for b_idx in range(batch_size): @@ -146,7 +146,6 @@ def get_batch_polygons(self, batch_images: torch.Tensor, ratios_w: torch.Tensor, ) # Adjust coordinates - breakpoint() boxes = adjustResultCoordinates(boxes, curr_ratio_w, curr_ratio_h) # Convert to tensor and add to batch From beaf0c49b4b99d41de7294866efd01243a423028 Mon Sep 17 00:00:00 2001 From: timholds Date: Wed, 16 Apr 2025 16:53:40 -0700 Subject: [PATCH 10/13] already have floats in craft model, just need to convert to half precision if we are using fp16 --- CRAFT/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CRAFT/model.py b/CRAFT/model.py index d42980b..a5f6968 100644 --- a/CRAFT/model.py +++ b/CRAFT/model.py @@ -106,7 +106,7 @@ def get_text_map(self, x: torch.Tensor, ratio_w: int, ratio_h: int) -> Tuple[np. def get_batch_polygons(self, batch_images: torch.Tensor, ratios_w: torch.Tensor, ratios_h: torch.Tensor): """Batch process pre-normalized images on GPU""" # Forward pass - batch_images = batch_images.float() # Convert to float32 + #batch_images = batch_images.float() # Convert to float32 if self.fp16: batch_images = batch_images.half() # Convert to half if using fp16 From ec96efd627c94eebc7a5bf8a3c11fae46ef48863 Mon Sep 17 00:00:00 2001 From: timholds Date: Wed, 16 Apr 2025 17:12:11 -0700 Subject: [PATCH 11/13] detach the link scores from torch (doesnt seem to make a difference) '' --- CRAFT/model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CRAFT/model.py b/CRAFT/model.py index a5f6968..7576cf0 100644 --- a/CRAFT/model.py +++ b/CRAFT/model.py @@ -123,8 +123,8 @@ def get_batch_polygons(self, batch_images: torch.Tensor, ratios_w: torch.Tensor, batch_size = batch_images.size(0) # Process each image in the batch (minimize CPU transfers) batch_polys = [] - text_scores = text_scores.cpu().numpy() - link_scores = link_scores.cpu().numpy() + text_scores = text_scores.detach().cpu().numpy() + link_scores = link_scores.detach().cpu().numpy() # ratios_w = ratios_w.cpu().numpy() # ratios_h = ratios_h.cpu().numpy() From acd9cd5805144c9490d1d124f3f4d50ae7b1fd1b Mon Sep 17 00:00:00 2001 From: timholds Date: Wed, 16 Apr 2025 19:21:33 -0700 Subject: [PATCH 12/13] multiprocess getDetBoxes with get_single_image() --- CRAFT/model.py | 77 +++++++++++++++++++++++++++++++------------------- 1 file changed, 48 insertions(+), 29 deletions(-) diff --git a/CRAFT/model.py b/CRAFT/model.py index 7576cf0..951ed98 100644 --- a/CRAFT/model.py +++ b/CRAFT/model.py @@ -6,8 +6,10 @@ from PIL import Image import numpy as np import cv2 -from huggingface_hub import hf_hub_url, hf_hub_download +from multiprocessing import Pool +import functools +from huggingface_hub import hf_hub_url, hf_hub_download from CRAFT.craft import CRAFT, init_CRAFT_model from CRAFT.refinenet import RefineNet, init_refiner_model from CRAFT.craft_utils import adjustResultCoordinates, getDetBoxes @@ -39,6 +41,16 @@ def preprocess_image(image: np.ndarray, canvas_size: int, mag_ratio: bool): x = Variable(x.unsqueeze(0)) # [c, h, w] to [b, c, h, w] return x, ratio_w, ratio_h +def process_single(args): + text_score, link_score, ratio_w, ratio_h, text_threshold, link_threshold, low_text = args + + boxes, polys = getDetBoxes( + text_score, link_score, + text_threshold, link_threshold, + low_text, False + ) + boxes = adjustResultCoordinates(boxes, ratio_w, ratio_h) + return boxes class CRAFTModel: @@ -103,6 +115,7 @@ def get_text_map(self, x: torch.Tensor, ratio_w: int, ratio_h: int) -> Tuple[np. return score_text, score_link + def get_batch_polygons(self, batch_images: torch.Tensor, ratios_w: torch.Tensor, ratios_h: torch.Tensor): """Batch process pre-normalized images on GPU""" # Forward pass @@ -121,44 +134,50 @@ def get_batch_polygons(self, batch_images: torch.Tensor, ratios_w: torch.Tensor, text_scores = y[..., 0] # [B, H, W] batch_size = batch_images.size(0) - # Process each image in the batch (minimize CPU transfers) - batch_polys = [] text_scores = text_scores.detach().cpu().numpy() link_scores = link_scores.detach().cpu().numpy() - # ratios_w = ratios_w.cpu().numpy() - # ratios_h = ratios_h.cpu().numpy() + + ratios_w = ratios_w.cpu().numpy() + ratios_h = ratios_h.cpu().numpy() + + with Pool(processes=os.cpu_count()) as pool: + batch_args = [(text_scores[i], link_scores[i], ratios_w[i], ratios_h[i], + self.text_threshold, self.link_threshold, self.low_text) + for i in range(batch_size)] + batch_polys = pool.map(process_single, batch_args) + + return batch_polys # TODO can we do some of this stuff in parallel - for b_idx in range(batch_size): - # Extract scores for this image - # text_score = text_scores[b_idx].cpu().numpy() - # link_score = link_scores[b_idx].cpu().numpy() + # batch_polys = [] + + # for b_idx in range(batch_size): - # Get current ratios - curr_ratio_w = ratios_w[b_idx].item() if isinstance(ratios_w, torch.Tensor) else ratios_w - curr_ratio_h = ratios_h[b_idx].item() if isinstance(ratios_h, torch.Tensor) else ratios_h + # # Get current ratios + # curr_ratio_w = ratios_w[b_idx].item() if isinstance(ratios_w, torch.Tensor) else ratios_w + # curr_ratio_h = ratios_h[b_idx].item() if isinstance(ratios_h, torch.Tensor) else ratios_h - # Use existing OpenCV-based post-processing - boxes, polys = getDetBoxes( - text_scores[b_idx], link_scores[b_idx], - self.text_threshold, self.link_threshold, - self.low_text, False # Don't need detailed polygons, just boxes - ) + # # Use existing OpenCV-based post-processing + # boxes, polys = getDetBoxes( + # text_scores[b_idx], link_scores[b_idx], + # self.text_threshold, self.link_threshold, + # self.low_text, False # Don't need detailed polygons, just boxes + # ) - # Adjust coordinates - boxes = adjustResultCoordinates(boxes, curr_ratio_w, curr_ratio_h) + # # Adjust coordinates + # boxes = adjustResultCoordinates(boxes, curr_ratio_w, curr_ratio_h) - # Convert to tensor and add to batch - image_polys = [] - if len(boxes) > 0: - # Ensure boxes is in a list format before processing - boxes = boxes.tolist() if isinstance(boxes, np.ndarray) else boxes - for box in boxes: - image_polys.append(box) + # # Convert to tensor and add to batch + # image_polys = [] + # if len(boxes) > 0: + # # Ensure boxes is in a list format before processing + # boxes = boxes.tolist() if isinstance(boxes, np.ndarray) else boxes + # for box in boxes: + # image_polys.append(box) - batch_polys.append(image_polys) + # batch_polys.append(image_polys) - return batch_polys + # return batch_polys def _convex_hull(self, x_coords, y_coords): """Simple convex hull approximation for GPU tensors""" From 8a8130519b40a2ba5ba5b3ed06219eb945ba030f Mon Sep 17 00:00:00 2001 From: timholds Date: Wed, 16 Apr 2025 19:50:13 -0700 Subject: [PATCH 13/13] get_batch_polygons() now uses multiprocessing, WORKING --- CRAFT/model.py | 31 ------------------------------- 1 file changed, 31 deletions(-) diff --git a/CRAFT/model.py b/CRAFT/model.py index 951ed98..4bc93b7 100644 --- a/CRAFT/model.py +++ b/CRAFT/model.py @@ -147,37 +147,6 @@ def get_batch_polygons(self, batch_images: torch.Tensor, ratios_w: torch.Tensor, batch_polys = pool.map(process_single, batch_args) return batch_polys - - # TODO can we do some of this stuff in parallel - # batch_polys = [] - - # for b_idx in range(batch_size): - - # # Get current ratios - # curr_ratio_w = ratios_w[b_idx].item() if isinstance(ratios_w, torch.Tensor) else ratios_w - # curr_ratio_h = ratios_h[b_idx].item() if isinstance(ratios_h, torch.Tensor) else ratios_h - - # # Use existing OpenCV-based post-processing - # boxes, polys = getDetBoxes( - # text_scores[b_idx], link_scores[b_idx], - # self.text_threshold, self.link_threshold, - # self.low_text, False # Don't need detailed polygons, just boxes - # ) - - # # Adjust coordinates - # boxes = adjustResultCoordinates(boxes, curr_ratio_w, curr_ratio_h) - - # # Convert to tensor and add to batch - # image_polys = [] - # if len(boxes) > 0: - # # Ensure boxes is in a list format before processing - # boxes = boxes.tolist() if isinstance(boxes, np.ndarray) else boxes - # for box in boxes: - # image_polys.append(box) - - # batch_polys.append(image_polys) - - # return batch_polys def _convex_hull(self, x_coords, y_coords): """Simple convex hull approximation for GPU tensors"""