From edde0a307db8ca0e15ebad69175524e09d0343c5 Mon Sep 17 00:00:00 2001
From: Tim Holdsworth <tholdsworth@sandiego.edu>
Date: Wed, 19 Mar 2025 18:45:38 -0700
Subject: [PATCH 01/13] try fixing deprecated huggingface download

---
 CRAFT/model.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/CRAFT/model.py b/CRAFT/model.py
index a2e2e4e..a430791 100644
--- a/CRAFT/model.py
+++ b/CRAFT/model.py
@@ -6,7 +6,7 @@
 from PIL import Image
 import numpy as np
 import cv2
-from huggingface_hub import hf_hub_url, cached_download
+from huggingface_hub import hf_hub_url, hf_hub_download
 
 from CRAFT.craft import CRAFT, init_CRAFT_model
 from CRAFT.refinenet import RefineNet, init_refiner_model
@@ -71,9 +71,13 @@ def __init__(
         for model_name in ['craft', 'refiner']:
             config = HF_MODELS[model_name]
             paths[model_name] = os.path.join(cache_dir, config['filename'])
+            # Replacement code
             if not local_files_only:
-                config_file_url = hf_hub_url(repo_id=config['repo_id'], filename=config['filename'])
-                cached_download(config_file_url, cache_dir=cache_dir, force_filename=config['filename'])
+                paths[model_name] = hf_hub_download(
+                    repo_id=config['repo_id'],
+                    filename=config['filename'],
+                    cache_dir=cache_dir
+                )
             
         self.net = init_CRAFT_model(paths['craft'], device, fp16=fp16)
         if self.use_refiner:

From 4e8193d35006f181466fe603f7edfb371e200f0e Mon Sep 17 00:00:00 2001
From: Tim Holdsworth <tholdsworth@sandiego.edu>
Date: Wed, 19 Mar 2025 19:06:58 -0700
Subject: [PATCH 02/13] fix HF download and remove comment

---
 CRAFT/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CRAFT/model.py b/CRAFT/model.py
index a430791..e9ba2b6 100644
--- a/CRAFT/model.py
+++ b/CRAFT/model.py
@@ -71,7 +71,7 @@ def __init__(
         for model_name in ['craft', 'refiner']:
             config = HF_MODELS[model_name]
             paths[model_name] = os.path.join(cache_dir, config['filename'])
-            # Replacement code
+           
             if not local_files_only:
                 paths[model_name] = hf_hub_download(
                     repo_id=config['repo_id'],

From 4d9ac3879e442239483e0d8855e3868bc6c2dcf4 Mon Sep 17 00:00:00 2001
From: Tim Holdsworth <tholdsworth@sandiego.edu>
Date: Wed, 19 Mar 2025 19:07:53 -0700
Subject: [PATCH 03/13] remove empty line so it matches upstream repo

---
 CRAFT/model.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/CRAFT/model.py b/CRAFT/model.py
index e9ba2b6..3ea97ec 100644
--- a/CRAFT/model.py
+++ b/CRAFT/model.py
@@ -71,7 +71,6 @@ def __init__(
         for model_name in ['craft', 'refiner']:
             config = HF_MODELS[model_name]
             paths[model_name] = os.path.join(cache_dir, config['filename'])
-           
             if not local_files_only:
                 paths[model_name] = hf_hub_download(
                     repo_id=config['repo_id'],

From 5e40d8a37b33b1129124b6d90a934580175cc5b6 Mon Sep 17 00:00:00 2001
From: timholds <tholdsworth@sandiego.edu>
Date: Sun, 6 Apr 2025 17:54:25 -0700
Subject: [PATCH 04/13] add get_batch_polygons

---
 CRAFT/model.py | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 93 insertions(+)

diff --git a/CRAFT/model.py b/CRAFT/model.py
index 3ea97ec..492a3b8 100644
--- a/CRAFT/model.py
+++ b/CRAFT/model.py
@@ -103,6 +103,99 @@ def get_text_map(self, x: torch.Tensor, ratio_w: int, ratio_h: int) -> Tuple[np.
             
         return score_text, score_link
 
+    def get_batch_polygons(self, batch_images: torch.Tensor, ratios_w: torch.Tensor, ratios_h: torch.Tensor):
+        """Batch process pre-normalized images on GPU"""
+        # Forward pass
+        with torch.no_grad():
+            y, _ = self.net(batch_images)
+            if self.refiner:
+                y, _ = self.refiner(y, None)
+
+        # Batch post-processing
+        text_scores = y[..., 0]  # [B, H, W]
+        link_scores = y[..., 1] if not self.refiner else y[..., 0]
+        
+        # Threshold maps on GPU
+        text_mask = (text_scores > self.text_threshold)
+        link_mask = (link_scores > self.link_threshold)
+        combined_mask = text_mask & link_mask
+
+        # Find connected components using PyTorch's label
+        batch_labels = [
+            torch.ops.torchvision.label_connected_components(mask.float())
+            for mask in combined_mask
+        ]
+
+        # Extract polygon coordinates for each component
+        batch_polys = []
+        for b_idx in range(batch_images.size(0)):
+            polys = []
+            for label in torch.unique(batch_labels[b_idx]):
+                if label == 0: continue
+                # Get component coordinates (GPU tensor)
+                y_coords, x_coords = torch.where(batch_labels[b_idx] == label)
+                if len(x_coords) < 4: continue
+                
+                # Find convex hull (custom kernel or approximation)
+                poly_points = self._convex_hull(x_coords, y_coords)
+                
+                # Scale coordinates using precomputed ratios
+                scaled_poly = poly_points * torch.tensor([
+                    [ratios_w[b_idx], ratios_h[b_idx]]
+                ], device=self.device)
+                
+                polys.append(scaled_poly)
+            batch_polys.append(polys)
+
+        return batch_polys
+
+
+    def get_batch_polygons(self, batch_images: torch.Tensor, ratios_w: torch.Tensor, ratios_h: torch.Tensor):
+        """Batch process pre-normalized images on GPU"""
+        # Forward pass
+        with torch.no_grad():
+            y, _ = self.net(batch_images)
+            if self.refiner:
+                y, _ = self.refiner(y, None)
+
+        # Batch post-processing
+        text_scores = y[..., 0]  # [B, H, W]
+        link_scores = y[..., 1] if not self.refiner else y[..., 0]
+        
+        # Threshold maps on GPU
+        text_mask = (text_scores > self.text_threshold)
+        link_mask = (link_scores > self.link_threshold)
+        combined_mask = text_mask & link_mask
+
+        # Find connected components using PyTorch's label
+        batch_labels = [
+            torch.ops.torchvision.label_connected_components(mask.float())
+            for mask in combined_mask
+        ]
+
+        # Extract polygon coordinates for each component
+        batch_polys = []
+        for b_idx in range(batch_images.size(0)):
+            polys = []
+            for label in torch.unique(batch_labels[b_idx]):
+                if label == 0: continue
+                # Get component coordinates (GPU tensor)
+                y_coords, x_coords = torch.where(batch_labels[b_idx] == label)
+                if len(x_coords) < 4: continue
+                
+                # Find convex hull (custom kernel or approximation)
+                poly_points = self._convex_hull(x_coords, y_coords)
+                
+                # Scale coordinates using precomputed ratios
+                scaled_poly = poly_points * torch.tensor([
+                    [ratios_w[b_idx], ratios_h[b_idx]]
+                ], device=self.device)
+                
+                polys.append(scaled_poly)
+            batch_polys.append(polys)
+
+        return batch_polys
+
     def get_polygons(self, image: Image.Image) -> List[List[List[int]]]:
         x, ratio_w, ratio_h = preprocess_image(np.array(image), self.canvas_size, self.mag_ratio)
         

From 5b4cddec039806be29fd58d8e2a3bcb85f7019be Mon Sep 17 00:00:00 2001
From: timholds <tholdsworth@sandiego.edu>
Date: Mon, 7 Apr 2025 00:50:28 -0700
Subject: [PATCH 05/13] fix call to refiner, threshold maps on gpu not working

---
 CRAFT/model.py | 77 ++++++++++++++++----------------------------------
 1 file changed, 25 insertions(+), 52 deletions(-)

diff --git a/CRAFT/model.py b/CRAFT/model.py
index 492a3b8..91bb71b 100644
--- a/CRAFT/model.py
+++ b/CRAFT/model.py
@@ -107,60 +107,15 @@ def get_batch_polygons(self, batch_images: torch.Tensor, ratios_w: torch.Tensor,
         """Batch process pre-normalized images on GPU"""
         # Forward pass
         with torch.no_grad():
-            y, _ = self.net(batch_images)
+            y, feature = self.net(batch_images.to(self.device)) 
             if self.refiner:
-                y, _ = self.refiner(y, None)
-
-        # Batch post-processing
-        text_scores = y[..., 0]  # [B, H, W]
-        link_scores = y[..., 1] if not self.refiner else y[..., 0]
-        
-        # Threshold maps on GPU
-        text_mask = (text_scores > self.text_threshold)
-        link_mask = (link_scores > self.link_threshold)
-        combined_mask = text_mask & link_mask
-
-        # Find connected components using PyTorch's label
-        batch_labels = [
-            torch.ops.torchvision.label_connected_components(mask.float())
-            for mask in combined_mask
-        ]
-
-        # Extract polygon coordinates for each component
-        batch_polys = []
-        for b_idx in range(batch_images.size(0)):
-            polys = []
-            for label in torch.unique(batch_labels[b_idx]):
-                if label == 0: continue
-                # Get component coordinates (GPU tensor)
-                y_coords, x_coords = torch.where(batch_labels[b_idx] == label)
-                if len(x_coords) < 4: continue
-                
-                # Find convex hull (custom kernel or approximation)
-                poly_points = self._convex_hull(x_coords, y_coords)
-                
-                # Scale coordinates using precomputed ratios
-                scaled_poly = poly_points * torch.tensor([
-                    [ratios_w[b_idx], ratios_h[b_idx]]
-                ], device=self.device)
-                
-                polys.append(scaled_poly)
-            batch_polys.append(polys)
-
-        return batch_polys
-
-
-    def get_batch_polygons(self, batch_images: torch.Tensor, ratios_w: torch.Tensor, ratios_h: torch.Tensor):
-        """Batch process pre-normalized images on GPU"""
-        # Forward pass
-        with torch.no_grad():
-            y, _ = self.net(batch_images)
-            if self.refiner:
-                y, _ = self.refiner(y, None)
+                y_refiner = self.refiner(y, feature)
+                link_scores = y_refiner[..., 0]  # [B, H, W]
+            else:
+                link_scores = y[..., 1]  # [B, H, W]
+            
+            text_scores = y[..., 0]  # [B, H, W]
 
-        # Batch post-processing
-        text_scores = y[..., 0]  # [B, H, W]
-        link_scores = y[..., 1] if not self.refiner else y[..., 0]
         
         # Threshold maps on GPU
         text_mask = (text_scores > self.text_threshold)
@@ -195,6 +150,24 @@ def get_batch_polygons(self, batch_images: torch.Tensor, ratios_w: torch.Tensor,
             batch_polys.append(polys)
 
         return batch_polys
+    
+    def _convex_hull(self, x_coords, y_coords):
+        """Simple convex hull approximation for GPU tensors"""
+        # For character detection, a simple bounding box is often sufficient
+        min_x = torch.min(x_coords)
+        max_x = torch.max(x_coords)
+        min_y = torch.min(y_coords)
+        max_y = torch.max(y_coords)
+
+        # Create rectangle corners
+        pts = torch.tensor([
+            [min_x, min_y],
+            [max_x, min_y],
+            [max_x, max_y],
+            [min_x, max_y]
+        ], device=x_coords.device)
+
+        return pts
 
     def get_polygons(self, image: Image.Image) -> List[List[List[int]]]:
         x, ratio_w, ratio_h = preprocess_image(np.array(image), self.canvas_size, self.mag_ratio)

From 07f6d71e011e1a51fc6550d4c113cf9c8f110c16 Mon Sep 17 00:00:00 2001
From: timholds <tholdsworth@sandiego.edu>
Date: Mon, 7 Apr 2025 01:33:02 -0700
Subject: [PATCH 06/13] update get_polygons() to do batch inference, still does
 post processing in seq on cpu

---
 CRAFT/model.py | 63 +++++++++++++++++++++++++-------------------------
 1 file changed, 32 insertions(+), 31 deletions(-)

diff --git a/CRAFT/model.py b/CRAFT/model.py
index 91bb71b..178be77 100644
--- a/CRAFT/model.py
+++ b/CRAFT/model.py
@@ -116,38 +116,39 @@ def get_batch_polygons(self, batch_images: torch.Tensor, ratios_w: torch.Tensor,
             
             text_scores = y[..., 0]  # [B, H, W]
 
-        
-        # Threshold maps on GPU
-        text_mask = (text_scores > self.text_threshold)
-        link_mask = (link_scores > self.link_threshold)
-        combined_mask = text_mask & link_mask
-
-        # Find connected components using PyTorch's label
-        batch_labels = [
-            torch.ops.torchvision.label_connected_components(mask.float())
-            for mask in combined_mask
-        ]
-
-        # Extract polygon coordinates for each component
+        batch_size = batch_images.size(0)
+        # Process each image in the batch (minimize CPU transfers)
         batch_polys = []
-        for b_idx in range(batch_images.size(0)):
-            polys = []
-            for label in torch.unique(batch_labels[b_idx]):
-                if label == 0: continue
-                # Get component coordinates (GPU tensor)
-                y_coords, x_coords = torch.where(batch_labels[b_idx] == label)
-                if len(x_coords) < 4: continue
-                
-                # Find convex hull (custom kernel or approximation)
-                poly_points = self._convex_hull(x_coords, y_coords)
-                
-                # Scale coordinates using precomputed ratios
-                scaled_poly = poly_points * torch.tensor([
-                    [ratios_w[b_idx], ratios_h[b_idx]]
-                ], device=self.device)
-                
-                polys.append(scaled_poly)
-            batch_polys.append(polys)
+        for b_idx in range(batch_size):
+            # Extract scores for this image
+            text_score = text_scores[b_idx].cpu().numpy()
+            link_score = link_scores[b_idx].cpu().numpy()
+            
+            # Get current ratios
+            curr_ratio_w = ratios_w[b_idx].item() if isinstance(ratios_w, torch.Tensor) else ratios_w
+            curr_ratio_h = ratios_h[b_idx].item() if isinstance(ratios_h, torch.Tensor) else ratios_h
+            
+            # Use existing OpenCV-based post-processing
+            boxes, polys = getDetBoxes(
+                text_score, link_score,
+                self.text_threshold, self.link_threshold,
+                self.low_text, False  # Don't need detailed polygons, just boxes
+            )
+            
+            # Adjust coordinates
+            boxes = adjustResultCoordinates(boxes, curr_ratio_w, curr_ratio_h)
+            
+            # Convert to tensor and add to batch
+            image_polys = []
+            if len(boxes) > 0:
+                # Ensure boxes is in a list format before processing
+                boxes = boxes.tolist() if isinstance(boxes, np.ndarray) else boxes
+                for box in boxes:
+                    # Convert to tensor (4 corner points)
+                    box_tensor = torch.tensor(box, dtype=torch.float32, device=self.device)
+                    image_polys.append(box_tensor)
+                    
+            batch_polys.append(image_polys)
 
         return batch_polys
     

From cfb7ec14907ba7dfad65bcbbb70de116776b08b1 Mon Sep 17 00:00:00 2001
From: timholds <tholdsworth@sandiego.edu>
Date: Tue, 15 Apr 2025 18:32:59 -0700
Subject: [PATCH 07/13] trying to fix warning about pretrained arg being
 renamed to weights

---
 CRAFT/basenet/vgg16_bn.py | 6 +++---
 CRAFT/craft.py            | 4 ++--
 CRAFT/model.py            | 1 +
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/CRAFT/basenet/vgg16_bn.py b/CRAFT/basenet/vgg16_bn.py
index 6cad358..e401b06 100644
--- a/CRAFT/basenet/vgg16_bn.py
+++ b/CRAFT/basenet/vgg16_bn.py
@@ -27,10 +27,10 @@ def init_weights(modules):
 
 
 class vgg16_bn(torch.nn.Module):
-    def __init__(self, pretrained=True, freeze=True):
+    def __init__(self, weights=True, freeze=True):
         super(vgg16_bn, self).__init__()
         model_urls['vgg16_bn'] = model_urls['vgg16_bn'].replace('https://', 'http://')
-        vgg_pretrained_features = models.vgg16_bn(pretrained=pretrained).features
+        vgg_pretrained_features = models.vgg16_bn(weights=weights).features
         self.slice1 = torch.nn.Sequential()
         self.slice2 = torch.nn.Sequential()
         self.slice3 = torch.nn.Sequential()
@@ -52,7 +52,7 @@ def __init__(self, pretrained=True, freeze=True):
                 nn.Conv2d(1024, 1024, kernel_size=1)
         )
 
-        if not pretrained:
+        if not weights:
             init_weights(self.slice1.modules())
             init_weights(self.slice2.modules())
             init_weights(self.slice3.modules())
diff --git a/CRAFT/craft.py b/CRAFT/craft.py
index 782a98e..5ab2b11 100755
--- a/CRAFT/craft.py
+++ b/CRAFT/craft.py
@@ -33,11 +33,11 @@ def forward(self, x):
 
 class CRAFT(nn.Module):
     
-    def __init__(self, pretrained=False, freeze=False):
+    def __init__(self, weights=False, freeze=False):
         super(CRAFT, self).__init__()
 
         """ Base network """
-        self.basenet = vgg16_bn(pretrained, freeze)
+        self.basenet = vgg16_bn(weights, freeze)
 
         """ U network """
         self.upconv1 = double_conv(1024, 512, 256)
diff --git a/CRAFT/model.py b/CRAFT/model.py
index 178be77..67f7f60 100644
--- a/CRAFT/model.py
+++ b/CRAFT/model.py
@@ -217,3 +217,4 @@ def get_boxes(self, image: Image.Image) -> List[List[List[int]]]:
         
         boxes_final = self._get_boxes_preproc(x, ratio_w, ratio_h)
         return boxes_final
+

From e48768a9ee0e922e2b46b44f72bc7c76e9198f34 Mon Sep 17 00:00:00 2001
From: timholds <tholdsworth@sandiego.edu>
Date: Wed, 16 Apr 2025 12:51:03 -0700
Subject: [PATCH 08/13] broke get_batch_polygons with ratio_w tensor

---
 CRAFT/model.py | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/CRAFT/model.py b/CRAFT/model.py
index 67f7f60..9838c44 100644
--- a/CRAFT/model.py
+++ b/CRAFT/model.py
@@ -106,6 +106,10 @@ def get_text_map(self, x: torch.Tensor, ratio_w: int, ratio_h: int) -> Tuple[np.
     def get_batch_polygons(self, batch_images: torch.Tensor, ratios_w: torch.Tensor, ratios_h: torch.Tensor):
         """Batch process pre-normalized images on GPU"""
         # Forward pass
+        batch_images = batch_images.float()  # Convert to float32
+        if self.fp16:
+            batch_images = batch_images.half()  # Convert to half if using fp16
+
         with torch.no_grad():
             y, feature = self.net(batch_images.to(self.device)) 
             if self.refiner:
@@ -119,10 +123,16 @@ def get_batch_polygons(self, batch_images: torch.Tensor, ratios_w: torch.Tensor,
         batch_size = batch_images.size(0)
         # Process each image in the batch (minimize CPU transfers)
         batch_polys = []
+        text_scores = text_scores.cpu().numpy()
+        link_scores = link_scores.cpu().numpy()
+        ratios_w = ratios_w.cpu().numpy()
+        ratios_h = ratios_h.cpu().numpy()
+
+        # TODO can we do some of this stuff in parallel
         for b_idx in range(batch_size):
             # Extract scores for this image
-            text_score = text_scores[b_idx].cpu().numpy()
-            link_score = link_scores[b_idx].cpu().numpy()
+            # text_score = text_scores[b_idx].cpu().numpy()
+            # link_score = link_scores[b_idx].cpu().numpy()
             
             # Get current ratios
             curr_ratio_w = ratios_w[b_idx].item() if isinstance(ratios_w, torch.Tensor) else ratios_w
@@ -130,12 +140,13 @@ def get_batch_polygons(self, batch_images: torch.Tensor, ratios_w: torch.Tensor,
             
             # Use existing OpenCV-based post-processing
             boxes, polys = getDetBoxes(
-                text_score, link_score,
+                text_scores[b_idx], link_scores[b_idx],
                 self.text_threshold, self.link_threshold,
                 self.low_text, False  # Don't need detailed polygons, just boxes
             )
             
             # Adjust coordinates
+            breakpoint()
             boxes = adjustResultCoordinates(boxes, curr_ratio_w, curr_ratio_h)
             
             # Convert to tensor and add to batch
@@ -144,9 +155,7 @@ def get_batch_polygons(self, batch_images: torch.Tensor, ratios_w: torch.Tensor,
                 # Ensure boxes is in a list format before processing
                 boxes = boxes.tolist() if isinstance(boxes, np.ndarray) else boxes
                 for box in boxes:
-                    # Convert to tensor (4 corner points)
-                    box_tensor = torch.tensor(box, dtype=torch.float32, device=self.device)
-                    image_polys.append(box_tensor)
+                    image_polys.append(box)
                     
             batch_polys.append(image_polys)
 

From 7ca096861187abe54b91e8928554cb638d9d70d8 Mon Sep 17 00:00:00 2001
From: timholds <tholdsworth@sandiego.edu>
Date: Wed, 16 Apr 2025 13:10:27 -0700
Subject: [PATCH 09/13] fixed, runs now (although slowly still)

---
 CRAFT/model.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/CRAFT/model.py b/CRAFT/model.py
index 9838c44..d42980b 100644
--- a/CRAFT/model.py
+++ b/CRAFT/model.py
@@ -125,8 +125,8 @@ def get_batch_polygons(self, batch_images: torch.Tensor, ratios_w: torch.Tensor,
         batch_polys = []
         text_scores = text_scores.cpu().numpy()
         link_scores = link_scores.cpu().numpy()
-        ratios_w = ratios_w.cpu().numpy()
-        ratios_h = ratios_h.cpu().numpy()
+        # ratios_w = ratios_w.cpu().numpy()
+        # ratios_h = ratios_h.cpu().numpy()
 
         # TODO can we do some of this stuff in parallel
         for b_idx in range(batch_size):
@@ -146,7 +146,6 @@ def get_batch_polygons(self, batch_images: torch.Tensor, ratios_w: torch.Tensor,
             )
             
             # Adjust coordinates
-            breakpoint()
             boxes = adjustResultCoordinates(boxes, curr_ratio_w, curr_ratio_h)
             
             # Convert to tensor and add to batch

From beaf0c49b4b99d41de7294866efd01243a423028 Mon Sep 17 00:00:00 2001
From: timholds <tholdsworth@sandiego.edu>
Date: Wed, 16 Apr 2025 16:53:40 -0700
Subject: [PATCH 10/13] already have floats in craft model, just need to
 convert to half precision if we are using fp16

---
 CRAFT/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CRAFT/model.py b/CRAFT/model.py
index d42980b..a5f6968 100644
--- a/CRAFT/model.py
+++ b/CRAFT/model.py
@@ -106,7 +106,7 @@ def get_text_map(self, x: torch.Tensor, ratio_w: int, ratio_h: int) -> Tuple[np.
     def get_batch_polygons(self, batch_images: torch.Tensor, ratios_w: torch.Tensor, ratios_h: torch.Tensor):
         """Batch process pre-normalized images on GPU"""
         # Forward pass
-        batch_images = batch_images.float()  # Convert to float32
+        #batch_images = batch_images.float()  # Convert to float32
         if self.fp16:
             batch_images = batch_images.half()  # Convert to half if using fp16
 

From ec96efd627c94eebc7a5bf8a3c11fae46ef48863 Mon Sep 17 00:00:00 2001
From: timholds <tholdsworth@sandiego.edu>
Date: Wed, 16 Apr 2025 17:12:11 -0700
Subject: [PATCH 11/13] detach the link scores from torch (doesnt seem to make
 a difference) ''

---
 CRAFT/model.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CRAFT/model.py b/CRAFT/model.py
index a5f6968..7576cf0 100644
--- a/CRAFT/model.py
+++ b/CRAFT/model.py
@@ -123,8 +123,8 @@ def get_batch_polygons(self, batch_images: torch.Tensor, ratios_w: torch.Tensor,
         batch_size = batch_images.size(0)
         # Process each image in the batch (minimize CPU transfers)
         batch_polys = []
-        text_scores = text_scores.cpu().numpy()
-        link_scores = link_scores.cpu().numpy()
+        text_scores = text_scores.detach().cpu().numpy()
+        link_scores = link_scores.detach().cpu().numpy()
         # ratios_w = ratios_w.cpu().numpy()
         # ratios_h = ratios_h.cpu().numpy()
 

From acd9cd5805144c9490d1d124f3f4d50ae7b1fd1b Mon Sep 17 00:00:00 2001
From: timholds <tholdsworth@sandiego.edu>
Date: Wed, 16 Apr 2025 19:21:33 -0700
Subject: [PATCH 12/13] multiprocess getDetBoxes with get_single_image()

---
 CRAFT/model.py | 77 +++++++++++++++++++++++++++++++-------------------
 1 file changed, 48 insertions(+), 29 deletions(-)

diff --git a/CRAFT/model.py b/CRAFT/model.py
index 7576cf0..951ed98 100644
--- a/CRAFT/model.py
+++ b/CRAFT/model.py
@@ -6,8 +6,10 @@
 from PIL import Image
 import numpy as np
 import cv2
-from huggingface_hub import hf_hub_url, hf_hub_download
+from multiprocessing import Pool
+import functools
 
+from huggingface_hub import hf_hub_url, hf_hub_download
 from CRAFT.craft import CRAFT, init_CRAFT_model
 from CRAFT.refinenet import RefineNet, init_refiner_model
 from CRAFT.craft_utils import adjustResultCoordinates, getDetBoxes
@@ -39,6 +41,16 @@ def preprocess_image(image: np.ndarray, canvas_size: int, mag_ratio: bool):
     x = Variable(x.unsqueeze(0))                # [c, h, w] to [b, c, h, w]
     return x, ratio_w, ratio_h
 
+def process_single(args):
+    text_score, link_score, ratio_w, ratio_h, text_threshold, link_threshold, low_text = args
+
+    boxes, polys = getDetBoxes(
+        text_score, link_score,
+        text_threshold, link_threshold,
+        low_text, False
+    )
+    boxes = adjustResultCoordinates(boxes, ratio_w, ratio_h)
+    return boxes
 
 class CRAFTModel:
     
@@ -103,6 +115,7 @@ def get_text_map(self, x: torch.Tensor, ratio_w: int, ratio_h: int) -> Tuple[np.
             
         return score_text, score_link
 
+    
     def get_batch_polygons(self, batch_images: torch.Tensor, ratios_w: torch.Tensor, ratios_h: torch.Tensor):
         """Batch process pre-normalized images on GPU"""
         # Forward pass
@@ -121,44 +134,50 @@ def get_batch_polygons(self, batch_images: torch.Tensor, ratios_w: torch.Tensor,
             text_scores = y[..., 0]  # [B, H, W]
 
         batch_size = batch_images.size(0)
-        # Process each image in the batch (minimize CPU transfers)
-        batch_polys = []
         text_scores = text_scores.detach().cpu().numpy()
         link_scores = link_scores.detach().cpu().numpy()
-        # ratios_w = ratios_w.cpu().numpy()
-        # ratios_h = ratios_h.cpu().numpy()
+       
+        ratios_w = ratios_w.cpu().numpy()
+        ratios_h = ratios_h.cpu().numpy()
+
+        with Pool(processes=os.cpu_count()) as pool:
+            batch_args = [(text_scores[i], link_scores[i], ratios_w[i], ratios_h[i], 
+                           self.text_threshold, self.link_threshold, self.low_text) 
+                            for i in range(batch_size)]
+            batch_polys = pool.map(process_single, batch_args)
+
+        return batch_polys
 
         # TODO can we do some of this stuff in parallel
-        for b_idx in range(batch_size):
-            # Extract scores for this image
-            # text_score = text_scores[b_idx].cpu().numpy()
-            # link_score = link_scores[b_idx].cpu().numpy()
+        # batch_polys = []
+
+        # for b_idx in range(batch_size):
             
-            # Get current ratios
-            curr_ratio_w = ratios_w[b_idx].item() if isinstance(ratios_w, torch.Tensor) else ratios_w
-            curr_ratio_h = ratios_h[b_idx].item() if isinstance(ratios_h, torch.Tensor) else ratios_h
+        #     # Get current ratios
+        #     curr_ratio_w = ratios_w[b_idx].item() if isinstance(ratios_w, torch.Tensor) else ratios_w
+        #     curr_ratio_h = ratios_h[b_idx].item() if isinstance(ratios_h, torch.Tensor) else ratios_h
             
-            # Use existing OpenCV-based post-processing
-            boxes, polys = getDetBoxes(
-                text_scores[b_idx], link_scores[b_idx],
-                self.text_threshold, self.link_threshold,
-                self.low_text, False  # Don't need detailed polygons, just boxes
-            )
+        #     # Use existing OpenCV-based post-processing
+        #     boxes, polys = getDetBoxes(
+        #         text_scores[b_idx], link_scores[b_idx],
+        #         self.text_threshold, self.link_threshold,
+        #         self.low_text, False  # Don't need detailed polygons, just boxes
+        #     )
             
-            # Adjust coordinates
-            boxes = adjustResultCoordinates(boxes, curr_ratio_w, curr_ratio_h)
+        #     # Adjust coordinates
+        #     boxes = adjustResultCoordinates(boxes, curr_ratio_w, curr_ratio_h)
             
-            # Convert to tensor and add to batch
-            image_polys = []
-            if len(boxes) > 0:
-                # Ensure boxes is in a list format before processing
-                boxes = boxes.tolist() if isinstance(boxes, np.ndarray) else boxes
-                for box in boxes:
-                    image_polys.append(box)
+        #     # Convert to tensor and add to batch
+        #     image_polys = []
+        #     if len(boxes) > 0:
+        #         # Ensure boxes is in a list format before processing
+        #         boxes = boxes.tolist() if isinstance(boxes, np.ndarray) else boxes
+        #         for box in boxes:
+        #             image_polys.append(box)
                     
-            batch_polys.append(image_polys)
+        #     batch_polys.append(image_polys)
 
-        return batch_polys
+        # return batch_polys
     
     def _convex_hull(self, x_coords, y_coords):
         """Simple convex hull approximation for GPU tensors"""

From 8a8130519b40a2ba5ba5b3ed06219eb945ba030f Mon Sep 17 00:00:00 2001
From: timholds <tholdsworth@sandiego.edu>
Date: Wed, 16 Apr 2025 19:50:13 -0700
Subject: [PATCH 13/13] get_batch_polygons() now uses multiprocessing, WORKING

---
 CRAFT/model.py | 31 -------------------------------
 1 file changed, 31 deletions(-)

diff --git a/CRAFT/model.py b/CRAFT/model.py
index 951ed98..4bc93b7 100644
--- a/CRAFT/model.py
+++ b/CRAFT/model.py
@@ -147,37 +147,6 @@ def get_batch_polygons(self, batch_images: torch.Tensor, ratios_w: torch.Tensor,
             batch_polys = pool.map(process_single, batch_args)
 
         return batch_polys
-
-        # TODO can we do some of this stuff in parallel
-        # batch_polys = []
-
-        # for b_idx in range(batch_size):
-            
-        #     # Get current ratios
-        #     curr_ratio_w = ratios_w[b_idx].item() if isinstance(ratios_w, torch.Tensor) else ratios_w
-        #     curr_ratio_h = ratios_h[b_idx].item() if isinstance(ratios_h, torch.Tensor) else ratios_h
-            
-        #     # Use existing OpenCV-based post-processing
-        #     boxes, polys = getDetBoxes(
-        #         text_scores[b_idx], link_scores[b_idx],
-        #         self.text_threshold, self.link_threshold,
-        #         self.low_text, False  # Don't need detailed polygons, just boxes
-        #     )
-            
-        #     # Adjust coordinates
-        #     boxes = adjustResultCoordinates(boxes, curr_ratio_w, curr_ratio_h)
-            
-        #     # Convert to tensor and add to batch
-        #     image_polys = []
-        #     if len(boxes) > 0:
-        #         # Ensure boxes is in a list format before processing
-        #         boxes = boxes.tolist() if isinstance(boxes, np.ndarray) else boxes
-        #         for box in boxes:
-        #             image_polys.append(box)
-                    
-        #     batch_polys.append(image_polys)
-
-        # return batch_polys
     
     def _convex_hull(self, x_coords, y_coords):
         """Simple convex hull approximation for GPU tensors"""