add ability to only enforce orthogonality loss on activated codes within a batch, for large codebooks (say taming transformers 16k)

lucidrains · lucidrains · commit 918322ac78c2 · 2021-12-03T20:45:28.000-08:00
diff --git a/README.md b/README.md
@@ -139,8 +139,9 @@ from vector_quantize_pytorch import VectorQuantize
 vq = VectorQuantize(
     dim = 256,
     codebook_size = 256,
-    accept_image_fmap = True,    # set this true to be able to pass in an image feature map
-    orthogonal_reg_weight = 10,  # in paper, they recommended a value of 10
+    accept_image_fmap = True,                   # set this true to be able to pass in an image feature map
+    orthogonal_reg_weight = 10,                 # in paper, they recommended a value of 10
+    orthogonal_reg_active_codes_only = False    # set this to True if you have a very large codebook, and would only like to enforce the loss on the activated codes per batch
 )
 
 img_fmap = torch.randn(1, 256, 32, 32)
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 setup(
   name = 'vector_quantize_pytorch',
   packages = find_packages(),
-  version = '0.4.3',
+  version = '0.4.4',
   license='MIT',
   description = 'Vector Quantization - Pytorch',
   author = 'Phil Wang',
diff --git a/vector_quantize_pytorch/vector_quantize_pytorch.py b/vector_quantize_pytorch/vector_quantize_pytorch.py
@@ -253,16 +253,17 @@ def __init__(
         n_embed = None,
         codebook_dim = None,
         decay = 0.8,
-        orthogonal_reg_weight = 0.,
-        commitment_weight = None,
         eps = 1e-5,
         kmeans_init = False,
         kmeans_iters = 10,
         use_cosine_sim = False,
         threshold_ema_dead_code = 0,
         channel_last = True,
         accept_image_fmap = False,
-        commitment = 1. # deprecate in next version, turn off by default
+        commitment_weight = None,
+        commitment = 1., # deprecate in next version, turn off by default
+        orthogonal_reg_weight = 0.,
+        orthogonal_reg_active_codes_only = False
     ):
         super().__init__()
         n_embed = default(n_embed, codebook_size)
@@ -276,7 +277,9 @@ def __init__(
 
         self.eps = eps
         self.commitment_weight = default(commitment_weight, commitment)
+
         self.orthogonal_reg_weight = orthogonal_reg_weight
+        self.orthogonal_reg_active_codes_only = orthogonal_reg_active_codes_only
 
         codebook_class = EuclideanCodebook if not use_cosine_sim \
                          else CosineSimCodebook
@@ -327,6 +330,15 @@ def forward(self, x):
                 loss = loss + commit_loss * self.commitment_weight
 
             if self.orthogonal_reg_weight > 0:
+                codebook = self.codebook
+
+                if self.orthogonal_reg_active_codes_only:
+                    # only calculate orthogonal loss for the activated codes for this batch
+                    unique_code_ids = torch.unique(embed_ind)
+                    codebook = self.codebook[unique_code_ids]
+                else:
+                    codebook = self.codebook
+
                 orthogonal_reg_loss = orthgonal_loss_fn(self.codebook)
                 loss = loss + orthogonal_reg_loss * self.orthogonal_reg_weight