add ability to random sample codes for orthogonality loss, with orthogonal_reg_max_codes param

lucidrains · lucidrains · commit 2fd172a9feaa · 2021-12-03T20:55:47.000-08:00
diff --git a/README.md b/README.md
@@ -141,6 +141,7 @@ vq = VectorQuantize(
     codebook_size = 256,
     accept_image_fmap = True,                   # set this true to be able to pass in an image feature map
     orthogonal_reg_weight = 10,                 # in paper, they recommended a value of 10
+    orthogonal_reg_max_codes = 128,             # this would randomly sample from the codebook for the orthogonal regularization loss, for limiting memory usage
     orthogonal_reg_active_codes_only = False    # set this to True if you have a very large codebook, and would only like to enforce the loss on the activated codes per batch
 )
 
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 setup(
   name = 'vector_quantize_pytorch',
   packages = find_packages(),
-  version = '0.4.5',
+  version = '0.4.7',
   license='MIT',
   description = 'Vector Quantization - Pytorch',
   author = 'Phil Wang',
diff --git a/vector_quantize_pytorch/vector_quantize_pytorch.py b/vector_quantize_pytorch/vector_quantize_pytorch.py
@@ -263,7 +263,8 @@ def __init__(
         commitment_weight = None,
         commitment = 1., # deprecate in next version, turn off by default
         orthogonal_reg_weight = 0.,
-        orthogonal_reg_active_codes_only = False
+        orthogonal_reg_active_codes_only = False,
+        orthogonal_reg_max_codes = None
     ):
         super().__init__()
         n_embed = default(n_embed, codebook_size)
@@ -280,6 +281,7 @@ def __init__(
 
         self.orthogonal_reg_weight = orthogonal_reg_weight
         self.orthogonal_reg_active_codes_only = orthogonal_reg_active_codes_only
+        self.orthogonal_reg_max_codes = orthogonal_reg_max_codes
 
         codebook_class = EuclideanCodebook if not use_cosine_sim \
                          else CosineSimCodebook
@@ -337,6 +339,11 @@ def forward(self, x):
                     unique_code_ids = torch.unique(embed_ind)
                     codebook = codebook[unique_code_ids]
 
+                num_codes = codebook.shape[0]
+                if exists(self.orthogonal_reg_max_codes) and num_codes > self.orthogonal_reg_max_codes:
+                    rand_ids = torch.randperm(num_codes, device = device)[:self.orthogonal_reg_max_codes]
+                    codebook = codebook[rand_ids]
+
                 orthogonal_reg_loss = orthgonal_loss_fn(codebook)
                 loss = loss + orthogonal_reg_loss * self.orthogonal_reg_weight
 

Original file line number	Diff line number	Diff line change
`@@ -141,6 +141,7 @@ vq = VectorQuantize(`
`141`	`141`	`codebook_size = 256,`
`142`	`142`	`accept_image_fmap = True, # set this true to be able to pass in an image feature map`
`143`	`143`	`orthogonal_reg_weight = 10, # in paper, they recommended a value of 10`
	`144`	`+ orthogonal_reg_max_codes = 128, # this would randomly sample from the codebook for the orthogonal regularization loss, for limiting memory usage`
`144`	`145`	`orthogonal_reg_active_codes_only = False # set this to True if you have a very large codebook, and would only like to enforce the loss on the activated codes per batch`
`145`	`146`	`)`
`146`	`147`