remove what reportedly does not work well

lucidrains · lucidrains · commit 936d9be4ba88 · 2023-05-27T09:30:15.000-07:00
diff --git a/README.md b/README.md
@@ -182,31 +182,6 @@ x = torch.randn(1, 1024, 256)
 quantized, indices, commit_loss = vq(x)
 ```
 
-### Orthogonal regularization loss
-
-VQ-VAE / VQ-GAN is quickly gaining popularity. A <a href="https://arxiv.org/abs/2112.00384">recent paper</a> proposes that when using vector quantization on images, enforcing the codebook to be orthogonal leads to translation equivariance of the discretized codes, leading to large improvements in downstream text to image generation tasks.
-
-You can use this feature by simply setting the `orthogonal_reg_weight` to be greater than `0`, in which case the orthogonal regularization will be added to the auxiliary loss outputted by the module.
-
-```python
-import torch
-from vector_quantize_pytorch import VectorQuantize
-
-vq = VectorQuantize(
-    dim = 256,
-    codebook_size = 256,
-    accept_image_fmap = True,                   # set this true to be able to pass in an image feature map
-    orthogonal_reg_weight = 10,                 # in paper, they recommended a value of 10
-    orthogonal_reg_max_codes = 128,             # this would randomly sample from the codebook for the orthogonal regularization loss, for limiting memory usage
-    orthogonal_reg_active_codes_only = False    # set this to True if you have a very large codebook, and would only like to enforce the loss on the activated codes per batch
-)
-
-img_fmap = torch.randn(1, 256, 32, 32)
-quantized, indices, loss = vq(img_fmap) # (1, 256, 32, 32), (1, 32, 32), (1,)
-
-# loss now contains the orthogonal regularization loss with the weight as assigned
-```
-
 ### Multi-headed VQ
 
 There has been a number of papers that proposes variants of discrete latent representations with a multi-headed approach (multiple codes per feature). I have decided to offer one variant where the same codebook is used to vector quantize across the input dimension `head` times.
@@ -230,7 +205,6 @@ img_fmap = torch.randn(1, 256, 32, 32)
 quantized, indices, loss = vq(img_fmap) # (1, 256, 32, 32), (1, 32, 32, 8), (1,)
 
 # indices shape - (batch, height, width, heads)
-# loss now contains the orthogonal regularization loss with the weight as assigned
 ```
 ### Random Projection Quantizer
 
@@ -344,17 +318,6 @@ if __name__ == '__main__':
 }
 ```
 
-```bibtex
-@misc{shin2021translationequivariant,
-    title   = {Translation-equivariant Image Quantizer for Bi-directional Image-Text Generation}, 
-    author  = {Woncheol Shin and Gyubok Lee and Jiyoung Lee and Joonseok Lee and Edward Choi},
-    year    = {2021},
-    eprint  = {2112.00384},
-    archivePrefix = {arXiv},
-    primaryClass = {cs.CV}
-}
-```
-
 ```bibtex
 @unknown{unknown,
     author  = {Lee, Doyup and Kim, Chiheon and Kim, Saehoon and Cho, Minsu and Han, Wook-Shin},
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 setup(
   name = 'vector_quantize_pytorch',
   packages = find_packages(),
-  version = '1.6.0',
+  version = '1.6.1',
   license='MIT',
   description = 'Vector Quantization - Pytorch',
   long_description_content_type = 'text/markdown',
diff --git a/vector_quantize_pytorch/vector_quantize_pytorch.py b/vector_quantize_pytorch/vector_quantize_pytorch.py
@@ -211,15 +211,6 @@ def batched_embedding(indices, embeds):
     embeds = repeat(embeds, 'h c d -> h b c d', b = batch)
     return embeds.gather(2, indices)
 
-# regularization losses
-
-def orthogonal_loss_fn(t):
-    # eq (2) from https://arxiv.org/abs/2112.00384
-    h, n = t.shape[:2]
-    normed_codes = l2norm(t)
-    cosine_sim = einsum('h i d, h j d -> h i j', normed_codes, normed_codes)
-    return (cosine_sim ** 2).sum() / (h * n ** 2) - (1 / n)
-
 # distance types
 
 class EuclideanCodebook(nn.Module):
@@ -610,9 +601,6 @@ def __init__(
         accept_image_fmap = False,
         commitment_weight = 1.,
         commitment_use_cross_entropy_loss = False,
-        orthogonal_reg_weight = 0.,
-        orthogonal_reg_active_codes_only = False,
-        orthogonal_reg_max_codes = None,
         stochastic_sample_codes = False,
         sample_codebook_temp = 1.,
         straight_through = False,
@@ -640,11 +628,6 @@ def __init__(
         self.commitment_weight = commitment_weight
         self.commitment_use_cross_entropy_loss = commitment_use_cross_entropy_loss # whether to use cross entropy loss to codebook as commitment loss
 
-        has_codebook_orthogonal_loss = orthogonal_reg_weight > 0
-        self.orthogonal_reg_weight = orthogonal_reg_weight
-        self.orthogonal_reg_active_codes_only = orthogonal_reg_active_codes_only
-        self.orthogonal_reg_max_codes = orthogonal_reg_max_codes
-
         codebook_class = EuclideanCodebook if not use_cosine_sim else CosineSimCodebook
 
         gumbel_sample_fn = partial(
@@ -665,7 +648,7 @@ def __init__(
             eps = eps,
             threshold_ema_dead_code = threshold_ema_dead_code,
             use_ddp = sync_codebook,
-            learnable_codebook = has_codebook_orthogonal_loss or learnable_codebook,
+            learnable_codebook = learnable_codebook,
             sample_codebook_temp = sample_codebook_temp,
             gumbel_sample = gumbel_sample_fn,
             ema_update = ema_update
@@ -828,25 +811,6 @@ def calculate_ce_loss(codes):
 
                 loss = loss + commit_loss * self.commitment_weight
 
-            if self.orthogonal_reg_weight > 0:
-                codebook = self._codebook.embed
-
-                # only calculate orthogonal loss for the activated codes for this batch
-
-                if self.orthogonal_reg_active_codes_only:
-                    assert not (is_multiheaded and self.separate_codebook_per_head), 'orthogonal regularization for only active codes not compatible with multi-headed with separate codebooks yet'
-                    unique_code_ids = torch.unique(embed_ind)
-                    codebook = codebook[:, unique_code_ids]
-
-                num_codes = codebook.shape[-2]
-
-                if exists(self.orthogonal_reg_max_codes) and num_codes > self.orthogonal_reg_max_codes:
-                    rand_ids = torch.randperm(num_codes, device = device)[:self.orthogonal_reg_max_codes]
-                    codebook = codebook[:, rand_ids]
-
-                orthogonal_reg_loss = orthogonal_loss_fn(codebook)
-                loss = loss + orthogonal_reg_loss * self.orthogonal_reg_weight
-
         # handle multi-headed quantized embeddings
 
         if is_multiheaded: