account for codebook scale in bsq

lucidrains · lucidrains · commit 221d3df6f75c · 2024-06-26T14:23:30.000-07:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "vector-quantize-pytorch"
-version = "1.14.38"
+version = "1.14.39"
 description = "Vector Quantization - Pytorch"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }
diff --git a/tests/test_readme.py b/tests/test_readme.py
@@ -201,7 +201,11 @@ def test_rfsq():
     assert torch.all(quantized == quantized_out)
 
 @pytest.mark.parametrize('spherical', (True, False))
-def test_lfq(spherical):
+@pytest.mark.parametrize('codebook_scale', (1., 0.5))
+def test_lfq(
+    spherical,
+    codebook_scale
+):
     from vector_quantize_pytorch import LFQ
 
     # you can specify either dim or codebook_size
@@ -212,7 +216,8 @@ def test_lfq(spherical):
         dim = 16,                   # this is the input feature dimension, defaults to log2(codebook_size) if not defined
         entropy_loss_weight = 0.1,  # how much weight to place on entropy loss
         diversity_gamma = 1.,       # within entropy loss, how much weight to give to diversity of codes, taken from https://arxiv.org/abs/1911.05894
-        spherical = spherical
+        spherical = spherical,
+        codebook_scale = codebook_scale
     )
 
     image_feats = torch.randn(1, 16, 32, 32)
diff --git a/vector_quantize_pytorch/lookup_free_quantization.py b/vector_quantize_pytorch/lookup_free_quantization.py
@@ -44,6 +44,9 @@ def maybe_distributed_mean(t):
 def exists(v):
     return v is not None
 
+def identity(t):
+    return t
+
 def default(*args):
     for arg in args:
         if exists(arg):
@@ -156,6 +159,7 @@ def __init__(
         # whether to use BSQ (binary spherical quantization)
 
         self.spherical = spherical
+        self.maybe_l2norm = (lambda t: l2norm(t) * self.codebook_scale) if spherical else identity
 
         # entropy aux loss related weights
 
@@ -220,8 +224,7 @@ def indices_to_codes(
 
         codes = self.bits_to_codes(bits)
 
-        if self.spherical:
-            codes = l2norm(codes)
+        codes = self.maybe_l2norm(codes)
 
         codes = rearrange(codes, '... c d -> ... (c d)')
 
@@ -281,8 +284,7 @@ def forward(
 
         # maybe l2norm
 
-        if self.spherical:
-            x = l2norm(x)
+        x = self.maybe_l2norm(x)
 
         # quantize by eq 3.
 
@@ -297,8 +299,7 @@ def forward(
 
         # maybe l2norm
 
-        if self.spherical:
-            quantized = l2norm(quantized)
+        quantized = self.maybe_l2norm(quantized)
 
         # use straight-through gradients (optionally with custom activation fn) if training
 
@@ -313,8 +314,7 @@ def forward(
         if self.training:
             codebook = self.codebook
 
-            if self.spherical:
-                codebook = l2norm(codebook)
+            codebook = self.maybe_l2norm(codebook)
 
             # the same as euclidean distance up to a constant
             distance = -2 * einsum('... i d, j d -> ... i j', original_input, codebook)