prepare for audiolm to be able to reconstruct from only coarse quantize signals

lucidrains · lucidrains · commit 6f65e7248e7b · 2022-11-17T09:43:37.000-08:00
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 setup(
   name = 'vector_quantize_pytorch',
   packages = find_packages(),
-  version = '0.10.10',
+  version = '0.10.11',
   license='MIT',
   description = 'Vector Quantization - Pytorch',
   long_description_content_type = 'text/markdown',
diff --git a/vector_quantize_pytorch/residual_vq.py b/vector_quantize_pytorch/residual_vq.py
@@ -3,6 +3,7 @@
 
 import torch
 from torch import nn
+import torch.nn.functional as F
 from vector_quantize_pytorch.vector_quantize_pytorch import VectorQuantize
 
 from einops import rearrange, repeat
@@ -48,11 +49,22 @@ def codebooks(self):
         return codebooks
 
     def get_codes_from_indices(self, indices):
-        batch = indices.shape[0]
+        batch, quantize_dim = indices.shape[0], indices.shape[-1]
+
+        # because of quantize dropout, one can pass in indices that are coarse
+        # and the network should be able to reconstruct
+
+        if quantize_dim < self.num_quantizers:
+            assert self.quantize_dropout > 0., 'quantize dropout must be greater than 0 if you wish to reconstruct from a signal with less fine quantizations'
+            indices = F.pad(indices, (0, self.num_quantizers - quantize_dim), value = -1)
+
+        # get ready for gathering
+
         codebooks = repeat(self.codebooks, 'q c d -> q b c d', b = batch)
         gather_indices = repeat(indices, 'b n q -> q b n d', d = codebooks.shape[-1])
 
         # take care of quantizer dropout
+
         mask = gather_indices == -1.
         gather_indices = gather_indices.masked_fill(mask, 0) # have it fetch a dummy code to be masked out later