diff --git a/docs/user_guide/kernel.rst b/docs/user_guide/kernel.rst
index 46dbede5..0197abd2 100644
--- a/docs/user_guide/kernel.rst
+++ b/docs/user_guide/kernel.rst
@@ -52,7 +52,7 @@ Global Alignment Kernel
 The Global Alignment Kernel (GAK) is a kernel that operates on time
 series.
 
-It is defined, for a given bandwidth :math:`\sigma`, as:
+The unnormalized GAK is defined, for a given bandwidth :math:`\sigma`, as:
 
 .. math::
 
@@ -64,6 +64,15 @@ It is defined, for a given bandwidth :math:`\sigma`, as:
 where :math:`\mathcal{A}(\mathbf{x}, \mathbf{y})` is the set of all possible
 alignments between series :math:`\mathbf{x}` and :math:`\mathbf{y}`.
 
+Note that the function ``gak`` is normalized in ``tslearn``: it corresponds to the quotient
+
+.. math::
+
+    \text{gak}(\mathbf{x}, \mathbf{y}) = \frac{k(\mathbf{x}, \mathbf{y})}{\sqrt{k(\mathbf{x}, \mathbf{x})k(\mathbf{y}, \mathbf{y})}}
+
+This normalization ensures that :math:`\text{gak}(\mathbf{x}, \mathbf{x})=1` for all :math:`\mathbf{x}`
+and :math:`\text{gak}(\mathbf{x}, \mathbf{y}) \in [0, 1]` for all :math:`\mathbf{x}, \mathbf{y}`.
+
 It is advised in [1]_ to set the bandwidth :math:`\sigma` as a multiple of a
 simple estimate of the median distance of different points observed in
 different time-series of your training set, scaled by the square root of the
@@ -81,7 +90,7 @@ This estimate is made available in ``tslearn`` through
 Note however that, on long time series, this estimate can lead to numerical
 overflows, which smaller values can avoid.
 
-Finally, GAK is related to :ref:`softDTW <dtw-softdtw>` [3]_ through the
+Finally, the unnormalized GAK is related to :ref:`softDTW <dtw-softdtw>` [3]_ through the
 following formula:
 
 .. math::
diff --git a/tslearn/metrics/softdtw_variants.py b/tslearn/metrics/softdtw_variants.py
index 3be98011..03ff2808 100644
--- a/tslearn/metrics/softdtw_variants.py
+++ b/tslearn/metrics/softdtw_variants.py
@@ -54,7 +54,7 @@ def _gak(gram, be=None):
     gram = be.array(gram)
     sz1, sz2 = be.shape(gram)
 
-    cum_sum = be.zeros((sz1 + 1, sz2 + 1))
+    cum_sum = be.zeros((sz1 + 1, sz2 + 1), dtype=gram.dtype)
     cum_sum[0, 0] = 1.0
 
     for i in range(sz1):
@@ -240,10 +240,8 @@ def gak(s1, s2, sigma=1.0, be=None):  # TODO: better doc (formula for the kernel
     be = instantiate_backend(be, s1, s2)
     s1 = be.array(s1)
     s2 = be.array(s2)
-    denom = be.sqrt(
-        unnormalized_gak(s1, s1, sigma=sigma, be=be)
-        * unnormalized_gak(s2, s2, sigma=sigma, be=be)
-    )
+    denom = be.sqrt(unnormalized_gak(s1, s1, sigma=sigma, be=be)) * be.sqrt(
+        unnormalized_gak(s2, s2, sigma=sigma, be=be))
     return unnormalized_gak(s1, s2, sigma=sigma, be=be) / denom
 
 
diff --git a/tslearn/metrics/utils.py b/tslearn/metrics/utils.py
index 7c93f098..84470543 100644
--- a/tslearn/metrics/utils.py
+++ b/tslearn/metrics/utils.py
@@ -79,7 +79,7 @@ def _cdist_generic(
     if dataset2 is None:
         # Inspired from code by @GillesVandewiele:
         # https://github.com/rtavenar/tslearn/pull/128#discussion_r314978479
-        matrix = be.zeros((len(dataset1), len(dataset1)))
+        matrix = be.zeros((len(dataset1), len(dataset1)), dtype=dataset1.dtype)
         indices = be.triu_indices(
             len(dataset1), k=0 if compute_diagonal else 1, m=len(dataset1)
         )
@@ -89,7 +89,8 @@ def _cdist_generic(
                 delayed(dist_fun)(dataset1[i], dataset1[j], *args, **kwargs)
                 for i in range(len(dataset1))
                 for j in range(i if compute_diagonal else i + 1, len(dataset1))
-            )
+            ),
+            dtype=matrix.dtype
         )
 
         indices = be.tril_indices(len(dataset1), k=-1, m=len(dataset1))
diff --git a/tslearn/tests/test_metrics.py b/tslearn/tests/test_metrics.py
index ebd971d7..8967fe63 100644
--- a/tslearn/tests/test_metrics.py
+++ b/tslearn/tests/test_metrics.py
@@ -446,6 +446,11 @@ def test_gak():
         for array_type in array_types:
             backend = instantiate_backend(be, array_type)
             # GAK
+            gak_zeros = tslearn.metrics.gak(
+                s1=backend.zeros(405, dtype=backend.float64),
+                s2=backend.zeros(405, dtype=backend.float64),
+                sigma=1.0)
+            np.testing.assert_allclose(gak_zeros, desired=1, atol=1e-8)
             g = tslearn.metrics.cdist_gak(
                 cast([[1, 2, 2, 3], [1.0, 2.0, 3.0, 4.0]], array_type), sigma=2.0, be=be
             )