From b015275d43c0c054802dd3d5075e17be7de4bbe6 Mon Sep 17 00:00:00 2001
From: Yann Cabanes <yann.cabanes@gmail.com>
Date: Thu, 4 Apr 2024 00:06:35 -0400
Subject: [PATCH 1/6] Improve gak

---
 tslearn/metrics/softdtw_variants.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tslearn/metrics/softdtw_variants.py b/tslearn/metrics/softdtw_variants.py
index 3be98011..1c196bc3 100644
--- a/tslearn/metrics/softdtw_variants.py
+++ b/tslearn/metrics/softdtw_variants.py
@@ -240,10 +240,8 @@ def gak(s1, s2, sigma=1.0, be=None):  # TODO: better doc (formula for the kernel
     be = instantiate_backend(be, s1, s2)
     s1 = be.array(s1)
     s2 = be.array(s2)
-    denom = be.sqrt(
-        unnormalized_gak(s1, s1, sigma=sigma, be=be)
-        * unnormalized_gak(s2, s2, sigma=sigma, be=be)
-    )
+    denom = be.sqrt(unnormalized_gak(s1, s1, sigma=sigma, be=be)) * be.sqrt(
+        unnormalized_gak(s2, s2, sigma=sigma, be=be))
     return unnormalized_gak(s1, s2, sigma=sigma, be=be) / denom
 
 

From 48118014f339b7ae266491afa5c0efaf28b67e06 Mon Sep 17 00:00:00 2001
From: Yann Cabanes <yann.cabanes@gmail.com>
Date: Thu, 4 Apr 2024 09:36:02 -0400
Subject: [PATCH 2/6] Add data type specifications for gak to return the same
 data type than its input

---
 tslearn/metrics/softdtw_variants.py | 2 +-
 tslearn/metrics/utils.py            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tslearn/metrics/softdtw_variants.py b/tslearn/metrics/softdtw_variants.py
index 1c196bc3..03ff2808 100644
--- a/tslearn/metrics/softdtw_variants.py
+++ b/tslearn/metrics/softdtw_variants.py
@@ -54,7 +54,7 @@ def _gak(gram, be=None):
     gram = be.array(gram)
     sz1, sz2 = be.shape(gram)
 
-    cum_sum = be.zeros((sz1 + 1, sz2 + 1))
+    cum_sum = be.zeros((sz1 + 1, sz2 + 1), dtype=gram.dtype)
     cum_sum[0, 0] = 1.0
 
     for i in range(sz1):
diff --git a/tslearn/metrics/utils.py b/tslearn/metrics/utils.py
index 7c93f098..4109ee72 100644
--- a/tslearn/metrics/utils.py
+++ b/tslearn/metrics/utils.py
@@ -79,7 +79,7 @@ def _cdist_generic(
     if dataset2 is None:
         # Inspired from code by @GillesVandewiele:
         # https://github.com/rtavenar/tslearn/pull/128#discussion_r314978479
-        matrix = be.zeros((len(dataset1), len(dataset1)))
+        matrix = be.zeros((len(dataset1), len(dataset1)), dtype=dataset1.dtype)
         indices = be.triu_indices(
             len(dataset1), k=0 if compute_diagonal else 1, m=len(dataset1)
         )

From c966e3f4c209070761d899eb420de8d3ad52845d Mon Sep 17 00:00:00 2001
From: Yann Cabanes <yann.cabanes@gmail.com>
Date: Thu, 4 Apr 2024 09:38:43 -0400
Subject: [PATCH 3/6] Add a test where gak is working without overflow for
 constant time series equal to zero of length 405

---
 tslearn/tests/test_metrics.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tslearn/tests/test_metrics.py b/tslearn/tests/test_metrics.py
index ebd971d7..8967fe63 100644
--- a/tslearn/tests/test_metrics.py
+++ b/tslearn/tests/test_metrics.py
@@ -446,6 +446,11 @@ def test_gak():
         for array_type in array_types:
             backend = instantiate_backend(be, array_type)
             # GAK
+            gak_zeros = tslearn.metrics.gak(
+                s1=backend.zeros(405, dtype=backend.float64),
+                s2=backend.zeros(405, dtype=backend.float64),
+                sigma=1.0)
+            np.testing.assert_allclose(gak_zeros, desired=1, atol=1e-8)
             g = tslearn.metrics.cdist_gak(
                 cast([[1, 2, 2, 3], [1.0, 2.0, 3.0, 4.0]], array_type), sigma=2.0, be=be
             )

From 18f3c1b9b21118499767d6046a179f8c94788f76 Mon Sep 17 00:00:00 2001
From: Yann Cabanes <yann.cabanes@gmail.com>
Date: Tue, 16 Apr 2024 09:41:24 -0400
Subject: [PATCH 4/6] fix dtype error in the function _cdist_generic in the
 file utils.py

---
 tslearn/metrics/utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tslearn/metrics/utils.py b/tslearn/metrics/utils.py
index 4109ee72..84470543 100644
--- a/tslearn/metrics/utils.py
+++ b/tslearn/metrics/utils.py
@@ -89,7 +89,8 @@ def _cdist_generic(
                 delayed(dist_fun)(dataset1[i], dataset1[j], *args, **kwargs)
                 for i in range(len(dataset1))
                 for j in range(i if compute_diagonal else i + 1, len(dataset1))
-            )
+            ),
+            dtype=matrix.dtype
         )
 
         indices = be.tril_indices(len(dataset1), k=-1, m=len(dataset1))

From edbe59d52931ca33a33f33b2519f0d7c81ad4752 Mon Sep 17 00:00:00 2001
From: Yann Cabanes <yann.cabanes@gmail.com>
Date: Tue, 23 Apr 2024 15:18:29 -0400
Subject: [PATCH 5/6] Add precisions about GAK normalization in the
 documentation file kernel.rst

---
 docs/user_guide/kernel.rst | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/docs/user_guide/kernel.rst b/docs/user_guide/kernel.rst
index 46dbede5..f417dfb8 100644
--- a/docs/user_guide/kernel.rst
+++ b/docs/user_guide/kernel.rst
@@ -52,7 +52,7 @@ Global Alignment Kernel
 The Global Alignment Kernel (GAK) is a kernel that operates on time
 series.
 
-It is defined, for a given bandwidth :math:`\sigma`, as:
+The unnormalized GAK is defined, for a given bandwidth :math:`\sigma`, as:
 
 .. math::
 
@@ -64,6 +64,15 @@ It is defined, for a given bandwidth :math:`\sigma`, as:
 where :math:`\mathcal{A}(\mathbf{x}, \mathbf{y})` is the set of all possible
 alignments between series :math:`\mathbf{x}` and :math:`\mathbf{y}`.
 
+Note that the function ``gak`` is normalized in ``tslearn``: it corresponds to the quotient
+
+.. math::
+
+    \text{gak}(\mathbf{x}, \mathbf{y}) = \frac{k(\mathbf{x}, \mathbf{y})}{\sqrt{k(\mathbf{x}, \mathbf{x})k(\mathbf{y}, \mathbf{y})}}
+
+to ensure that :math:`\text{gak}(\mathbf{x}, \mathbf{x})=1` for all :math:`\mathbf{x}`
+and :math:`\text{gak}(\mathbf{x}, \mathbf{y}) \in [0, 1]` for all :math:`\mathbf{x}, \mathbf{y}`.
+
 It is advised in [1]_ to set the bandwidth :math:`\sigma` as a multiple of a
 simple estimate of the median distance of different points observed in
 different time-series of your training set, scaled by the square root of the
@@ -81,7 +90,7 @@ This estimate is made available in ``tslearn`` through
 Note however that, on long time series, this estimate can lead to numerical
 overflows, which smaller values can avoid.
 
-Finally, GAK is related to :ref:`softDTW <dtw-softdtw>` [3]_ through the
+Finally, the unnormalized GAK is related to :ref:`softDTW <dtw-softdtw>` [3]_ through the
 following formula:
 
 .. math::

From 4f33f768b07cc1bdda45c28ccb5f8c284a88161e Mon Sep 17 00:00:00 2001
From: Yann Cabanes <yann.cabanes@gmail.com>
Date: Tue, 23 Apr 2024 15:49:22 -0400
Subject: [PATCH 6/6] Improve gak documentation in kernel.rst

---
 docs/user_guide/kernel.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/user_guide/kernel.rst b/docs/user_guide/kernel.rst
index f417dfb8..0197abd2 100644
--- a/docs/user_guide/kernel.rst
+++ b/docs/user_guide/kernel.rst
@@ -70,7 +70,7 @@ Note that the function ``gak`` is normalized in ``tslearn``: it corresponds to t
 
     \text{gak}(\mathbf{x}, \mathbf{y}) = \frac{k(\mathbf{x}, \mathbf{y})}{\sqrt{k(\mathbf{x}, \mathbf{x})k(\mathbf{y}, \mathbf{y})}}
 
-to ensure that :math:`\text{gak}(\mathbf{x}, \mathbf{x})=1` for all :math:`\mathbf{x}`
+This normalization ensures that :math:`\text{gak}(\mathbf{x}, \mathbf{x})=1` for all :math:`\mathbf{x}`
 and :math:`\text{gak}(\mathbf{x}, \mathbf{y}) \in [0, 1]` for all :math:`\mathbf{x}, \mathbf{y}`.
 
 It is advised in [1]_ to set the bandwidth :math:`\sigma` as a multiple of a