From 9d72887883f60edc8f42827329a7a6e82fda6d1a Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Fri, 17 Jan 2025 04:25:15 -0800
Subject: [PATCH 1/4] Add w/a for dpnp.linalg.qr on CUDA

---
 dpnp/linalg/dpnp_utils_linalg.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py
index 06e71366154f..c928285b89f2 100644
--- a/dpnp/linalg/dpnp_utils_linalg.py
+++ b/dpnp/linalg/dpnp_utils_linalg.py
@@ -399,6 +399,12 @@ def _batched_qr(a, mode="reduced"):
     )
     _manager.add_event_pair(ht_ev, geqrf_ev)
 
+    # w/a to avoid raice conditional on CUDA during multiple runs
+    # TODO: Remove it ones the OneMath issue is resolved
+    # https://github.com/uxlfoundation/oneMath/issues/626
+    if dpnp.is_cuda_backend(a_sycl_queue):
+        ht_ev.wait()
+
     if mode in ["r", "raw"]:
         if mode == "r":
             r = a_t[..., :k].swapaxes(-2, -1)
@@ -2470,6 +2476,12 @@ def dpnp_qr(a, mode="reduced"):
     )
     _manager.add_event_pair(ht_ev, geqrf_ev)
 
+    # w/a to avoid raice conditional on CUDA during multiple runs
+    # TODO: Remove it ones the OneMath issue is resolved
+    # https://github.com/uxlfoundation/oneMath/issues/626
+    if dpnp.is_cuda_backend(a_sycl_queue):
+        ht_ev.wait()
+
     if mode in ["r", "raw"]:
         if mode == "r":
             r = a_t[:, :k].transpose()

From a930274c3f4410c7c9ef1cd923c159ecae359281 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Fri, 17 Jan 2025 04:26:22 -0800
Subject: [PATCH 2/4] Update QR tests

---
 dpnp/tests/test_linalg.py                     | 54 ++++++++++++-------
 .../cupy/linalg_tests/test_decomposition.py   |  7 ---
 2 files changed, 36 insertions(+), 25 deletions(-)

diff --git a/dpnp/tests/test_linalg.py b/dpnp/tests/test_linalg.py
index a150d823d750..1087e7386a70 100644
--- a/dpnp/tests/test_linalg.py
+++ b/dpnp/tests/test_linalg.py
@@ -2380,12 +2380,6 @@ class TestQr:
     )
     @pytest.mark.parametrize("mode", ["r", "raw", "complete", "reduced"])
     def test_qr(self, dtype, shape, mode):
-        if (
-            is_cuda_device()
-            and mode in ["complete", "reduced"]
-            and shape in [(16, 16), (2, 2, 4)]
-        ):
-            pytest.skip("SAT-7589")
         a = generate_random_numpy_array(shape, dtype, seed_value=81)
         ia = dpnp.array(a)
 
@@ -2398,24 +2392,48 @@ def test_qr(self, dtype, shape, mode):
 
             # check decomposition
             if mode in ("complete", "reduced"):
-                if a.ndim == 2:
-                    assert_almost_equal(
-                        dpnp.dot(dpnp_q, dpnp_r),
-                        a,
-                        decimal=5,
-                    )
-                else:  # a.ndim > 2
-                    assert_almost_equal(
-                        dpnp.matmul(dpnp_q, dpnp_r),
-                        a,
-                        decimal=5,
-                    )
+                assert_almost_equal(
+                    dpnp.matmul(dpnp_q, dpnp_r),
+                    a,
+                    decimal=5,
+                )
             else:  # mode=="raw"
                 assert_dtype_allclose(dpnp_q, np_q)
 
         if mode in ("raw", "r"):
             assert_dtype_allclose(dpnp_r, np_r)
 
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True))
+    @pytest.mark.parametrize(
+        "shape",
+        [(32, 32), (8, 16, 16)],
+        ids=[
+            "(32, 32)",
+            "(8, 16, 16)",
+        ],
+    )
+    @pytest.mark.parametrize("mode", ["r", "raw", "complete", "reduced"])
+    def test_qr_large(self, dtype, shape, mode):
+        a = generate_random_numpy_array(shape, dtype, seed_value=81)
+        ia = dpnp.array(a)
+        if mode == "r":
+            np_r = numpy.linalg.qr(a, mode)
+            dpnp_r = dpnp.linalg.qr(ia, mode)
+        else:
+            np_q, np_r = numpy.linalg.qr(a, mode)
+            dpnp_q, dpnp_r = dpnp.linalg.qr(ia, mode)
+            # check decomposition
+            if mode in ("complete", "reduced"):
+                assert_almost_equal(
+                    dpnp.matmul(dpnp_q, dpnp_r),
+                    a,
+                    decimal=5,
+                )
+            else:  # mode=="raw"
+                assert_dtype_allclose(dpnp_q, np_q, factor=12)
+        if mode in ("raw", "r"):
+            assert_dtype_allclose(dpnp_r, np_r, factor=12)
+
     @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True))
     @pytest.mark.parametrize(
         "shape",
diff --git a/dpnp/tests/third_party/cupy/linalg_tests/test_decomposition.py b/dpnp/tests/third_party/cupy/linalg_tests/test_decomposition.py
index 47e7dd23b3a2..5cefa89dcef3 100644
--- a/dpnp/tests/third_party/cupy/linalg_tests/test_decomposition.py
+++ b/dpnp/tests/third_party/cupy/linalg_tests/test_decomposition.py
@@ -163,14 +163,7 @@ def test_decomposition(self, dtype):
 class TestQRDecomposition(unittest.TestCase):
 
     @testing.for_dtypes("fdFD")
-    # skip cases with 'complete' and 'reduce' modes on CUDA (SAT-7611)
     def check_mode(self, array, mode, dtype):
-        if (
-            is_cuda_device()
-            and array.size > 0
-            and mode in ["complete", "reduced"]
-        ):
-            return
         a_cpu = numpy.asarray(array, dtype=dtype)
         a_gpu = cupy.asarray(array, dtype=dtype)
         result_gpu = cupy.linalg.qr(a_gpu, mode=mode)

From 7e988dae46d3aace7a033a04fff803710ff964e0 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Fri, 17 Jan 2025 05:31:53 -0800
Subject: [PATCH 3/4] Do not use event manager for w/a

---
 dpnp/linalg/dpnp_utils_linalg.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py
index c928285b89f2..fda4af36f79e 100644
--- a/dpnp/linalg/dpnp_utils_linalg.py
+++ b/dpnp/linalg/dpnp_utils_linalg.py
@@ -397,13 +397,14 @@ def _batched_qr(a, mode="reduced"):
         batch_size,
         depends=[copy_ev],
     )
-    _manager.add_event_pair(ht_ev, geqrf_ev)
 
     # w/a to avoid raice conditional on CUDA during multiple runs
     # TODO: Remove it ones the OneMath issue is resolved
     # https://github.com/uxlfoundation/oneMath/issues/626
     if dpnp.is_cuda_backend(a_sycl_queue):
         ht_ev.wait()
+    else:
+        _manager.add_event_pair(ht_ev, geqrf_ev)
 
     if mode in ["r", "raw"]:
         if mode == "r":
@@ -2474,13 +2475,14 @@ def dpnp_qr(a, mode="reduced"):
     ht_ev, geqrf_ev = li._geqrf(
         a_sycl_queue, a_t.get_array(), tau_h.get_array(), depends=[copy_ev]
     )
-    _manager.add_event_pair(ht_ev, geqrf_ev)
 
     # w/a to avoid raice conditional on CUDA during multiple runs
     # TODO: Remove it ones the OneMath issue is resolved
     # https://github.com/uxlfoundation/oneMath/issues/626
     if dpnp.is_cuda_backend(a_sycl_queue):
         ht_ev.wait()
+    else:
+        _manager.add_event_pair(ht_ev, geqrf_ev)
 
     if mode in ["r", "raw"]:
         if mode == "r":

From 871b8a2dafa7953420eff115d498d46f70e4a248 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Mon, 20 Jan 2025 02:58:22 -0800
Subject: [PATCH 4/4] Use assert_allclose() in test_qr_large

---
 dpnp/tests/test_linalg.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dpnp/tests/test_linalg.py b/dpnp/tests/test_linalg.py
index 1087e7386a70..222f22e6e7de 100644
--- a/dpnp/tests/test_linalg.py
+++ b/dpnp/tests/test_linalg.py
@@ -2430,9 +2430,9 @@ def test_qr_large(self, dtype, shape, mode):
                     decimal=5,
                 )
             else:  # mode=="raw"
-                assert_dtype_allclose(dpnp_q, np_q, factor=12)
+                assert_allclose(np_q, dpnp_q, atol=1e-4)
         if mode in ("raw", "r"):
-            assert_dtype_allclose(dpnp_r, np_r, factor=12)
+            assert_allclose(np_r, dpnp_r, atol=1e-4)
 
     @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True))
     @pytest.mark.parametrize(