openjdk
diff --git a/‎make/modules/java.base/Lib.gmk‎
Lines changed: 19 additions & 0 deletions b/‎make/modules/java.base/Lib.gmk‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎src/hotspot/cpu/aarch64/globals_aarch64.hpp‎
Lines changed: 3 additions & 1 deletion b/‎src/hotspot/cpu/aarch64/globals_aarch64.hpp‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎src/hotspot/cpu/aarch64/matcher_aarch64.hpp‎
Lines changed: 7 additions & 0 deletions b/‎src/hotspot/cpu/aarch64/matcher_aarch64.hpp‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp‎
Lines changed: 4 additions & 0 deletions b/‎src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/hotspot/cpu/x86/stubGenerator_x86_64.cpp‎
Lines changed: 4 additions & 16 deletions b/‎src/hotspot/cpu/x86/stubGenerator_x86_64.cpp‎
Lines changed: 4 additions & 16 deletions
diff --git a/‎src/hotspot/share/runtime/stubRoutines.cpp‎
Lines changed: 32 additions & 0 deletions b/‎src/hotspot/share/runtime/stubRoutines.cpp‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎src/hotspot/share/runtime/stubRoutines.hpp‎
Lines changed: 4 additions & 0 deletions b/‎src/hotspot/share/runtime/stubRoutines.hpp‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/java.base/linux/native/libsimdsort/aarch64/pivot-selection.hpp‎
Lines changed: 21 additions & 55 deletions b/‎src/java.base/linux/native/libsimdsort/aarch64/pivot-selection.hpp‎
Lines changed: 21 additions & 55 deletions
diff --git a/‎src/java.base/linux/native/libsimdsort/aarch64/simdsort-support.hpp‎
Lines changed: 5 additions & 4 deletions b/‎src/java.base/linux/native/libsimdsort/aarch64/simdsort-support.hpp‎
Lines changed: 5 additions & 4 deletions
@@ -187,6 +187,7 @@ ifeq ($(ENABLE_FALLBACK_LINKER), true)
   TARGETS += $(BUILD_LIBFALLBACKLINKER)
 endif
 
+SIMDSORT_BASE_DIR := $(TOPDIR)/src/java.base/linux/native/libsimdsort
 ifeq ($(call isTargetOs, linux)+$(call isTargetCpu, x86_64)+$(INCLUDE_COMPILER2)+$(filter $(TOOLCHAIN_TYPE), gcc), true+true+true+gcc)
   ##############################################################################
   ## Build libsimdsort
@@ -196,6 +197,7 @@ ifeq ($(call isTargetOs, linux)+$(call isTargetCpu, x86_64)+$(INCLUDE_COMPILER2)
       NAME := simdsort, \
       LINK_TYPE := C++, \
       OPTIMIZATION := HIGH, \
+      SRC := $(SIMDSORT_BASE_DIR)/x86, \
       CXXFLAGS := -std=c++17, \
       DISABLED_WARNINGS_gcc := unused-variable, \
       LIBS_linux := $(LIBM), \
@@ -204,4 +206,21 @@ ifeq ($(call isTargetOs, linux)+$(call isTargetCpu, x86_64)+$(INCLUDE_COMPILER2)
   TARGETS += $(BUILD_LIBSIMD_SORT)
 endif
 
+ifeq ($(call isTargetOs, linux)+$(call isTargetCpu, aarch64)+$(INCLUDE_COMPILER2)+$(filter $(TOOLCHAIN_TYPE), gcc), true+true+true+gcc)
+  $(eval $(call SetupJdkLibrary, BUILD_LIBSIMD_SORT, \
+      NAME := simdsort, \
+      TOOLCHAIN := TOOLCHAIN_LINK_CXX, \
+      OPTIMIZATION := HIGH, \
+      SRC := $(SIMDSORT_BASE_DIR)/aarch64, \
+      CFLAGS := $(CFLAGS_JDKLIB) -march=armv8.2-a+sve, \
+      CXXFLAGS := $(CXXFLAGS_JDKLIB) -march=armv8.2-a+sve -std=c++17, \
+      LDFLAGS := $(LDFLAGS_JDKLIB) \
+          $(call SET_SHARED_LIBRARY_ORIGIN), \
+      LIBS := $(LIBCXX), \
+      DISABLED_WARNINGS_gcc := unused-variable, \
+      LIBS_linux := -lc -lm -ldl, \
+  ))
+
+  TARGETS += $(BUILD_LIBSIMD_SORT)
+endif
 ################################################################################
@@ -127,7 +127,9 @@ define_pd_global(intx, InlineSmallCode,          1000);
           "Branch Protection to use: none, standard, pac-ret")          \
   product(bool, AlwaysMergeDMB, true, DIAGNOSTIC,                       \
           "Always merge DMB instructions in code emission")             \
-
+  product(bool, UseSVELibSimdSortForFP, false, EXPERIMENTAL,            \
+          "Use SVE-based LibSimdSort for float type on SVE supporting " \
+          "machines")                                                   \
 // end of ARCH_FLAGS
 
 #endif // CPU_AARCH64_GLOBALS_AARCH64_HPP
@@ -197,6 +197,13 @@
 
   // Is SIMD sort supported for this CPU?
   static bool supports_simd_sort(BasicType bt) {
+    // SIMD sort is supported only on SVE machines
+    if (VM_Version::supports_sve()) {
+      // Currently, only T_INT and T_FLOAT types are supported.
+      // However, T_FLOAT is supported only if the experimental
+      // flag - UseSVELibSimdSortForFP is enabled.
+      return (bt == T_INT || (bt == T_FLOAT && UseSVELibSimdSortForFP));
+    }
     return false;
   }
 
 
@@ -11873,6 +11873,10 @@ class StubGenerator: public StubCodeGenerator {
       StubRoutines::_montgomerySquare = g.generate_multiply();
     }
 
+    // Load sve_sort library on supported hardware to enable SIMD sort and partition intrinsics
+    if (VM_Version::supports_sve()) {
+      (void)StubRoutines::try_load_simdsort("sve_sort", "sve_partition");
+    }
 #endif // COMPILER2
 
     if (UseChaCha20Intrinsics) {
 
@@ -4303,22 +4303,10 @@ void StubGenerator::generate_compiler_stubs() {
   // Load x86_64_sort library on supported hardware to enable SIMD sort and partition intrinsics
 
   if (VM_Version::supports_avx512dq() || VM_Version::supports_avx2()) {
-    void *libsimdsort = nullptr;
-    char ebuf_[1024];
-    char dll_name_simd_sort[JVM_MAXPATHLEN];
-    if (os::dll_locate_lib(dll_name_simd_sort, sizeof(dll_name_simd_sort), Arguments::get_dll_dir(), "simdsort")) {
-      libsimdsort = os::dll_load(dll_name_simd_sort, ebuf_, sizeof ebuf_);
-    }
-    // Get addresses for SIMD sort and partition routines
-    if (libsimdsort != nullptr) {
-      log_info(library)("Loaded library %s, handle " INTPTR_FORMAT, JNI_LIB_PREFIX "simdsort" JNI_LIB_SUFFIX, p2i(libsimdsort));
-
-      os::snprintf_checked(ebuf_, sizeof(ebuf_), VM_Version::supports_avx512_simd_sort() ? "avx512_sort" : "avx2_sort");
-      StubRoutines::_array_sort = (address)os::dll_lookup(libsimdsort, ebuf_);
-
-      os::snprintf_checked(ebuf_, sizeof(ebuf_), VM_Version::supports_avx512_simd_sort() ? "avx512_partition" : "avx2_partition");
-      StubRoutines::_array_partition = (address)os::dll_lookup(libsimdsort, ebuf_);
-    }
+    const bool use_avx512 = VM_Version::supports_avx512_simd_sort();
+    const char* sort_sym      = use_avx512 ? "avx512_sort"      : "avx2_sort";
+    const char* partition_sym = use_avx512 ? "avx512_partition" : "avx2_partition";
+    (void)StubRoutines::try_load_simdsort(sort_sym, partition_sym);
   }
 
 #endif // COMPILER2
 
@@ -469,6 +469,38 @@ StubRoutines::select_arraycopy_function(BasicType t, bool aligned, bool disjoint
 #undef RETURN_STUB_PARM
 }
 
+bool StubRoutines::try_load_simdsort(const char* sort_sym, const char* partition_sym) {
+  void* libsimdsort = nullptr;
+  char ebuf_[1024];
+  char dll_name_simd_sort[JVM_MAXPATHLEN];
+
+  if (os::dll_locate_lib(dll_name_simd_sort, sizeof(dll_name_simd_sort),
+                         Arguments::get_dll_dir(), "simdsort")) {
+    libsimdsort = os::dll_load(dll_name_simd_sort, ebuf_, sizeof ebuf_);
+  }
+
+  if (libsimdsort == nullptr) {
+    return false;
+  }
+
+  // Get addresses for SIMD sort and partition routines
+  log_info(library)("Loaded library %s, handle " INTPTR_FORMAT,
+                    JNI_LIB_PREFIX "simdsort" JNI_LIB_SUFFIX, p2i(libsimdsort));
+  address sort_addr      = (address)os::dll_lookup(libsimdsort, sort_sym);
+  address partition_addr = (address)os::dll_lookup(libsimdsort, partition_sym);
+
+  if (sort_addr == nullptr || partition_addr == nullptr) {
+    log_warning(library)("libsimdsort missing symbols: %s=" INTPTR_FORMAT ", %s=" INTPTR_FORMAT,
+                sort_sym, p2i(sort_addr), partition_sym, p2i(partition_addr));
+    // If either of the addresses are null, return false.
+    return false;
+  }
+
+  StubRoutines::_array_sort = sort_addr;
+  StubRoutines::_array_partition = partition_addr;
+  return true;
+}
+
 UnsafeMemoryAccessMark::UnsafeMemoryAccessMark(StubCodeGenerator* cgen, bool add_entry, bool continue_at_scope_end, address error_exit_pc) {
   _cgen = cgen;
   _ucm_entry = nullptr;
 
@@ -28,6 +28,7 @@
 #include "code/codeBlob.hpp"
 #include "memory/allocation.hpp"
 #include "prims/vectorSupport.hpp"
+#include "runtime/arguments.hpp"
 #include "runtime/frame.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/stubCodeGenerator.hpp"
@@ -362,6 +363,9 @@ class StubRoutines: AllStatic {
   static void arrayof_oop_copy       (HeapWord* src, HeapWord* dest, size_t count);
   static void arrayof_oop_copy_uninit(HeapWord* src, HeapWord* dest, size_t count);
 
+  // SIMD sort support. This method resolves the symbols - sort_sym, partition_sym
+  // and on success sets the StubRoutines::_array_sort/_array_partition and returns true.
+  static bool try_load_simdsort(const char* sort_sym, const char* partition_sym);
 };
 
 #endif // SHARE_RUNTIME_STUBROUTINES_HPP
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2021, 2023, Intel Corporation. All rights reserved.
  * Copyright (c) 2021 Serge Sans Paille. All rights reserved.
+ * Copyright 2025 Arm Limited and/or its affiliates.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -23,66 +24,31 @@
  *
  */
 
-// This implementation is based on x86-simd-sort(https://github.com/intel/x86-simd-sort)
+#ifndef AARCH64_SVE_PIVOT_SELECTION_HPP
+#define AARCH64_SVE_PIVOT_SELECTION_HPP
 
-template <typename vtype, typename mm_t>
-X86_SIMD_SORT_INLINE void COEX(mm_t &a, mm_t &b);
+#include <algorithm>
+#include "sve-config.hpp"
 
-template <typename vtype, typename type_t>
-X86_SIMD_SORT_INLINE type_t get_pivot(type_t *arr, const arrsize_t left,
-                                      const arrsize_t right) {
-    using reg_t = typename vtype::reg_t;
-    type_t samples[vtype::numlanes];
-    arrsize_t delta = (right - left) / vtype::numlanes;
-    for (int i = 0; i < vtype::numlanes; i++) {
-        samples[i] = arr[left + i * delta];
-    }
-    reg_t rand_vec = vtype::loadu(samples);
-    reg_t sort = vtype::sort_vec(rand_vec);
-
-    return ((type_t *)&sort)[vtype::numlanes / 2];
-}
+/* <TODO> The current pivot selection method follows median-of-three method.
+ * Possible improvements could be the usage of sorting network (Compare and exchange sorting)
+ * for larger arrays.
+ */
 
 template <typename vtype, typename type_t>
-X86_SIMD_SORT_INLINE type_t get_pivot_blocks(type_t *arr, const arrsize_t left,
-                                             const arrsize_t right) {
-    if (right - left <= 1024) {
-        return get_pivot<vtype>(arr, left, right);
-    }
-
-    using reg_t = typename vtype::reg_t;
-    constexpr int numVecs = 5;
-
-    arrsize_t width = (right - vtype::numlanes) - left;
-    arrsize_t delta = width / numVecs;
+static inline type_t get_pivot_blocks(type_t* arr, const arrsize_t left, const arrsize_t right) {
+  const arrsize_t len = right - left;
+  if (len < 64) return arr[left];
 
-    reg_t vecs[numVecs];
-    // Load data
-    for (int i = 0; i < numVecs; i++) {
-        vecs[i] = vtype::loadu(arr + left + delta * i);
-    }
+  const arrsize_t mid = left + (len / 2);
+  const type_t a = arr[left];
+  const type_t b = arr[mid];
+  const type_t c = arr[right - 1];
 
-    // Implement sorting network (from
-    // https://bertdobbelaere.github.io/sorting_networks.html)
-    COEX<vtype>(vecs[0], vecs[3]);
-    COEX<vtype>(vecs[1], vecs[4]);
+  const type_t min_ab = std::min(a, b);
+  const type_t max_ab = std::max(a, b);
 
-    COEX<vtype>(vecs[0], vecs[2]);
-    COEX<vtype>(vecs[1], vecs[3]);
-
-    COEX<vtype>(vecs[0], vecs[1]);
-    COEX<vtype>(vecs[2], vecs[4]);
-
-    COEX<vtype>(vecs[1], vecs[2]);
-    COEX<vtype>(vecs[3], vecs[4]);
-
-    COEX<vtype>(vecs[2], vecs[3]);
-
-    // Calculate median of the middle vector
-    reg_t &vec = vecs[numVecs / 2];
-    vec = vtype::sort_vec(vec);
-
-    type_t data[vtype::numlanes];
-    vtype::storeu(data, vec);
-    return data[vtype::numlanes / 2];
+  return std::min(max_ab, std::max(min_ab, c));
 }
+
+#endif // AARCH64_SVE_PIVOT_SELECTION_HPP
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2023 Intel Corporation. All rights reserved.
+ * Copyright 2025 Arm Limited and/or its affiliates.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -30,10 +31,10 @@
 #undef assert
 #define assert(cond, msg) { if (!(cond)) { fprintf(stderr, "assert fails %s %d: %s\n", __FILE__, __LINE__, msg); abort(); }}
 
-
-// GCC >= 9.1 is needed to build AVX2 portions of libsimdsort using C++17 features
-#if defined(_LP64) && (defined(__GNUC__) && ((__GNUC__ > 9) || ((__GNUC__ == 9) && (__GNUC_MINOR__ >= 1))))
+// GCC >= 10.1 is required for a full support of ARM SVE ACLE intrinsics (which also includes the header file - arm_sve.h)
+#if defined(__aarch64__) && defined(_LP64) && defined(__GNUC__) && \
+    ((__GNUC__ > 10) || (__GNUC__ == 10 && __GNUC_MINOR__ >= 1))
 #define __SIMDSORT_SUPPORTED_LINUX
 #endif
 
-#endif //SIMDSORT_SUPPORT_HPP
+#endif //SIMDSORT_SUPPORT_HPP
Original file line number	Diff line number	Diff line change
`@@ -11873,6 +11873,10 @@ class StubGenerator: public StubCodeGenerator {`
`11873`	`11873`	`StubRoutines::_montgomerySquare = g.generate_multiply();`
`11874`	`11874`	`}`
`11875`	`11875`
	`11876`	`+ // Load sve_sort library on supported hardware to enable SIMD sort and partition intrinsics`
	`11877`	`+ if (VM_Version::supports_sve()) {`
	`11878`	`+ (void)StubRoutines::try_load_simdsort("sve_sort", "sve_partition");`
	`11879`	`+ }`
`11876`	`11880`	`#endif // COMPILER2`
`11877`	`11881`
`11878`	`11882`	`if (UseChaCha20Intrinsics) {`