Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions make/modules/java.base/Lib.gmk
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ ifeq ($(ENABLE_FALLBACK_LINKER), true)
TARGETS += $(BUILD_LIBFALLBACKLINKER)
endif

SIMDSORT_BASE_DIR := $(TOPDIR)/src/java.base/linux/native/libsimdsort
ifeq ($(call isTargetOs, linux)+$(call isTargetCpu, x86_64)+$(INCLUDE_COMPILER2)+$(filter $(TOOLCHAIN_TYPE), gcc), true+true+true+gcc)
##############################################################################
## Build libsimdsort
Expand All @@ -196,6 +197,7 @@ ifeq ($(call isTargetOs, linux)+$(call isTargetCpu, x86_64)+$(INCLUDE_COMPILER2)
NAME := simdsort, \
LINK_TYPE := C++, \
OPTIMIZATION := HIGH, \
SRC := $(SIMDSORT_BASE_DIR)/x86, \
CXXFLAGS := -std=c++17, \
DISABLED_WARNINGS_gcc := unused-variable, \
LIBS_linux := $(LIBM), \
Expand All @@ -204,4 +206,21 @@ ifeq ($(call isTargetOs, linux)+$(call isTargetCpu, x86_64)+$(INCLUDE_COMPILER2)
TARGETS += $(BUILD_LIBSIMD_SORT)
endif

ifeq ($(call isTargetOs, linux)+$(call isTargetCpu, aarch64)+$(INCLUDE_COMPILER2)+$(filter $(TOOLCHAIN_TYPE), gcc), true+true+true+gcc)
$(eval $(call SetupJdkLibrary, BUILD_LIBSIMD_SORT, \
NAME := simdsort, \
TOOLCHAIN := TOOLCHAIN_LINK_CXX, \
OPTIMIZATION := HIGH, \
SRC := $(SIMDSORT_BASE_DIR)/aarch64, \
CFLAGS := $(CFLAGS_JDKLIB) -march=armv8.2-a+sve, \
CXXFLAGS := $(CXXFLAGS_JDKLIB) -march=armv8.2-a+sve -std=c++17, \
LDFLAGS := $(LDFLAGS_JDKLIB) \
$(call SET_SHARED_LIBRARY_ORIGIN), \
LIBS := $(LIBCXX), \
DISABLED_WARNINGS_gcc := unused-variable, \
LIBS_linux := -lc -lm -ldl, \
))

TARGETS += $(BUILD_LIBSIMD_SORT)
endif
Comment on lines +209 to +225
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This whole block should be combined with the existing block above, something like this:

ifeq ($(call isTargetOs, linux)+$(call isTargetCpu, x86_64 aarch64)+$(INCLUDE_COMPILER2)+$(filter $(TOOLCHAIN_TYPE), gcc), true+true+true+gcc)
  ##############################################################################
  ## Build libsimdsort
  ##############################################################################

  $(eval $(call SetupJdkLibrary, BUILD_LIBSIMD_SORT, \
      NAME := simdsort, \
      LINK_TYPE := C++, \
      OPTIMIZATION := HIGH, \
      INCLUDES := $(OPENJDK_TARGET_CPU_ARCH), \
      CXXFLAGS := -std=c++17, \
      CXXFLAGS_linux_aarch64 := -march=armv8.2-a+sve, \
      DISABLED_WARNINGS_gcc := unused-variable, \
      LIBS_linux := $(LIBM), \
  ))

  TARGETS += $(BUILD_LIBSIMD_SORT)
endif

Unfortunately we don't currently support CXXFLAGS_, just CFLAGS_, but this can be fixed and I think it should be since we now have a need for it.

diff --git a/make/common/native/Flags.gmk b/make/common/native/Flags.gmk
index efb4c08e74c..2f3680af7c7 100644
--- a/make/common/native/Flags.gmk
+++ b/make/common/native/Flags.gmk
@@ -106,10 +106,12 @@ define SetupCompilerFlags
     $1_EXTRA_CFLAGS += -DSTATIC_BUILD=1
   endif
 
-  # Pickup extra OPENJDK_TARGET_OS_TYPE, OPENJDK_TARGET_OS and/or TOOLCHAIN_TYPE
-  # dependent variables for CXXFLAGS.
+  # Pickup extra OPENJDK_TARGET_OS_TYPE, OPENJDK_TARGET_OS, TOOLCHAIN_TYPE and
+  # OPENJDK_TARGET_OS plus OPENJDK_TARGET_CPU pair dependent variables for
+  # CXXFLAGS.
   $1_EXTRA_CXXFLAGS := $$($1_CXXFLAGS_$(OPENJDK_TARGET_OS_TYPE)) $$($1_CXXFLAGS_$(OPENJDK_TARGET_OS)) \
-      $$($1_CXXFLAGS_$(TOOLCHAIN_TYPE))
+      $$($1_CXXFLAGS_$(TOOLCHAIN_TYPE)) \
+      $$($1_CXXFLAGS_$(OPENJDK_TARGET_OS)_$(OPENJDK_TARGET_CPU))
 
   ifneq ($(DEBUG_LEVEL), release)
     # Pickup extra debug dependent variables for CXXFLAGS

The above at least compiles for me.

################################################################################
4 changes: 3 additions & 1 deletion src/hotspot/cpu/aarch64/globals_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,9 @@ define_pd_global(intx, InlineSmallCode, 1000);
"Branch Protection to use: none, standard, pac-ret") \
product(bool, AlwaysMergeDMB, true, DIAGNOSTIC, \
"Always merge DMB instructions in code emission") \

product(bool, UseSVELibSimdSortForFP, false, EXPERIMENTAL, \
"Use SVE-based LibSimdSort for float type on SVE supporting " \
"machines") \
// end of ARCH_FLAGS

#endif // CPU_AARCH64_GLOBALS_AARCH64_HPP
7 changes: 7 additions & 0 deletions src/hotspot/cpu/aarch64/matcher_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,13 @@

// Is SIMD sort supported for this CPU?
static bool supports_simd_sort(BasicType bt) {
// SIMD sort is supported only on SVE machines
if (VM_Version::supports_sve()) {
// Currently, only T_INT and T_FLOAT types are supported.
// However, T_FLOAT is supported only if the experimental
// flag - UseSVELibSimdSortForFP is enabled.
return (bt == T_INT || (bt == T_FLOAT && UseSVELibSimdSortForFP));
}
return false;
}

Expand Down
4 changes: 4 additions & 0 deletions src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11873,6 +11873,10 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_montgomerySquare = g.generate_multiply();
}

// Load sve_sort library on supported hardware to enable SIMD sort and partition intrinsics
if (VM_Version::supports_sve()) {
(void)StubRoutines::try_load_simdsort("sve_sort", "sve_partition");
}
#endif // COMPILER2

if (UseChaCha20Intrinsics) {
Expand Down
20 changes: 4 additions & 16 deletions src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4303,22 +4303,10 @@ void StubGenerator::generate_compiler_stubs() {
// Load x86_64_sort library on supported hardware to enable SIMD sort and partition intrinsics

if (VM_Version::supports_avx512dq() || VM_Version::supports_avx2()) {
void *libsimdsort = nullptr;
char ebuf_[1024];
char dll_name_simd_sort[JVM_MAXPATHLEN];
if (os::dll_locate_lib(dll_name_simd_sort, sizeof(dll_name_simd_sort), Arguments::get_dll_dir(), "simdsort")) {
libsimdsort = os::dll_load(dll_name_simd_sort, ebuf_, sizeof ebuf_);
}
// Get addresses for SIMD sort and partition routines
if (libsimdsort != nullptr) {
log_info(library)("Loaded library %s, handle " INTPTR_FORMAT, JNI_LIB_PREFIX "simdsort" JNI_LIB_SUFFIX, p2i(libsimdsort));

os::snprintf_checked(ebuf_, sizeof(ebuf_), VM_Version::supports_avx512_simd_sort() ? "avx512_sort" : "avx2_sort");
StubRoutines::_array_sort = (address)os::dll_lookup(libsimdsort, ebuf_);

os::snprintf_checked(ebuf_, sizeof(ebuf_), VM_Version::supports_avx512_simd_sort() ? "avx512_partition" : "avx2_partition");
StubRoutines::_array_partition = (address)os::dll_lookup(libsimdsort, ebuf_);
}
const bool use_avx512 = VM_Version::supports_avx512_simd_sort();
const char* sort_sym = use_avx512 ? "avx512_sort" : "avx2_sort";
const char* partition_sym = use_avx512 ? "avx512_partition" : "avx2_partition";
(void)StubRoutines::try_load_simdsort(sort_sym, partition_sym);
}

#endif // COMPILER2
Expand Down
32 changes: 32 additions & 0 deletions src/hotspot/share/runtime/stubRoutines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,38 @@ StubRoutines::select_arraycopy_function(BasicType t, bool aligned, bool disjoint
#undef RETURN_STUB_PARM
}

bool StubRoutines::try_load_simdsort(const char* sort_sym, const char* partition_sym) {
void* libsimdsort = nullptr;
char ebuf_[1024];
char dll_name_simd_sort[JVM_MAXPATHLEN];

if (os::dll_locate_lib(dll_name_simd_sort, sizeof(dll_name_simd_sort),
Arguments::get_dll_dir(), "simdsort")) {
libsimdsort = os::dll_load(dll_name_simd_sort, ebuf_, sizeof ebuf_);
}

if (libsimdsort == nullptr) {
return false;
}

// Get addresses for SIMD sort and partition routines
log_info(library)("Loaded library %s, handle " INTPTR_FORMAT,
JNI_LIB_PREFIX "simdsort" JNI_LIB_SUFFIX, p2i(libsimdsort));
address sort_addr = (address)os::dll_lookup(libsimdsort, sort_sym);
address partition_addr = (address)os::dll_lookup(libsimdsort, partition_sym);

if (sort_addr == nullptr || partition_addr == nullptr) {
log_warning(library)("libsimdsort missing symbols: %s=" INTPTR_FORMAT ", %s=" INTPTR_FORMAT,
sort_sym, p2i(sort_addr), partition_sym, p2i(partition_addr));
// If either of the addresses are null, return false.
return false;
}

StubRoutines::_array_sort = sort_addr;
StubRoutines::_array_partition = partition_addr;
return true;
}

UnsafeMemoryAccessMark::UnsafeMemoryAccessMark(StubCodeGenerator* cgen, bool add_entry, bool continue_at_scope_end, address error_exit_pc) {
_cgen = cgen;
_ucm_entry = nullptr;
Expand Down
4 changes: 4 additions & 0 deletions src/hotspot/share/runtime/stubRoutines.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "code/codeBlob.hpp"
#include "memory/allocation.hpp"
#include "prims/vectorSupport.hpp"
#include "runtime/arguments.hpp"
#include "runtime/frame.hpp"
#include "runtime/mutexLocker.hpp"
#include "runtime/stubCodeGenerator.hpp"
Expand Down Expand Up @@ -362,6 +363,9 @@ class StubRoutines: AllStatic {
static void arrayof_oop_copy (HeapWord* src, HeapWord* dest, size_t count);
static void arrayof_oop_copy_uninit(HeapWord* src, HeapWord* dest, size_t count);

// SIMD sort support. This method resolves the symbols - sort_sym, partition_sym
// and on success sets the StubRoutines::_array_sort/_array_partition and returns true.
static bool try_load_simdsort(const char* sort_sym, const char* partition_sym);
};

#endif // SHARE_RUNTIME_STUBROUTINES_HPP
54 changes: 54 additions & 0 deletions src/java.base/linux/native/libsimdsort/aarch64/pivot-selection.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*
* Copyright (c) 2021, 2023, Intel Corporation. All rights reserved.
* Copyright (c) 2021 Serge Sans Paille. All rights reserved.
* Copyright 2025 Arm Limited and/or its affiliates.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/

#ifndef AARCH64_SVE_PIVOT_SELECTION_HPP
#define AARCH64_SVE_PIVOT_SELECTION_HPP

#include <algorithm>
#include "sve-config.hpp"

/* <TODO> The current pivot selection method follows median-of-three method.
* Possible improvements could be the usage of sorting network (Compare and exchange sorting)
* for larger arrays.
*/

template <typename vtype, typename type_t>
static inline type_t get_pivot_blocks(type_t* arr, const arrsize_t left, const arrsize_t right) {
const arrsize_t len = right - left;
if (len < 64) return arr[left];

const arrsize_t mid = left + (len / 2);
const type_t a = arr[left];
const type_t b = arr[mid];
const type_t c = arr[right - 1];

const type_t min_ab = std::min(a, b);
const type_t max_ab = std::max(a, b);

return std::min(max_ab, std::max(min_ab, c));
}

#endif // AARCH64_SVE_PIVOT_SELECTION_HPP
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Copyright (c) 2023 Intel Corporation. All rights reserved.
* Copyright 2025 Arm Limited and/or its affiliates.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/

#ifndef SIMDSORT_SUPPORT_HPP
#define SIMDSORT_SUPPORT_HPP
#include <stdio.h>
#include <stdlib.h>

#undef assert
#define assert(cond, msg) { if (!(cond)) { fprintf(stderr, "assert fails %s %d: %s\n", __FILE__, __LINE__, msg); abort(); }}

// GCC >= 10.1 is required for a full support of ARM SVE ACLE intrinsics (which also includes the header file - arm_sve.h)
#if defined(__aarch64__) && defined(_LP64) && defined(__GNUC__) && \
((__GNUC__ > 10) || (__GNUC__ == 10 && __GNUC_MINOR__ >= 1))
#define __SIMDSORT_SUPPORTED_LINUX
#endif

#endif //SIMDSORT_SUPPORT_HPP
Loading