@@ -10,32 +10,39 @@ option(BUILD_PLUGINS "Build Clang/LLVM plugins" ON)
1010option (BUILD_TESTS "Build test executables" ON )
1111option (BUILD_PYBIND "Build Python bindings" OFF )
1212option (USE_CUDA "Enable CUDA kernels (requires nvcc)" OFF )
13+ option (USE_CBLAS "Enable BLAS" ON )
1314
1415set (CMAKE_CXX_STANDARD 17)
1516set (CMAKE_CXX_STANDARD_REQUIRED ON )
1617set (CMAKE_EXPORT_COMPILE_COMMANDS ON )
1718
18- message (STATUS "Detecting architecture..." )
19+ if (APPLE )
20+ list (APPEND CMAKE_PREFIX_PATH "/opt/local" "/opt/local/libexec/llvm-20" )
21+ include_directories ("/opt/local/include" )
22+ link_directories ("/opt/local/lib" )
23+ link_directories ("/opt/local/libexec/llvm-20/lib" )
24+
25+ find_library (ACCELERATE_LIB Accelerate)
26+ if (ACCELERATE_LIB)
27+ link_libraries (${ACCELERATE_LIB} )
28+ add_compile_definitions (TENSORIUM_USE_CBLAS)
29+ endif ()
30+ endif ()
1931
2032if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|AMD64" )
21- message (STATUS "→ Building for x86_64 (AVX/AVX2/AVX512 enabled)" )
2233 add_compile_definitions (TENSORIUM_X86)
2334 add_compile_options (-mavx2 -mfma)
2435elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "arm64|aarch64" )
25- message (STATUS "→ Building for ARM64 / Apple Silicon (NEON enabled)" )
2636 add_compile_definitions (TENSORIUM_ARM)
2737else ()
28- message (WARNING "→ Unknown architecture: ${CMAKE_SYSTEM_PROCESSOR} , using scalar fallback." )
2938 add_compile_definitions (TENSORIUM_FALLBACK)
3039endif ()
3140
3241find_library (NUMA_LIB numa)
3342if (NUMA_LIB)
34- message (STATUS "→ libnuma detected: enabling NUMA-aware allocation" )
3543 add_compile_definitions (USE_NUMA)
3644 set (HAVE_NUMA TRUE )
3745else ()
38- message (WARNING "libnuma not found: NUMA support disabled" )
3946 set (HAVE_NUMA FALSE )
4047endif ()
4148
@@ -47,24 +54,18 @@ execute_process(
4754)
4855
4956if (NOT GPU_NAME STREQUAL "none" )
50- message (STATUS "Detected NVIDIA GPU: ${GPU_NAME} " )
5157 add_compile_definitions (TENSORIUM_GPU_PRESENT)
52- else ()
53- message (WARNING "No NVIDIA GPU detected or nvidia-smi missing" )
5458endif ()
5559
5660if (USE_CUDA)
5761 if (NOT CUDAToolkit_FOUND)
5862 message (FATAL_ERROR "USE_CUDA=ON but CUDA toolkit not found" )
5963 endif ()
6064
61- message (STATUS "CUDA toolkit found at: ${CUDAToolkit_ROOT} " )
62- message (STATUS "→ Version: ${CUDAToolkit_VERSION} " )
6365 add_compile_definitions (TENSORIUM_CUDA)
6466
6567 enable_language (CUDA)
6668 set (CMAKE_CUDA_COMPILER "${CUDAToolkit_NVCC_EXECUTABLE} " )
67- message (STATUS "Using NVCC for CUDA compilation: ${CMAKE_CUDA_COMPILER} " )
6869
6970 set (CMAKE_CUDA_ARCHITECTURES 61 70 75 80 86 90)
7071 add_compile_definitions (TENSORIUM_CUDA_ARCH=${CMAKE_CUDA_ARCHITECTURES} )
@@ -75,31 +76,25 @@ if (USE_CUDA)
7576
7677 include_directories (${CUDAToolkit_INCLUDE_DIRS} )
7778 link_directories (${CUDAToolkit_LIBRARY_DIR} )
78-
79- message (STATUS "→ CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES} " )
8079else ()
81- message (STATUS "CUDA support disabled (USE_CUDA=OFF)" )
8280 add_compile_definitions (TENSORIUM_NO_CUDA)
8381endif ()
8482
8583include_directories (${CMAKE_SOURCE_DIR} /Includes)
8684
8785if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "amd64" )
88- message (STATUS "Configuring for x86_64: enabling AVX2/FMA or AVX512" )
8986 set (BASE_FLAGS "-O3 -mtune=native -Wno-ignored-attributes -Rpass-analysis=tensorium-align" )
9087 set (AVX2_FLAGS "-mfma -mavx2" )
9188 set (AVX512_FLAGS "-mfma -mavx512f -mavx512cd" )
9289 if (AVX512)
93- set (CMAKE_CXX_FLAGS "${BASE_FLAGS} ${AVX512_FLAGS} " )
90+ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ BASE_FLAGS} ${AVX512_FLAGS} " )
9491 else ()
95- set (CMAKE_CXX_FLAGS "${BASE_FLAGS} ${AVX2_FLAGS} " )
92+ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ BASE_FLAGS} ${AVX2_FLAGS} " )
9693 endif ()
9794elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "arm64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" )
98- message (STATUS "Configuring for ARM64: disabling AVX flags" )
99- set (CMAKE_CXX_FLAGS "-O3 -mcpu=apple-m1 -Wno-ignored-attributes" )
95+ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -mcpu=native -Wno-ignored-attributes" )
10096else ()
101- message (WARNING "Unknown architecture (${CMAKE_SYSTEM_PROCESSOR} ); using generic optimization flags." )
102- set (CMAKE_CXX_FLAGS "-O3 -mtune=native" )
97+ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -mtune=native" )
10398endif ()
10499
105100if (DEBUG)
@@ -109,16 +104,17 @@ if(VERBOSE)
109104 add_compile_definitions (VERBOSE)
110105endif ()
111106
112- message (STATUS "Detected architecture: ${CMAKE_SYSTEM_PROCESSOR} " )
113-
114107if (APPLE )
115108 if (CMAKE_SYSTEM_PROCESSOR MATCHES "arm64" )
116- message (STATUS "Configuring OpenMP manually for macOS ARM64 (Apple Clang)" )
117- set (OPENMP_INCLUDE_PATH "/opt/homebrew/opt/libomp/include" )
118- set (OPENMP_LIB_PATH "/opt/homebrew/opt/libomp/lib" )
109+ set (OPENMP_INCLUDE_PATH "/opt/local/include/libomp" )
110+ set (OPENMP_LIB_PATH "/opt/local/lib/libomp" )
119111 set (OPENMP_LIB "omp" )
112+
113+ include_directories (${OPENMP_INCLUDE_PATH} )
114+ link_directories (${OPENMP_LIB_PATH} )
115+ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xpreprocessor -fopenmp" )
116+ set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lomp" )
120117 elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64" )
121- message (STATUS "Using OpenMP for macOS x86_64 (Intel)" )
122118 find_package (OpenMP)
123119 if (OpenMP_CXX_FOUND)
124120 set (OPENMP_LIB "OpenMP::OpenMP_CXX" )
@@ -132,9 +128,9 @@ else()
132128endif ()
133129
134130if (HAVE_NUMA)
135- message (STATUS "→ Linking libnuma to all targets (NUMA-aware allocation enabled)" )
136131 link_libraries (${NUMA_LIB} )
137132endif ()
133+
138134if (BUILD_PLUGINS)
139135 add_subdirectory (Plugins)
140136endif ()
0 commit comments