Skip to content

Commit f383510

Browse files
committed
better
1 parent afa8631 commit f383510

File tree

2 files changed

+45
-5
lines changed

2 files changed

+45
-5
lines changed

CMakeLists.txt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,23 @@ set(CMAKE_CXX_STANDARD 17)
1515
set(CMAKE_CXX_STANDARD_REQUIRED ON)
1616
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
1717

18+
message(STATUS "Detecting architecture...")
19+
20+
if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|AMD64")
21+
message(STATUS "→ Building for x86_64 (AVX/AVX2/AVX512 enabled)")
22+
add_compile_definitions(TENSORIUM_X86)
23+
add_compile_options(-mavx2 -mfma)
24+
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "arm64|aarch64")
25+
message(STATUS "→ Building for ARM64 / Apple Silicon (NEON enabled)")
26+
add_compile_definitions(TENSORIUM_ARM)
27+
find_package(OpenMP)
28+
if (OpenMP_CXX_FOUND)
29+
target_link_libraries(${PROJECT_NAME} PUBLIC OpenMP::OpenMP_CXX)
30+
endif()
31+
else()
32+
message(WARNING "→ Unknown architecture: ${CMAKE_SYSTEM_PROCESSOR}, using scalar fallback.")
33+
add_compile_definitions(TENSORIUM_FALLBACK)
34+
endif()
1835
# ─────────────────────────────── Includes ──────────────────────────────
1936
include_directories(${CMAKE_SOURCE_DIR}/Includes)
2037

includes/Tensorium/Core/Vector.hpp

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -431,27 +431,50 @@ template <typename K> class Vector {
431431
* @param v Second 3D vector.
432432
* @return Resulting 3D vector.
433433
*/
434+
434435
__attribute__((always_inline, hot, flatten)) static inline Vector<float>
435436
cross_product(const Vector<float> &u, const Vector<float> &v) {
436437
if (u.size() != 3 || v.size() != 3)
437438
throw std::invalid_argument("Cross product is only defined for 3D vectors.");
438439

439440
Vector<float> r(3);
440441

442+
#if defined(TENSORIUM_X86)
441443
__m128 uxy = _mm_set_ps(0.0f, u.data[0], u.data[2], u.data[1]);
442444
__m128 vxy = _mm_set_ps(0.0f, v.data[0], v.data[2], v.data[1]);
443-
445+
#elif defined(TENSORIUM_ARM)
446+
float32x4_t uxy = {0.0f, u.data[0], u.data[2], u.data[1]};
447+
float32x4_t vxy = {0.0f, v.data[0], v.data[2], v.data[1]};
448+
#else
449+
(void)0;
450+
#endif
451+
452+
// Produit vectoriel (portable et optimisé)
453+
#if defined(__FMA__) || defined(TENSORIUM_X86) || defined(TENSORIUM_ARM)
444454
r.data[0] = std::fma(u.data[1], v.data[2], -u.data[2] * v.data[1]);
445455
r.data[1] = std::fma(u.data[2], v.data[0], -u.data[0] * v.data[2]);
446456
r.data[2] = std::fma(u.data[0], v.data[1], -u.data[1] * v.data[0]);
457+
#else
458+
r.data[0] = u.data[1] * v.data[2] - u.data[2] * v.data[1];
459+
r.data[1] = u.data[2] * v.data[0] - u.data[0] * v.data[2];
460+
r.data[2] = u.data[0] * v.data[1] - u.data[1] * v.data[0];
461+
#endif
447462

448463
return r;
449464
}
450465

451-
Vector<K>& operator+=(const Vector<K>& m) { this->add(m); return *this; }
452-
Vector<K>& operator-=(const Vector<K>& m) { this->sub(m); return *this; }
453-
Vector<K>& operator*=(K alpha) { this->scl(alpha); return *this; }
454-
466+
Vector<K> &operator+=(const Vector<K> &m) {
467+
this->add(m);
468+
return *this;
469+
}
470+
Vector<K> &operator-=(const Vector<K> &m) {
471+
this->sub(m);
472+
return *this;
473+
}
474+
Vector<K> &operator*=(K alpha) {
475+
this->scl(alpha);
476+
return *this;
477+
}
455478
};
456479

457480
template <typename K> inline Vector<K> operator+(const Vector<K> &a, const Vector<K> &b) {

0 commit comments

Comments
 (0)