Skip to content

Commit c956b8c

Browse files
committed
grid setup
2 parents b18e6f8 + fec9cd6 commit c956b8c

File tree

8 files changed

+207
-58
lines changed

8 files changed

+207
-58
lines changed
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
---
2+
name: Bug report
3+
about: Create a report to help us improve
4+
title: ''
5+
labels: ''
6+
assignees: ''
7+
8+
---
9+
10+
**Describe the bug**
11+
A clear and concise description of what the bug is.
12+
13+
**To Reproduce**
14+
Steps to reproduce the behavior:
15+
1. Go to '...'
16+
2. Click on '....'
17+
3. Scroll down to '....'
18+
4. See error
19+
20+
**Expected behavior**
21+
A clear and concise description of what you expected to happen.
22+
23+
**Screenshots**
24+
If applicable, add screenshots to help explain your problem.
25+
26+
**Desktop (please complete the following information):**
27+
- OS: [e.g. iOS]
28+
- Browser [e.g. chrome, safari]
29+
- Version [e.g. 22]
30+
31+
**Smartphone (please complete the following information):**
32+
- Device: [e.g. iPhone6]
33+
- OS: [e.g. iOS8.1]
34+
- Browser [e.g. stock browser, safari]
35+
- Version [e.g. 22]
36+
37+
**Additional context**
38+
Add any other context about the problem here.
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
---
2+
name: Feature request
3+
about: Suggest an idea for this project
4+
title: ''
5+
labels: ''
6+
assignees: ''
7+
8+
---
9+
10+
**Is your feature request related to a problem? Please describe.**
11+
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12+
13+
**Describe the solution you'd like**
14+
A clear and concise description of what you want to happen.
15+
16+
**Describe alternatives you've considered**
17+
A clear and concise description of any alternative solutions or features you've considered.
18+
19+
**Additional context**
20+
Add any other context or screenshots about the feature request here.

CMakeLists.txt

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ option(DEBUG "Build with debug symbols" OFF)
99
option(BUILD_PLUGINS "Build Clang/LLVM plugins" ON)
1010
option(BUILD_TESTS "Build test executables" ON)
1111
option(BUILD_PYBIND "Build Python bindings" OFF)
12+
option(USE_CUDA "Enable CUDA kernels (requires nvcc)" OFF)
1213

1314
set(CMAKE_CXX_STANDARD 17)
1415
set(CMAKE_CXX_STANDARD_REQUIRED ON)
@@ -28,8 +29,6 @@ else()
2829
add_compile_definitions(TENSORIUM_FALLBACK)
2930
endif()
3031

31-
32-
#
3332
find_package(CUDAToolkit QUIET)
3433

3534
execute_process(
@@ -41,15 +40,12 @@ if (NOT GPU_NAME STREQUAL "none")
4140
message(STATUS "Detected NVIDIA GPU: ${GPU_NAME}")
4241
add_compile_definitions(TENSORIUM_GPU_PRESENT)
4342
else()
44-
message(WARNING "⚠️ Aucun GPU NVIDIA détecté (ou nvidia-smi absent)")
43+
message(WARNING "No NVIDIA GPU detected or nvidia-smi missing")
4544
endif()
4645

47-
48-
option(USE_CUDA "Enable CUDA kernels (requires nvcc)" OFF)
49-
5046
if (USE_CUDA)
5147
if (NOT CUDAToolkit_FOUND)
52-
message(FATAL_ERROR "USE_CUDA=ON mais aucun CUDA toolkit détecté ⚠️")
48+
message(FATAL_ERROR "USE_CUDA=ON but CUDA toolkit not found")
5349
endif()
5450

5551
message(STATUS "CUDA toolkit found at: ${CUDAToolkit_ROOT}")
@@ -63,17 +59,14 @@ if (USE_CUDA)
6359
set(CMAKE_CUDA_ARCHITECTURES 61 70 75 80 86 90)
6460
add_compile_definitions(TENSORIUM_CUDA_ARCH=${CMAKE_CUDA_ARCHITECTURES})
6561

66-
# Flags NVCC (propres)
6762
set(CMAKE_CUDA_STANDARD 17)
6863
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
69-
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -O3 -Xcompiler=-fPIC")
70-
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --use_fast_math -lineinfo -Wno-deprecated-gpu-targets")
64+
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -O3 -Xcompiler=-fPIC --use_fast_math -lineinfo -Wno-deprecated-gpu-targets")
7165

7266
include_directories(${CUDAToolkit_INCLUDE_DIRS})
7367
link_directories(${CUDAToolkit_LIBRARY_DIR})
7468

7569
message(STATUS "→ CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
76-
7770
else()
7871
message(STATUS "CUDA support disabled (USE_CUDA=OFF)")
7972
add_compile_definitions(TENSORIUM_NO_CUDA)
@@ -92,7 +85,7 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "am
9285
set(CMAKE_CXX_FLAGS "${BASE_FLAGS} ${AVX2_FLAGS}")
9386
endif()
9487
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
95-
message(STATUS "Configuring for Apple Silicon ARM64: disabling AVX flags")
88+
message(STATUS "Configuring for ARM64: disabling AVX flags")
9689
set(CMAKE_CXX_FLAGS "-O3 -mcpu=apple-m1 -Wno-ignored-attributes")
9790
else()
9891
message(WARNING "Unknown architecture (${CMAKE_SYSTEM_PROCESSOR}); using generic optimization flags.")

README.md

Lines changed: 69 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
![Nouveau projet](https://github.com/user-attachments/assets/5f75f1f9-999d-410b-971e-ba3bd5e8b5e9)
22
# Tensorium_lib
3-
### !!!DISCLAMER!!!
3+
> !!!DISCLAMER!!!
44
Tensorium_lib is still in the early development phase, and many of its features work, but I'm not yet convinced of the solidity of some of them (especially the tensor manipulations).
55
The python binding is usable without any other python librairy, but I'm still working on it to make it all clean and usable using a simple pip3 install (see the Jupiter Notebook).
66

77
**Tensorium_lib** is a high-performance scientific C++ library designed for demanding computational domains such as **numerical relativity**, **machine learning (ML)**, **deep learning (DL)** and general **scientific simulations**.
88

9-
Here is the full documentation : https://tensoriumcore.github.io/Tensorium_lib/
9+
## Documentation
1010

11+
> Here is the full documentation : https://tensoriumcore.github.io/Tensorium_lib/
12+
13+
## Highlight
1114
It provides a modern, extensible infrastructure for efficient vector, matrix, and tensor computations by leveraging:
1215
- **SIMD acceleration** (SSE, AVX2, AVX512),
1316
- **Multithreading** with OpenMP,
@@ -24,6 +27,70 @@ This library is built with the goal of empowering projects that require both spe
2427
- Fast manipulation of large scientific datasets and image matrices (not atm),
2528
- Research and education projects needing intuitive yet high-performance numerical tools.
2629

30+
## Requirements
31+
32+
> **Recommended:** build and use with **LLVM/Clang** for maximum performance.
33+
34+
### Core Dependencies
35+
- **C++17/20 compiler** with `AVX2` / `FMA` support
36+
`AVX512` is automatically detected and enabled if available
37+
→ Recommended: **Clang ≥ 17** or **LLVM ≥ 20**
38+
- **OpenMP** (`fopenmp`)
39+
- **MPI** (for distributed parallelism)
40+
- **libmemkind-dev** *(required only for Intel Xeon Phi Knight Landing CPUs)*
41+
- **CMake ≥ 3.16**
42+
- **Python ≥ 3.10** (for Python bindings)
43+
- **pybind11**
44+
- Arch Linux: `sudo pacman -S python-pybind11`
45+
- Other: `pip install pybind11 --user`
46+
- **OpenBLAS** *(optional)* — used for benchmarking against BLAS kernels
47+
48+
---
49+
## Build Instructions
50+
51+
### Recommended LLVM/Clang Toolchain
52+
53+
If you want the best performance, use **LLVM/Clang 20+**.
54+
55+
### Install LLVM/Clang (example for Linux)
56+
57+
```bash
58+
# Clone the official LLVM project
59+
git clone https://github.com/llvm/llvm-project.git
60+
cd llvm-project
61+
mkdir llvm-build-release && cd llvm-build-release
62+
63+
# Configure the build
64+
cmake -G Ninja ../llvm \
65+
-DCMAKE_BUILD_TYPE=Release \
66+
-DLLVM_ENABLE_PROJECTS="clang;mlir;lld;lldb;openmp" \
67+
-DLLVM_TARGETS_TO_BUILD="X86;AArch64;NVPTX" \
68+
-DLLVM_ENABLE_RTTI=ON \
69+
-DCMAKE_INSTALL_PREFIX=/opt/llvm-20
70+
71+
# Build & install
72+
ninja -j$(nproc)
73+
sudo ninja install
74+
```
75+
Then you can compile the Tensorium_lib. If you want to use it on your own projects, simply change the Test rule to Srcs (or another) and set the recommended options in the CmakeLists.txt file in the `
76+
Tests` folder, or add a src rule and create a src folder :
77+
then
78+
```cmake
79+
###inside the main CmakeLists.txt
80+
if(BUILD_SRCS)
81+
add_subdirectory(SRCS)
82+
endif()
83+
```
84+
### Build the lib
85+
86+
```bash
87+
git clone https://github.com/TensoriumCore/Tensorium_lib.git && cd Tensorium_lib
88+
mkdir build && cd build
89+
cmake .. (options if you need, a documentation is comming soon)
90+
make -j
91+
```
92+
The Python module will be created as a .so file in the pybuild/ directory.
93+
2794
## Highlights
2895
2996
- Optimized `Tensor`, `Vector` and `Matrix` classes with aligned memory
@@ -45,43 +112,8 @@ This library is built with the goal of empowering projects that require both spe
45112
- Some (several) optimizations
46113
- Plug Tensorium_MLIR and externalize Compiler plugins (subdependencies)
47114
- ARM support
48-
## Build Instructions
49115
50-
### Requirements
51-
- !!! USE CLANG/LLVM if you want to use the max performances of this lib !!!
52-
- C++17/20 compiler with AVX2/FMA support or AVX512 if avalaible on your plateform (Intel compilers will be added later)
53-
- fopenmp
54-
- MPI
55-
- libmemkind-dev (if you are using Xeon Phi knight landing CPU)
56-
- CMake ≥ 3.16
57-
- Python ≥ 3.10 (for Python bindings)
58-
- `pybind11` installed (`pacman -S python-pybind11` on Arch, or `pip install pybind11 --user`)
59-
- OpenBLAS (optional, for benchmarking with BLAS)
60116
61-
## Build over Nix for pythton binding
62-
63-
```bash
64-
./build_linux.sh && pip install --user -e .
65-
```
66-
if you are on Macos :
67-
```bash
68-
nix --extra-experimental-features 'nix-command flakes' develop && ./build_macos && pip install --user -e .
69-
```
70-
71-
Then you can use it as the .ipynb show
72-
### Build C++ only for special targets and options
73-
74-
```bash
75-
make # Default AVX2
76-
make help # Show differents compile options
77-
make AVX512=true # AVX512
78-
make USE_KNL=true # MCDRAM Memkind HBW (Xeon phi KNL)
79-
make DEBUG=true # debug symbols
80-
make VERBOSE=true # VERBOSE log
81-
make benchmark # BLAS vs Tensorium mat_mult benchmark
82-
```
83-
84-
The Python module will be created as a .so file in the pybuild/ directory.
85117
### Exemple using in C++
86118
```cpp
87119
#include "Tensorium.hpp"

Tests/Matrix/Matrix.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ int matrix_tests() {
294294

295295
tensorium::Vector<double> X(dim);
296296
X(0) = 0.0;
297-
X(1) = 3.0;
297+
X(1) = 10.0;
298298
X(2) = M_PI / 2.0;
299299
X(3) = 0.0;
300300

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#pragma once
2+
3+
#include "../../Core/Tensor.hpp"
4+
#include "../../Core/Vector.hpp"
5+
#include <cassert>
6+
#include <cstddef>
7+
#include <stdexcept>
8+
template <typename T> class BSSNConstraints {
9+
public:
10+
11+
};

includes/Tensorium/DiffGeometry/BSSN/BSSNSetup.hpp

Lines changed: 62 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ namespace tensorium_RG {
4444
* - Extrinsic curvature \f$K_{ij}\f$
4545
* - Trace-free conformal extrinsic curvature \f$\tilde{A}_{ij}\f$
4646
*/
47-
struct alignas(32) BSSNGrid {
47+
struct alignas(64) BSSNGrid {
4848

4949
std::vector<double> alpha; ///< Lapse function \f$\alpha\f$
5050
std::vector<tensorium::Vector<double>> beta; ///< Shift vector \f$\beta^i\f$
@@ -68,6 +68,48 @@ struct alignas(32) BSSNGrid {
6868
contracted_Gamma; ///< Contracted symbols \f$\Gamma^i_{ij} = -\frac{3}{2} \partial_j \ln
6969
std::vector<tensorium::Tensor<double, 5>> ricci_tilde; // [NX, NY, NZ, 3, 3]
7070
};
71+
template <typename T>
72+
tensorium::Tensor<T, 2>
73+
compute_dt_gamma_from_beta(const tensorium::Tensor<T, 2> &gamma,
74+
const tensorium::Vector<T> &beta_u, // β^i
75+
const tensorium::Tensor<T, 2> &partial_beta_u, // (i,m)=∂_i β^m
76+
const tensorium::Tensor<T, 3> &christoffel, // Γ^k_{ij}
77+
const tensorium::Tensor<T, 3> &dgamma_phys) // (i,j,m)=∂_i γ_{jm}
78+
{
79+
// β_j = γ_{jm} β^m
80+
tensorium::Vector<T> beta_d(3);
81+
for (int j = 0; j < 3; ++j) {
82+
T s = 0;
83+
for (int m = 0; m < 3; ++m)
84+
s += gamma(j, m) * beta_u(m);
85+
beta_d(j) = s;
86+
}
87+
88+
// ∂_i β_j = (∂_i γ_{j m}) β^m + γ_{j m} (∂_i β^m)
89+
tensorium::Tensor<T, 2> d_beta_d({3, 3}); // (i,j)
90+
for (int i = 0; i < 3; ++i)
91+
for (int j = 0; j < 3; ++j) {
92+
T s = 0;
93+
for (int m = 0; m < 3; ++m)
94+
s += dgamma_phys(i, j, m) * beta_u(m) + gamma(j, m) * partial_beta_u(i, m);
95+
d_beta_d(i, j) = s;
96+
}
97+
98+
// D_i β_j = ∂_i β_j - Γ^k_{ij} β_k
99+
tensorium::Tensor<T, 2> Dt_g({3, 3}); // ∂_t γ_ij = D_i β_j + D_j β_i (sans -2αK_ij ici)
100+
for (int i = 0; i < 3; ++i)
101+
for (int j = 0; j < 3; ++j) {
102+
T Di_bj = d_beta_d(i, j);
103+
for (int k = 0; k < 3; ++k)
104+
Di_bj -= christoffel(i, j, k) * beta_d(k);
105+
T Dj_bi = d_beta_d(j, i);
106+
for (int k = 0; k < 3; ++k)
107+
Dj_bi -= christoffel(j, i, k) * beta_d(k);
108+
Dt_g(i, j) = Di_bj + Dj_bi;
109+
}
110+
return Dt_g;
111+
}
112+
71113
/**
72114
* @class BSSN
73115
* @brief Driver class to initialize and store BSSN variables from an input spacetime metric.
@@ -143,11 +185,24 @@ template <typename T> class BSSN {
143185

144186
tensorium_RG::ExtrinsicCurvature<T> extr;
145187
auto Kij = extr.compute_Kij(dgt, gamma_ij, beta, d_beta, christoffel_phys, alpha);
188+
dgt = compute_dt_gamma_from_beta(gamma_ij, beta, d_beta, christoffel_phys, dgamma_phys);
189+
// Lie(γ) = ∇_i β_j + ∇_j β_i
190+
auto Lie =
191+
compute_dt_gamma_from_beta(gamma_ij, beta, d_beta, christoffel_phys, dgamma_phys);
192+
print_tensor2("Lie_beta(gamma_ij)", Lie);
193+
194+
// Test de stationnarité analytique: ∂_t γ = 0 ⇒ 2 α K - Lie ≈ 0
195+
tensorium::Tensor<T, 2> resid({3, 3});
196+
for (int i = 0; i < 3; ++i)
197+
for (int j = 0; j < 3; ++j)
198+
resid(i, j) = 2.0 * alpha * Kij(i, j) - Lie(i, j);
199+
200+
print_tensor2("Stationarity residual R_ij = 2α K_ij - Lie_beta(gamma_ij)", resid);
146201

147202
tensorium_RG::BSSNAtildeTensor<T> Aij;
148203
auto AtildeTensor = Aij.compute_Atilde_tensor(Kij, gamma_ij_inv, gamma_ij, chi);
149204

150-
const size_t NX = 32, NY = 32, NZ = 32;
205+
const size_t NX = 64, NY = 64, NZ = 64;
151206

152207
tensorium::Tensor<T, 5> Atilde_full({NX, NY, NZ, 3, 3});
153208
tensorium::Tensor<T, 5> gtilde_inv_full({NX, NY, NZ, 3, 3});
@@ -166,16 +221,15 @@ template <typename T> class BSSN {
166221
}
167222
}
168223

169-
auto chi_ctx = ChiContext<T>::compute(X, dx, dy, dz, gamma_ij, dgamma_phys, metric);
170-
auto Ricci_tilde = RicciTildeTensor<T>::compute_Ricci_Tilde_tensor(
171-
chi_ctx, gamma_tilde_inv, tilde_Gamma, christoffel_tilde, gamma_tilde);
172-
224+
auto chi_ctx = ChiContext<T>::compute(X, dx, dy, dz, gamma_ij, dgamma_phys, metric);
225+
auto Ricci_tilde = RicciTildeTensor<T>::compute_Ricci_Tilde_tensor(
226+
chi_ctx, gamma_tilde_inv, tilde_Gamma, christoffel_tilde, gamma_tilde);
173227

174228
auto Ricci_chi = RicciConformalTensor<T>::compute_Ricci_chi_total(
175229
chi_ctx, gamma_tilde, gamma_tilde_inv, christoffel_tilde);
176230

177-
auto Ricci = RicciPhysicalTensor<T>::compute_Ricci_total(chi_ctx, gamma_tilde, gamma_tilde_inv,
178-
tilde_Gamma, christoffel_tilde);
231+
auto Ricci = RicciPhysicalTensor<T>::compute_Ricci_total(
232+
chi_ctx, gamma_tilde, gamma_tilde_inv, tilde_Gamma, christoffel_tilde);
179233

180234
grid.alpha = {alpha};
181235
grid.beta = {beta};
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
#pragma once

0 commit comments

Comments
 (0)