Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion examples/embeddings/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ anyhow = { workspace = true }
[features]
cuda = ["llama-cpp-2/cuda"]
metal = ["llama-cpp-2/metal"]
native = ["llama-cpp-2/native"]
vulkan = ["llama-cpp-2/vulkan"]

[lints]
Expand Down
1 change: 0 additions & 1 deletion examples/mtmd/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ clap = { workspace = true, features = ["derive"] }
[features]
cuda = ["llama-cpp-2/cuda"]
metal = ["llama-cpp-2/metal"]
native = ["llama-cpp-2/native"]
vulkan = ["llama-cpp-2/vulkan"]

[lints]
Expand Down
1 change: 0 additions & 1 deletion examples/reranker/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ encoding_rs = { workspace = true }
[features]
cuda = ["llama-cpp-2/cuda"]
metal = ["llama-cpp-2/metal"]
native = ["llama-cpp-2/native"]
vulkan = ["llama-cpp-2/vulkan"]

[lints]
Expand Down
1 change: 0 additions & 1 deletion examples/simple/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ tracing-subscriber = { workspace = true }
[features]
cuda = ["llama-cpp-2/cuda"]
metal = ["llama-cpp-2/metal"]
native = ["llama-cpp-2/native"]
vulkan = ["llama-cpp-2/vulkan"]

[lints]
Expand Down
1 change: 0 additions & 1 deletion llama-cpp-2/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ cuda-no-vmm = ["cuda", "llama-cpp-sys-2/cuda-no-vmm"]
metal = ["llama-cpp-sys-2/metal"]
dynamic-link = ["llama-cpp-sys-2/dynamic-link"]
vulkan = ["llama-cpp-sys-2/vulkan"]
native = ["llama-cpp-sys-2/native"]
openmp = ["llama-cpp-sys-2/openmp"]
sampler = []
# Only has an impact on Android.
Expand Down
1 change: 0 additions & 1 deletion llama-cpp-sys-2/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ cuda-no-vmm = ["cuda"]
metal = []
dynamic-link = []
vulkan = []
native = []
openmp = []
# Only has an impact on Android.
shared-stdcxx = []
Expand Down
91 changes: 89 additions & 2 deletions llama-cpp-sys-2/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,93 @@ fn main() {
}
}

// in this next bit, we select which cpu-specific features to compile for
// first check for target-cpu=native
let has_native_target_cpu = std::env::var("CARGO_ENCODED_RUSTFLAGS")
.map(|rustflags| {
rustflags
.split('\x1f')
.any(|f| f.contains("target-cpu=native"))
})
.unwrap_or(false);

// Also extract the target-cpu value if specified (e.g., x86-64, x86-64-v2, etc.)
let target_cpu = std::env::var("CARGO_ENCODED_RUSTFLAGS")
.ok()
.and_then(|rustflags| {
rustflags
.split('\x1f')
.find(|f| f.contains("target-cpu=") && !f.contains("target-cpu=native"))
.and_then(|f| f.split("target-cpu=").nth(1))
.map(|s| s.to_string())
});

if has_native_target_cpu {
debug_log!("Detected target-cpu=native, compiling with GGML_NATIVE");
config.define("GGML_NATIVE", "ON");
}
// if native isn't specified, enable specific features for ggml
// Get the target features as a comma-separated string
else if let Ok(features) = std::env::var("CARGO_CFG_TARGET_FEATURE") {
debug_log!("Compiling with target features: {}", features);
config.define("GGML_NATIVE", "OFF");

// Set baseline architecture from target-cpu if specified
// This is critical to prevent the compiler from auto-vectorizing to the build host's capabilities
if let Some(ref cpu) = target_cpu {
debug_log!("Setting baseline architecture: -march={}", cpu);
// Pass the baseline architecture to CMake's C and CXX compilers
config.cflag(&format!("-march={}", cpu));
config.cxxflag(&format!("-march={}", cpu));
}

// list of rust target_features here:
// https://doc.rust-lang.org/reference/attributes/codegen.html#the-target_feature-attribute
// GGML config flags have been found by looking at:
// llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
for feature in features.split(',') {
match feature {
"avx" => {
config.define("GGML_AVX", "ON");
}
"avx2" => {
config.define("GGML_AVX2", "ON");
}
"avx512bf16" => {
config.define("GGML_AVX512_BF16", "ON");
}
"avx512vbmi" => {
config.define("GGML_AVX512_VBMI", "ON");
}
"avx512vnni" => {
config.define("GGML_AVX512_VNNI", "ON");
}
"avxvnni" => {
config.define("GGML_AVX_VNNI", "ON");
}
"bmi2" => {
config.define("GGML_BMI2", "ON");
}
"f16c" => {
config.define("GGML_F16C", "ON");
}
"fma" => {
config.define("GGML_FMA", "ON");
}
"sse4.2" => {
config.define("GGML_SSE42", "ON");
}
_ => {
debug_log!(
"Unrecognized cpu feature: '{}' - skipping GGML config for it.",
feature
);
continue;
}
};
}
}

config.define(
"BUILD_SHARED_LIBS",
if build_shared_libs { "ON" } else { "OFF" },
Expand Down Expand Up @@ -627,9 +714,9 @@ fn main() {

if matches!(target_os, TargetOs::Linux)
&& target_triple.contains("aarch64")
&& env::var(format!("CARGO_FEATURE_{}", "native".to_uppercase())).is_err()
&& has_native_target_cpu
{
// If the native feature is not enabled, we take off the native ARM64 support.
// If the target-cpu is not specified as native, we take off the native ARM64 support.
// It is useful in docker environments where the native feature is not enabled.
config.define("GGML_NATIVE", "OFF");
config.define("GGML_CPU_ARM_ARCH", "armv8-a");
Expand Down
Loading