diff --git a/examples/embeddings/Cargo.toml b/examples/embeddings/Cargo.toml index 13858693..e5866549 100644 --- a/examples/embeddings/Cargo.toml +++ b/examples/embeddings/Cargo.toml @@ -13,7 +13,6 @@ anyhow = { workspace = true } [features] cuda = ["llama-cpp-2/cuda"] metal = ["llama-cpp-2/metal"] -native = ["llama-cpp-2/native"] vulkan = ["llama-cpp-2/vulkan"] [lints] diff --git a/examples/mtmd/Cargo.toml b/examples/mtmd/Cargo.toml index 426ddaa6..3863fc65 100644 --- a/examples/mtmd/Cargo.toml +++ b/examples/mtmd/Cargo.toml @@ -11,7 +11,6 @@ clap = { workspace = true, features = ["derive"] } [features] cuda = ["llama-cpp-2/cuda"] metal = ["llama-cpp-2/metal"] -native = ["llama-cpp-2/native"] vulkan = ["llama-cpp-2/vulkan"] [lints] diff --git a/examples/reranker/Cargo.toml b/examples/reranker/Cargo.toml index dfce8e37..2bedf9fb 100644 --- a/examples/reranker/Cargo.toml +++ b/examples/reranker/Cargo.toml @@ -14,7 +14,6 @@ encoding_rs = { workspace = true } [features] cuda = ["llama-cpp-2/cuda"] metal = ["llama-cpp-2/metal"] -native = ["llama-cpp-2/native"] vulkan = ["llama-cpp-2/vulkan"] [lints] diff --git a/examples/simple/Cargo.toml b/examples/simple/Cargo.toml index 964b061e..f8376c65 100644 --- a/examples/simple/Cargo.toml +++ b/examples/simple/Cargo.toml @@ -17,7 +17,6 @@ tracing-subscriber = { workspace = true } [features] cuda = ["llama-cpp-2/cuda"] metal = ["llama-cpp-2/metal"] -native = ["llama-cpp-2/native"] vulkan = ["llama-cpp-2/vulkan"] [lints] diff --git a/llama-cpp-2/Cargo.toml b/llama-cpp-2/Cargo.toml index 026487ef..54d41444 100644 --- a/llama-cpp-2/Cargo.toml +++ b/llama-cpp-2/Cargo.toml @@ -26,7 +26,6 @@ cuda-no-vmm = ["cuda", "llama-cpp-sys-2/cuda-no-vmm"] metal = ["llama-cpp-sys-2/metal"] dynamic-link = ["llama-cpp-sys-2/dynamic-link"] vulkan = ["llama-cpp-sys-2/vulkan"] -native = ["llama-cpp-sys-2/native"] openmp = ["llama-cpp-sys-2/openmp"] sampler = [] # Only has an impact on Android. diff --git a/llama-cpp-sys-2/Cargo.toml b/llama-cpp-sys-2/Cargo.toml index e7f03ef2..e0b17000 100644 --- a/llama-cpp-sys-2/Cargo.toml +++ b/llama-cpp-sys-2/Cargo.toml @@ -78,7 +78,6 @@ cuda-no-vmm = ["cuda"] metal = [] dynamic-link = [] vulkan = [] -native = [] openmp = [] # Only has an impact on Android. shared-stdcxx = [] diff --git a/llama-cpp-sys-2/build.rs b/llama-cpp-sys-2/build.rs index aa70bee5..ba96a2f1 100644 --- a/llama-cpp-sys-2/build.rs +++ b/llama-cpp-sys-2/build.rs @@ -508,6 +508,85 @@ fn main() { } } + // extract the target-cpu config value, if specified + let target_cpu = std::env::var("CARGO_ENCODED_RUSTFLAGS") + .ok() + .and_then(|rustflags| { + rustflags + .split('\x1f') + .find(|f| f.contains("target-cpu=")) + .and_then(|f| f.split("target-cpu=").nth(1)) + .map(|s| s.to_string()) + }); + + if target_cpu == Some("native".into()) { + debug_log!("Detected target-cpu=native, compiling with GGML_NATIVE"); + config.define("GGML_NATIVE", "ON"); + } + // if native isn't specified, enable specific features for ggml instead + else { + // rust code isn't using `target-cpu=native`, so llama.cpp shouldn't use GGML_NATIVE either + config.define("GGML_NATIVE", "OFF"); + + // if `target-cpu` is set set, also set -march for llama.cpp to the same value + if let Some(ref cpu) = target_cpu { + debug_log!("Setting baseline architecture: -march={}", cpu); + config.cflag(&format!("-march={}", cpu)); + config.cxxflag(&format!("-march={}", cpu)); + } + + // I expect this env var to always be present + let features = std::env::var("CARGO_CFG_TARGET_FEATURE") + .expect("Env var CARGO_CFG_TARGET_FEATURE not found."); + debug_log!("Compiling with target features: {}", features); + + // list of rust target_features here: + // https://doc.rust-lang.org/reference/attributes/codegen.html#the-target_feature-attribute + // GGML config flags have been found by looking at: + // llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt + for feature in features.split(',') { + match feature { + "avx" => { + config.define("GGML_AVX", "ON"); + } + "avx2" => { + config.define("GGML_AVX2", "ON"); + } + "avx512bf16" => { + config.define("GGML_AVX512_BF16", "ON"); + } + "avx512vbmi" => { + config.define("GGML_AVX512_VBMI", "ON"); + } + "avx512vnni" => { + config.define("GGML_AVX512_VNNI", "ON"); + } + "avxvnni" => { + config.define("GGML_AVX_VNNI", "ON"); + } + "bmi2" => { + config.define("GGML_BMI2", "ON"); + } + "f16c" => { + config.define("GGML_F16C", "ON"); + } + "fma" => { + config.define("GGML_FMA", "ON"); + } + "sse4.2" => { + config.define("GGML_SSE42", "ON"); + } + _ => { + debug_log!( + "Unrecognized cpu feature: '{}' - skipping GGML config for it.", + feature + ); + continue; + } + }; + } + } + config.define( "BUILD_SHARED_LIBS", if build_shared_libs { "ON" } else { "OFF" }, @@ -627,9 +706,9 @@ fn main() { if matches!(target_os, TargetOs::Linux) && target_triple.contains("aarch64") - && env::var(format!("CARGO_FEATURE_{}", "native".to_uppercase())).is_err() + && target_cpu != Some("native".into()) { - // If the native feature is not enabled, we take off the native ARM64 support. + // If the target-cpu is not specified as native, we take off the native ARM64 support. // It is useful in docker environments where the native feature is not enabled. config.define("GGML_NATIVE", "OFF"); config.define("GGML_CPU_ARM_ARCH", "armv8-a");