utilityai · MarcusDunn · Dec 4, 2025 · Dec 1, 2025 · Dec 2, 2025 · Dec 2, 2025
diff --git a/examples/embeddings/Cargo.toml b/examples/embeddings/Cargo.toml
@@ -13,7 +13,6 @@ anyhow = { workspace = true }
 [features]
 cuda = ["llama-cpp-2/cuda"]
 metal = ["llama-cpp-2/metal"]
-native = ["llama-cpp-2/native"]
 vulkan = ["llama-cpp-2/vulkan"]
 
 [lints]

diff --git a/examples/mtmd/Cargo.toml b/examples/mtmd/Cargo.toml
@@ -11,7 +11,6 @@ clap = { workspace = true, features = ["derive"] }
 [features]
 cuda = ["llama-cpp-2/cuda"]
 metal = ["llama-cpp-2/metal"]
-native = ["llama-cpp-2/native"]
 vulkan = ["llama-cpp-2/vulkan"]
 
 [lints]

diff --git a/examples/reranker/Cargo.toml b/examples/reranker/Cargo.toml
@@ -14,7 +14,6 @@ encoding_rs = { workspace = true }
 [features]
 cuda = ["llama-cpp-2/cuda"]
 metal = ["llama-cpp-2/metal"]
-native = ["llama-cpp-2/native"]
 vulkan = ["llama-cpp-2/vulkan"]
 
 [lints]

diff --git a/examples/simple/Cargo.toml b/examples/simple/Cargo.toml
@@ -17,7 +17,6 @@ tracing-subscriber = { workspace = true }
 [features]
 cuda = ["llama-cpp-2/cuda"]
 metal = ["llama-cpp-2/metal"]
-native = ["llama-cpp-2/native"]
 vulkan = ["llama-cpp-2/vulkan"]
 
 [lints]

diff --git a/llama-cpp-2/Cargo.toml b/llama-cpp-2/Cargo.toml
@@ -26,7 +26,6 @@ cuda-no-vmm = ["cuda", "llama-cpp-sys-2/cuda-no-vmm"]
 metal = ["llama-cpp-sys-2/metal"]
 dynamic-link = ["llama-cpp-sys-2/dynamic-link"]
 vulkan = ["llama-cpp-sys-2/vulkan"]
-native = ["llama-cpp-sys-2/native"]
 openmp = ["llama-cpp-sys-2/openmp"]
 sampler = []
 # Only has an impact on Android.

diff --git a/llama-cpp-sys-2/Cargo.toml b/llama-cpp-sys-2/Cargo.toml
@@ -78,7 +78,6 @@ cuda-no-vmm = ["cuda"]
 metal = []
 dynamic-link = []
 vulkan = []
-native = []
 openmp = []
 # Only has an impact on Android.
 shared-stdcxx = []

diff --git a/llama-cpp-sys-2/build.rs b/llama-cpp-sys-2/build.rs
@@ -508,6 +508,93 @@ fn main() {
         }
     }
 
+    // in this next bit, we select which cpu-specific features to compile for
+    // first check for target-cpu=native
+    let has_native_target_cpu = std::env::var("CARGO_ENCODED_RUSTFLAGS")
+        .map(|rustflags| {
+            rustflags
+                .split('\x1f')
+                .any(|f| f.contains("target-cpu=native"))
+        })
+        .unwrap_or(false);
+
+    // Also extract the target-cpu value if specified (e.g., x86-64, x86-64-v2, etc.)
+    let target_cpu = std::env::var("CARGO_ENCODED_RUSTFLAGS")
+        .ok()
+        .and_then(|rustflags| {
+            rustflags
+                .split('\x1f')
+                .find(|f| f.contains("target-cpu=") && !f.contains("target-cpu=native"))
+                .and_then(|f| f.split("target-cpu=").nth(1))
+                .map(|s| s.to_string())
+        });
+
+    if has_native_target_cpu {
+        debug_log!("Detected target-cpu=native, compiling with GGML_NATIVE");
+        config.define("GGML_NATIVE", "ON");
+    }
+    // if native isn't specified, enable specific features for ggml
+    // Get the target features as a comma-separated string
+    else if let Ok(features) = std::env::var("CARGO_CFG_TARGET_FEATURE") {
+        debug_log!("Compiling with target features: {}", features);
+        config.define("GGML_NATIVE", "OFF");
+
+        // Set baseline architecture from target-cpu if specified
+        // This is critical to prevent the compiler from auto-vectorizing to the build host's capabilities
+        if let Some(ref cpu) = target_cpu {
+            debug_log!("Setting baseline architecture: -march={}", cpu);
+            // Pass the baseline architecture to CMake's C and CXX compilers
+            config.cflag(&format!("-march={}", cpu));
+            config.cxxflag(&format!("-march={}", cpu));
+        }
+
+        // list of rust target_features here:
+        //   https://doc.rust-lang.org/reference/attributes/codegen.html#the-target_feature-attribute
+        // GGML config flags have been found by looking at:
+        //   llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
+        for feature in features.split(',') {
+            match feature {
+                "avx" => {
+                    config.define("GGML_AVX", "ON");
+                }
+                "avx2" => {
+                    config.define("GGML_AVX2", "ON");
+                }
+                "avx512bf16" => {
+                    config.define("GGML_AVX512_BF16", "ON");
+                }
+                "avx512vbmi" => {
+                    config.define("GGML_AVX512_VBMI", "ON");
+                }
+                "avx512vnni" => {
+                    config.define("GGML_AVX512_VNNI", "ON");
+                }
+                "avxvnni" => {
+                    config.define("GGML_AVX_VNNI", "ON");
+                }
+                "bmi2" => {
+                    config.define("GGML_BMI2", "ON");
+                }
+                "f16c" => {
+                    config.define("GGML_F16C", "ON");
+                }
+                "fma" => {
+                    config.define("GGML_FMA", "ON");
+                }
+                "sse4.2" => {
+                    config.define("GGML_SSE42", "ON");
+                }
+                _ => {
+                    debug_log!(
+                        "Unrecognized cpu feature: '{}' - skipping GGML config for it.",
+                        feature
+                    );
+                    continue;
+                }
+            };
+        }
+    }
+
     config.define(
         "BUILD_SHARED_LIBS",
         if build_shared_libs { "ON" } else { "OFF" },
@@ -627,9 +714,9 @@ fn main() {
 
     if matches!(target_os, TargetOs::Linux)
         && target_triple.contains("aarch64")
-        && env::var(format!("CARGO_FEATURE_{}", "native".to_uppercase())).is_err()
+        && has_native_target_cpu
     {
-        // If the native feature is not enabled, we take off the native ARM64 support.
+        // If the target-cpu is not specified as native, we take off the native ARM64 support.
         // It is useful in docker environments where the native feature is not enabled.
         config.define("GGML_NATIVE", "OFF");
         config.define("GGML_CPU_ARM_ARCH", "armv8-a");