Fix most non-pedantic clippy warnings

ysimonson · ysimonson · commit 13f3fe4a38f2 · 2025-11-28T12:20:22.000-05:00
diff --git a/examples/reranker/src/main.rs b/examples/reranker/src/main.rs
@@ -91,13 +91,11 @@ fn main() -> Result<()> {
         .with_n_threads_batch(std::thread::available_parallelism()?.get().try_into()?)
         .with_embeddings(true)
         .with_pooling_type(pooling_type);
-    println!("ctx_params: {:?}", ctx_params);
+    println!("ctx_params: {ctx_params:?}");
     let mut ctx = model
         .new_context(&backend, ctx_params)
         .with_context(|| "unable to create the llama_context")?;
 
-    let n_embd = model.n_embd();
-
     let prompt_lines = {
         let mut lines = Vec::new();
         for doc in documents {
@@ -107,13 +105,13 @@ fn main() -> Result<()> {
         lines
     };
 
-    println!("prompt_lines: {:?}", prompt_lines);
+    println!("prompt_lines: {prompt_lines:?}");
     // tokenize the prompt
     let tokens_lines_list = prompt_lines
         .iter()
         .map(|line| model.str_to_token(line, AddBos::Always))
         .collect::<Result<Vec<_>, _>>()
-        .with_context(|| format!("failed to tokenize {:?}", prompt_lines))?;
+        .with_context(|| format!("failed to tokenize {prompt_lines:?}"))?;
 
     let n_ctx = ctx.n_ctx() as usize;
     let n_ctx_train = model.n_ctx_train();
@@ -169,7 +167,7 @@ fn main() -> Result<()> {
                 max_seq_id_batch,
                 &mut output,
                 normalise,
-                pooling.clone(),
+                &pooling,
             )?;
             max_seq_id_batch = 0;
             batch.clear();
@@ -185,31 +183,21 @@ fn main() -> Result<()> {
         max_seq_id_batch,
         &mut output,
         normalise,
-        pooling.clone(),
+        &pooling,
     )?;
 
     let t_main_end = ggml_time_us();
 
     for (j, embeddings) in output.iter().enumerate() {
-        if pooling == "none" {
-            eprintln!("embedding {j}: ");
-            for i in 0..n_embd as usize {
-                if !normalise {
-                    eprint!("{:6.5} ", embeddings[i]);
-                } else {
-                    eprint!("{:9.6} ", embeddings[i]);
-                }
-            }
-            eprintln!();
-        } else if pooling == "rank" {
+        if pooling == "rank" {
             eprintln!("rerank score {j}: {:8.3}", embeddings[0]);
         } else {
             eprintln!("embedding {j}: ");
-            for i in 0..n_embd as usize {
-                if !normalise {
-                    eprint!("{:6.5} ", embeddings[i]);
+            for embedding in embeddings {
+                if normalise {
+                    eprint!("{embedding:9.6} ");
                 } else {
-                    eprint!("{:9.6} ", embeddings[i]);
+                    eprint!("{embedding:6.5} ");
                 }
             }
             eprintln!();
@@ -236,7 +224,7 @@ fn batch_decode(
     s_batch: i32,
     output: &mut Vec<Vec<f32>>,
     normalise: bool,
-    pooling: String,
+    pooling: &str,
 ) -> Result<()> {
     eprintln!(
         "{}: n_tokens = {}, n_seq = {}",
@@ -256,9 +244,9 @@ fn batch_decode(
             .with_context(|| "Failed to get sequence embeddings")?;
         let normalized = if normalise {
             if pooling == "rank" {
-                normalize_embeddings(&embeddings, -1)
+                normalize_embeddings(embeddings, -1)
             } else {
-                normalize_embeddings(&embeddings, 2)
+                normalize_embeddings(embeddings, 2)
             }
         } else {
             embeddings.to_vec()
@@ -281,27 +269,30 @@ fn normalize_embeddings(input: &[f32], embd_norm: i32) -> Vec<f32> {
         0 => {
             // max absolute
             let max_abs = input.iter().map(|x| x.abs()).fold(0.0f32, f32::max) / 32760.0;
-            max_abs as f64
+            f64::from(max_abs)
         }
         2 => {
             // euclidean norm
             input
                 .iter()
-                .map(|x| (*x as f64).powi(2))
+                .map(|x| f64::from(*x).powi(2))
                 .sum::<f64>()
                 .sqrt()
         }
         p => {
             // p-norm
-            let sum = input.iter().map(|x| (x.abs() as f64).powi(p)).sum::<f64>();
-            sum.powf(1.0 / p as f64)
+            let sum = input
+                .iter()
+                .map(|x| f64::from(x.abs()).powi(p))
+                .sum::<f64>();
+            sum.powf(1.0 / f64::from(p))
         }
     };
 
     let norm = if sum > 0.0 { 1.0 / sum } else { 0.0 };
 
     for i in 0..n {
-        output[i] = (input[i] as f64 * norm) as f32;
+        output[i] = (f64::from(input[i]) * norm) as f32;
     }
 
     output
diff --git a/examples/simple/src/main.rs b/examples/simple/src/main.rs
@@ -97,7 +97,7 @@ struct Args {
 fn parse_key_val(s: &str) -> Result<(String, ParamOverrideValue)> {
     let pos = s
         .find('=')
-        .ok_or_else(|| anyhow!("invalid KEY=value: no `=` found in `{}`", s))?;
+        .ok_or_else(|| anyhow!("invalid KEY=value: no `=` found in `{s}`"))?;
     let key = s[..pos].parse()?;
     let value: String = s[pos + 1..].parse()?;
     let value = i64::from_str(&value)
diff --git a/llama-cpp-2/src/lib.rs b/llama-cpp-2/src/lib.rs
@@ -449,6 +449,7 @@ pub struct LogOptions {
 impl LogOptions {
     /// If enabled, logs are sent to tracing. If disabled, all logs are suppressed. Default is for
     /// logs to be sent to tracing.
+    #[must_use]
     pub fn with_logs_enabled(mut self, enabled: bool) -> Self {
         self.disabled = !enabled;
         self
diff --git a/llama-cpp-2/src/model.rs b/llama-cpp-2/src/model.rs
@@ -36,9 +36,9 @@ pub struct LlamaLoraAdapter {
     pub(crate) lora_adapter: NonNull<llama_cpp_sys_2::llama_adapter_lora>,
 }
 
-/// A performance-friendly wrapper around [LlamaModel::chat_template] which is then
-/// fed into [LlamaModel::apply_chat_template] to convert a list of messages into an LLM
-/// prompt. Internally the template is stored as a CString to avoid round-trip conversions
+/// A performance-friendly wrapper around [`LlamaModel::chat_template`] which is then
+/// fed into [`LlamaModel::apply_chat_template`] to convert a list of messages into an LLM
+/// prompt. Internally the template is stored as a `CString` to avoid round-trip conversions
 /// within the FFI.
 #[derive(Eq, PartialEq, Clone, PartialOrd, Ord, Hash)]
 pub struct LlamaChatTemplate(CString);
@@ -55,7 +55,7 @@ impl LlamaChatTemplate {
         &self.0
     }
 
-    /// Attempts to convert the CString into a Rust str reference.
+    /// Attempts to convert the `CString` into a Rust str reference.
     pub fn to_str(&self) -> Result<&str, Utf8Error> {
         self.0.to_str()
     }
@@ -569,7 +569,7 @@ impl LlamaModel {
 
     /// Get chat template from model by name. If the name parameter is None, the default chat template will be returned.
     ///
-    /// You supply this into [Self::apply_chat_template] to get back a string with the appropriate template
+    /// You supply this into [`Self::apply_chat_template`] to get back a string with the appropriate template
     /// substitution applied to convert a list of messages into a prompt the LLM can use to complete
     /// the chat.
     ///
@@ -681,14 +681,14 @@ impl LlamaModel {
     }
 
     /// Apply the models chat template to some messages.
-    /// See https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template
+    /// See <https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template>
     ///
-    /// Unlike the llama.cpp apply_chat_template which just randomly uses the ChatML template when given
+    /// Unlike the llama.cpp `apply_chat_template` which just randomly uses the ChatML template when given
     /// a null pointer for the template, this requires an explicit template to be specified. If you want to
     /// use "chatml", then just do `LlamaChatTemplate::new("chatml")` or any other model name or template
     /// string.
     ///
-    /// Use [Self::chat_template] to retrieve the template baked into the model (this is the preferred
+    /// Use [`Self::chat_template`] to retrieve the template baked into the model (this is the preferred
     /// mechanism as using the wrong chat template can result in really unexpected responses from the LLM).
     ///
     /// You probably want to set `add_ass` to true so that the generated template string ends with a the
@@ -764,7 +764,7 @@ where
     let mut buffer = vec![0u8; capacity];
 
     // call the foreign function
-    let result = c_function(buffer.as_mut_ptr() as *mut c_char, buffer.len());
+    let result = c_function(buffer.as_mut_ptr().cast::<c_char>(), buffer.len());
     if result < 0 {
         return Err(MetaValError::NegativeReturn(result));
     }
diff --git a/llama-cpp-2/src/model/params.rs b/llama-cpp-2/src/model/params.rs
@@ -235,7 +235,7 @@ impl LlamaModelParams {
         );
 
         // There should be some way to do this without iterating over everything.
-        for (_i, &c) in key.to_bytes_with_nul().iter().enumerate() {
+        for &c in key.to_bytes_with_nul().iter() {
             c_char::try_from(c).expect("invalid character in key");
         }
 
diff --git a/llama-cpp-2/src/sampling.rs b/llama-cpp-2/src/sampling.rs
@@ -385,7 +385,7 @@ impl LlamaSampler {
 
     /// Penalizes tokens for being present in the context.
     ///
-    /// Parameters:  
+    /// Parameters:
     /// - ``penalty_last_n``: last n tokens to penalize (0 = disable penalty, -1 = context size)
     /// - ``penalty_repeat``: 1.0 = disabled
     /// - ``penalty_freq``: 0.0 = disabled
@@ -415,15 +415,15 @@ impl LlamaSampler {
     /// - ``n_vocab``: [`LlamaModel::n_vocab`]
     /// - ``seed``: Seed to initialize random generation with.
     /// - ``tau``: The target cross-entropy (or surprise) value you want to achieve for the
-    ///     generated text. A higher value corresponds to more surprising or less predictable text,
-    ///     while a lower value corresponds to less surprising or more predictable text.
+    ///   generated text. A higher value corresponds to more surprising or less predictable text,
+    ///   while a lower value corresponds to less surprising or more predictable text.
     /// - ``eta``: The learning rate used to update `mu` based on the error between the target and
-    ///     observed surprisal of the sampled word. A larger learning rate will cause `mu` to be
-    ///     updated more quickly, while a smaller learning rate will result in slower updates.
+    ///   observed surprisal of the sampled word. A larger learning rate will cause `mu` to be
+    ///   updated more quickly, while a smaller learning rate will result in slower updates.
     /// - ``m``: The number of tokens considered in the estimation of `s_hat`. This is an arbitrary
-    ///     value that is used to calculate `s_hat`, which in turn helps to calculate the value of `k`.
-    ///     In the paper, they use `m = 100`, but you can experiment with different values to see how
-    ///     it affects the performance of the algorithm.
+    ///   value that is used to calculate `s_hat`, which in turn helps to calculate the value of `k`.
+    ///   In the paper, they use `m = 100`, but you can experiment with different values to see how
+    ///   it affects the performance of the algorithm.
     #[must_use]
     pub fn mirostat(n_vocab: i32, seed: u32, tau: f32, eta: f32, m: i32) -> Self {
         let sampler =
@@ -436,11 +436,11 @@ impl LlamaSampler {
     /// # Parameters:
     /// - ``seed``: Seed to initialize random generation with.
     /// - ``tau``: The target cross-entropy (or surprise) value you want to achieve for the
-    ///     generated text. A higher value corresponds to more surprising or less predictable text,
-    ///     while a lower value corresponds to less surprising or more predictable text.
+    ///   generated text. A higher value corresponds to more surprising or less predictable text,
+    ///   while a lower value corresponds to less surprising or more predictable text.
     /// - ``eta``: The learning rate used to update `mu` based on the error between the target and
-    ///     observed surprisal of the sampled word. A larger learning rate will cause `mu` to be
-    ///     updated more quickly, while a smaller learning rate will result in slower updates.
+    ///   observed surprisal of the sampled word. A larger learning rate will cause `mu` to be
+    ///   updated more quickly, while a smaller learning rate will result in slower updates.
     #[must_use]
     pub fn mirostat_v2(seed: u32, tau: f32, eta: f32) -> Self {
         let sampler = unsafe { llama_cpp_sys_2::llama_sampler_init_mirostat_v2(seed, tau, eta) };
diff --git a/llama-cpp-sys-2/build.rs b/llama-cpp-sys-2/build.rs
@@ -627,7 +627,7 @@ fn main() {
 
     if matches!(target_os, TargetOs::Linux)
         && target_triple.contains("aarch64")
-        && !env::var(format!("CARGO_FEATURE_{}", "native".to_uppercase())).is_ok()
+        && env::var(format!("CARGO_FEATURE_{}", "native".to_uppercase())).is_err()
     {
         // If the native feature is not enabled, we take off the native ARM64 support.
         // It is useful in docker environments where the native feature is not enabled.

Original file line number	Diff line number	Diff line change
`@@ -235,7 +235,7 @@ impl LlamaModelParams {`
`235`	`235`	`);`
`236`	`236`
`237`	`237`	`// There should be some way to do this without iterating over everything.`
`238`		`- for (_i, &c) in key.to_bytes_with_nul().iter().enumerate() {`
	`238`	`+ for &c in key.to_bytes_with_nul().iter() {`
`239`	`239`	`c_char::try_from(c).expect("invalid character in key");`
`240`	`240`	`}`
`241`	`241`
Original file line number	Diff line number	Diff line change
`@@ -627,7 +627,7 @@ fn main() {`
`627`	`627`
`628`	`628`	`if matches!(target_os, TargetOs::Linux)`
`629`	`629`	`&& target_triple.contains("aarch64")`
`630`		`- && !env::var(format!("CARGO_FEATURE_{}", "native".to_uppercase())).is_ok()`
	`630`	`+ && env::var(format!("CARGO_FEATURE_{}", "native".to_uppercase())).is_err()`
`631`	`631`	`{`
`632`	`632`	`// If the native feature is not enabled, we take off the native ARM64 support.`
`633`	`633`	`// It is useful in docker environments where the native feature is not enabled.`