Skip to content

Commit c3f0d38

Browse files
tobilgmlafeldt
authored andcommitted
Add support for volatile scalar functions
This PR adds the ability to mark scalar functions as volatile, which prevents DuckDB from optimizing them as constants. This is essential for functions that generate random or unique values per row, such as: - Random number generators - UUID generators - Fake data generators - Current timestamp functions Added a new method to `ScalarFunction` that calls the existing FFI binding `duckdb_scalar_function_set_volatile()`. This method follows the builder pattern used by other methods in the API. Added two new convenience methods to `Connection`: - `register_volatile_scalar_function<S: VScalar>(name)` - Register with default state - `register_volatile_scalar_function_with_state<S: VScalar>(name, state)` - Register with custom state These mirror the existing `register_scalar_function` methods but automatically mark the functions as volatile. Added comprehensive tests demonstrating: - `test_volatile_scalar` - Verifies volatile functions are evaluated per row - `test_non_volatile_scalar` - Verifies non-volatile functions are optimized as constants ```rust use duckdb::Connection; use duckdb::vscalar::VScalar; // Assume RandomUUID implements VScalar let conn = Connection::open_in_memory()?; conn.register_volatile_scalar_function::<RandomUUID>("random_uuid")?; // Each row gets a unique UUID let mut stmt = conn.prepare("SELECT random_uuid() FROM generate_series(1, 10)")?; ``` By default, DuckDB optimizes zero-argument scalar functions as constants, evaluating them only once and reusing the result. For deterministic functions this is correct, but for non-deterministic functions (random generators, UUIDs, fake data), this produces incorrect results where all rows get the same value. The VOLATILE flag tells DuckDB's optimizer to re-evaluate the function for each row, which is the correct behavior for non-deterministic functions. Fixes functionality needed by DuckDB extensions that generate unique data per row.
1 parent b96eda5 commit c3f0d38

File tree

2 files changed

+188
-1
lines changed

2 files changed

+188
-1
lines changed

crates/duckdb/src/vscalar/function.rs

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,8 @@ use libduckdb_sys::{
5353
duckdb_create_scalar_function_set, duckdb_data_chunk, duckdb_delete_callback_t, duckdb_destroy_scalar_function,
5454
duckdb_function_info, duckdb_scalar_function, duckdb_scalar_function_add_parameter, duckdb_scalar_function_set,
5555
duckdb_scalar_function_set_extra_info, duckdb_scalar_function_set_function, duckdb_scalar_function_set_name,
56-
duckdb_scalar_function_set_return_type, duckdb_scalar_function_set_varargs, duckdb_vector, DuckDBSuccess,
56+
duckdb_scalar_function_set_return_type, duckdb_scalar_function_set_varargs, duckdb_scalar_function_set_volatile,
57+
duckdb_vector, DuckDBSuccess,
5758
};
5859

5960
use crate::{core::LogicalTypeHandle, Error};
@@ -112,6 +113,33 @@ impl ScalarFunction {
112113
self
113114
}
114115

116+
/// Marks the scalar function as volatile.
117+
///
118+
/// Volatile functions are re-evaluated for each row, even if they have no parameters.
119+
/// This is useful for functions that generate random or unique values, such as random
120+
/// number generators, UUID generators, or fake data generators.
121+
///
122+
/// By default, DuckDB optimizes zero-argument scalar functions as constants, evaluating
123+
/// them only once. Setting a function as volatile prevents this optimization.
124+
///
125+
/// # Example
126+
/// ```no_run
127+
/// use duckdb::vscalar::ScalarFunction;
128+
/// use duckdb::core::LogicalTypeHandle;
129+
/// use libduckdb_sys::LogicalTypeId;
130+
///
131+
/// let func = ScalarFunction::new("my_random_func").unwrap();
132+
/// func.set_return_type(&LogicalTypeHandle::from(LogicalTypeId::Varchar))
133+
/// .set_volatile() // Mark as volatile so it's evaluated per row
134+
/// .set_function(Some(my_random_impl));
135+
/// ```
136+
pub fn set_volatile(&self) -> &Self {
137+
unsafe {
138+
duckdb_scalar_function_set_volatile(self.ptr);
139+
}
140+
self
141+
}
142+
115143
/// Assigns extra information to the scalar function using raw pointers.
116144
///
117145
/// For most use cases, prefer [`set_extra_info`](Self::set_extra_info) which handles memory management automatically.

crates/duckdb/src/vscalar/mod.rs

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,73 @@ impl Connection {
168168
}
169169
self.db.borrow_mut().register_scalar_function_set(set)
170170
}
171+
172+
/// Register the given ScalarFunction with default state, marked as volatile.
173+
///
174+
/// Volatile functions are re-evaluated for each row, even if they have no parameters.
175+
/// This is useful for functions that generate random or unique values per row, such as:
176+
/// - Random number generators
177+
/// - UUID generators
178+
/// - Fake data generators
179+
/// - Current timestamp functions
180+
///
181+
/// By default, DuckDB optimizes zero-argument scalar functions as constants.
182+
/// Use this method when you need the function to be evaluated independently for each row.
183+
///
184+
/// # Example
185+
/// ```no_run
186+
/// use duckdb::Connection;
187+
/// // Assume RandomUUID implements VScalar
188+
/// let conn = Connection::open_in_memory()?;
189+
/// conn.register_volatile_scalar_function::<RandomUUID>("random_uuid")?;
190+
///
191+
/// // Each row gets a unique UUID
192+
/// let mut stmt = conn.prepare("SELECT random_uuid() FROM generate_series(1, 10)")?;
193+
/// # Ok::<(), duckdb::Error>(())
194+
/// ```
195+
#[inline]
196+
pub fn register_volatile_scalar_function<S: VScalar>(&self, name: &str) -> crate::Result<()>
197+
where
198+
S::State: Default,
199+
{
200+
let set = ScalarFunctionSet::new(name);
201+
for signature in S::signatures() {
202+
let scalar_function = ScalarFunction::new(name)?;
203+
signature.register_with_scalar(&scalar_function);
204+
scalar_function.set_function(Some(scalar_func::<S>));
205+
scalar_function.set_volatile(); // Mark as volatile
206+
scalar_function.set_extra_info(S::State::default());
207+
set.add_function(scalar_function)?;
208+
}
209+
self.db.borrow_mut().register_scalar_function_set(set)
210+
}
211+
212+
/// Register the given ScalarFunction with custom state, marked as volatile.
213+
///
214+
/// Volatile functions are re-evaluated for each row, even if they have no parameters.
215+
/// This is the volatile variant of `register_scalar_function_with_state`.
216+
///
217+
/// See [`register_volatile_scalar_function`](Self::register_volatile_scalar_function) for more details on volatile functions.
218+
#[inline]
219+
pub fn register_volatile_scalar_function_with_state<S: VScalar>(
220+
&self,
221+
name: &str,
222+
state: &S::State,
223+
) -> crate::Result<()>
224+
where
225+
S::State: Clone,
226+
{
227+
let set = ScalarFunctionSet::new(name);
228+
for signature in S::signatures() {
229+
let scalar_function = ScalarFunction::new(name)?;
230+
signature.register_with_scalar(&scalar_function);
231+
scalar_function.set_function(Some(scalar_func::<S>));
232+
scalar_function.set_volatile(); // Mark as volatile
233+
scalar_function.set_extra_info(state.clone());
234+
set.add_function(scalar_function)?;
235+
}
236+
self.db.borrow_mut().register_scalar_function_set(set)
237+
}
171238
}
172239

173240
impl InnerConnection {
@@ -374,4 +441,96 @@ mod test {
374441

375442
Ok(())
376443
}
444+
445+
// Counter for testing volatile functions
446+
use std::sync::atomic::{AtomicU64, Ordering};
447+
static COUNTER: AtomicU64 = AtomicU64::new(0);
448+
449+
struct CounterScalar {}
450+
451+
impl VScalar for CounterScalar {
452+
type State = ();
453+
454+
unsafe fn invoke(
455+
_: &Self::State,
456+
input: &mut DataChunkHandle,
457+
output: &mut dyn WritableVector,
458+
) -> Result<(), Box<dyn std::error::Error>> {
459+
let len = input.len();
460+
let mut output_vec = output.flat_vector();
461+
let data = output_vec.as_mut_slice::<i64>();
462+
463+
for i in 0..len {
464+
let count = COUNTER.fetch_add(1, Ordering::SeqCst);
465+
data[i] = count as i64;
466+
}
467+
Ok(())
468+
}
469+
470+
fn signatures() -> Vec<ScalarFunctionSignature> {
471+
vec![ScalarFunctionSignature::exact(
472+
vec![],
473+
LogicalTypeHandle::from(LogicalTypeId::Bigint),
474+
)]
475+
}
476+
}
477+
478+
#[test]
479+
fn test_volatile_scalar() -> Result<(), Box<dyn Error>> {
480+
let conn = Connection::open_in_memory()?;
481+
482+
// Reset counter
483+
COUNTER.store(0, Ordering::SeqCst);
484+
485+
// Register as volatile
486+
conn.register_volatile_scalar_function::<CounterScalar>("volatile_counter")?;
487+
488+
// Query should get different values for each row
489+
let mut stmt = conn.prepare("SELECT volatile_counter() FROM generate_series(1, 5)")?;
490+
let mut rows = stmt.query([])?;
491+
492+
let mut values = Vec::new();
493+
while let Some(row) = rows.next()? {
494+
let val: i64 = row.get(0)?;
495+
values.push(val);
496+
}
497+
498+
// Each value should be unique (counter increments)
499+
assert_eq!(values.len(), 5);
500+
for (i, val) in values.iter().enumerate() {
501+
assert_eq!(*val, i as i64, "Row {} should have counter value {}", i, i);
502+
}
503+
504+
Ok(())
505+
}
506+
507+
#[test]
508+
fn test_non_volatile_scalar() -> Result<(), Box<dyn Error>> {
509+
let conn = Connection::open_in_memory()?;
510+
511+
// Reset counter
512+
COUNTER.store(100, Ordering::SeqCst);
513+
514+
// Register WITHOUT volatile flag
515+
conn.register_scalar_function::<CounterScalar>("non_volatile_counter")?;
516+
517+
// Query should get the SAME value for all rows (optimized as constant)
518+
let mut stmt = conn.prepare("SELECT non_volatile_counter() FROM generate_series(1, 5)")?;
519+
let mut rows = stmt.query([])?;
520+
521+
let mut values = Vec::new();
522+
while let Some(row) = rows.next()? {
523+
let val: i64 = row.get(0)?;
524+
values.push(val);
525+
}
526+
527+
// All values should be the same (function was only called once)
528+
assert_eq!(values.len(), 5);
529+
let first_val = values[0];
530+
for val in values.iter() {
531+
assert_eq!(*val, first_val, "All rows should have the same value when not volatile");
532+
}
533+
534+
Ok(())
535+
}
377536
}

0 commit comments

Comments
 (0)