Skip to content

Commit d79a185

Browse files
authored
Align scalar state semantics and revert TableFunctionOptions (#638)
- Aligns the State associated type in VScalar and VArrowScalar traits with DuckDB's extra_info semantics (Adding `+ 'static` is technically a breaking change, but practically unlikely to break anything - most state types are owned data anyway) - Reverts the TableFunctionOptions wrapper from #637 in favor of keeping the simpler register_table_function_with_extra_info API - Renamed (private) APIs are now aligned across table and scalar functions
2 parents cc3efce + b96eda5 commit d79a185

File tree

5 files changed

+55
-65
lines changed

5 files changed

+55
-65
lines changed

crates/duckdb/src/vscalar/arrow.rs

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -73,12 +73,13 @@ impl ArrowFunctionSignature {
7373

7474
/// A trait for scalar functions that accept and return arrow types that can be registered with DuckDB
7575
pub trait VArrowScalar: Sized {
76-
/// State that persists across invocations of the scalar function (the lifetime of the connection)
77-
/// The state can be accessed by multiple threads, so it must be `Send + Sync`.
78-
type State: Default + Sized + Send + Sync;
76+
/// State set at registration time. Persists for the lifetime of the catalog entry.
77+
/// Shared across worker threads and invocations — must not be modified during execution.
78+
/// Must be `'static` as it is stored in DuckDB and may outlive the current stack frame.
79+
type State: Default + Sized + Send + Sync + 'static;
7980

8081
/// The actual function that is called by DuckDB
81-
fn invoke(info: &Self::State, input: RecordBatch) -> Result<Arc<dyn Array>, Box<dyn std::error::Error>>;
82+
fn invoke(state: &Self::State, input: RecordBatch) -> Result<Arc<dyn Array>, Box<dyn std::error::Error>>;
8283

8384
/// The possible signatures of the scalar function. These will result in DuckDB scalar function overloads.
8485
/// The invoke method should be able to handle all of these signatures.
@@ -92,11 +93,11 @@ where
9293
type State = T::State;
9394

9495
unsafe fn invoke(
95-
info: &Self::State,
96+
state: &Self::State,
9697
input: &mut DataChunkHandle,
9798
out: &mut dyn WritableVector,
9899
) -> Result<(), Box<dyn std::error::Error>> {
99-
let array = T::invoke(info, data_chunk_to_arrow(input)?)?;
100+
let array = T::invoke(state, data_chunk_to_arrow(input)?)?;
100101
write_arrow_array_to_vector(&array, out)
101102
}
102103

@@ -200,8 +201,8 @@ mod test {
200201
impl VArrowScalar for ArrowOverloaded {
201202
type State = MockState;
202203

203-
fn invoke(s: &Self::State, input: RecordBatch) -> Result<Arc<dyn Array>, Box<dyn std::error::Error>> {
204-
assert_eq!("some meta", s.info);
204+
fn invoke(state: &Self::State, input: RecordBatch) -> Result<Arc<dyn Array>, Box<dyn std::error::Error>> {
205+
assert_eq!("some meta", state.info);
205206

206207
let a = input.column(0);
207208
let b = input.column(1);

crates/duckdb/src/vscalar/function.rs

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -112,25 +112,33 @@ impl ScalarFunction {
112112
self
113113
}
114114

115-
/// Assigns extra information to the scalar function that can be fetched during binding, etc.
115+
/// Assigns extra information to the scalar function using raw pointers.
116+
///
117+
/// For most use cases, prefer [`set_extra_info`](Self::set_extra_info) which handles memory management automatically.
116118
///
117119
/// # Arguments
118-
/// * `extra_info`: The extra information
119-
/// * `destroy`: The callback that will be called to destroy the bind data (if any)
120+
/// * `extra_info`: The extra information as a raw pointer
121+
/// * `destroy`: The callback that will be called to destroy the data (if any)
120122
///
121123
/// # Safety
122-
unsafe fn set_extra_info_impl(&self, extra_info: *mut c_void, destroy: duckdb_delete_callback_t) {
124+
/// The caller must ensure that `extra_info` is a valid pointer and that `destroy`
125+
/// properly cleans up the data when called.
126+
pub unsafe fn set_extra_info_raw(&self, extra_info: *mut c_void, destroy: duckdb_delete_callback_t) {
123127
duckdb_scalar_function_set_extra_info(self.ptr, extra_info, destroy);
124128
}
125129

130+
/// Assigns extra information to the scalar function that can be fetched during execution.
131+
///
132+
/// # Arguments
133+
/// * `info`: The extra information to store
126134
pub fn set_extra_info<T>(&self, info: T) -> &Self
127135
where
128-
T: Send + Sync,
136+
T: Send + Sync + 'static,
129137
{
130138
unsafe {
131139
let t = Box::new(info);
132140
let c_void = Box::into_raw(t) as *mut c_void;
133-
self.set_extra_info_impl(c_void, Some(drop_ptr::<T>));
141+
self.set_extra_info_raw(c_void, Some(drop_ptr::<T>));
134142
}
135143
self
136144
}

crates/duckdb/src/vscalar/mod.rs

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,10 @@ pub use arrow::{ArrowFunctionSignature, ArrowScalarParams, VArrowScalar};
2323

2424
/// Duckdb scalar function trait
2525
pub trait VScalar: Sized {
26-
/// State that persists across invocations of the scalar function (the lifetime of the connection)
27-
/// The state can be accessed by multiple threads, so it must be `Send + Sync`.
28-
type State: Sized + Send + Sync;
26+
/// State set at registration time. Persists for the lifetime of the catalog entry.
27+
/// Shared across worker threads and invocations — must not be modified during execution.
28+
/// Must be `'static` as it is stored in DuckDB and may outlive the current stack frame.
29+
type State: Sized + Send + Sync + 'static;
2930
/// The actual function
3031
///
3132
/// # Safety
@@ -109,7 +110,7 @@ impl From<duckdb_function_info> for ScalarFunctionInfo {
109110
}
110111

111112
impl ScalarFunctionInfo {
112-
pub unsafe fn get_scalar_extra_info<T>(&self) -> &T {
113+
pub unsafe fn get_extra_info<T>(&self) -> &T {
113114
&*(duckdb_scalar_function_get_extra_info(self.0).cast())
114115
}
115116

@@ -125,14 +126,14 @@ where
125126
{
126127
let info = ScalarFunctionInfo::from(info);
127128
let mut input = DataChunkHandle::new_unowned(input);
128-
let result = T::invoke(info.get_scalar_extra_info(), &mut input, &mut output);
129+
let result = T::invoke(info.get_extra_info(), &mut input, &mut output);
129130
if let Err(e) = result {
130131
info.set_error(&e.to_string());
131132
}
132133
}
133134

134135
impl Connection {
135-
/// Register the given ScalarFunction with default state
136+
/// Register the given ScalarFunction with default state.
136137
#[inline]
137138
pub fn register_scalar_function<S: VScalar>(&self, name: &str) -> crate::Result<()>
138139
where
@@ -149,7 +150,9 @@ impl Connection {
149150
self.db.borrow_mut().register_scalar_function_set(set)
150151
}
151152

152-
/// Register the given ScalarFunction with custom state
153+
/// Register the given ScalarFunction with custom state.
154+
///
155+
/// The state is cloned once per function signature (overload) and stored in DuckDB's catalog.
153156
#[inline]
154157
pub fn register_scalar_function_with_state<S: VScalar>(&self, name: &str, state: &S::State) -> crate::Result<()>
155158
where

crates/duckdb/src/vtab/function.rs

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ impl BindInfo {
111111
pub fn set_cardinality(&self, cardinality: idx_t, is_exact: bool) {
112112
unsafe { duckdb_bind_set_cardinality(self.ptr, cardinality, is_exact) }
113113
}
114-
/// Retrieves the extra info of the function as set in [`TableFunction::with_extra_info`].
114+
/// Retrieves the extra info of the function as set in [`TableFunction::set_extra_info`].
115115
pub fn get_extra_info<T>(&self) -> *const T {
116116
unsafe { duckdb_bind_get_extra_info(self.ptr).cast() }
117117
}
@@ -163,7 +163,7 @@ impl InitInfo {
163163
indices
164164
}
165165

166-
/// Retrieves the extra info of the function as set in [`TableFunction::with_extra_info`].
166+
/// Retrieves the extra info of the function as set in [`TableFunction::set_extra_info`].
167167
pub fn get_extra_info<T>(&self) -> *const T {
168168
unsafe { duckdb_init_get_extra_info(self.0).cast() }
169169
}
@@ -306,35 +306,33 @@ impl TableFunction {
306306
self
307307
}
308308

309-
/// Assigns extra information to the table function that can be fetched during binding, etc.
309+
/// Assigns extra information to the table function using raw pointers.
310310
///
311-
/// For most use cases, prefer [`with_extra_info`](Self::with_extra_info) which handles memory management automatically.
311+
/// For most use cases, prefer [`set_extra_info`](Self::set_extra_info) which handles memory management automatically.
312312
///
313313
/// # Arguments
314-
/// * `extra_info`: The extra information
315-
/// * `destroy`: The callback that will be called to destroy the bind data (if any)
314+
/// * `extra_info`: The extra information as a raw pointer
315+
/// * `destroy`: The callback that will be called to destroy the data (if any)
316316
///
317317
/// # Safety
318318
/// The caller must ensure that `extra_info` is a valid pointer and that `destroy`
319319
/// properly cleans up the data when called.
320-
pub unsafe fn set_extra_info(&self, extra_info: *mut c_void, destroy: duckdb_delete_callback_t) {
320+
pub unsafe fn set_extra_info_raw(&self, extra_info: *mut c_void, destroy: duckdb_delete_callback_t) {
321321
duckdb_table_function_set_extra_info(self.ptr, extra_info, destroy);
322322
}
323323

324324
/// Assigns extra information to the table function that can be fetched during binding, init, and execution.
325325
///
326-
/// This is a safe wrapper around [`set_extra_info`](Self::set_extra_info) that handles memory management automatically.
327-
///
328326
/// # Arguments
329327
/// * `info`: The extra information to store
330-
pub fn with_extra_info<T>(&self, info: T) -> &Self
328+
pub fn set_extra_info<T>(&self, info: T) -> &Self
331329
where
332330
T: Send + Sync + 'static,
333331
{
334332
unsafe {
335333
let boxed = Box::new(info);
336334
let ptr = Box::into_raw(boxed) as *mut c_void;
337-
self.set_extra_info(ptr, Some(drop_boxed::<T>));
335+
self.set_extra_info_raw(ptr, Some(drop_boxed::<T>));
338336
}
339337
self
340338
}
@@ -404,7 +402,7 @@ impl<V: VTab> TableFunctionInfo<V> {
404402
}
405403
}
406404

407-
/// Retrieves the extra info of the function as set in [`TableFunction::with_extra_info`].
405+
/// Retrieves the extra info of the function as set in [`TableFunction::set_extra_info`].
408406
pub fn get_extra_info<T>(&self) -> *mut T {
409407
unsafe { duckdb_function_get_extra_info(self.ptr).cast() }
410408
}

crates/duckdb/src/vtab/mod.rs

Lines changed: 12 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -23,20 +23,6 @@ mod excel;
2323
pub use function::{BindInfo, InitInfo, TableFunction, TableFunctionInfo};
2424
pub use value::Value;
2525

26-
/// Options for registering a table function.
27-
pub struct TableFunctionOptions<E> {
28-
/// Extra info passed to the function at runtime.
29-
/// Accessible via `BindInfo::get_extra_info`, `InitInfo::get_extra_info`,
30-
/// or `TableFunctionInfo::get_extra_info`.
31-
pub extra_info: Option<E>,
32-
}
33-
34-
impl<E> Default for TableFunctionOptions<E> {
35-
fn default() -> Self {
36-
Self { extra_info: None }
37-
}
38-
}
39-
4026
use crate::core::{DataChunkHandle, LogicalTypeHandle};
4127
use ffi::{duckdb_bind_info, duckdb_data_chunk, duckdb_function_info, duckdb_init_info};
4228

@@ -165,26 +151,25 @@ impl Connection {
165151
self.db.borrow_mut().register_table_function(table_function)
166152
}
167153

168-
/// Register the given TableFunction with options.
154+
/// Register the given TableFunction with custom extra info.
155+
///
156+
/// This allows you to pass extra info that can be accessed during bind, init, and execution
157+
/// via `BindInfo::get_extra_info`, `InitInfo::get_extra_info`, or `TableFunctionInfo::get_extra_info`.
158+
///
159+
/// The extra info is cloned once during registration and stored in DuckDB's catalog.
169160
#[inline]
170-
pub fn register_table_function_with_options<T: VTab, E>(
171-
&self,
172-
name: &str,
173-
options: TableFunctionOptions<E>,
174-
) -> Result<()>
161+
pub fn register_table_function_with_extra_info<T: VTab, E>(&self, name: &str, extra_info: &E) -> Result<()>
175162
where
176-
E: Send + Sync + 'static,
163+
E: Clone + Send + Sync + 'static,
177164
{
178165
let table_function = TableFunction::default();
179166
table_function
180167
.set_name(name)
181168
.supports_pushdown(T::supports_pushdown())
182169
.set_bind(Some(bind::<T>))
183170
.set_init(Some(init::<T>))
184-
.set_function(Some(func::<T>));
185-
if let Some(extra_info) = options.extra_info {
186-
table_function.with_extra_info(extra_info);
187-
}
171+
.set_function(Some(func::<T>))
172+
.set_extra_info(extra_info.clone());
188173
for ty in T::parameters().unwrap_or_default() {
189174
table_function.add_parameter(&ty);
190175
}
@@ -371,14 +356,9 @@ mod test {
371356
}
372357

373358
#[test]
374-
fn test_table_function_with_options() -> Result<(), Box<dyn Error>> {
359+
fn test_table_function_with_extra_info() -> Result<(), Box<dyn Error>> {
375360
let conn = Connection::open_in_memory()?;
376-
conn.register_table_function_with_options::<PrefixVTab, _>(
377-
"greet",
378-
TableFunctionOptions {
379-
extra_info: Some("Howdy".to_string()),
380-
},
381-
)?;
361+
conn.register_table_function_with_extra_info::<PrefixVTab, _>("greet", &"Howdy".to_string())?;
382362

383363
let val = conn.query_row("select * from greet('partner')", [], |row| <(String,)>::try_from(row))?;
384364
assert_eq!(val, ("Howdy partner".to_string(),));

0 commit comments

Comments
 (0)