From 5a78ea2a0a38256847fec7d0d20f0fec34824e36 Mon Sep 17 00:00:00 2001 From: Omer Ozarslan Date: Thu, 4 Dec 2025 18:26:00 -0800 Subject: [PATCH 1/5] Add a test for ToSql support with a list The end goal is to make binding a list to a prepared statement work. --- crates/duckdb/src/types/to_sql.rs | 50 +++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/crates/duckdb/src/types/to_sql.rs b/crates/duckdb/src/types/to_sql.rs index 14264881..a0ed51e0 100644 --- a/crates/duckdb/src/types/to_sql.rs +++ b/crates/duckdb/src/types/to_sql.rs @@ -361,4 +361,54 @@ mod test { assert_eq!(found_label, "target"); Ok(()) } + + #[test] + fn test_list() -> crate::Result<()> { + use crate::{ + params, + types::{FromSql, FromSqlError, FromSqlResult, ToSqlOutput, Value, ValueRef}, + Connection, + }; + + #[derive(Debug, PartialEq, Eq)] + struct MyList(Vec); + + impl ToSql for MyList { + fn to_sql(&self) -> crate::Result> { + Ok(ToSqlOutput::Owned(Value::List( + self.0.iter().map(|&x| Value::Int(x)).collect(), + ))) + } + } + + impl FromSql for MyList { + fn column_result(value_ref: ValueRef<'_>) -> FromSqlResult { + match value_ref.to_owned() { + Value::List(values) => values + .into_iter() + .map(|v| { + if let Value::Int(i) = v { + Ok(i) + } else { + Err(FromSqlError::InvalidType) + } + }) + .collect::, _>>() + .map(MyList), + _ => return FromSqlResult::Err(FromSqlError::InvalidType), + } + } + } + + let db = Connection::open_in_memory()?; + db.execute_batch("CREATE TABLE foo (numbers INT[]);")?; + + let list = MyList(vec![1, 2, 3, 4, 5]); + db.execute("INSERT INTO foo (numbers) VALUES (?)", params![&list])?; + + let found_numbers: MyList = db.prepare("SELECT numbers FROM foo")?.query_one([], |r| r.get(0))?; + assert_eq!(found_numbers, MyList(vec![1, 2, 3, 4, 5])); + + Ok(()) + } } From 8d84c6ddc9b67a922ae24b614d01da2caadb72be Mon Sep 17 00:00:00 2001 From: Omer Ozarslan Date: Thu, 4 Dec 2025 18:28:21 -0800 Subject: [PATCH 2/5] Implement querying LogicalTypeId/Handle from Type This will be useful later for converting duckdb-rs types to FFI logical type handles, in particular, for arguments of duckdb_create_list_value. --- crates/duckdb/src/core/logical_type.rs | 2 + crates/duckdb/src/types/mod.rs | 76 ++++++++++++++++++++++++++ 2 files changed, 78 insertions(+) diff --git a/crates/duckdb/src/core/logical_type.rs b/crates/duckdb/src/core/logical_type.rs index 34d74acd..237b1146 100644 --- a/crates/duckdb/src/core/logical_type.rs +++ b/crates/duckdb/src/core/logical_type.rs @@ -168,6 +168,7 @@ impl Debug for LogicalTypeHandle { } write!(f, ">") } + LogicalTypeId::List => write!(f, "List<{:?}>", self.child(0)), _ => write!(f, "{:?}", id), } } @@ -340,6 +341,7 @@ impl LogicalTypeHandle { pub fn child(&self, idx: usize) -> Self { let c_logical_type = unsafe { match self.id() { + LogicalTypeId::List => duckdb_list_type_child_type(self.ptr), LogicalTypeId::Struct => duckdb_struct_type_child_type(self.ptr, idx as u64), LogicalTypeId::Union => duckdb_union_type_member_type(self.ptr, idx as u64), LogicalTypeId::Array => duckdb_array_type_child_type(self.ptr), diff --git a/crates/duckdb/src/types/mod.rs b/crates/duckdb/src/types/mod.rs index 87227143..ab614be6 100644 --- a/crates/duckdb/src/types/mod.rs +++ b/crates/duckdb/src/types/mod.rs @@ -2,6 +2,8 @@ //! implements [`ToSql`] or [`FromSql`] for the cases where you want to know if //! a value was NULL (which gets translated to `None`). +use crate::core::{LogicalTypeHandle, LogicalTypeId}; + pub use self::{ from_sql::{FromSql, FromSqlError, FromSqlResult}, ordered_map::OrderedMap, @@ -194,6 +196,80 @@ impl fmt::Display for Type { } } +impl Type { + /// Returns the inner type of a list, if this type is a list. + pub fn inner_type(&self) -> Option<&Type> { + match self { + Type::List(inner_type) => Some(inner_type), + _ => None, + } + } + + /// Returns the logical type ID for this type. + pub fn logical_type_id(&self) -> LogicalTypeId { + match self { + Type::Null => LogicalTypeId::SqlNull, + Type::Boolean => LogicalTypeId::Boolean, + Type::TinyInt => LogicalTypeId::Tinyint, + Type::SmallInt => LogicalTypeId::Smallint, + Type::Int => LogicalTypeId::Integer, + Type::BigInt => LogicalTypeId::Bigint, + Type::HugeInt => LogicalTypeId::Hugeint, + Type::UTinyInt => LogicalTypeId::UTinyint, + Type::USmallInt => LogicalTypeId::USmallint, + Type::UInt => LogicalTypeId::UInteger, + Type::UBigInt => LogicalTypeId::UBigint, + Type::Float => LogicalTypeId::Float, + Type::Double => LogicalTypeId::Double, + Type::Timestamp => LogicalTypeId::Timestamp, + Type::Text => LogicalTypeId::Varchar, + Type::Blob => LogicalTypeId::Blob, + Type::Date32 => LogicalTypeId::Date, + Type::Time64 => LogicalTypeId::Time, + Type::Interval => LogicalTypeId::Interval, + // Complex types + Type::Decimal => LogicalTypeId::Decimal, + Type::Enum => LogicalTypeId::Enum, + Type::List(_) => LogicalTypeId::List, + Type::Struct(_) => LogicalTypeId::Struct, + Type::Map(_, _) => LogicalTypeId::Map, + Type::Array(_, _) => LogicalTypeId::Array, + Type::Union => LogicalTypeId::Union, + Type::Any => LogicalTypeId::Any, + } + } + + /// Returns the logical type handle for this type. + pub fn logical_type_handle(&self) -> LogicalTypeHandle { + match self { + Type::Null + | Type::Boolean + | Type::TinyInt + | Type::SmallInt + | Type::Int + | Type::BigInt + | Type::HugeInt + | Type::UTinyInt + | Type::USmallInt + | Type::UInt + | Type::UBigInt + | Type::Float + | Type::Double + | Type::Timestamp + | Type::Text + | Type::Blob + | Type::Date32 + | Type::Time64 + | Type::Interval + | Type::Any => self.logical_type_id().into(), + Type::List(inner_type) => LogicalTypeHandle::list(&inner_type.logical_type_handle()), + Type::Decimal | Type::Enum | Type::Struct(_) | Type::Map(_, _) | Type::Array(_, _) | Type::Union => { + unimplemented!("Logical type handle conversion not implemented for {:?}", self) + } + } + } +} + #[cfg(test)] mod test { use super::Value; From 8d142ba93fa98dd26f743cb9c12cfe647e2dca7d Mon Sep 17 00:00:00 2001 From: Omer Ozarslan Date: Thu, 4 Dec 2025 18:41:57 -0800 Subject: [PATCH 3/5] Add ValueHandle, a wrapper around duckdb_value This will be useful later when calling FFI, in particular, for duckdb_create_list_value and duckdb_bind_value. --- crates/duckdb/src/core/mod.rs | 2 ++ crates/duckdb/src/core/value.rs | 62 +++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 crates/duckdb/src/core/value.rs diff --git a/crates/duckdb/src/core/mod.rs b/crates/duckdb/src/core/mod.rs index 21e630fd..92d9b049 100644 --- a/crates/duckdb/src/core/mod.rs +++ b/crates/duckdb/src/core/mod.rs @@ -1,7 +1,9 @@ mod data_chunk; mod logical_type; +mod value; mod vector; pub use data_chunk::DataChunkHandle; pub use logical_type::{LogicalTypeHandle, LogicalTypeId}; +pub use value::ValueHandle; pub use vector::*; diff --git a/crates/duckdb/src/core/value.rs b/crates/duckdb/src/core/value.rs new file mode 100644 index 00000000..75ccadd7 --- /dev/null +++ b/crates/duckdb/src/core/value.rs @@ -0,0 +1,62 @@ +use crate::{ + ffi::*, + types::{ListType, ValueRef}, +}; + +/// A wrapper around a DuckDB value handle. +#[derive(Debug)] +#[repr(C)] +pub struct ValueHandle { + pub(crate) ptr: duckdb_value, + // Do not add members so that array of this type can be used in FFI. +} + +impl Drop for ValueHandle { + fn drop(&mut self) { + if !self.ptr.is_null() { + unsafe { + duckdb_destroy_value(&mut self.ptr); + } + } + self.ptr = std::ptr::null_mut(); + } +} + +impl<'a> From> for ValueHandle { + fn from(value_ref: ValueRef<'a>) -> Self { + let ptr = match value_ref { + ValueRef::Null => unsafe { duckdb_create_null_value() }, + ValueRef::Boolean(v) => unsafe { duckdb_create_bool(v) }, + ValueRef::TinyInt(v) => unsafe { duckdb_create_int8(v) }, + ValueRef::SmallInt(v) => unsafe { duckdb_create_int16(v) }, + ValueRef::Int(v) => unsafe { duckdb_create_int32(v) }, + ValueRef::BigInt(v) => unsafe { duckdb_create_int64(v) }, + ValueRef::HugeInt(v) => { + let lower = v.cast_unsigned() as u64; + let upper = (v >> 64) as i64; + unsafe { duckdb_create_hugeint(duckdb_hugeint { lower, upper }) } + } + ValueRef::UTinyInt(v) => unsafe { duckdb_create_uint8(v) }, + ValueRef::USmallInt(v) => unsafe { duckdb_create_uint16(v) }, + ValueRef::UInt(v) => unsafe { duckdb_create_uint32(v) }, + ValueRef::UBigInt(v) => unsafe { duckdb_create_uint64(v) }, + ValueRef::Float(v) => unsafe { duckdb_create_float(v) }, + ValueRef::Double(v) => unsafe { duckdb_create_double(v) }, + ValueRef::Text(v) => unsafe { duckdb_create_varchar_length(v.as_ptr() as *const i8, v.len() as u64) }, + ValueRef::Blob(v) => unsafe { duckdb_create_blob(v.as_ptr() as *const u8, v.len() as u64) }, + ValueRef::Timestamp(..) + | ValueRef::List(..) + | ValueRef::Date32(..) + | ValueRef::Time64(..) + | ValueRef::Interval { .. } + | ValueRef::Decimal(..) + | ValueRef::Enum(..) + | ValueRef::Struct(..) + | ValueRef::Array(..) + | ValueRef::Map(..) + | ValueRef::Union(..) => unimplemented!("Not implemented for {:?}", value_ref), + }; + assert!(!ptr.is_null(), "Failed to create DuckDB value for {:?}", value_ref); + Self { ptr } + } +} From 5cb9d7c98e785938055ce5bf55a511f5c872eaab Mon Sep 17 00:00:00 2001 From: Omer Ozarslan Date: Thu, 4 Dec 2025 18:50:34 -0800 Subject: [PATCH 4/5] Implement conversion to ValueRef for list values `Value::List` is backed by a `Vec`, however, `ValueRef::List` can't refer to it because `ValueRef` assumes referee is backed by Arrow list arrays. This commit extends `ValueRef::List` to allow refering to native Rust values that are not backed by Arrow list arrays. Note that this is a user-facing braking change in `ValueRef::List` and `ListType` API. --- crates/duckdb/src/core/value.rs | 21 +++++++++++++++ crates/duckdb/src/row.rs | 4 +-- crates/duckdb/src/statement.rs | 7 ++++- crates/duckdb/src/types/value.rs | 5 +++- crates/duckdb/src/types/value_ref.rs | 38 ++++++++++++++++++---------- 5 files changed, 58 insertions(+), 17 deletions(-) diff --git a/crates/duckdb/src/core/value.rs b/crates/duckdb/src/core/value.rs index 75ccadd7..c9cdafb2 100644 --- a/crates/duckdb/src/core/value.rs +++ b/crates/duckdb/src/core/value.rs @@ -44,6 +44,27 @@ impl<'a> From> for ValueHandle { ValueRef::Double(v) => unsafe { duckdb_create_double(v) }, ValueRef::Text(v) => unsafe { duckdb_create_varchar_length(v.as_ptr() as *const i8, v.len() as u64) }, ValueRef::Blob(v) => unsafe { duckdb_create_blob(v.as_ptr() as *const u8, v.len() as u64) }, + ValueRef::List(ListType::Native(arr)) => { + let logical_type = value_ref + .data_type() + .inner_type() + .expect("List type doesn't have an inner type") + .logical_type_handle(); + // Underlying DuckDB API isn't marked const unfortunately, so we have to use a mutable pointer. + let mut values = arr + .iter() + .map(ValueRef::from) + .map(ValueHandle::from) + .collect::>(); + let value_count = arr.len() as u64; + unsafe { + duckdb_create_list_value( + logical_type.ptr, + values[..].as_mut_ptr() as *mut duckdb_value, + value_count, + ) + } + } ValueRef::Timestamp(..) | ValueRef::List(..) | ValueRef::Date32(..) diff --git a/crates/duckdb/src/row.rs b/crates/duckdb/src/row.rs index 19e027d3..069847d1 100644 --- a/crates/duckdb/src/row.rs +++ b/crates/duckdb/src/row.rs @@ -621,12 +621,12 @@ impl<'stmt> Row<'stmt> { DataType::LargeList(..) => { let arr = column.as_any().downcast_ref::().unwrap(); - ValueRef::List(ListType::Large(arr), row) + ValueRef::List(ListType::Large(arr, row)) } DataType::List(..) => { let arr = column.as_any().downcast_ref::().unwrap(); - ValueRef::List(ListType::Regular(arr), row) + ValueRef::List(ListType::Regular(arr, row)) } DataType::Dictionary(key_type, ..) => { let column = column.as_any(); diff --git a/crates/duckdb/src/statement.rs b/crates/duckdb/src/statement.rs index af8dc7b4..24df2621 100644 --- a/crates/duckdb/src/statement.rs +++ b/crates/duckdb/src/statement.rs @@ -7,8 +7,9 @@ use super::{ffi, AndThenRows, Connection, Error, MappedRows, Params, RawStatemen use crate::{arrow2, polars_dataframe::Polars}; use crate::{ arrow_batch::{Arrow, ArrowStream}, + core::ValueHandle, error::result_from_duckdb_prepare, - types::{TimeUnit, ToSql, ToSqlOutput}, + types::{ListType, TimeUnit, ToSql, ToSqlOutput}, }; /// A prepared statement. @@ -608,6 +609,10 @@ impl Statement<'_> { let micros = nanos / 1_000; ffi::duckdb_bind_interval(ptr, col as u64, ffi::duckdb_interval { months, days, micros }) }, + ValueRef::List(ListType::Native(_)) => { + let value = ValueHandle::from(value); + unsafe { ffi::duckdb_bind_value(ptr, col as u64, value.ptr) } + } _ => unreachable!("not supported: {}", value.data_type()), }; result_from_duckdb_prepare(rc, ptr) diff --git a/crates/duckdb/src/types/value.rs b/crates/duckdb/src/types/value.rs index 4d532457..b915bd8a 100644 --- a/crates/duckdb/src/types/value.rs +++ b/crates/duckdb/src/types/value.rs @@ -1,3 +1,5 @@ +use crate::types::{value_ref::ListType, ValueRef}; + use super::{Null, OrderedMap, TimeUnit, Type}; use rust_decimal::prelude::*; @@ -238,7 +240,8 @@ impl Value { Self::Date32(_) => Type::Date32, Self::Time64(..) => Type::Time64, Self::Interval { .. } => Type::Interval, - Self::Union(..) | Self::Struct(..) | Self::List(..) | Self::Array(..) | Self::Map(..) => todo!(), + Self::List(ref arr) => ValueRef::List(ListType::Native(arr)).data_type(), + Self::Union(..) | Self::Struct(..) | Self::Array(..) | Self::Map(..) => todo!(), Self::Enum(..) => Type::Enum, } } diff --git a/crates/duckdb/src/types/value_ref.rs b/crates/duckdb/src/types/value_ref.rs index 0bab7c34..5a3d82f3 100644 --- a/crates/duckdb/src/types/value_ref.rs +++ b/crates/duckdb/src/types/value_ref.rs @@ -1,7 +1,8 @@ use super::{Type, Value}; -use crate::types::{FromSqlError, FromSqlResult, OrderedMap}; - -use crate::Row; +use crate::{ + types::{FromSqlError, FromSqlResult, OrderedMap}, + Row, +}; use rust_decimal::prelude::*; use arrow::{ @@ -92,7 +93,7 @@ pub enum ValueRef<'a> { nanos: i64, }, /// The value is a list - List(ListType<'a>, usize), + List(ListType<'a>), /// The value is an enum Enum(EnumType<'a>, usize), /// The value is a struct @@ -109,9 +110,11 @@ pub enum ValueRef<'a> { #[derive(Debug, Copy, Clone, PartialEq)] pub enum ListType<'a> { /// The underlying list is a `ListArray` - Regular(&'a ListArray), + Regular(&'a ListArray, usize), /// The underlying list is a `LargeListArray` - Large(&'a LargeListArray), + Large(&'a LargeListArray, usize), + /// The underlying list type is not backed by an array. + Native(&'a Vec), } /// Wrapper type for different enum sizes @@ -153,9 +156,16 @@ impl ValueRef<'_> { ValueRef::Struct(arr, _) => arr.data_type().into(), ValueRef::Map(arr, _) => arr.data_type().into(), ValueRef::Array(arr, _) => arr.data_type().into(), - ValueRef::List(arr, _) => match arr { - ListType::Large(arr) => arr.data_type().into(), - ListType::Regular(arr) => arr.data_type().into(), + ValueRef::List(arr) => match arr { + ListType::Large(arr, _) => arr.data_type().into(), + ListType::Regular(arr, _) => arr.data_type().into(), + ListType::Native(arr) => { + // This could be improved for nested lists like [[],[1]] by avoiding to rely only + // to the first element. Let the caller handle Any resolution for now to keep + // things simpler. + let element_type = arr.get(0).map(|v| v.data_type()).unwrap_or(Type::Any); + Type::List(Box::new(element_type)) + } }, ValueRef::Enum(..) => Type::Enum, ValueRef::Union(arr, _) => arr.data_type().into(), @@ -218,8 +228,8 @@ impl From> for Value { ValueRef::Date32(d) => Self::Date32(d), ValueRef::Time64(t, d) => Self::Time64(t, d), ValueRef::Interval { months, days, nanos } => Self::Interval { months, days, nanos }, - ValueRef::List(items, idx) => match items { - ListType::Regular(items) => { + ValueRef::List(items) => match items { + ListType::Regular(items, idx) => { let offsets = items.offsets(); from_list( offsets[idx].try_into().unwrap(), @@ -228,7 +238,7 @@ impl From> for Value { items.values(), ) } - ListType::Large(items) => { + ListType::Large(items, idx) => { let offsets = items.offsets(); from_list( offsets[idx].try_into().unwrap(), @@ -237,6 +247,7 @@ impl From> for Value { items.values(), ) } + ListType::Native(items) => Self::List(items.clone()), }, ValueRef::Enum(items, idx) => { let dict_values = match items { @@ -349,8 +360,9 @@ impl<'a> From<&'a Value> for ValueRef<'a> { Value::Date32(d) => ValueRef::Date32(d), Value::Time64(t, d) => ValueRef::Time64(t, d), Value::Interval { months, days, nanos } => ValueRef::Interval { months, days, nanos }, + Value::List(ref items) => ValueRef::List(ListType::Native(items)), Value::Enum(..) => todo!(), - Value::List(..) | Value::Struct(..) | Value::Map(..) | Value::Array(..) | Value::Union(..) => { + Value::Struct(..) | Value::Map(..) | Value::Array(..) | Value::Union(..) => { unimplemented!() } } From 5788f8f8274dc34da699dbfade176b7b2a2ec186 Mon Sep 17 00:00:00 2001 From: Omer Ozarslan Date: Thu, 4 Dec 2025 19:52:48 -0800 Subject: [PATCH 5/5] Add test for empty lists not working for binding --- crates/duckdb/src/types/to_sql.rs | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/crates/duckdb/src/types/to_sql.rs b/crates/duckdb/src/types/to_sql.rs index a0ed51e0..cd64cdf2 100644 --- a/crates/duckdb/src/types/to_sql.rs +++ b/crates/duckdb/src/types/to_sql.rs @@ -411,4 +411,33 @@ mod test { Ok(()) } + + #[test] + #[should_panic = "Failed to create DuckDB value for List(Native([]))"] + fn test_empty_list() -> () { + use crate::{ + params, + types::{ToSqlOutput, Value}, + Connection, + }; + + #[derive(Debug, PartialEq, Eq)] + struct MyList(Vec); + + impl ToSql for MyList { + fn to_sql(&self) -> crate::Result> { + Ok(ToSqlOutput::Owned(Value::List( + self.0.iter().map(|&x| Value::Int(x)).collect(), + ))) + } + } + + let db = Connection::open_in_memory().unwrap(); + db.execute_batch("CREATE TABLE foo (numbers INT[]);").unwrap(); + + let list = MyList(vec![]); + + // This should panic because the list is empty and DuckDB cannot determine the type of the list. + _ = db.execute("INSERT INTO foo (numbers) VALUES (?)", params![&list]); + } }