From 1588acf6abcf1bc7071af0756081a6c4eb6221aa Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Tue, 20 Jan 2026 13:32:12 +0000 Subject: [PATCH 1/8] chore[duckdb]: clean up the duckdb exporters Signed-off-by: Joe Isaacs --- .../src/arrays/chunked/compute/filter.rs | 1 - vortex-array/src/arrays/decimal/array.rs | 22 +++ vortex-array/src/arrays/decimal/mod.rs | 1 + .../src/arrays/fixed_size_list/array.rs | 4 +- vortex-array/src/arrays/list/array.rs | 17 ++ vortex-array/src/arrays/list/mod.rs | 1 + vortex-array/src/arrays/struct_/array.rs | 21 ++- vortex-array/src/arrays/struct_/mod.rs | 1 + .../src/arrow/compute/to_arrow/mod.rs | 4 +- vortex-array/src/arrow/executor/byte.rs | 2 +- vortex-array/src/arrow/executor/byte_view.rs | 2 +- .../src/arrow/executor/fixed_size_list.rs | 2 +- vortex-array/src/arrow/executor/list.rs | 6 +- vortex-array/src/arrow/executor/list_view.rs | 2 +- vortex-array/src/arrow/executor/struct_.rs | 45 +++-- vortex-array/src/arrow/executor/validity.rs | 4 +- vortex-duckdb/src/convert/dtype.rs | 59 ++++++- vortex-duckdb/src/duckdb/logical_type.rs | 49 ------ vortex-duckdb/src/exporter/constant.rs | 14 +- vortex-duckdb/src/exporter/decimal.rs | 25 ++- vortex-duckdb/src/exporter/dict.rs | 166 +++--------------- vortex-duckdb/src/exporter/fixed_size_list.rs | 54 ++++-- vortex-duckdb/src/exporter/list.rs | 120 ++++--------- vortex-duckdb/src/exporter/list_view.rs | 127 +++++--------- vortex-duckdb/src/exporter/mod.rs | 87 ++------- vortex-duckdb/src/exporter/run_end.rs | 24 +-- vortex-duckdb/src/exporter/struct_.rs | 96 +++++----- vortex-duckdb/src/exporter/temporal.rs | 19 +- vortex-duckdb/src/lib.rs | 1 + vortex-error/src/lib.rs | 1 + vortex-python/src/arrays/mod.rs | 2 +- 31 files changed, 402 insertions(+), 577 deletions(-) diff --git a/vortex-array/src/arrays/chunked/compute/filter.rs b/vortex-array/src/arrays/chunked/compute/filter.rs index b0ccb5ab07d..14849413517 100644 --- a/vortex-array/src/arrays/chunked/compute/filter.rs +++ b/vortex-array/src/arrays/chunked/compute/filter.rs @@ -142,7 +142,6 @@ pub(crate) fn chunk_filters( } /// Filter the chunks using indices. -#[allow(deprecated)] fn filter_indices( array: &ChunkedArray, indices: impl Iterator, diff --git a/vortex-array/src/arrays/decimal/array.rs b/vortex-array/src/arrays/decimal/array.rs index 46084dfac95..4bc9ead5067 100644 --- a/vortex-array/src/arrays/decimal/array.rs +++ b/vortex-array/src/arrays/decimal/array.rs @@ -12,6 +12,7 @@ use vortex_dtype::DecimalDType; use vortex_dtype::DecimalType; use vortex_dtype::IntegerPType; use vortex_dtype::NativeDecimalType; +use vortex_dtype::Nullability; use vortex_dtype::match_each_decimal_value_type; use vortex_dtype::match_each_integer_ptype; use vortex_error::VortexExpect; @@ -91,6 +92,14 @@ pub struct DecimalArray { pub(super) stats_set: ArrayStats, } +pub struct DecimalArrayParts { + pub decimal_dtype: DecimalDType, + pub nullability: Nullability, + pub values: ByteBuffer, + pub values_type: DecimalType, + pub validity: Validity, +} + impl DecimalArray { /// Creates a new [`DecimalArray`]. /// @@ -221,6 +230,19 @@ impl DecimalArray { } } + pub fn into_parts(self) -> DecimalArrayParts { + let nullability = self.dtype.nullability(); + let dtype = self.dtype.into_decimal_opt().vortex_expect("cannot fail"); + + DecimalArrayParts { + dtype, + nullability, + values: self.values, + values_type: self.values_type, + validity: self.validity, + } + } + /// Returns the underlying [`ByteBuffer`] of the array. pub fn byte_buffer(&self) -> ByteBuffer { self.values.clone() diff --git a/vortex-array/src/arrays/decimal/mod.rs b/vortex-array/src/arrays/decimal/mod.rs index d04b3829752..0e57832baad 100644 --- a/vortex-array/src/arrays/decimal/mod.rs +++ b/vortex-array/src/arrays/decimal/mod.rs @@ -3,6 +3,7 @@ mod array; pub use array::DecimalArray; +pub use array::DecimalArrayParts; mod compute; diff --git a/vortex-array/src/arrays/fixed_size_list/array.rs b/vortex-array/src/arrays/fixed_size_list/array.rs index 9728633a7bc..9f1f4a5e736 100644 --- a/vortex-array/src/arrays/fixed_size_list/array.rs +++ b/vortex-array/src/arrays/fixed_size_list/array.rs @@ -166,8 +166,8 @@ impl FixedSizeListArray { } } - pub fn into_parts(self) -> (ArrayRef, Validity) { - (self.elements, self.validity) + pub fn into_parts(self) -> (ArrayRef, Validity, DType) { + (self.elements, self.validity, self.dtype) } /// Validates the components that would be used to create a [`FixedSizeListArray`]. diff --git a/vortex-array/src/arrays/list/array.rs b/vortex-array/src/arrays/list/array.rs index 4d0a7e51bda..606cd88e599 100644 --- a/vortex-array/src/arrays/list/array.rs +++ b/vortex-array/src/arrays/list/array.rs @@ -85,6 +85,13 @@ pub struct ListArray { pub(super) stats_set: ArrayStats, } +pub struct ListArrayParts { + pub dtype: DType, + pub elements: ArrayRef, + pub offsets: ArrayRef, + pub validity: Validity, +} + impl ListArray { /// Creates a new [`ListArray`]. /// @@ -229,6 +236,16 @@ impl ListArray { Ok(()) } + /// Splits an array into its parts + pub fn into_parts(self) -> ListArrayParts { + ListArrayParts { + dtype: self.dtype, + elements: self.elements, + offsets: self.offsets, + validity: self.validity, + } + } + /// Returns the offset at the given index from the list array. /// /// Panics if the index is out of bounds. diff --git a/vortex-array/src/arrays/list/mod.rs b/vortex-array/src/arrays/list/mod.rs index 19f927ddfb8..3b68de43460 100644 --- a/vortex-array/src/arrays/list/mod.rs +++ b/vortex-array/src/arrays/list/mod.rs @@ -3,6 +3,7 @@ mod array; pub use array::ListArray; +pub use array::ListArrayParts; mod compute; diff --git a/vortex-array/src/arrays/struct_/array.rs b/vortex-array/src/arrays/struct_/array.rs index a0b2418b903..7b39f30c7bb 100644 --- a/vortex-array/src/arrays/struct_/array.rs +++ b/vortex-array/src/arrays/struct_/array.rs @@ -8,6 +8,7 @@ use std::sync::Arc; use vortex_dtype::DType; use vortex_dtype::FieldName; use vortex_dtype::FieldNames; +use vortex_dtype::Nullability; use vortex_dtype::StructFields; use vortex_error::VortexExpect; use vortex_error::VortexResult; @@ -148,6 +149,13 @@ pub struct StructArray { pub(super) stats_set: ArrayStats, } +pub struct StructArrayParts { + pub struct_fields: StructFields, + pub nullability: Nullability, + pub fields: Arc<[ArrayRef]>, + pub validity: Validity, +} + impl StructArray { pub fn fields(&self) -> &Arc<[ArrayRef]> { &self.fields @@ -344,8 +352,19 @@ impl StructArray { Ok(unsafe { Self::new_unchecked(fields, dtype, length, validity) }) } + pub fn into_parts(self) -> StructArrayParts { + let nullability = self.dtype.nullability(); + let struct_fields = self.dtype.into_struct_fields(); + StructArrayParts { + struct_fields, + nullability, + fields: self.fields, + validity: self.validity, + } + } + pub fn into_fields(self) -> Vec { - self.fields.to_vec() + self.into_parts().fields.to_vec() } pub fn from_fields>(items: &[(N, ArrayRef)]) -> VortexResult { diff --git a/vortex-array/src/arrays/struct_/mod.rs b/vortex-array/src/arrays/struct_/mod.rs index 47d90317bc8..85123d8053a 100644 --- a/vortex-array/src/arrays/struct_/mod.rs +++ b/vortex-array/src/arrays/struct_/mod.rs @@ -3,6 +3,7 @@ mod array; pub use array::StructArray; +pub use array::StructArrayParts; mod compute; mod vtable; diff --git a/vortex-array/src/arrow/compute/to_arrow/mod.rs b/vortex-array/src/arrow/compute/to_arrow/mod.rs index 71ac3de959c..cd955f43060 100644 --- a/vortex-array/src/arrow/compute/to_arrow/mod.rs +++ b/vortex-array/src/arrow/compute/to_arrow/mod.rs @@ -22,7 +22,7 @@ use crate::compute::Options; /// Warning: do not use this to convert a Vortex [`crate::stream::ArrayStream`] since each array /// may have a different preferred Arrow type. Use [`to_arrow`] instead. #[deprecated(note = "Use ArrowArrayExecutor::execute_arrow instead")] -#[allow(deprecated)] +#[expect(deprecated)] pub fn to_arrow_preferred(array: &dyn Array) -> VortexResult { to_arrow_opts(array, &ToArrowOptions { arrow_type: None }) } @@ -35,7 +35,7 @@ pub fn to_arrow(array: &dyn Array, arrow_type: &DataType) -> VortexResult VortexResult { let data_type = if let Some(data_type) = &options.arrow_type { data_type.clone() diff --git a/vortex-array/src/arrow/executor/byte.rs b/vortex-array/src/arrow/executor/byte.rs index 2b1d85b8bd3..e48aba31434 100644 --- a/vortex-array/src/arrow/executor/byte.rs +++ b/vortex-array/src/arrow/executor/byte.rs @@ -68,7 +68,7 @@ where let data = array.bytes().clone().into_arrow_buffer(); - let null_buffer = to_arrow_null_buffer(array.validity(), array.len(), ctx)?; + let null_buffer = to_arrow_null_buffer(array.validity().clone(), array.len(), ctx)?; Ok(Arc::new(unsafe { GenericByteArray::::new_unchecked(offsets, data, null_buffer) })) diff --git a/vortex-array/src/arrow/executor/byte_view.rs b/vortex-array/src/arrow/executor/byte_view.rs index a1a7d00c07b..ec6aea2fe97 100644 --- a/vortex-array/src/arrow/executor/byte_view.rs +++ b/vortex-array/src/arrow/executor/byte_view.rs @@ -46,7 +46,7 @@ pub fn execute_varbinview_to_arrow( .iter() .map(|buffer| buffer.clone().into_arrow_buffer()) .collect(); - let nulls = to_arrow_null_buffer(array.validity(), array.len(), ctx)?; + let nulls = to_arrow_null_buffer(array.validity().clone(), array.len(), ctx)?; // SAFETY: our own VarBinView array is considered safe. Ok(Arc::new(unsafe { diff --git a/vortex-array/src/arrow/executor/fixed_size_list.rs b/vortex-array/src/arrow/executor/fixed_size_list.rs index 8c8834ebc53..6da7c40aa58 100644 --- a/vortex-array/src/arrow/executor/fixed_size_list.rs +++ b/vortex-array/src/arrow/executor/fixed_size_list.rs @@ -53,7 +53,7 @@ fn list_to_list( "Cannot convert FixedSizeListArray to non-nullable Arrow array when elements are nullable" ); - let null_buffer = to_arrow_null_buffer(array.validity(), array.len(), ctx)?; + let null_buffer = to_arrow_null_buffer(array.validity().clone(), array.len(), ctx)?; Ok(Arc::new(arrow_array::FixedSizeListArray::new( elements_field.clone(), diff --git a/vortex-array/src/arrow/executor/list.rs b/vortex-array/src/arrow/executor/list.rs index 145f0d5770b..597135fdab0 100644 --- a/vortex-array/src/arrow/executor/list.rs +++ b/vortex-array/src/arrow/executor/list.rs @@ -97,7 +97,7 @@ fn list_to_list( "Cannot convert to non-nullable Arrow array with null elements" ); - let null_buffer = to_arrow_null_buffer(array.validity(), array.len(), ctx)?; + let null_buffer = to_arrow_null_buffer(array.validity().clone(), array.len(), ctx)?; // TODO(ngates): use new_unchecked when it is added to arrow-rs. Ok(Arc::new(GenericListArray::::new( @@ -154,7 +154,7 @@ fn list_view_zctl( "Cannot convert to non-nullable Arrow array with null elements" ); - let null_buffer = to_arrow_null_buffer(&validity, sizes.len(), ctx)?; + let null_buffer = to_arrow_null_buffer(validity, sizes.len(), ctx)?; Ok(Arc::new(GenericListArray::::new( elements_field.clone(), @@ -212,7 +212,7 @@ fn list_view_to_list( "Cannot convert to non-nullable Arrow array with null elements" ); - let null_buffer = to_arrow_null_buffer(&validity, sizes.len(), ctx)?; + let null_buffer = to_arrow_null_buffer(validity, sizes.len(), ctx)?; Ok(Arc::new(GenericListArray::::new( elements_field.clone(), diff --git a/vortex-array/src/arrow/executor/list_view.rs b/vortex-array/src/arrow/executor/list_view.rs index cfd7e994d4b..96e7fb791f9 100644 --- a/vortex-array/src/arrow/executor/list_view.rs +++ b/vortex-array/src/arrow/executor/list_view.rs @@ -61,7 +61,7 @@ fn list_view_to_list_view( .to_buffer::() .into_arrow_scalar_buffer(); - let null_buffer = to_arrow_null_buffer(&validity, offsets.len(), ctx)?; + let null_buffer = to_arrow_null_buffer(validity, offsets.len(), ctx)?; Ok(Arc::new(GenericListViewArray::::new( elements_field.clone(), diff --git a/vortex-array/src/arrow/executor/struct_.rs b/vortex-array/src/arrow/executor/struct_.rs index 105dd4debda..a33843bd4b9 100644 --- a/vortex-array/src/arrow/executor/struct_.rs +++ b/vortex-array/src/arrow/executor/struct_.rs @@ -24,16 +24,16 @@ use crate::ToCanonical; use crate::arrays::ChunkedVTable; use crate::arrays::ScalarFnVTable; use crate::arrays::StructArray; +use crate::arrays::StructArrayParts; use crate::arrays::StructVTable; use crate::arrow::ArrowArrayExecutor; use crate::arrow::executor::validity::to_arrow_null_buffer; use crate::builtins::ArrayBuiltins; use crate::expr::Pack; -use crate::vtable::ValidityHelper; pub(super) fn to_arrow_struct( array: ArrayRef, - fields: Option<&Fields>, + target_fields: Option<&Fields>, ctx: &mut ExecutionCtx, ) -> VortexResult { let len = array.len(); @@ -51,10 +51,17 @@ pub(super) fn to_arrow_struct( // Attempt to short-circuit if the array is already a StructVTable: let array = match array.try_into::() { Ok(array) => { - let validity = to_arrow_null_buffer(array.validity(), array.len(), ctx)?; + let len = array.len(); + let StructArrayParts { + validity, + fields, + struct_fields, + .. + } = array.into_parts(); + let validity = to_arrow_null_buffer(validity, len, ctx)?; return create_from_fields( - fields.ok_or_else(|| array.names().clone()), - array.into_fields(), + target_fields.ok_or_else(|| struct_fields.names().clone()), + &fields, validity, len, ctx, @@ -71,8 +78,8 @@ pub(super) fn to_arrow_struct( unreachable!("Pack must have Struct dtype"); }; return create_from_fields( - fields.ok_or_else(|| struct_fields.names().clone()), - array.children().to_vec(), + target_fields.ok_or_else(|| struct_fields.names().clone()), + array.children(), None, // Pack is never null, len, ctx, @@ -80,7 +87,7 @@ pub(super) fn to_arrow_struct( } // Otherwise, we fall back to executing to a StructArray. - let array = if let Some(fields) = fields { + let array = if let Some(fields) = target_fields { let vx_fields = StructFields::from_arrow(fields); // We apply a cast to ensure we push down casting where possible into the struct fields. array.cast(DType::Struct( @@ -92,10 +99,18 @@ pub(super) fn to_arrow_struct( }; let struct_array = array.execute::(ctx)?; - let validity = to_arrow_null_buffer(struct_array.validity(), struct_array.len(), ctx)?; + let len = struct_array.len(); + let StructArrayParts { + validity, + fields, + struct_fields, + .. + } = struct_array.into_parts(); + + let validity = to_arrow_null_buffer(validity, len, ctx)?; create_from_fields( - fields.ok_or_else(|| struct_array.names().clone()), - struct_array.into_fields(), + target_fields.ok_or_else(|| struct_fields.names().clone()), + &fields, validity, len, ctx, @@ -104,7 +119,7 @@ pub(super) fn to_arrow_struct( fn create_from_fields( fields: Result<&Fields, FieldNames>, - vortex_fields: Vec, + vortex_fields: &[ArrayRef], null_buffer: Option, len: usize, ctx: &mut ExecutionCtx, @@ -120,7 +135,9 @@ fn create_from_fields( let mut arrow_arrays = Vec::with_capacity(vortex_fields.len()); for (field, vx_field) in fields.iter().zip_eq(vortex_fields.into_iter()) { - let arrow_field = vx_field.execute_arrow(Some(field.data_type()), ctx)?; + let arrow_field = vx_field + .clone() + .execute_arrow(Some(field.data_type()), ctx)?; vortex_ensure!( field.is_nullable() || arrow_field.null_count() == 0, "Cannot convert field '{}' to non-nullable Arrow field because it contains nulls", @@ -142,7 +159,7 @@ fn create_from_fields( // No target fields specified - use preferred types for each child let mut arrow_arrays = Vec::with_capacity(vortex_fields.len()); for vx_field in vortex_fields.into_iter() { - let arrow_array = vx_field.execute_arrow(None, ctx)?; + let arrow_array = vx_field.clone().execute_arrow(None, ctx)?; arrow_arrays.push(arrow_array); } diff --git a/vortex-array/src/arrow/executor/validity.rs b/vortex-array/src/arrow/executor/validity.rs index 6bf27a1e2db..27a29a58783 100644 --- a/vortex-array/src/arrow/executor/validity.rs +++ b/vortex-array/src/arrow/executor/validity.rs @@ -10,13 +10,13 @@ use crate::arrow::null_buffer::to_null_buffer; use crate::validity::Validity; pub(super) fn to_arrow_null_buffer( - validity: &Validity, + validity: Validity, len: usize, ctx: &mut ExecutionCtx, ) -> VortexResult> { Ok(match validity { Validity::NonNullable | Validity::AllValid => None, Validity::AllInvalid => Some(NullBuffer::new_null(len)), - Validity::Array(array) => to_null_buffer(array.clone().execute::(ctx)?), + Validity::Array(array) => to_null_buffer(array.execute::(ctx)?), }) } diff --git a/vortex-duckdb/src/convert/dtype.rs b/vortex-duckdb/src/convert/dtype.rs index 436e0c68ede..e9db0d5669b 100644 --- a/vortex-duckdb/src/convert/dtype.rs +++ b/vortex-duckdb/src/convert/dtype.rs @@ -199,6 +199,14 @@ where .collect::>() } +impl TryFrom for LogicalType { + type Error = VortexError; + + fn try_from(value: DType) -> Result { + LogicalType::try_from(&value) + } +} + impl TryFrom<&DType> for LogicalType { type Error = VortexError; @@ -254,7 +262,7 @@ impl TryFrom<&DType> for LogicalType { } DType::Extension(ext_dtype) => { if datetime::is_temporal_ext_type(ext_dtype.id()) { - return LogicalType::temporal_type(ext_dtype); + return LogicalType::try_from(ext_dtype.as_ref()); } else { vortex_bail!("Unsupported extension type \"{}\"", ext_dtype.id()) } @@ -265,6 +273,55 @@ impl TryFrom<&DType> for LogicalType { } } +/// Converts temporal extension types to corresponding DuckDB types. +/// +/// # Arguments +/// +/// * `ext_dtype` - A reference to the extension data type containing temporal metadata. +/// +/// # Supported Temporal Types +/// +/// - **Date**: Must use `TimeUnit::D` +/// - **Time**: Must use `TimeUnit::Us` +/// - **Timestamp**: Supports `TimeUnit::Ns`, `Us`, `Ms`, `S` +impl TryFrom<&ExtDType> for LogicalType { + type Error = VortexError; + + fn try_from(ext_dtype: &ExtDType) -> Result { + let temporal_metadata = TemporalMetadata::try_from(ext_dtype) + .map_err(|e| vortex_err!("Failed to extract temporal metadata: {}", e))?; + + let duckdb_type = match temporal_metadata { + TemporalMetadata::Date(TimeUnit::Days) => DUCKDB_TYPE::DUCKDB_TYPE_DATE, + TemporalMetadata::Date(time_unit) => { + vortex_bail!("Invalid TimeUnit {} for date", time_unit); + } + TemporalMetadata::Time(TimeUnit::Microseconds) => DUCKDB_TYPE::DUCKDB_TYPE_TIME, + TemporalMetadata::Time(time_unit) => { + vortex_bail!("Invalid TimeUnit {} for time", time_unit); + } + TemporalMetadata::Timestamp(time_unit, tz) => match time_unit { + TimeUnit::Nanoseconds => DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP_NS, + TimeUnit::Microseconds => { + if let Some(tz) = tz { + if tz != "UTC" { + vortex_bail!("Invalid timezone for timestamp: {tz}"); + } + DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP_TZ + } else { + DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP + } + } + TimeUnit::Milliseconds => DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP_MS, + TimeUnit::Seconds => DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP_S, + _ => vortex_bail!("Invalid TimeUnit {} for timestamp", time_unit), + }, + }; + + Ok(Self::new(duckdb_type)) + } +} + impl TryFrom for LogicalType { type Error = VortexError; diff --git a/vortex-duckdb/src/duckdb/logical_type.rs b/vortex-duckdb/src/duckdb/logical_type.rs index 375034d0da2..61d393a1a9d 100644 --- a/vortex-duckdb/src/duckdb/logical_type.rs +++ b/vortex-duckdb/src/duckdb/logical_type.rs @@ -7,7 +7,6 @@ use std::fmt::Debug; use std::fmt::Display; use std::fmt::Formatter; -use vortex::dtype::ExtDType; use vortex::dtype::FieldName; use vortex::error::VortexExpect; use vortex::error::VortexResult; @@ -107,54 +106,6 @@ impl LogicalType { Ok(unsafe { Self::own(ptr) }) } - /// Converts temporal extension types to corresponding DuckDB types. - /// - /// # Arguments - /// - /// * `ext_dtype` - A reference to the extension data type containing temporal metadata. - /// - /// # Supported Temporal Types - /// - /// - **Date**: Must use `TimeUnit::D` - /// - **Time**: Must use `TimeUnit::Us` - /// - **Timestamp**: Supports `TimeUnit::Ns`, `Us`, `Ms`, `S` - pub fn temporal_type(ext_dtype: &ExtDType) -> VortexResult { - use vortex::dtype::datetime::TemporalMetadata; - use vortex::dtype::datetime::TimeUnit; - - let temporal_metadata = TemporalMetadata::try_from(ext_dtype) - .map_err(|e| vortex_err!("Failed to extract temporal metadata: {}", e))?; - - let duckdb_type = match temporal_metadata { - TemporalMetadata::Date(TimeUnit::Days) => DUCKDB_TYPE::DUCKDB_TYPE_DATE, - TemporalMetadata::Date(time_unit) => { - vortex_bail!("Invalid TimeUnit {} for date", time_unit); - } - TemporalMetadata::Time(TimeUnit::Microseconds) => DUCKDB_TYPE::DUCKDB_TYPE_TIME, - TemporalMetadata::Time(time_unit) => { - vortex_bail!("Invalid TimeUnit {} for time", time_unit); - } - TemporalMetadata::Timestamp(time_unit, tz) => match time_unit { - TimeUnit::Nanoseconds => DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP_NS, - TimeUnit::Microseconds => { - if let Some(tz) = tz { - if tz != "UTC" { - vortex_bail!("Invalid timezone for timestamp: {tz}"); - } - DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP_TZ - } else { - DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP - } - } - TimeUnit::Milliseconds => DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP_MS, - TimeUnit::Seconds => DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP_S, - _ => vortex_bail!("Invalid TimeUnit {} for timestamp", time_unit), - }, - }; - - Ok(Self::new(duckdb_type)) - } - pub fn new_array(element_dtype: DUCKDB_TYPE, array_size: u32) -> Self { let element_dtype = Self::new(element_dtype); diff --git a/vortex-duckdb/src/exporter/constant.rs b/vortex-duckdb/src/exporter/constant.rs index df7aca92c1c..15d659ee4be 100644 --- a/vortex-duckdb/src/exporter/constant.rs +++ b/vortex-duckdb/src/exporter/constant.rs @@ -1,6 +1,9 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use vortex::array::Canonical; +use vortex::array::ExecutionCtx; +use vortex::array::IntoArray; use vortex::array::arrays::ConstantArray; use vortex::error::VortexResult; use vortex::mask::Mask; @@ -18,9 +21,10 @@ struct ConstantExporter { } pub fn new_exporter_with_mask( - array: &ConstantArray, + array: ConstantArray, mask: Mask, cache: &ConversionCache, + ctx: &mut ExecutionCtx, ) -> VortexResult> { if mask.all_false() { return Ok(Box::new(ConstantExporter { value: None })); @@ -31,14 +35,18 @@ pub fn new_exporter_with_mask( // TODO(joe): we can splat the constant in a specific exporter and save a copy. return Ok(validity::new_exporter( mask, - new_array_exporter(array.to_canonical()?.as_ref(), cache)?, + new_array_exporter( + array.into_array().execute::(ctx)?.into_array(), + cache, + ctx, + )?, )); } new_exporter(array) } -pub(crate) fn new_exporter(array: &ConstantArray) -> VortexResult> { +pub(crate) fn new_exporter(array: ConstantArray) -> VortexResult> { let value = if array.scalar().is_null() { // If the scalar is null and _not_ of type Null, then we cannot assign a null DuckDB value // to a constant vector since DuckDB will complain about a type-mismatch. In these cases, diff --git a/vortex-duckdb/src/exporter/decimal.rs b/vortex-duckdb/src/exporter/decimal.rs index 0bbed37d5f0..b5c50aec012 100644 --- a/vortex-duckdb/src/exporter/decimal.rs +++ b/vortex-duckdb/src/exporter/decimal.rs @@ -15,6 +15,7 @@ use vortex::error::VortexResult; use vortex::error::vortex_bail; use vortex::mask::Mask; use vortex::scalar::DecimalType; +use vortex_array::arrays::DecimalArrayParts; use crate::duckdb::Vector; use crate::duckdb::VectorBuffer; @@ -34,12 +35,20 @@ struct DecimalZeroCopyExporter { } pub(crate) fn new_exporter(array: DecimalArray) -> VortexResult> { - let validity = array.validity_mask(); - let dest_values_type = precision_to_duckdb_storage_size(&array.decimal_dtype())?; - - if array.values_type() == dest_values_type { - match_each_decimal_value_type!(array.values_type(), |D| { - let buffer = array.buffer::(); + let len = array.len(); + let DecimalArrayParts { + validity, + decimal_dtype, + values_type, + values, + .. + } = array.into_parts(); + let dest_values_type = precision_to_duckdb_storage_size(&decimal_dtype)?; + let validity = validity.to_mask(len); + + if values_type == dest_values_type { + match_each_decimal_value_type!(values_type, |D| { + let buffer = Buffer::::from_byte_buffer(values) return Ok(Box::new(DecimalZeroCopyExporter { values: buffer.clone(), shared_buffer: VectorBuffer::new(buffer), @@ -48,10 +57,10 @@ pub(crate) fn new_exporter(array: DecimalArray) -> VortexResult(), + values: Buffer::::from_byte_buffer(values), validity, dest_value_type: PhantomData::, })) diff --git a/vortex-duckdb/src/exporter/dict.rs b/vortex-duckdb/src/exporter/dict.rs index 1868ed3a9e8..be045c6eed9 100644 --- a/vortex-duckdb/src/exporter/dict.rs +++ b/vortex-duckdb/src/exporter/dict.rs @@ -16,10 +16,6 @@ use vortex::array::arrays::ConstantArray; use vortex::array::arrays::ConstantVTable; use vortex::array::arrays::DictArray; use vortex::array::arrays::PrimitiveArray; -use vortex::array::builtins::ArrayBuiltins; -use vortex::array::validity::Validity; -use vortex::array::vtable::ValidityHelper; -use vortex::compute; use vortex::dtype::IntegerPType; use vortex::dtype::match_each_integer_ptype; use vortex::error::VortexResult; @@ -33,7 +29,6 @@ use crate::exporter::all_invalid; use crate::exporter::cache::ConversionCache; use crate::exporter::constant; use crate::exporter::new_array_exporter; -use crate::exporter::new_operator_array_exporter; struct DictExporter { // Store the dictionary values once and export the same dictionary with each codes chunk. @@ -48,6 +43,7 @@ struct DictExporter { pub(crate) fn new_exporter_with_flatten( array: &DictArray, cache: &ConversionCache, + ctx: &mut ExecutionCtx, // Whether to return a duckdb flat vector or not. mut flatten: bool, ) -> VortexResult> { @@ -56,9 +52,10 @@ pub(crate) fn new_exporter_with_flatten( let values_type: LogicalType = values.dtype().try_into()?; if let Some(constant) = values.as_opt::() { return constant::new_exporter_with_mask( - &ConstantArray::new(constant.scalar().clone(), array.codes().len()), + ConstantArray::new(constant.scalar().clone(), array.codes().len()), array.codes().validity_mask(), cache, + ctx, ); } @@ -92,7 +89,14 @@ pub(crate) fn new_exporter_with_flatten( canonical } }; - return new_array_exporter(&compute::take(canonical.as_ref(), codes.as_ref())?, cache); + return new_array_exporter( + DictArray::new(array.codes().clone(), canonical.into_array()) + .into_array() + .execute::(ctx)? + .into_array(), + cache, + ctx, + ); } else { // Check if we have a cached vector and extract it if we do. let cached_vector = cache @@ -105,7 +109,11 @@ pub(crate) fn new_exporter_with_flatten( None => { // Create a new DuckDB vector for the values. let mut vector = Vector::with_capacity(values.dtype().try_into()?, values.len()); - new_array_exporter(values, cache)?.export(0, values.len(), &mut vector)?; + new_array_exporter(values.clone(), cache, ctx)?.export( + 0, + values.len(), + &mut vector, + )?; let vector = Arc::new(Mutex::new(vector)); cache @@ -163,109 +171,6 @@ impl> ColumnExporter for DictExporter { } } -pub(crate) fn new_operator_exporter_with_flatten( - array: &DictArray, - cache: &ConversionCache, - ctx: &mut ExecutionCtx, - // Whether to return a duckdb flat vector or not. - mut flatten: bool, -) -> VortexResult> { - // Grab the cache dictionary values. - let values = array.values(); - let values_type: LogicalType = values.dtype().try_into()?; - if let Some(constant) = values.as_opt::() { - return constant::new_exporter_with_mask( - &ConstantArray::new(constant.scalar().clone(), array.codes().len()), - array.codes().is_null()?.not()?.execute::(ctx)?, - cache, - ); - } - - let codes = array - .codes() - .clone() - .execute::(ctx)? - .into_primitive(); - - match codes.validity() { - Validity::AllValid | Validity::NonNullable => {} - Validity::AllInvalid => { - return Ok(all_invalid::new_exporter(array.len(), &values_type)); - } - Validity::Array(_) => { - // duckdb cannot have a dictionary with validity in the codes, so flatten the array and - // apply the validity mask there. - flatten = true; - } - } - - let values_key = Arc::as_ptr(values).addr(); - - let exporter_values = if flatten { - let canonical = cache - .canonical_cache - .get(&values_key) - .map(|entry| entry.value().1.clone()); - let canonical = match canonical { - Some(c) => c, - None => { - let canonical = values.to_canonical()?; - cache - .canonical_cache - .insert(values_key, (values.clone(), canonical.clone())); - canonical - } - }; - - return new_operator_array_exporter( - unsafe { DictArray::new_unchecked(codes.into_array(), canonical.into_array()) } - .into_array() - .execute::(ctx)? - .into_array(), - // Take::take(values, &codes).into_array(array.dtype()), - cache, - ctx, - ); - } else { - // Check if we have a cached vector and extract it if we do. - let cached_vector = cache - .values_cache - .get(&values_key) - .map(|entry| entry.value().1.clone()); - - match cached_vector { - Some(vector) => vector, - None => { - // Create a new DuckDB vector for the values. - let mut vector = Vector::with_capacity(values.dtype().try_into()?, values.len()); - new_operator_array_exporter(values.clone(), cache, ctx)?.export( - 0, - values.len(), - &mut vector, - )?; - - let vector = Arc::new(Mutex::new(vector)); - cache - .values_cache - .insert(values_key, (values.clone(), vector.clone())); - - vector - } - } - }; - - match_each_integer_ptype!(codes.ptype(), |I| { - Ok(Box::new(DictExporter { - values_vector: exporter_values, - values_len: values.len().as_u32(), - codes, - codes_type: PhantomData::, - cache_id: cache.instance_id(), - value_id: values_key, - })) - }) -} - #[cfg(test)] mod tests { use vortex::VortexSessionDefault; @@ -275,24 +180,24 @@ mod tests { use vortex::array::arrays::DictArray; use vortex::array::arrays::PrimitiveArray; use vortex::buffer::Buffer; - use vortex::error::VortexExpect; use vortex::error::VortexResult; use vortex::session::VortexSession; + use vortex_array::VortexSessionExecute; + use crate::SESSION; use crate::cpp; use crate::duckdb::DataChunk; use crate::duckdb::LogicalType; use crate::exporter::ColumnExporter; use crate::exporter::ConversionCache; use crate::exporter::dict::new_exporter_with_flatten; - use crate::exporter::dict::new_operator_exporter_with_flatten; use crate::exporter::new_array_exporter; pub(crate) fn new_exporter( array: &DictArray, cache: &ConversionCache, ) -> VortexResult> { - new_exporter_with_flatten(array, cache, false) + new_exporter_with_flatten(array, cache, &mut SESSION.create_execution_ctx(), false) } #[test] @@ -319,31 +224,7 @@ mod tests { } #[test] - fn test_constant_dict_vector() { - let arr = DictArray::new( - PrimitiveArray::from_option_iter([None, Some(0u32)]).into_array(), - ConstantArray::new(10, 1).into_array(), - ); - - let mut chunk = DataChunk::new([LogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_INTEGER)]); - - let mut ctx = ExecutionCtx::new(VortexSession::default()); - new_operator_exporter_with_flatten(&arr, &ConversionCache::default(), &mut ctx, false) - .unwrap() - .export(0, 2, &mut chunk.get_vector(0)) - .unwrap(); - chunk.set_len(2); - - assert_eq!( - format!("{}", String::try_from(&chunk).unwrap()), - r#"Chunk - [1 Columns] -- FLAT INTEGER: 2 = [ NULL, 10] -"# - ); - } - - #[test] - fn test_constant_dict_vector_null() { + fn test_constant_dict_null() { let arr = DictArray::new( PrimitiveArray::from_option_iter([None::, None]).into_array(), ConstantArray::new(10, 1).into_array(), @@ -352,7 +233,7 @@ mod tests { let mut chunk = DataChunk::new([LogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_INTEGER)]); let mut ctx = ExecutionCtx::new(VortexSession::default()); - new_operator_exporter_with_flatten(&arr, &ConversionCache::default(), &mut ctx, false) + new_exporter_with_flatten(&arr, &ConversionCache::default(), &mut ctx, false) .unwrap() .export(0, 2, &mut chunk.get_vector(0)) .unwrap(); @@ -393,10 +274,9 @@ mod tests { DataChunk::new([LogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_INTEGER)]); new_array_exporter( - arr.to_canonical() - .vortex_expect("to_canonical failed") - .as_ref(), + arr.into_array(), &ConversionCache::default(), + &mut SESSION.create_execution_ctx(), ) .unwrap() .export(0, 3, &mut flat_chunk.get_vector(0)) diff --git a/vortex-duckdb/src/exporter/fixed_size_list.rs b/vortex-duckdb/src/exporter/fixed_size_list.rs index 467e7caf0f4..dfdd1d8976b 100644 --- a/vortex-duckdb/src/exporter/fixed_size_list.rs +++ b/vortex-duckdb/src/exporter/fixed_size_list.rs @@ -8,6 +8,7 @@ //! lists have the same number of elements. //! //! [`DType::FixedSizeList`]: vortex_dtype::DType::FixedSizeList +use vortex::array::ExecutionCtx; use vortex::array::arrays::FixedSizeListArray; use vortex::error::VortexResult; use vortex::mask::Mask; @@ -33,19 +34,24 @@ struct FixedSizeListExporter { pub(crate) fn new_exporter( array: FixedSizeListArray, cache: &ConversionCache, + ctx: &mut ExecutionCtx, ) -> VortexResult> { - let elements_exporter = new_array_exporter_with_flatten(array.elements(), cache, true)?; + let list_size = array.list_size(); + let len = array.len(); + let (elements, validity, dtype) = array.into_parts(); + let mask = validity.to_mask(len); + let elements_exporter = new_array_exporter_with_flatten(elements, cache, ctx, true)?; - let ltype: LogicalType = array.dtype().try_into()?; + let ltype: LogicalType = (&dtype).try_into()?; - if let Mask::AllFalse(len) = array.validity_mask() { + if mask.all_false() { return Ok(all_invalid::new_exporter(len, <ype)); } Ok(Box::new(FixedSizeListExporter { - validity: array.validity_mask(), + validity: mask, elements_exporter, - list_size: array.list_size(), + list_size, })) } @@ -93,8 +99,10 @@ mod tests { use vortex::array::validity::Validity; use vortex::buffer::buffer; use vortex::error::VortexExpect; + use vortex_array::VortexSessionExecute; use super::*; + use crate::SESSION; use crate::cpp; use crate::duckdb::DataChunk; use crate::duckdb::LogicalType; @@ -112,10 +120,14 @@ mod tests { // TODO(connor): This mutable API is brittle. Maybe bundle this logic? let mut chunk = DataChunk::new([array_type]); - new_exporter(fsl, &ConversionCache::default()) - .unwrap() - .export(offset, len, &mut chunk.get_vector(0)) - .unwrap(); + new_exporter( + fsl, + &ConversionCache::default(), + &mut SESSION.create_execution_ctx(), + ) + .unwrap() + .export(offset, len, &mut chunk.get_vector(0)) + .unwrap(); chunk.set_len(len); chunk @@ -323,10 +335,14 @@ mod tests { let outer_array_type = create_nested_array_type(2, 3); let mut chunk = DataChunk::new([outer_array_type]); - new_exporter(outer_fsl, &ConversionCache::default()) - .unwrap() - .export(0, 2, &mut chunk.get_vector(0)) - .unwrap(); + new_exporter( + outer_fsl, + &ConversionCache::default(), + &mut SESSION.create_execution_ctx(), + ) + .unwrap() + .export(0, 2, &mut chunk.get_vector(0)) + .unwrap(); chunk.set_len(2); assert_eq!(chunk.len(), 2); @@ -376,10 +392,14 @@ mod tests { let outer_array_type = create_nested_array_type(2, 3); let mut chunk = DataChunk::new([outer_array_type]); - new_exporter(outer_fsl, &ConversionCache::default()) - .unwrap() - .export(0, 3, &mut chunk.get_vector(0)) - .unwrap(); + new_exporter( + outer_fsl, + &ConversionCache::default(), + &mut SESSION.create_execution_ctx(), + ) + .unwrap() + .export(0, 3, &mut chunk.get_vector(0)) + .unwrap(); chunk.set_len(3); assert_eq!(chunk.len(), 3); diff --git a/vortex-duckdb/src/exporter/list.rs b/vortex-duckdb/src/exporter/list.rs index d365954cf41..80e86ccabd2 100644 --- a/vortex-duckdb/src/exporter/list.rs +++ b/vortex-duckdb/src/exporter/list.rs @@ -5,10 +5,10 @@ use std::marker::PhantomData; use std::sync::Arc; use parking_lot::Mutex; -use vortex::array::Canonical; +use vortex::array::Array; use vortex::array::ExecutionCtx; -use vortex::array::ToCanonical; use vortex::array::arrays::ListArray; +use vortex::array::arrays::ListArrayParts; use vortex::array::arrays::PrimitiveArray; use vortex::dtype::IntegerPType; use vortex::dtype::match_each_integer_ptype; @@ -18,7 +18,6 @@ use vortex::mask::Mask; use super::ConversionCache; use super::new_array_exporter_with_flatten; -use super::new_array_operator_exporter_with_flatten; use crate::cpp; use crate::duckdb::Vector; use crate::exporter::ColumnExporter; @@ -38,14 +37,21 @@ struct ListExporter { } pub(crate) fn new_exporter( - array: &ListArray, + array: ListArray, cache: &ConversionCache, + ctx: &mut ExecutionCtx, ) -> VortexResult> { + let array_len = array.len(); // Cache an `elements` vector up front so that future exports can reference it. - let elements = array.elements(); + let ListArrayParts { + elements, + offsets, + validity, + .. + } = array.into_parts(); let num_elements = elements.len(); - let values_key = Arc::as_ptr(elements).addr(); + let values_key = Arc::as_ptr(&elements).addr(); // Check if we have a cached vector and extract it if we do. let cached_elements = cache .values_cache @@ -57,27 +63,28 @@ pub(crate) fn new_exporter( None => { // We have no cached the vector yet, so create a new DuckDB vector for the elements. let mut duckdb_elements = - Vector::with_capacity(elements.dtype().try_into()?, elements.len()); - let elements_exporter = new_array_exporter_with_flatten(array.elements(), cache, true)?; + Vector::with_capacity(elements.dtype().try_into()?, num_elements); + let elements_exporter = + new_array_exporter_with_flatten(elements.clone(), cache, ctx, true)?; - if !elements.is_empty() { - elements_exporter.export(0, elements.len(), &mut duckdb_elements)?; + if num_elements != 0 { + elements_exporter.export(0, num_elements, &mut duckdb_elements)?; } let shared_elements = Arc::new(Mutex::new(duckdb_elements)); cache .values_cache - .insert(values_key, (elements.clone(), shared_elements.clone())); + .insert(values_key, (elements, shared_elements.clone())); shared_elements } }; - let offsets = array.offsets().to_primitive(); + let offsets = offsets.execute::(ctx)?; let boxed = match_each_integer_ptype!(offsets.ptype(), |O| { Box::new(ListExporter { - validity: array.validity_mask(), + validity: validity.to_mask(array_len), duckdb_elements: shared_elements, offsets, num_elements, @@ -135,67 +142,6 @@ impl ColumnExporter for ListExporter { } } -pub(crate) fn new_operator_exporter( - array: &ListArray, - cache: &ConversionCache, - ctx: &mut ExecutionCtx, -) -> VortexResult> { - // Cache an `elements` vector up front so that future exports can reference it. - let elements = array.elements(); - let num_elements = elements.len(); - - let values_key = Arc::as_ptr(elements).addr(); - // Check if we have a cached vector and extract it if we do. - let cached_elements = cache - .values_cache - .get(&values_key) - .map(|entry| entry.value().1.clone()); - - let shared_elements = match cached_elements { - Some(elements) => elements, - None => { - // We have no cached the vector yet, so create a new DuckDB vector for the elements. - let mut duckdb_elements = - Vector::with_capacity(elements.dtype().try_into()?, elements.len()); - let elements_exporter = new_array_operator_exporter_with_flatten( - array.elements().clone(), - cache, - ctx, - true, - )?; - - if !elements.is_empty() { - elements_exporter.export(0, elements.len(), &mut duckdb_elements)?; - } - - let shared_elements = Arc::new(Mutex::new(duckdb_elements)); - cache - .values_cache - .insert(values_key, (elements.clone(), shared_elements.clone())); - - shared_elements - } - }; - - let offsets = array - .offsets() - .clone() - .execute::(ctx)? - .into_primitive(); - - let boxed = match_each_integer_ptype!(offsets.ptype(), |O| { - Box::new(ListExporter { - validity: array.validity_mask(), - duckdb_elements: shared_elements, - offsets, - num_elements, - offset_type: PhantomData::, - }) as Box - }); - - Ok(boxed) -} - #[cfg(test)] mod tests { use vortex::array::IntoArray as _; @@ -204,8 +150,10 @@ mod tests { use vortex::buffer::Buffer; use vortex::buffer::buffer; use vortex::error::VortexExpect; + use vortex_array::VortexSessionExecute; use super::*; + use crate::SESSION; use crate::duckdb::DataChunk; use crate::duckdb::LogicalType; use crate::exporter::new_array_exporter; @@ -226,10 +174,14 @@ mod tests { .vortex_expect("LogicalType creation should succeed for test data"); let mut chunk = DataChunk::new([list_type]); - new_array_exporter(&list, &ConversionCache::default()) - .unwrap() - .export(0, 0, &mut chunk.get_vector(0)) - .unwrap(); + new_array_exporter( + list, + &ConversionCache::default(), + &mut SESSION.create_execution_ctx(), + ) + .unwrap() + .export(0, 0, &mut chunk.get_vector(0)) + .unwrap(); chunk.set_len(0); assert_eq!( @@ -261,10 +213,14 @@ mod tests { .vortex_expect("LogicalType creation should succeed for test data"); let mut chunk = DataChunk::new([list_type]); - new_array_exporter(&list, &ConversionCache::default()) - .unwrap() - .export(0, 4, &mut chunk.get_vector(0)) - .unwrap(); + new_array_exporter( + list, + &ConversionCache::default(), + &mut SESSION.create_execution_ctx(), + ) + .unwrap() + .export(0, 4, &mut chunk.get_vector(0)) + .unwrap(); chunk.set_len(4); assert_eq!( diff --git a/vortex-duckdb/src/exporter/list_view.rs b/vortex-duckdb/src/exporter/list_view.rs index 2ca9c03b929..42b5a23c1c9 100644 --- a/vortex-duckdb/src/exporter/list_view.rs +++ b/vortex-duckdb/src/exporter/list_view.rs @@ -8,7 +8,6 @@ use parking_lot::Mutex; use vortex::array::Array; use vortex::array::Canonical; use vortex::array::ExecutionCtx; -use vortex::array::ToCanonical; use vortex::array::arrays::ListViewArray; use vortex::array::arrays::PrimitiveArray; use vortex::dtype::IntegerPType; @@ -19,7 +18,6 @@ use vortex::mask::Mask; use super::ConversionCache; use super::new_array_exporter_with_flatten; -use super::new_array_operator_exporter_with_flatten; use crate::cpp; use crate::duckdb::Vector; use crate::exporter::ColumnExporter; @@ -40,9 +38,11 @@ struct ListViewExporter { size_type: PhantomData, } +// TODO(joe): into parts pub(crate) fn new_exporter( array: ListViewArray, cache: &ConversionCache, + ctx: &mut ExecutionCtx, ) -> VortexResult> { // Cache an `elements` vector up front so that future exports can reference it. let elements = array.elements(); @@ -61,7 +61,8 @@ pub(crate) fn new_exporter( // We have no cached the vector yet, so create a new DuckDB vector for the elements. let mut duckdb_elements = Vector::with_capacity(elements.dtype().try_into()?, elements.len()); - let elements_exporter = new_array_exporter_with_flatten(array.elements(), cache, true)?; + let elements_exporter = + new_array_exporter_with_flatten(array.elements().clone(), cache, ctx, true)?; if !elements.is_empty() { elements_exporter.export(0, elements.len(), &mut duckdb_elements)?; @@ -76,8 +77,16 @@ pub(crate) fn new_exporter( } }; - let offsets = array.offsets().to_primitive(); - let sizes = array.sizes().to_primitive(); + let offsets = array + .offsets() + .clone() + .execute::(ctx)? + .into_primitive(); + let sizes = array + .sizes() + .clone() + .execute::(ctx)? + .into_primitive(); let boxed = match_each_integer_ptype!(offsets.ptype(), |O| { match_each_integer_ptype!(sizes.ptype(), |S| { @@ -145,76 +154,6 @@ impl ColumnExporter for ListViewExporter } } -pub(crate) fn new_operator_exporter( - array: ListViewArray, - cache: &ConversionCache, - ctx: &mut ExecutionCtx, -) -> VortexResult> { - // Cache an `elements` vector up front so that future exports can reference it. - let elements = array.elements(); - let num_elements = elements.len(); - - let values_key = Arc::as_ptr(elements).addr(); - // Check if we have a cached vector and extract it if we do. - let cached_elements = cache - .values_cache - .get(&values_key) - .map(|entry| entry.value().1.clone()); - - let shared_elements = match cached_elements { - Some(elements) => elements, - None => { - // We have no cached the vector yet, so create a new DuckDB vector for the elements. - let mut duckdb_elements = - Vector::with_capacity(elements.dtype().try_into()?, elements.len()); - let elements_exporter = new_array_operator_exporter_with_flatten( - array.elements().clone(), - cache, - ctx, - true, - )?; - - if !elements.is_empty() { - elements_exporter.export(0, elements.len(), &mut duckdb_elements)?; - } - - let shared_elements = Arc::new(Mutex::new(duckdb_elements)); - cache - .values_cache - .insert(values_key, (elements.clone(), shared_elements.clone())); - - shared_elements - } - }; - - let offsets = array - .offsets() - .clone() - .execute::(ctx)? - .into_primitive(); - let sizes = array - .sizes() - .clone() - .execute::(ctx)? - .into_primitive(); - - let boxed = match_each_integer_ptype!(offsets.ptype(), |O| { - match_each_integer_ptype!(sizes.ptype(), |S| { - Box::new(ListViewExporter { - validity: array.validity_mask(), - duckdb_elements: shared_elements, - offsets, - sizes, - num_elements, - offset_type: PhantomData::, - size_type: PhantomData::, - }) as Box - }) - }); - - Ok(boxed) -} - #[cfg(test)] mod tests { use vortex::array::IntoArray as _; @@ -223,8 +162,10 @@ mod tests { use vortex::buffer::Buffer; use vortex::buffer::buffer; use vortex::error::VortexExpect; + use vortex_array::VortexSessionExecute; use super::*; + use crate::SESSION; use crate::duckdb::DataChunk; use crate::duckdb::LogicalType; use crate::exporter::new_array_exporter; @@ -247,10 +188,14 @@ mod tests { .vortex_expect("LogicalType creation should succeed for test data"); let mut chunk = DataChunk::new([list_type]); - new_array_exporter(&list, &ConversionCache::default()) - .unwrap() - .export(0, 0, &mut chunk.get_vector(0)) - .unwrap(); + new_array_exporter( + list, + &ConversionCache::default(), + &mut SESSION.create_execution_ctx(), + ) + .unwrap() + .export(0, 0, &mut chunk.get_vector(0)) + .unwrap(); chunk.set_len(0); assert_eq!( @@ -278,10 +223,14 @@ mod tests { .vortex_expect("LogicalType creation should succeed for test data"); let mut chunk = DataChunk::new([list_type]); - new_array_exporter(&list, &ConversionCache::default()) - .unwrap() - .export(0, 3, &mut chunk.get_vector(0)) - .unwrap(); + new_array_exporter( + list, + &ConversionCache::default(), + &mut SESSION.create_execution_ctx(), + ) + .unwrap() + .export(0, 3, &mut chunk.get_vector(0)) + .unwrap(); chunk.set_len(3); assert_eq!( @@ -315,10 +264,14 @@ mod tests { .vortex_expect("LogicalType creation should succeed for test data"); let mut chunk = DataChunk::new([list_type]); - new_array_exporter(&list, &ConversionCache::default()) - .unwrap() - .export(0, 4, &mut chunk.get_vector(0)) - .unwrap(); + new_array_exporter( + list, + &ConversionCache::default(), + &mut SESSION.create_execution_ctx(), + ) + .unwrap() + .export(0, 4, &mut chunk.get_vector(0)) + .unwrap(); chunk.set_len(4); assert_eq!( diff --git a/vortex-duckdb/src/exporter/mod.rs b/vortex-duckdb/src/exporter/mod.rs index 987f7832d26..3ac5d1e9025 100644 --- a/vortex-duckdb/src/exporter/mod.rs +++ b/vortex-duckdb/src/exporter/mod.rs @@ -23,7 +23,6 @@ use bitvec::prelude::Lsb0; use bitvec::view::BitView; pub use cache::ConversionCache; pub use decimal::precision_to_duckdb_storage_size; -use vortex::array::Array; use vortex::array::ArrayRef; use vortex::array::Canonical; use vortex::array::ExecutionCtx; @@ -61,7 +60,7 @@ impl ArrayExporter { let fields = array .fields() .iter() - .map(|field| new_operator_array_exporter(field.clone(), cache, ctx)) + .map(|field| new_array_exporter(field.clone(), cache, ctx)) .collect::>>()?; Ok(Self { fields, @@ -114,93 +113,41 @@ pub trait ColumnExporter { } fn new_array_exporter( - array: &dyn Array, - cache: &ConversionCache, -) -> VortexResult> { - new_array_exporter_with_flatten(array, cache, false) -} - -/// Create a DuckDB exporter for the given Vortex array. -fn new_array_exporter_with_flatten( - array: &dyn Array, - cache: &ConversionCache, - flatten: bool, -) -> VortexResult> { - if let Some(array) = array.as_opt::() { - return constant::new_exporter(array); - } - - if let Some(array) = array.as_opt::() { - return run_end::new_exporter(array, cache); - } - - if let Some(array) = array.as_opt::() { - return dict::new_exporter_with_flatten(array, cache, flatten); - } - - if let Some(array) = array.as_opt::() { - return sequence::new_exporter(array); - } - - if let Some(array) = array.as_opt::() { - return list::new_exporter(array, cache); - } - - // Otherwise, we fall back to canonical - match array.to_canonical()? { - Canonical::Null(_) => Ok(all_invalid::new_exporter(array.len(), &LogicalType::null())), - Canonical::Bool(array) => bool::new_exporter(array), - Canonical::Primitive(array) => primitive::new_exporter(array), - Canonical::Decimal(array) => decimal::new_exporter(array), - Canonical::Struct(array) => struct_::new_exporter(array, cache), - Canonical::List(array) => list_view::new_exporter(array, cache), - Canonical::FixedSizeList(array) => fixed_size_list::new_exporter(array, cache), - Canonical::VarBinView(array) => varbinview::new_exporter(array), - Canonical::Extension(ext) => { - if is_temporal_ext_type(ext.id()) { - let temporal_array = - TemporalArray::try_from(ext).vortex_expect("id is a temporal array"); - return temporal::new_exporter(&temporal_array); - } - todo!("no non-temporal extension exporter") - } - } -} - -fn new_operator_array_exporter( array: ArrayRef, cache: &ConversionCache, ctx: &mut ExecutionCtx, ) -> VortexResult> { - new_array_operator_exporter_with_flatten(array, cache, ctx, false) + new_array_exporter_with_flatten(array, cache, ctx, false) } /// Create a DuckDB exporter for the given Vortex array. -fn new_array_operator_exporter_with_flatten( +fn new_array_exporter_with_flatten( array: ArrayRef, cache: &ConversionCache, ctx: &mut ExecutionCtx, flatten: bool, ) -> VortexResult> { - if let Some(array) = array.as_opt::() { - return constant::new_exporter(array); - } + let array = match array.try_into::() { + Ok(array) => return constant::new_exporter(array), + Err(array) => array, + }; if let Some(array) = array.as_opt::() { return sequence::new_exporter(array); } if let Some(array) = array.as_opt::() { - return run_end::new_operator_exporter(array, cache, ctx); + return run_end::new_exporter(array, cache, ctx); } if let Some(array) = array.as_opt::() { - return dict::new_operator_exporter_with_flatten(array, cache, ctx, flatten); + return dict::new_exporter_with_flatten(array, cache, ctx, flatten); } - if let Some(array) = array.as_opt::() { - return list::new_operator_exporter(array, cache, ctx); - } + let array = match array.try_into::() { + Ok(array) => return list::new_exporter(array, cache, ctx), + Err(array) => array, + }; // Otherwise, we fall back to canonical match array.execute::(ctx)? { @@ -209,14 +156,14 @@ fn new_array_operator_exporter_with_flatten( Canonical::Primitive(array) => primitive::new_exporter(array), Canonical::Decimal(array) => decimal::new_exporter(array), Canonical::VarBinView(array) => varbinview::new_exporter(array), - Canonical::List(array) => list_view::new_operator_exporter(array, cache, ctx), - Canonical::FixedSizeList(array) => fixed_size_list::new_exporter(array, cache), - Canonical::Struct(array) => struct_::new_operator_exporter(array, cache, ctx), + Canonical::List(array) => list_view::new_exporter(array, cache, ctx), + Canonical::FixedSizeList(array) => fixed_size_list::new_exporter(array, cache, ctx), + Canonical::Struct(array) => struct_::new_exporter(array, cache, ctx), Canonical::Extension(ext) => { if is_temporal_ext_type(ext.id()) { let temporal_array = TemporalArray::try_from(ext).vortex_expect("id is a temporal array"); - return temporal::new_operator_exporter(temporal_array, ctx); + return temporal::new_exporter(temporal_array, ctx); } vortex_bail!("no non-temporal extension exporter") } diff --git a/vortex-duckdb/src/exporter/run_end.rs b/vortex-duckdb/src/exporter/run_end.rs index 20010703a49..c576ceb0904 100644 --- a/vortex-duckdb/src/exporter/run_end.rs +++ b/vortex-duckdb/src/exporter/run_end.rs @@ -6,7 +6,6 @@ use std::marker::PhantomData; use vortex::array::ArrayRef; use vortex::array::Canonical; use vortex::array::ExecutionCtx; -use vortex::array::ToCanonical; use vortex::array::arrays::PrimitiveArray; use vortex::array::search_sorted::SearchSorted; use vortex::array::search_sorted::SearchSortedSide; @@ -22,7 +21,6 @@ use crate::duckdb::Vector; use crate::exporter::ColumnExporter; use crate::exporter::cache::ConversionCache; use crate::exporter::new_array_exporter; -use crate::exporter::new_operator_array_exporter; /// We export run-end arrays to a DuckDB dictionary vector, using a selection vector to /// repeat the values in the run-end array. @@ -34,28 +32,10 @@ struct RunEndExporter { run_end_offset: usize, } +// TODO(joe): into_parts. pub(crate) fn new_exporter( array: &RunEndArray, cache: &ConversionCache, -) -> VortexResult> { - let ends = array.ends().to_primitive(); - let values = array.values().clone(); - let values_exporter = new_array_exporter(array.values(), cache)?; - - match_each_integer_ptype!(ends.ptype(), |E| { - Ok(Box::new(RunEndExporter { - ends, - ends_type: PhantomData::, - values, - values_exporter, - run_end_offset: array.offset(), - })) - }) -} - -pub(crate) fn new_operator_exporter( - array: &RunEndArray, - cache: &ConversionCache, ctx: &mut ExecutionCtx, ) -> VortexResult> { let ends = array @@ -64,7 +44,7 @@ pub(crate) fn new_operator_exporter( .execute::(ctx)? .into_primitive(); let values = array.values().clone(); - let values_exporter = new_operator_array_exporter(values.clone(), cache, ctx)?; + let values_exporter = new_array_exporter(values.clone(), cache, ctx)?; match_each_integer_ptype!(ends.ptype(), |E| { Ok(Box::new(RunEndExporter { diff --git a/vortex-duckdb/src/exporter/struct_.rs b/vortex-duckdb/src/exporter/struct_.rs index a6a6dc26b8d..4eaec938176 100644 --- a/vortex-duckdb/src/exporter/struct_.rs +++ b/vortex-duckdb/src/exporter/struct_.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use std::ops::Not; + use vortex::array::ExecutionCtx; use vortex::array::arrays::StructArray; use vortex::array::optimizer::ArrayOptimizer; @@ -14,53 +16,16 @@ use crate::exporter::ColumnExporter; use crate::exporter::ConversionCache; use crate::exporter::all_invalid; use crate::exporter::new_array_exporter; -use crate::exporter::new_operator_array_exporter; use crate::exporter::validity; struct StructExporter { children: Vec>, } +// // TODO(joe): into parts pub(crate) fn new_exporter( array: StructArray, cache: &ConversionCache, -) -> VortexResult> { - let validity = array.validity_mask(); - // DuckDB requires that the validity of the child be a subset of the parent struct so we mask out children with - // parents nullability - let validity_for_mask = array.dtype().is_nullable().then(|| !&validity); - - let children = array - .fields() - .iter() - .map(|child| { - if let Some(mv) = validity_for_mask.as_ref() { - new_array_exporter(&mask(child, mv)?, cache) - } else { - new_array_exporter(child, cache) - } - }) - .collect::>>()?; - let struct_exporter = Box::new(StructExporter { children }); - Ok(if array.dtype().is_nullable() { - validity::new_exporter(validity, struct_exporter) - } else { - struct_exporter - }) -} - -impl ColumnExporter for StructExporter { - fn export(&self, offset: usize, len: usize, vector: &mut Vector) -> VortexResult<()> { - for (idx, child) in self.children.iter().enumerate() { - child.export(offset, len, &mut vector.struct_vector_get_child(idx))?; - } - Ok(()) - } -} - -pub(crate) fn new_operator_exporter( - array: StructArray, - cache: &ConversionCache, ctx: &mut ExecutionCtx, ) -> VortexResult> { let validity = array.validity_mask(); @@ -78,9 +43,13 @@ pub(crate) fn new_operator_exporter( .map(|child| { if matches!(validity, Mask::Values(_)) { // TODO(joe): use new mask. - new_operator_array_exporter(mask(child, &validity)?.optimize()?, cache, ctx) + new_array_exporter( + mask(child, &validity.clone().not())?.optimize()?, + cache, + ctx, + ) } else { - new_operator_array_exporter(child.clone(), cache, ctx) + new_array_exporter(child.clone(), cache, ctx) } }) .collect::>>()?; @@ -90,6 +59,15 @@ pub(crate) fn new_operator_exporter( )) } +impl ColumnExporter for StructExporter { + fn export(&self, offset: usize, len: usize, vector: &mut Vector) -> VortexResult<()> { + for (idx, child) in self.children.iter().enumerate() { + child.export(offset, len, &mut vector.struct_vector_get_child(idx))?; + } + Ok(()) + } +} + #[cfg(test)] mod tests { use std::ffi::CString; @@ -103,8 +81,10 @@ mod tests { use vortex::buffer::BitBuffer; use vortex::buffer::buffer; use vortex::error::VortexExpect; + use vortex_array::VortexSessionExecute; use super::*; + use crate::SESSION; use crate::cpp; use crate::duckdb::DataChunk; use crate::duckdb::LogicalType; @@ -126,10 +106,14 @@ mod tests { ) .vortex_expect("LogicalType creation should succeed for test data")]); - new_exporter(arr, &ConversionCache::default()) - .unwrap() - .export(0, 10, &mut chunk.get_vector(0)) - .unwrap(); + new_exporter( + arr, + &ConversionCache::default(), + &mut SESSION.create_execution_ctx(), + ) + .unwrap() + .export(0, 10, &mut chunk.get_vector(0)) + .unwrap(); chunk.set_len(10); assert_eq!( @@ -186,10 +170,14 @@ mod tests { ) .vortex_expect("LogicalType creation should succeed for test data")]); - new_exporter(arr, &ConversionCache::default()) - .unwrap() - .export(0, 10, &mut chunk.get_vector(0)) - .unwrap(); + new_exporter( + arr, + &ConversionCache::default(), + &mut SESSION.create_execution_ctx(), + ) + .unwrap() + .export(0, 10, &mut chunk.get_vector(0)) + .unwrap(); chunk.set_len(10); assert_eq!( @@ -227,10 +215,14 @@ mod tests { ) .vortex_expect("LogicalType creation should succeed for test data")]); - new_exporter(arr, &ConversionCache::default()) - .unwrap() - .export(0, 10, &mut chunk.get_vector(0)) - .unwrap(); + new_exporter( + arr, + &ConversionCache::default(), + &mut SESSION.create_execution_ctx(), + ) + .unwrap() + .export(0, 10, &mut chunk.get_vector(0)) + .unwrap(); chunk.set_len(10); assert_eq!( diff --git a/vortex-duckdb/src/exporter/temporal.rs b/vortex-duckdb/src/exporter/temporal.rs index 503e3b3dc7d..8e4fd0d3e2f 100644 --- a/vortex-duckdb/src/exporter/temporal.rs +++ b/vortex-duckdb/src/exporter/temporal.rs @@ -3,7 +3,6 @@ use vortex::array::Canonical; use vortex::array::ExecutionCtx; -use vortex::array::ToCanonical; use vortex::array::arrays::TemporalArray; use vortex::error::VortexResult; @@ -15,21 +14,13 @@ struct TemporalExporter { storage_type_exporter: Box, } -pub(crate) fn new_exporter(array: &TemporalArray) -> VortexResult> { - Ok(Box::new(TemporalExporter { - storage_type_exporter: primitive::new_exporter( - array.temporal_values().clone().to_primitive(), - )?, - })) -} - impl ColumnExporter for TemporalExporter { fn export(&self, offset: usize, len: usize, vector: &mut Vector) -> VortexResult<()> { self.storage_type_exporter.export(offset, len, vector) } } -pub(crate) fn new_operator_exporter( +pub(crate) fn new_exporter( array: TemporalArray, ctx: &mut ExecutionCtx, ) -> VortexResult> { @@ -51,7 +42,9 @@ mod tests { use vortex::array::arrays::TemporalArray; use vortex::buffer::buffer; use vortex::dtype::datetime::TimeUnit; + use vortex_array::VortexSessionExecute; + use crate::SESSION; use crate::cpp; use crate::duckdb::DataChunk; use crate::duckdb::LogicalType; @@ -67,7 +60,7 @@ mod tests { let mut chunk = DataChunk::new([LogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_TIMESTAMP_S)]); - new_exporter(&arr) + new_exporter(arr, &mut SESSION.create_execution_ctx()) .unwrap() .export(1, 5, &mut chunk.get_vector(0)) .unwrap(); @@ -91,7 +84,7 @@ mod tests { ); let mut chunk = DataChunk::new([LogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_TIMESTAMP)]); - new_exporter(&arr) + new_exporter(arr, &mut SESSION.create_execution_ctx()) .unwrap() .export(1, 5, &mut chunk.get_vector(0)) .unwrap(); @@ -114,7 +107,7 @@ mod tests { let mut chunk = DataChunk::new([LogicalType::try_from(arr.dtype()).unwrap()]); - new_exporter(&arr) + new_exporter(arr, &mut SESSION.create_execution_ctx()) .unwrap() .export(1, 5, &mut chunk.get_vector(0)) .unwrap(); diff --git a/vortex-duckdb/src/lib.rs b/vortex-duckdb/src/lib.rs index 5d12cf19b8e..108dd3dfbd1 100644 --- a/vortex-duckdb/src/lib.rs +++ b/vortex-duckdb/src/lib.rs @@ -98,6 +98,7 @@ pub unsafe extern "C" fn vortex_init_rust(db: cpp::duckdb_database) { let conn = database .connect() + .inspect_err(|e| println!("err {e}")) .vortex_expect("Failed to connect to DuckDB database"); register_table_functions(&conn).vortex_expect("Failed to initialize Vortex extension"); } diff --git a/vortex-error/src/lib.rs b/vortex-error/src/lib.rs index 50e59b0e02e..0bd7d00dceb 100644 --- a/vortex-error/src/lib.rs +++ b/vortex-error/src/lib.rs @@ -331,6 +331,7 @@ where #[inline(always)] fn vortex_expect(self, msg: &'static str) -> Self::Output { self.map_err(|err| err.into()) + .inspect_err(|e| println!("got a big ERROR {e}")) .unwrap_or_else(|e| vortex_panic!(e.with_context(msg.to_string()))) } } diff --git a/vortex-python/src/arrays/mod.rs b/vortex-python/src/arrays/mod.rs index ec0776c7380..e0560cd860e 100644 --- a/vortex-python/src/arrays/mod.rs +++ b/vortex-python/src/arrays/mod.rs @@ -622,7 +622,7 @@ impl PyArray { ))); } - let inner = take(&slf, &*indices)?; + let inner = &slf.take(&*indices)?; Ok(PyArrayRef::from(inner)) } From 8271e6c85764d02ab93be9fb8a85bd9fcb6f5b02 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Tue, 20 Jan 2026 14:25:28 +0000 Subject: [PATCH 2/8] clean Signed-off-by: Joe Isaacs --- encodings/runend/src/array.rs | 14 ++++ vortex-array/src/arrays/bool/array.rs | 16 ++++ vortex-array/src/arrays/bool/mod.rs | 3 +- vortex-array/src/arrays/decimal/array.rs | 4 +- vortex-array/src/arrays/listview/array.rs | 32 +++++++- vortex-array/src/arrays/listview/mod.rs | 1 + vortex-array/src/arrays/varbinview/array.rs | 17 ++++ vortex-array/src/arrays/varbinview/mod.rs | 1 + vortex-array/src/arrow/executor/list.rs | 17 +++- vortex-array/src/arrow/executor/list_view.rs | 9 ++- vortex-array/src/arrow/executor/struct_.rs | 4 +- vortex-array/src/canonical_to_vector.rs | 11 ++- vortex-duckdb/src/convert/dtype.rs | 46 +++++++---- vortex-duckdb/src/exporter/bool.rs | 81 ++++++++++++++------ vortex-duckdb/src/exporter/decimal.rs | 12 ++- vortex-duckdb/src/exporter/list_view.rs | 34 ++++---- vortex-duckdb/src/exporter/mod.rs | 15 ++-- vortex-duckdb/src/exporter/primitive.rs | 22 +++++- vortex-duckdb/src/exporter/run_end.rs | 16 ++-- vortex-duckdb/src/exporter/struct_.rs | 21 +++-- vortex-duckdb/src/exporter/temporal.rs | 2 + vortex-duckdb/src/exporter/validity.rs | 4 + vortex-duckdb/src/exporter/varbinview.rs | 39 ++++++---- vortex-duckdb/src/exporter/vector.rs | 2 +- 24 files changed, 309 insertions(+), 114 deletions(-) diff --git a/encodings/runend/src/array.rs b/encodings/runend/src/array.rs index 01446be7dc7..eb6fa744362 100644 --- a/encodings/runend/src/array.rs +++ b/encodings/runend/src/array.rs @@ -165,6 +165,11 @@ pub struct RunEndArray { stats_set: ArrayStats, } +pub struct RunEndArrayParts { + pub ends: ArrayRef, + pub values: ArrayRef, +} + #[derive(Debug)] pub struct RunEndVTable; @@ -393,6 +398,15 @@ impl RunEndArray { pub fn values(&self) -> &ArrayRef { &self.values } + + /// Split an `RunEndArray` into parts. + #[inline] + pub fn into_parts(self) -> RunEndArrayParts { + RunEndArrayParts { + values: self.values, + ends: self.ends, + } + } } impl BaseArrayVTable for RunEndVTable { diff --git a/vortex-array/src/arrays/bool/array.rs b/vortex-array/src/arrays/bool/array.rs index d6e6e61bac0..28850af39d8 100644 --- a/vortex-array/src/arrays/bool/array.rs +++ b/vortex-array/src/arrays/bool/array.rs @@ -53,6 +53,12 @@ pub struct BoolArray { pub(super) stats_set: ArrayStats, } +pub struct BoolArrayParts { + pub dtype: DType, + pub bits: BitBuffer, + pub validity: Validity, +} + impl BoolArray { /// Constructs a new `BoolArray`. /// @@ -122,6 +128,16 @@ impl BoolArray { Ok(()) } + /// Splits into owned parts + #[inline] + pub fn into_parts(self) -> BoolArrayParts { + BoolArrayParts { + dtype: self.dtype, + bits: self.bits, + validity: self.validity, + } + } + /// Creates a new [`BoolArray`] from a [`BitBuffer`] and [`Validity`] directly. /// /// # Panics diff --git a/vortex-array/src/arrays/bool/mod.rs b/vortex-array/src/arrays/bool/mod.rs index 16a0cde3bf9..96c9673eb52 100644 --- a/vortex-array/src/arrays/bool/mod.rs +++ b/vortex-array/src/arrays/bool/mod.rs @@ -4,7 +4,8 @@ mod array; mod patch; -pub use array::*; +pub use array::BoolArray; +pub use array::BoolArrayParts; pub mod compute; diff --git a/vortex-array/src/arrays/decimal/array.rs b/vortex-array/src/arrays/decimal/array.rs index 4bc9ead5067..6308fc57c82 100644 --- a/vortex-array/src/arrays/decimal/array.rs +++ b/vortex-array/src/arrays/decimal/array.rs @@ -232,10 +232,10 @@ impl DecimalArray { pub fn into_parts(self) -> DecimalArrayParts { let nullability = self.dtype.nullability(); - let dtype = self.dtype.into_decimal_opt().vortex_expect("cannot fail"); + let decimal_dtype = self.dtype.into_decimal_opt().vortex_expect("cannot fail"); DecimalArrayParts { - dtype, + decimal_dtype, nullability, values: self.values, values_type: self.values_type, diff --git a/vortex-array/src/arrays/listview/array.rs b/vortex-array/src/arrays/listview/array.rs index dcffc007d27..1c9389a9984 100644 --- a/vortex-array/src/arrays/listview/array.rs +++ b/vortex-array/src/arrays/listview/array.rs @@ -6,6 +6,7 @@ use std::sync::Arc; use num_traits::AsPrimitive; use vortex_dtype::DType; use vortex_dtype::IntegerPType; +use vortex_dtype::Nullability; use vortex_dtype::match_each_integer_ptype; use vortex_error::VortexExpect; use vortex_error::VortexResult; @@ -124,6 +125,24 @@ pub struct ListViewArray { pub(super) stats_set: ArrayStats, } +pub struct ListViewArrayParts { + pub nullability: Nullability, + + pub elements_dtype: Arc, + + /// See `ListViewArray::elements` + pub elements: ArrayRef, + + /// See `ListViewArray::offsets` + pub offsets: ArrayRef, + + /// See `ListViewArray::sizes` + pub sizes: ArrayRef, + + /// See `ListViewArray::validity` + pub validity: Validity, +} + impl ListViewArray { /// Creates a new [`ListViewArray`]. /// @@ -319,8 +338,17 @@ impl ListViewArray { .is_ok() } - pub fn into_parts(self) -> (ArrayRef, ArrayRef, ArrayRef, Validity) { - (self.elements, self.offsets, self.sizes, self.validity) + pub fn into_parts(self) -> ListViewArrayParts { + let nullability = self.dtype.nullability(); + let dtype = self.dtype.into_list_element_opt().vortex_expect("is list"); + ListViewArrayParts { + nullability, + elements_dtype: dtype, + elements: self.elements, + offsets: self.offsets, + sizes: self.sizes, + validity: self.validity, + } } /// Returns the offset at the given index. diff --git a/vortex-array/src/arrays/listview/mod.rs b/vortex-array/src/arrays/listview/mod.rs index 0292a2c8c84..7ed668bd6e9 100644 --- a/vortex-array/src/arrays/listview/mod.rs +++ b/vortex-array/src/arrays/listview/mod.rs @@ -3,6 +3,7 @@ mod array; pub use array::ListViewArray; +pub use array::ListViewArrayParts; mod compute; diff --git a/vortex-array/src/arrays/varbinview/array.rs b/vortex-array/src/arrays/varbinview/array.rs index 1c7aacc45d0..42639d40f0a 100644 --- a/vortex-array/src/arrays/varbinview/array.rs +++ b/vortex-array/src/arrays/varbinview/array.rs @@ -88,6 +88,13 @@ pub struct VarBinViewArray { pub(super) stats_set: ArrayStats, } +pub struct VarBinViewArrayParts { + pub dtype: DType, + pub buffers: Arc<[ByteBuffer]>, + pub views: Buffer, + pub validity: Validity, +} + impl VarBinViewArray { /// Creates a new [`VarBinViewArray`]. /// @@ -262,6 +269,16 @@ impl VarBinViewArray { Ok(()) } + /// Splits the array into owned parts + pub fn into_parts(self) -> VarBinViewArrayParts { + VarBinViewArrayParts { + dtype: self.dtype, + buffers: self.buffers, + views: self.views, + validity: self.validity, + } + } + /// Number of raw string data buffers held by this array. pub fn nbuffers(&self) -> usize { self.buffers.len() diff --git a/vortex-array/src/arrays/varbinview/mod.rs b/vortex-array/src/arrays/varbinview/mod.rs index 9cc3a883f21..af6be468c18 100644 --- a/vortex-array/src/arrays/varbinview/mod.rs +++ b/vortex-array/src/arrays/varbinview/mod.rs @@ -3,6 +3,7 @@ mod array; pub use array::VarBinViewArray; +pub use array::VarBinViewArrayParts; mod accessor; pub(crate) mod compact; diff --git a/vortex-array/src/arrow/executor/list.rs b/vortex-array/src/arrow/executor/list.rs index 597135fdab0..adab81da60c 100644 --- a/vortex-array/src/arrow/executor/list.rs +++ b/vortex-array/src/arrow/executor/list.rs @@ -25,6 +25,7 @@ use crate::IntoArray; use crate::arrays::ListArray; use crate::arrays::ListVTable; use crate::arrays::ListViewArray; +use crate::arrays::ListViewArrayParts; use crate::arrays::ListViewVTable; use crate::arrays::PrimitiveArray; use crate::arrow::ArrowArrayExecutor; @@ -115,7 +116,13 @@ fn list_view_zctl( ) -> VortexResult { assert!(array.is_zero_copy_to_list()); - let (elements, offsets, sizes, validity) = array.into_parts(); + let ListViewArrayParts { + elements, + offsets, + sizes, + validity, + .. + } = array.into_parts(); // For ZCTL, we know that we only care about the final size. let final_size = sizes @@ -169,7 +176,13 @@ fn list_view_to_list( elements_field: &FieldRef, ctx: &mut ExecutionCtx, ) -> VortexResult { - let (elements, offsets, sizes, validity) = array.into_parts(); + let ListViewArrayParts { + elements, + offsets, + sizes, + validity, + .. + } = array.into_parts(); let offsets = offsets .cast(DType::Primitive(O::PTYPE, Nullability::NonNullable))? diff --git a/vortex-array/src/arrow/executor/list_view.rs b/vortex-array/src/arrow/executor/list_view.rs index 96e7fb791f9..dc8edd8efc5 100644 --- a/vortex-array/src/arrow/executor/list_view.rs +++ b/vortex-array/src/arrow/executor/list_view.rs @@ -15,6 +15,7 @@ use vortex_error::vortex_ensure; use crate::ArrayRef; use crate::ExecutionCtx; use crate::arrays::ListViewArray; +use crate::arrays::ListViewArrayParts; use crate::arrays::ListViewVTable; use crate::arrays::PrimitiveArray; use crate::arrow::ArrowArrayExecutor; @@ -42,7 +43,13 @@ fn list_view_to_list_view( elements_field: &FieldRef, ctx: &mut ExecutionCtx, ) -> VortexResult { - let (elements, offsets, sizes, validity) = array.into_parts(); + let ListViewArrayParts { + elements, + offsets, + sizes, + validity, + .. + } = array.into_parts(); let elements = elements.execute_arrow(Some(elements_field.data_type()), ctx)?; vortex_ensure!( diff --git a/vortex-array/src/arrow/executor/struct_.rs b/vortex-array/src/arrow/executor/struct_.rs index a33843bd4b9..6b7b4581cba 100644 --- a/vortex-array/src/arrow/executor/struct_.rs +++ b/vortex-array/src/arrow/executor/struct_.rs @@ -134,7 +134,7 @@ fn create_from_fields( ); let mut arrow_arrays = Vec::with_capacity(vortex_fields.len()); - for (field, vx_field) in fields.iter().zip_eq(vortex_fields.into_iter()) { + for (field, vx_field) in fields.iter().zip_eq(vortex_fields.iter()) { let arrow_field = vx_field .clone() .execute_arrow(Some(field.data_type()), ctx)?; @@ -158,7 +158,7 @@ fn create_from_fields( Err(names) => { // No target fields specified - use preferred types for each child let mut arrow_arrays = Vec::with_capacity(vortex_fields.len()); - for vx_field in vortex_fields.into_iter() { + for vx_field in vortex_fields.iter() { let arrow_array = vx_field.clone().execute_arrow(None, ctx)?; arrow_arrays.push(arrow_array); } diff --git a/vortex-array/src/canonical_to_vector.rs b/vortex-array/src/canonical_to_vector.rs index c6cf44da215..9e8bcd04d7a 100644 --- a/vortex-array/src/canonical_to_vector.rs +++ b/vortex-array/src/canonical_to_vector.rs @@ -29,9 +29,11 @@ use crate::ArrayRef; use crate::Canonical; use crate::Executable; use crate::ExecutionCtx; +use crate::arrays::ListViewArrayParts; use crate::arrays::PrimitiveArray; impl Executable for Vector { + #[expect(deprecated)] fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult { let canonical = array.execute::(ctx)?; canonical.to_vector(ctx) @@ -44,6 +46,7 @@ impl Canonical { /// This is the reverse of `VectorIntoArray` - it takes a fully materialized /// canonical array and converts it into the corresponding vector type. /// TODO(joe): move over the execute_mask + #[deprecated] pub fn to_vector(self, ctx: &mut ExecutionCtx) -> VortexResult { Ok(match self { Canonical::Null(a) => Vector::Null(NullVector::new(a.len())), @@ -116,7 +119,13 @@ impl Canonical { } } Canonical::List(a) => { - let (elements, offsets, sizes, validity) = a.into_parts(); + let ListViewArrayParts { + elements, + offsets, + sizes, + validity, + .. + } = a.into_parts(); let validity = validity.to_array(offsets.len()).execute::(ctx)?; let elements_vector = elements.execute::(ctx)?; diff --git a/vortex-duckdb/src/convert/dtype.rs b/vortex-duckdb/src/convert/dtype.rs index e9db0d5669b..756c87ca7d4 100644 --- a/vortex-duckdb/src/convert/dtype.rs +++ b/vortex-duckdb/src/convert/dtype.rs @@ -230,21 +230,7 @@ impl TryFrom<&DType> for LogicalType { DType::Utf8(_) => DUCKDB_TYPE::DUCKDB_TYPE_VARCHAR, DType::Binary(_) => DUCKDB_TYPE::DUCKDB_TYPE_BLOB, DType::Struct(struct_type, _) => { - let child_types: Vec = struct_type - .fields() - .map(|field_dtype| LogicalType::try_from(&field_dtype)) - .collect::>()?; - - let child_names: Vec = struct_type - .names() - .iter() - .map(|field_name| { - CString::new(field_name.as_ref()) - .map_err(|e| vortex_err!("Invalid field name '{field_name}': {e}")) - }) - .collect::>()?; - - return LogicalType::struct_type(child_types, child_names); + return LogicalType::try_from(struct_type); } DType::Decimal(decimal_dtype, _) => { return LogicalType::decimal_type( @@ -273,6 +259,36 @@ impl TryFrom<&DType> for LogicalType { } } +impl TryFrom for LogicalType { + type Error = VortexError; + + fn try_from(struct_type: StructFields) -> Result { + LogicalType::try_from(&struct_type) + } +} + +impl TryFrom<&StructFields> for LogicalType { + type Error = VortexError; + + fn try_from(struct_type: &StructFields) -> Result { + let child_types: Vec = struct_type + .fields() + .map(|field_dtype| LogicalType::try_from(&field_dtype)) + .collect::>()?; + + let child_names: Vec = struct_type + .names() + .iter() + .map(|field_name| { + CString::new(field_name.as_ref()) + .map_err(|e| vortex_err!("Invalid field name '{field_name}': {e}")) + }) + .collect::>()?; + + LogicalType::struct_type(child_types, child_names) + } +} + /// Converts temporal extension types to corresponding DuckDB types. /// /// # Arguments diff --git a/vortex-duckdb/src/exporter/bool.rs b/vortex-duckdb/src/exporter/bool.rs index be2d4bc09a8..0ca88e12c63 100644 --- a/vortex-duckdb/src/exporter/bool.rs +++ b/vortex-duckdb/src/exporter/bool.rs @@ -2,42 +2,36 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use itertools::Itertools; +use vortex::array::ExecutionCtx; use vortex::array::arrays::BoolArray; +use vortex::array::arrays::BoolArrayParts; use vortex::buffer::BitBuffer; use vortex::error::VortexResult; use vortex::mask::Mask; use crate::duckdb::Vector; use crate::exporter::ColumnExporter; -use crate::exporter::all_invalid; +use crate::exporter::validity; struct BoolExporter { bit_buffer: BitBuffer, - validity_mask: Mask, } -pub(crate) fn new_exporter(array: BoolArray) -> VortexResult> { - let validity_mask = array.validity_mask(); - if validity_mask.all_false() { - return Ok(all_invalid::new_exporter( - array.len(), - &array.dtype().try_into()?, - )); - } - Ok(Box::new(BoolExporter { - bit_buffer: array.bit_buffer().clone(), - validity_mask, - })) +pub(crate) fn new_exporter( + array: BoolArray, + ctx: &mut ExecutionCtx, +) -> VortexResult> { + let len = array.len(); + let BoolArrayParts { validity, bits, .. } = array.into_parts(); + let validity = validity.to_array(len).execute::(ctx)?; + Ok(validity::new_exporter( + validity, + Box::new(BoolExporter { bit_buffer: bits }), + )) } impl ColumnExporter for BoolExporter { fn export(&self, offset: usize, len: usize, vector: &mut Vector) -> VortexResult<()> { - // Set validity if necessary. - if unsafe { vector.set_validity(&self.validity_mask, offset, len) } { - // All values are null, so no point copying the data. - return Ok(()); - } - // DuckDB uses byte bools, not bit bools. // maybe we can convert into these from a compressed array sometimes?. unsafe { vector.as_slice_mut(len) }.copy_from_slice( @@ -56,7 +50,10 @@ impl ColumnExporter for BoolExporter { mod tests { use std::iter; + use vortex_array::VortexSessionExecute; + use super::*; + use crate::SESSION; use crate::cpp; use crate::duckdb::DataChunk; use crate::duckdb::LogicalType; @@ -66,7 +63,7 @@ mod tests { let arr = BoolArray::from_iter([true, false, true]); let mut chunk = DataChunk::new([LogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_BOOLEAN)]); - new_exporter(arr) + new_exporter(arr, &mut SESSION.create_execution_ctx()) .unwrap() .export(1, 2, &mut chunk.get_vector(0)) .unwrap(); @@ -86,7 +83,7 @@ mod tests { let mut chunk = DataChunk::new([LogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_BOOLEAN)]); - new_exporter(arr) + new_exporter(arr, &mut SESSION.create_execution_ctx()) .unwrap() .export(1, 66, &mut chunk.get_vector(0)) .unwrap(); @@ -102,4 +99,44 @@ mod tests { ) ); } + + #[test] + fn test_bool_nullable() { + let arr = BoolArray::from_iter([Some(true), None, Some(false)]); + + let mut chunk = DataChunk::new([LogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_BOOLEAN)]); + + new_exporter(arr, &mut SESSION.create_execution_ctx()) + .unwrap() + .export(1, 2, &mut chunk.get_vector(0)) + .unwrap(); + chunk.set_len(2); + + assert_eq!( + format!("{}", String::try_from(&chunk).unwrap()), + r#"Chunk - [1 Columns] +- FLAT BOOLEAN: 2 = [ NULL, false] +"# + ); + } + + #[test] + fn test_bool_all_invalid() { + let arr = BoolArray::from_iter([None; 3]); + + let mut chunk = DataChunk::new([LogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_BOOLEAN)]); + + new_exporter(arr, &mut SESSION.create_execution_ctx()) + .unwrap() + .export(1, 2, &mut chunk.get_vector(0)) + .unwrap(); + chunk.set_len(2); + + assert_eq!( + format!("{}", String::try_from(&chunk).unwrap()), + r#"Chunk - [1 Columns] +- CONSTANT BOOLEAN: 2 = [ NULL] +"# + ); + } } diff --git a/vortex-duckdb/src/exporter/decimal.rs b/vortex-duckdb/src/exporter/decimal.rs index b5c50aec012..f0dadb1199e 100644 --- a/vortex-duckdb/src/exporter/decimal.rs +++ b/vortex-duckdb/src/exporter/decimal.rs @@ -5,6 +5,7 @@ use std::marker::PhantomData; use num_traits::ToPrimitive; use vortex::array::arrays::DecimalArray; +use vortex::array::arrays::DecimalArrayParts; use vortex::buffer::Buffer; use vortex::dtype::BigCast; use vortex::dtype::DecimalDType; @@ -15,7 +16,7 @@ use vortex::error::VortexResult; use vortex::error::vortex_bail; use vortex::mask::Mask; use vortex::scalar::DecimalType; -use vortex_array::arrays::DecimalArrayParts; +use vortex_array::ExecutionCtx; use crate::duckdb::Vector; use crate::duckdb::VectorBuffer; @@ -34,7 +35,10 @@ struct DecimalZeroCopyExporter { validity: Mask, } -pub(crate) fn new_exporter(array: DecimalArray) -> VortexResult> { +pub(crate) fn new_exporter( + array: DecimalArray, + ctx: &mut ExecutionCtx, +) -> VortexResult> { let len = array.len(); let DecimalArrayParts { validity, @@ -44,11 +48,11 @@ pub(crate) fn new_exporter(array: DecimalArray) -> VortexResult(ctx)?; if values_type == dest_values_type { match_each_decimal_value_type!(values_type, |D| { - let buffer = Buffer::::from_byte_buffer(values) + let buffer = Buffer::::from_byte_buffer(values); return Ok(Box::new(DecimalZeroCopyExporter { values: buffer.clone(), shared_buffer: VectorBuffer::new(buffer), diff --git a/vortex-duckdb/src/exporter/list_view.rs b/vortex-duckdb/src/exporter/list_view.rs index 42b5a23c1c9..96c2abed739 100644 --- a/vortex-duckdb/src/exporter/list_view.rs +++ b/vortex-duckdb/src/exporter/list_view.rs @@ -6,9 +6,9 @@ use std::sync::Arc; use parking_lot::Mutex; use vortex::array::Array; -use vortex::array::Canonical; use vortex::array::ExecutionCtx; use vortex::array::arrays::ListViewArray; +use vortex::array::arrays::ListViewArrayParts; use vortex::array::arrays::PrimitiveArray; use vortex::dtype::IntegerPType; use vortex::dtype::match_each_integer_ptype; @@ -38,17 +38,24 @@ struct ListViewExporter { size_type: PhantomData, } -// TODO(joe): into parts pub(crate) fn new_exporter( array: ListViewArray, cache: &ConversionCache, ctx: &mut ExecutionCtx, ) -> VortexResult> { + let len = array.len(); + let ListViewArrayParts { + elements_dtype, + elements, + offsets, + sizes, + validity, + .. + } = array.into_parts(); // Cache an `elements` vector up front so that future exports can reference it. - let elements = array.elements(); let num_elements = elements.len(); - let values_key = Arc::as_ptr(elements).addr(); + let values_key = Arc::as_ptr(&elements).addr(); // Check if we have a cached vector and extract it if we do. let cached_elements = cache .values_cache @@ -60,9 +67,9 @@ pub(crate) fn new_exporter( None => { // We have no cached the vector yet, so create a new DuckDB vector for the elements. let mut duckdb_elements = - Vector::with_capacity(elements.dtype().try_into()?, elements.len()); + Vector::with_capacity(elements_dtype.as_ref().try_into()?, elements.len()); let elements_exporter = - new_array_exporter_with_flatten(array.elements().clone(), cache, ctx, true)?; + new_array_exporter_with_flatten(elements.clone(), cache, ctx, true)?; if !elements.is_empty() { elements_exporter.export(0, elements.len(), &mut duckdb_elements)?; @@ -77,21 +84,14 @@ pub(crate) fn new_exporter( } }; - let offsets = array - .offsets() - .clone() - .execute::(ctx)? - .into_primitive(); - let sizes = array - .sizes() - .clone() - .execute::(ctx)? - .into_primitive(); + let offsets = offsets.execute::(ctx)?; + let sizes = sizes.clone().execute::(ctx)?; + let validity = validity.to_array(len).execute::(ctx)?; let boxed = match_each_integer_ptype!(offsets.ptype(), |O| { match_each_integer_ptype!(sizes.ptype(), |S| { Box::new(ListViewExporter { - validity: array.validity_mask(), + validity, duckdb_elements: shared_elements, offsets, sizes, diff --git a/vortex-duckdb/src/exporter/mod.rs b/vortex-duckdb/src/exporter/mod.rs index 3ac5d1e9025..d913748e976 100644 --- a/vortex-duckdb/src/exporter/mod.rs +++ b/vortex-duckdb/src/exporter/mod.rs @@ -136,9 +136,10 @@ fn new_array_exporter_with_flatten( return sequence::new_exporter(array); } - if let Some(array) = array.as_opt::() { - return run_end::new_exporter(array, cache, ctx); - } + let array = match array.try_into::() { + Ok(array) => return run_end::new_exporter(array, cache, ctx), + Err(array) => array, + }; if let Some(array) = array.as_opt::() { return dict::new_exporter_with_flatten(array, cache, ctx, flatten); @@ -152,10 +153,10 @@ fn new_array_exporter_with_flatten( // Otherwise, we fall back to canonical match array.execute::(ctx)? { Canonical::Null(array) => Ok(all_invalid::new_exporter(array.len(), &LogicalType::null())), - Canonical::Bool(array) => bool::new_exporter(array), - Canonical::Primitive(array) => primitive::new_exporter(array), - Canonical::Decimal(array) => decimal::new_exporter(array), - Canonical::VarBinView(array) => varbinview::new_exporter(array), + Canonical::Bool(array) => bool::new_exporter(array, ctx), + Canonical::Primitive(array) => primitive::new_exporter(array, ctx), + Canonical::Decimal(array) => decimal::new_exporter(array, ctx), + Canonical::VarBinView(array) => varbinview::new_exporter(array, ctx), Canonical::List(array) => list_view::new_exporter(array, cache, ctx), Canonical::FixedSizeList(array) => fixed_size_list::new_exporter(array, cache, ctx), Canonical::Struct(array) => struct_::new_exporter(array, cache, ctx), diff --git a/vortex-duckdb/src/exporter/primitive.rs b/vortex-duckdb/src/exporter/primitive.rs index 2beb61cdea7..0ba88d4afce 100644 --- a/vortex-duckdb/src/exporter/primitive.rs +++ b/vortex-duckdb/src/exporter/primitive.rs @@ -7,6 +7,9 @@ use vortex::array::arrays::PrimitiveArray; use vortex::dtype::NativePType; use vortex::dtype::match_each_native_ptype; use vortex::error::VortexResult; +use vortex::mask::Mask; +use vortex_array::ExecutionCtx; +use vortex_array::vtable::ValidityHelper; use crate::duckdb::Vector; use crate::duckdb::VectorBuffer; @@ -20,7 +23,10 @@ struct PrimitiveExporter { _phantom_type: PhantomData, } -pub fn new_exporter(array: PrimitiveArray) -> VortexResult> { +pub fn new_exporter( + array: PrimitiveArray, + ctx: &mut ExecutionCtx, +) -> VortexResult> { match_each_native_ptype!(array.ptype(), |T| { let buffer = array.to_buffer::(); let prim = Box::new(PrimitiveExporter { @@ -29,7 +35,13 @@ pub fn new_exporter(array: PrimitiveArray) -> VortexResult(ctx)?, + prim, + )) }) } @@ -50,8 +62,10 @@ impl ColumnExporter for PrimitiveExporter { mod tests { use itertools::Itertools; use vortex::error::VortexExpect; + use vortex_array::VortexSessionExecute; use super::*; + use crate::SESSION; use crate::cpp; use crate::duckdb::DUCKDB_STANDARD_VECTOR_SIZE; use crate::duckdb::DataChunk; @@ -63,7 +77,7 @@ mod tests { let mut chunk = DataChunk::new([LogicalType::new(cpp::duckdb_type::DUCKDB_TYPE_INTEGER)]); - new_exporter(arr) + new_exporter(arr, &mut SESSION.create_execution_ctx()) .unwrap() .export(0, 3, &mut chunk.get_vector(0)) .unwrap(); @@ -89,7 +103,7 @@ mod tests { .collect_vec(); for i in 0..ARRAY_COUNT { - new_exporter(arr.clone()) + new_exporter(arr.clone(), &mut SESSION.create_execution_ctx()) .unwrap() .export( i * DUCKDB_STANDARD_VECTOR_SIZE, diff --git a/vortex-duckdb/src/exporter/run_end.rs b/vortex-duckdb/src/exporter/run_end.rs index c576ceb0904..f37e5a1146f 100644 --- a/vortex-duckdb/src/exporter/run_end.rs +++ b/vortex-duckdb/src/exporter/run_end.rs @@ -4,7 +4,6 @@ use std::marker::PhantomData; use vortex::array::ArrayRef; -use vortex::array::Canonical; use vortex::array::ExecutionCtx; use vortex::array::arrays::PrimitiveArray; use vortex::array::search_sorted::SearchSorted; @@ -12,6 +11,7 @@ use vortex::array::search_sorted::SearchSortedSide; use vortex::dtype::IntegerPType; use vortex::dtype::match_each_integer_ptype; use vortex::encodings::runend::RunEndArray; +use vortex::encodings::runend::RunEndArrayParts; use vortex::error::VortexExpect; use vortex::error::VortexResult; @@ -32,18 +32,14 @@ struct RunEndExporter { run_end_offset: usize, } -// TODO(joe): into_parts. pub(crate) fn new_exporter( - array: &RunEndArray, + array: RunEndArray, cache: &ConversionCache, ctx: &mut ExecutionCtx, ) -> VortexResult> { - let ends = array - .ends() - .clone() - .execute::(ctx)? - .into_primitive(); - let values = array.values().clone(); + let offset = array.offset(); + let RunEndArrayParts { ends, values } = array.into_parts(); + let ends = ends.execute::(ctx)?; let values_exporter = new_array_exporter(values.clone(), cache, ctx)?; match_each_integer_ptype!(ends.ptype(), |E| { @@ -52,7 +48,7 @@ pub(crate) fn new_exporter( ends_type: PhantomData::, values, values_exporter, - run_end_offset: array.offset(), + run_end_offset: offset, })) }) } diff --git a/vortex-duckdb/src/exporter/struct_.rs b/vortex-duckdb/src/exporter/struct_.rs index 4eaec938176..140a4ded143 100644 --- a/vortex-duckdb/src/exporter/struct_.rs +++ b/vortex-duckdb/src/exporter/struct_.rs @@ -4,7 +4,9 @@ use std::ops::Not; use vortex::array::ExecutionCtx; +use vortex::array::IntoArray; use vortex::array::arrays::StructArray; +use vortex::array::arrays::StructArrayParts; use vortex::array::optimizer::ArrayOptimizer; use vortex::compute::mask; use vortex::error::VortexResult; @@ -22,23 +24,28 @@ struct StructExporter { children: Vec>, } -// // TODO(joe): into parts pub(crate) fn new_exporter( array: StructArray, cache: &ConversionCache, ctx: &mut ExecutionCtx, ) -> VortexResult> { - let validity = array.validity_mask(); + let len = array.len(); + let StructArrayParts { + validity, + struct_fields, + fields, + .. + } = array.into_parts(); + let validity = validity.to_array(len).execute::(ctx)?; if validity.all_false() { return Ok(all_invalid::new_exporter( - array.len(), - &LogicalType::try_from(array.dtype())?, + len, + &LogicalType::try_from(struct_fields)?, )); } - let children = array - .fields() + let children = fields .iter() .map(|child| { if matches!(validity, Mask::Values(_)) { @@ -49,7 +56,7 @@ pub(crate) fn new_exporter( ctx, ) } else { - new_array_exporter(child.clone(), cache, ctx) + new_array_exporter(child.clone().into_array(), cache, ctx) } }) .collect::>>()?; diff --git a/vortex-duckdb/src/exporter/temporal.rs b/vortex-duckdb/src/exporter/temporal.rs index 8e4fd0d3e2f..3beac12def3 100644 --- a/vortex-duckdb/src/exporter/temporal.rs +++ b/vortex-duckdb/src/exporter/temporal.rs @@ -20,6 +20,7 @@ impl ColumnExporter for TemporalExporter { } } +// TODO(joe): into_parts pub(crate) fn new_exporter( array: TemporalArray, ctx: &mut ExecutionCtx, @@ -31,6 +32,7 @@ pub(crate) fn new_exporter( .clone() .execute::(ctx)? .into_primitive(), + ctx, )?, })) } diff --git a/vortex-duckdb/src/exporter/validity.rs b/vortex-duckdb/src/exporter/validity.rs index 16f6683c43f..1babce79b70 100644 --- a/vortex-duckdb/src/exporter/validity.rs +++ b/vortex-duckdb/src/exporter/validity.rs @@ -25,6 +25,10 @@ pub(crate) fn new_exporter( impl ColumnExporter for ValidityExporter { fn export(&self, offset: usize, len: usize, vector: &mut Vector) -> VortexResult<()> { + assert!( + offset + len <= self.mask.len(), + "cannot access outside of array" + ); if unsafe { vector.set_validity(&self.mask, offset, len) } { // All values are null, so no point copying the data. return Ok(()); diff --git a/vortex-duckdb/src/exporter/varbinview.rs b/vortex-duckdb/src/exporter/varbinview.rs index 77451629e17..5d088a8c59e 100644 --- a/vortex-duckdb/src/exporter/varbinview.rs +++ b/vortex-duckdb/src/exporter/varbinview.rs @@ -2,10 +2,12 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use std::ffi::c_char; +use std::sync::Arc; use itertools::Itertools; +use vortex::array::ExecutionCtx; use vortex::array::arrays::VarBinViewArray; -use vortex::array::vtable::ValidityHelper; +use vortex::array::arrays::VarBinViewArrayParts; use vortex::buffer::Buffer; use vortex::buffer::ByteBuffer; use vortex::error::VortexResult; @@ -13,6 +15,7 @@ use vortex::mask::Mask; use vortex::vector::binaryview::BinaryView; use vortex::vector::binaryview::Inlined; +use crate::LogicalType; use crate::duckdb::Vector; use crate::duckdb::VectorBuffer; use crate::exporter::ColumnExporter; @@ -20,30 +23,34 @@ use crate::exporter::all_invalid; struct VarBinViewExporter { views: Buffer, - buffers: Vec, + buffers: Arc<[ByteBuffer]>, vector_buffers: Vec, validity: Mask, } -pub(crate) fn new_exporter(array: VarBinViewArray) -> VortexResult> { - let validity = array.validity().to_mask(array.len()); +pub(crate) fn new_exporter( + array: VarBinViewArray, + ctx: &mut ExecutionCtx, +) -> VortexResult> { + let len = array.len(); + let VarBinViewArrayParts { + validity, + dtype, + views, + buffers, + } = array.into_parts(); + let validity = validity.to_array(len).execute::(ctx)?; if validity.all_false() { return Ok(all_invalid::new_exporter( - array.len(), - &array.dtype().try_into()?, + len, + &LogicalType::try_from(dtype)?, )); } - Ok(Box::new(VarBinViewExporter { - views: array.views().clone(), - buffers: array.buffers().to_vec(), - vector_buffers: array - .buffers() - .iter() - .cloned() - .map(VectorBuffer::new) - .collect_vec(), - validity: array.validity_mask(), + views, + buffers: buffers.clone(), + vector_buffers: buffers.iter().cloned().map(VectorBuffer::new).collect_vec(), + validity, })) } diff --git a/vortex-duckdb/src/exporter/vector.rs b/vortex-duckdb/src/exporter/vector.rs index dcd1e7fc39d..e8f66bb88f8 100644 --- a/vortex-duckdb/src/exporter/vector.rs +++ b/vortex-duckdb/src/exporter/vector.rs @@ -22,7 +22,7 @@ impl Vector { true } Mask::Values(arr) => { - let true_count = arr.bit_buffer().true_count(); + let true_count = arr.bit_buffer().slice(offset..(offset + len)).true_count(); if true_count == len { unsafe { self.set_all_true_validity(len) } } else if true_count == 0 { From 389caaf374f2fd25fbee17c2ecfc30395ac34874 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Tue, 20 Jan 2026 14:45:28 +0000 Subject: [PATCH 3/8] clean Signed-off-by: Joe Isaacs --- vortex-duckdb/src/exporter/bool.rs | 7 ++ vortex-duckdb/src/exporter/decimal.rs | 66 ++++++------ vortex-duckdb/src/exporter/fixed_size_list.rs | 40 ++++--- vortex-duckdb/src/exporter/list.rs | 14 ++- vortex-duckdb/src/exporter/list_view.rs | 14 ++- vortex-duckdb/src/exporter/primitive.rs | 22 ++-- vortex-duckdb/src/exporter/varbinview.rs | 102 ++++++++++++++++-- 7 files changed, 190 insertions(+), 75 deletions(-) diff --git a/vortex-duckdb/src/exporter/bool.rs b/vortex-duckdb/src/exporter/bool.rs index 0ca88e12c63..46a223a7ac5 100644 --- a/vortex-duckdb/src/exporter/bool.rs +++ b/vortex-duckdb/src/exporter/bool.rs @@ -9,8 +9,10 @@ use vortex::buffer::BitBuffer; use vortex::error::VortexResult; use vortex::mask::Mask; +use crate::LogicalType; use crate::duckdb::Vector; use crate::exporter::ColumnExporter; +use crate::exporter::all_invalid; use crate::exporter::validity; struct BoolExporter { @@ -24,6 +26,11 @@ pub(crate) fn new_exporter( let len = array.len(); let BoolArrayParts { validity, bits, .. } = array.into_parts(); let validity = validity.to_array(len).execute::(ctx)?; + + if validity.all_false() { + return Ok(all_invalid::new_exporter(len, &LogicalType::bool())); + } + Ok(validity::new_exporter( validity, Box::new(BoolExporter { bit_buffer: bits }), diff --git a/vortex-duckdb/src/exporter/decimal.rs b/vortex-duckdb/src/exporter/decimal.rs index f0dadb1199e..5b9c624bc29 100644 --- a/vortex-duckdb/src/exporter/decimal.rs +++ b/vortex-duckdb/src/exporter/decimal.rs @@ -4,10 +4,12 @@ use std::marker::PhantomData; use num_traits::ToPrimitive; +use vortex::array::ExecutionCtx; use vortex::array::arrays::DecimalArray; use vortex::array::arrays::DecimalArrayParts; use vortex::buffer::Buffer; use vortex::dtype::BigCast; +use vortex::dtype::DType; use vortex::dtype::DecimalDType; use vortex::dtype::NativeDecimalType; use vortex::dtype::match_each_decimal_value_type; @@ -16,15 +18,16 @@ use vortex::error::VortexResult; use vortex::error::vortex_bail; use vortex::mask::Mask; use vortex::scalar::DecimalType; -use vortex_array::ExecutionCtx; +use crate::LogicalType; use crate::duckdb::Vector; use crate::duckdb::VectorBuffer; use crate::exporter::ColumnExporter; +use crate::exporter::all_invalid; +use crate::exporter::validity; struct DecimalExporter { values: Buffer, - validity: Mask, /// The DecimalType of the DuckDB column. dest_value_type: PhantomData, } @@ -32,7 +35,6 @@ struct DecimalExporter { struct DecimalZeroCopyExporter { values: Buffer, shared_buffer: VectorBuffer, - validity: Mask, } pub(crate) fn new_exporter( @@ -45,31 +47,38 @@ pub(crate) fn new_exporter( decimal_dtype, values_type, values, - .. + nullability, } = array.into_parts(); let dest_values_type = precision_to_duckdb_storage_size(&decimal_dtype)?; let validity = validity.to_array(len).execute::(ctx)?; - if values_type == dest_values_type { + if validity.all_false() { + return Ok(all_invalid::new_exporter( + len, + &LogicalType::try_from(DType::Decimal(decimal_dtype, nullability))?, + )); + } + + let exporter = if values_type == dest_values_type { match_each_decimal_value_type!(values_type, |D| { let buffer = Buffer::::from_byte_buffer(values); - return Ok(Box::new(DecimalZeroCopyExporter { + Box::new(DecimalZeroCopyExporter { values: buffer.clone(), shared_buffer: VectorBuffer::new(buffer), - validity, - })); + }) as Box }) - } - - match_each_decimal_value_type!(values_type, |D| { - match_each_decimal_value_type!(dest_values_type, |N| { - Ok(Box::new(DecimalExporter { - values: Buffer::::from_byte_buffer(values), - validity, - dest_value_type: PhantomData::, - })) + } else { + match_each_decimal_value_type!(values_type, |D| { + match_each_decimal_value_type!(dest_values_type, |N| { + Box::new(DecimalExporter { + values: Buffer::::from_byte_buffer(values), + dest_value_type: PhantomData::, + }) as Box + }) }) - }) + }; + + Ok(validity::new_exporter(validity, exporter)) } impl ColumnExporter for DecimalExporter @@ -78,12 +87,6 @@ where N: BigCast, { fn export(&self, offset: usize, len: usize, vector: &mut Vector) -> VortexResult<()> { - // Set validity if necessary. - if unsafe { vector.set_validity(&self.validity, offset, len) } { - // All values are null, so no point copying the data. - return Ok(()); - } - // Copy the values from the Vortex array to the DuckDB vector. for (src, dst) in self.values[offset..offset + len] .iter() @@ -100,11 +103,6 @@ where impl ColumnExporter for DecimalZeroCopyExporter { fn export(&self, offset: usize, len: usize, vector: &mut Vector) -> VortexResult<()> { - if unsafe { vector.set_validity(&self.validity, offset, len) } { - // All values are null, so no point copying the data. - return Ok(()); - } - assert!(self.values.len() >= offset + len); let pos = unsafe { self.values.as_ptr().add(offset) }; @@ -147,11 +145,13 @@ mod tests { assert_eq!(array.values_type(), dest_values_type); match_each_decimal_value_type!(array.values_type(), |D| { let buffer = array.buffer::(); - Ok(Box::new(DecimalZeroCopyExporter { - values: buffer.clone(), - shared_buffer: VectorBuffer::new(buffer), + Ok(validity::new_exporter( validity, - })) + Box::new(DecimalZeroCopyExporter { + values: buffer.clone(), + shared_buffer: VectorBuffer::new(buffer), + }), + )) }) } diff --git a/vortex-duckdb/src/exporter/fixed_size_list.rs b/vortex-duckdb/src/exporter/fixed_size_list.rs index dfdd1d8976b..1caa2f982d0 100644 --- a/vortex-duckdb/src/exporter/fixed_size_list.rs +++ b/vortex-duckdb/src/exporter/fixed_size_list.rs @@ -16,18 +16,18 @@ use vortex::mask::Mask; use super::ConversionCache; use super::all_invalid; use super::new_array_exporter_with_flatten; +use super::validity; use crate::duckdb::LogicalType; use crate::duckdb::Vector; use crate::exporter::ColumnExporter; /// Exporter for converting Vortex [`FixedSizeListArray`] to DuckDB ARRAY vectors. struct FixedSizeListExporter { - /// Validity mask indicating which lists/arrays are null. - validity: Mask, /// Exporter for the underlying elements array. elements_exporter: Box, /// The fixed number of elements in each list. list_size: u32, + len: usize, } /// Creates a new exporter for converting a [`FixedSizeListArray`] to DuckDB ARRAY format. @@ -39,20 +39,24 @@ pub(crate) fn new_exporter( let list_size = array.list_size(); let len = array.len(); let (elements, validity, dtype) = array.into_parts(); - let mask = validity.to_mask(len); + let mask = validity.to_array(len).execute::(ctx)?; let elements_exporter = new_array_exporter_with_flatten(elements, cache, ctx, true)?; - let ltype: LogicalType = (&dtype).try_into()?; - if mask.all_false() { - return Ok(all_invalid::new_exporter(len, <ype)); + return Ok(all_invalid::new_exporter( + len, + &LogicalType::try_from(dtype)?, + )); } - Ok(Box::new(FixedSizeListExporter { - validity: mask, - elements_exporter, - list_size, - })) + Ok(validity::new_exporter( + mask, + Box::new(FixedSizeListExporter { + elements_exporter, + list_size, + len, + }), + )) } impl ColumnExporter for FixedSizeListExporter { @@ -61,23 +65,15 @@ impl ColumnExporter for FixedSizeListExporter { fn export(&self, offset: usize, len: usize, vector: &mut Vector) -> VortexResult<()> { // Verify that offset + len doesn't exceed the validity mask length. assert!( - offset + len <= self.validity.len(), - "Export range [{}, {}) exceeds validity mask length {}", + offset + len <= self.len, + "Export range [{}, {}) exceeds array length {}", offset, offset + len, - self.validity.len() + self.len ); let list_size = self.list_size as usize; - // If all values are null, then we don't need to worry about exporting values (similar to - // the primitive exporter). - // SAFETY: We've asserted that offset + len <= self.validity.len(), which ensures we won't - // read past the validity mask bounds. - if unsafe { vector.set_validity(&self.validity, offset, len) } { - return Ok(()); - } - // Get the child vector for array elements and export the elements directly. let mut elements_vector = vector.array_vector_get_child(); self.elements_exporter diff --git a/vortex-duckdb/src/exporter/list.rs b/vortex-duckdb/src/exporter/list.rs index 80e86ccabd2..237b84813c5 100644 --- a/vortex-duckdb/src/exporter/list.rs +++ b/vortex-duckdb/src/exporter/list.rs @@ -17,7 +17,9 @@ use vortex::error::vortex_err; use vortex::mask::Mask; use super::ConversionCache; +use super::all_invalid; use super::new_array_exporter_with_flatten; +use crate::LogicalType; use crate::cpp; use crate::duckdb::Vector; use crate::exporter::ColumnExporter; @@ -47,9 +49,17 @@ pub(crate) fn new_exporter( elements, offsets, validity, - .. + dtype, } = array.into_parts(); let num_elements = elements.len(); + let validity = validity.to_array(array_len).execute::(ctx)?; + + if validity.all_false() { + return Ok(all_invalid::new_exporter( + array_len, + &LogicalType::try_from(dtype)?, + )); + } let values_key = Arc::as_ptr(&elements).addr(); // Check if we have a cached vector and extract it if we do. @@ -84,7 +94,7 @@ pub(crate) fn new_exporter( let boxed = match_each_integer_ptype!(offsets.ptype(), |O| { Box::new(ListExporter { - validity: validity.to_mask(array_len), + validity, duckdb_elements: shared_elements, offsets, num_elements, diff --git a/vortex-duckdb/src/exporter/list_view.rs b/vortex-duckdb/src/exporter/list_view.rs index 96c2abed739..d529df82eaa 100644 --- a/vortex-duckdb/src/exporter/list_view.rs +++ b/vortex-duckdb/src/exporter/list_view.rs @@ -10,6 +10,7 @@ use vortex::array::ExecutionCtx; use vortex::array::arrays::ListViewArray; use vortex::array::arrays::ListViewArrayParts; use vortex::array::arrays::PrimitiveArray; +use vortex::dtype::DType; use vortex::dtype::IntegerPType; use vortex::dtype::match_each_integer_ptype; use vortex::error::VortexResult; @@ -17,7 +18,9 @@ use vortex::error::vortex_err; use vortex::mask::Mask; use super::ConversionCache; +use super::all_invalid; use super::new_array_exporter_with_flatten; +use crate::LogicalType; use crate::cpp; use crate::duckdb::Vector; use crate::exporter::ColumnExporter; @@ -50,10 +53,18 @@ pub(crate) fn new_exporter( offsets, sizes, validity, - .. + nullability, } = array.into_parts(); // Cache an `elements` vector up front so that future exports can reference it. let num_elements = elements.len(); + let validity = validity.to_array(len).execute::(ctx)?; + + if validity.all_false() { + return Ok(all_invalid::new_exporter( + len, + &LogicalType::try_from(DType::List(elements_dtype, nullability))?, + )); + } let values_key = Arc::as_ptr(&elements).addr(); // Check if we have a cached vector and extract it if we do. @@ -86,7 +97,6 @@ pub(crate) fn new_exporter( let offsets = offsets.execute::(ctx)?; let sizes = sizes.clone().execute::(ctx)?; - let validity = validity.to_array(len).execute::(ctx)?; let boxed = match_each_integer_ptype!(offsets.ptype(), |O| { match_each_integer_ptype!(sizes.ptype(), |S| { diff --git a/vortex-duckdb/src/exporter/primitive.rs b/vortex-duckdb/src/exporter/primitive.rs index 0ba88d4afce..134b25f6389 100644 --- a/vortex-duckdb/src/exporter/primitive.rs +++ b/vortex-duckdb/src/exporter/primitive.rs @@ -11,9 +11,11 @@ use vortex::mask::Mask; use vortex_array::ExecutionCtx; use vortex_array::vtable::ValidityHelper; +use crate::LogicalType; use crate::duckdb::Vector; use crate::duckdb::VectorBuffer; use crate::exporter::ColumnExporter; +use crate::exporter::all_invalid; use crate::exporter::validity; struct PrimitiveExporter { @@ -27,6 +29,18 @@ pub fn new_exporter( array: PrimitiveArray, ctx: &mut ExecutionCtx, ) -> VortexResult> { + let validity = array + .validity() + .to_array(array.len()) + .execute::(ctx)?; + + if validity.all_false() { + return Ok(all_invalid::new_exporter( + array.len(), + &LogicalType::try_from(array.ptype())?, + )); + } + match_each_native_ptype!(array.ptype(), |T| { let buffer = array.to_buffer::(); let prim = Box::new(PrimitiveExporter { @@ -35,13 +49,7 @@ pub fn new_exporter( shared_buffer: VectorBuffer::new(buffer), _phantom_type: Default::default(), }); - Ok(validity::new_exporter( - array - .validity() - .to_array(array.len()) - .execute::(ctx)?, - prim, - )) + Ok(validity::new_exporter(validity, prim)) }) } diff --git a/vortex-duckdb/src/exporter/varbinview.rs b/vortex-duckdb/src/exporter/varbinview.rs index 5d088a8c59e..5068176658c 100644 --- a/vortex-duckdb/src/exporter/varbinview.rs +++ b/vortex-duckdb/src/exporter/varbinview.rs @@ -20,12 +20,12 @@ use crate::duckdb::Vector; use crate::duckdb::VectorBuffer; use crate::exporter::ColumnExporter; use crate::exporter::all_invalid; +use crate::exporter::validity; struct VarBinViewExporter { views: Buffer, buffers: Arc<[ByteBuffer]>, vector_buffers: Vec, - validity: Mask, } pub(crate) fn new_exporter( @@ -46,12 +46,14 @@ pub(crate) fn new_exporter( &LogicalType::try_from(dtype)?, )); } - Ok(Box::new(VarBinViewExporter { - views, - buffers: buffers.clone(), - vector_buffers: buffers.iter().cloned().map(VectorBuffer::new).collect_vec(), + Ok(validity::new_exporter( validity, - })) + Box::new(VarBinViewExporter { + views, + buffers: buffers.clone(), + vector_buffers: buffers.iter().cloned().map(VectorBuffer::new).collect_vec(), + }), + )) } impl ColumnExporter for VarBinViewExporter { @@ -67,9 +69,6 @@ impl ColumnExporter for VarBinViewExporter { *mut_view = view; } - // Update the validity mask. - unsafe { vector.set_validity(&self.validity, offset, len) }; - // We register our buffers zero-copy with DuckDB and re-use them in each vector. for buffer in &self.vector_buffers { vector.add_string_vector_buffer(buffer); @@ -129,3 +128,88 @@ fn to_ptr_binary_view<'a>( } }) } + +#[cfg(test)] +mod tests { + use Nullability::Nullable; + use vortex::dtype::DType; + use vortex::dtype::Nullability; + use vortex::error::VortexResult; + use vortex_array::VortexSessionExecute; + use vortex_array::arrays::VarBinViewArray; + + use crate::LogicalType; + use crate::SESSION; + use crate::duckdb::DataChunk; + use crate::exporter::varbinview::new_exporter; + + #[test] + fn all_invalid_varbinview() -> VortexResult<()> { + let arr = VarBinViewArray::from_iter([Option::<&str>::None; 4], DType::Utf8(Nullable)); + + let mut chunk = DataChunk::new([LogicalType::varchar()]); + + new_exporter(arr, &mut SESSION.create_execution_ctx())?.export( + 0, + 3, + &mut chunk.get_vector(0), + )?; + chunk.set_len(3); + + assert_eq!( + format!("{}", String::try_from(&chunk).unwrap()), + r#"Chunk - [1 Columns] +- CONSTANT VARCHAR: 3 = [ NULL] +"# + ); + Ok(()) + } + + #[test] + fn all_invalid_varbinview_section() -> VortexResult<()> { + let arr = + VarBinViewArray::from_iter([None, None, None, Some("Hey")], DType::Utf8(Nullable)); + + let mut chunk = DataChunk::new([LogicalType::varchar()]); + + new_exporter(arr, &mut SESSION.create_execution_ctx())?.export( + 0, + 3, + &mut chunk.get_vector(0), + )?; + chunk.set_len(3); + + assert_eq!( + format!("{}", String::try_from(&chunk).unwrap()), + r#"Chunk - [1 Columns] +- CONSTANT VARCHAR: 3 = [ NULL] +"# + ); + Ok(()) + } + + #[test] + fn partial_invalid_varbinview_section() -> VortexResult<()> { + let arr = VarBinViewArray::from_iter( + [None, None, Some("Hi"), Some("Hey")], + DType::Utf8(Nullable), + ); + + let mut chunk = DataChunk::new([LogicalType::varchar()]); + + new_exporter(arr, &mut SESSION.create_execution_ctx())?.export( + 0, + 3, + &mut chunk.get_vector(0), + )?; + chunk.set_len(3); + + assert_eq!( + format!("{}", String::try_from(&chunk).unwrap()), + r#"Chunk - [1 Columns] +- FLAT VARCHAR: 3 = [ NULL, NULL, Hi] +"# + ); + Ok(()) + } +} From e2a45892a44bea3d261e0f2ad55ced75a415266b Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Tue, 20 Jan 2026 14:48:54 +0000 Subject: [PATCH 4/8] clean Signed-off-by: Joe Isaacs --- vortex-duckdb/src/exporter/primitive.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vortex-duckdb/src/exporter/primitive.rs b/vortex-duckdb/src/exporter/primitive.rs index 134b25f6389..51efb6d8438 100644 --- a/vortex-duckdb/src/exporter/primitive.rs +++ b/vortex-duckdb/src/exporter/primitive.rs @@ -3,12 +3,12 @@ use std::marker::PhantomData; +use vortex::array::ExecutionCtx; use vortex::array::arrays::PrimitiveArray; use vortex::dtype::NativePType; use vortex::dtype::match_each_native_ptype; use vortex::error::VortexResult; use vortex::mask::Mask; -use vortex_array::ExecutionCtx; use vortex_array::vtable::ValidityHelper; use crate::LogicalType; From a21fc0ff37378430dd8e6cc4b8336e710e7db24e Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Tue, 20 Jan 2026 14:58:34 +0000 Subject: [PATCH 5/8] clean Signed-off-by: Joe Isaacs --- vortex-duckdb/src/exporter/primitive.rs | 2 +- vortex-python/src/arrays/mod.rs | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/vortex-duckdb/src/exporter/primitive.rs b/vortex-duckdb/src/exporter/primitive.rs index 51efb6d8438..06bcca2af41 100644 --- a/vortex-duckdb/src/exporter/primitive.rs +++ b/vortex-duckdb/src/exporter/primitive.rs @@ -5,11 +5,11 @@ use std::marker::PhantomData; use vortex::array::ExecutionCtx; use vortex::array::arrays::PrimitiveArray; +use vortex::array::vtable::ValidityHelper; use vortex::dtype::NativePType; use vortex::dtype::match_each_native_ptype; use vortex::error::VortexResult; use vortex::mask::Mask; -use vortex_array::vtable::ValidityHelper; use crate::LogicalType; use crate::duckdb::Vector; diff --git a/vortex-python/src/arrays/mod.rs b/vortex-python/src/arrays/mod.rs index e0560cd860e..024bcb1c5c1 100644 --- a/vortex-python/src/arrays/mod.rs +++ b/vortex-python/src/arrays/mod.rs @@ -28,7 +28,6 @@ use vortex::array::arrays::ChunkedVTable; use vortex::array::arrow::IntoArrowArray; use vortex::compute::Operator; use vortex::compute::compare; -use vortex::compute::take; use vortex::dtype::DType; use vortex::dtype::Nullability; use vortex::dtype::PType; @@ -622,9 +621,7 @@ impl PyArray { ))); } - let inner = &slf.take(&*indices)?; - - Ok(PyArrayRef::from(inner)) + Ok(PyArrayRef::from(slf.take(indices.clone())?)) } #[pyo3(signature = (start, end))] From a0c06cf5c1a6b37efcb8015a46b9163606f5f07d Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Tue, 20 Jan 2026 15:10:11 +0000 Subject: [PATCH 6/8] clean Signed-off-by: Joe Isaacs --- vortex-python/src/arrays/mod.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/vortex-python/src/arrays/mod.rs b/vortex-python/src/arrays/mod.rs index 024bcb1c5c1..c23a77a1c16 100644 --- a/vortex-python/src/arrays/mod.rs +++ b/vortex-python/src/arrays/mod.rs @@ -35,7 +35,7 @@ use vortex::dtype::match_each_integer_ptype; use vortex::error::VortexError; use vortex::ipc::messages::EncoderMessage; use vortex::ipc::messages::MessageEncoder; - +use vortex_array::compute::take; use crate::PyVortex; use crate::arrays::native::PyNativeArray; use crate::arrays::py::PyPythonArray; @@ -621,7 +621,9 @@ impl PyArray { ))); } - Ok(PyArrayRef::from(slf.take(indices.clone())?)) + let inner = take(&slf, &*indices)?; + + Ok(PyArrayRef::from(inner)) } #[pyo3(signature = (start, end))] From 2d53a716f6bc3a7a39038ae59f9979f14dd4d31f Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Tue, 20 Jan 2026 15:26:18 +0000 Subject: [PATCH 7/8] clean Signed-off-by: Joe Isaacs --- vortex-python/src/arrays/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/vortex-python/src/arrays/mod.rs b/vortex-python/src/arrays/mod.rs index c23a77a1c16..da113edb84c 100644 --- a/vortex-python/src/arrays/mod.rs +++ b/vortex-python/src/arrays/mod.rs @@ -36,6 +36,7 @@ use vortex::error::VortexError; use vortex::ipc::messages::EncoderMessage; use vortex::ipc::messages::MessageEncoder; use vortex_array::compute::take; + use crate::PyVortex; use crate::arrays::native::PyNativeArray; use crate::arrays::py::PyPythonArray; From 993a3925c4d29a2d8cb3e7c6b3c47aac5518cc98 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Tue, 20 Jan 2026 15:28:48 +0000 Subject: [PATCH 8/8] clean Signed-off-by: Joe Isaacs --- vortex-python/src/arrays/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vortex-python/src/arrays/mod.rs b/vortex-python/src/arrays/mod.rs index da113edb84c..d33af824064 100644 --- a/vortex-python/src/arrays/mod.rs +++ b/vortex-python/src/arrays/mod.rs @@ -26,6 +26,7 @@ use vortex::array::ArrayRef; use vortex::array::ToCanonical; use vortex::array::arrays::ChunkedVTable; use vortex::array::arrow::IntoArrowArray; +use vortex::array::compute::take; use vortex::compute::Operator; use vortex::compute::compare; use vortex::dtype::DType; @@ -35,7 +36,6 @@ use vortex::dtype::match_each_integer_ptype; use vortex::error::VortexError; use vortex::ipc::messages::EncoderMessage; use vortex::ipc::messages::MessageEncoder; -use vortex_array::compute::take; use crate::PyVortex; use crate::arrays::native::PyNativeArray;