Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 32 additions & 21 deletions datafusion/functions-nested/src/reverse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@

use crate::utils::make_scalar_function;
use arrow::array::{
Array, ArrayRef, Capacities, FixedSizeListArray, GenericListArray,
GenericListViewArray, MutableArrayData, OffsetSizeTrait, UInt32Array,
Array, ArrayRef, FixedSizeListArray, GenericListArray, GenericListViewArray,
OffsetSizeTrait, UInt32Array, UInt64Array,
};
use arrow::buffer::{OffsetBuffer, ScalarBuffer};
use arrow::compute::take;
Expand Down Expand Up @@ -155,11 +155,8 @@ fn general_array_reverse<O: OffsetSizeTrait>(
field: &FieldRef,
) -> Result<ArrayRef> {
let values = array.values();
let original_data = values.to_data();
let capacity = Capacities::Array(original_data.len());
let mut offsets = vec![O::usize_as(0)];
let mut mutable =
MutableArrayData::with_capacities(vec![&original_data], false, capacity);
let mut indices: Vec<O> = Vec::with_capacity(values.len());

for (row_index, (&start, &end)) in array.offsets().iter().tuple_windows().enumerate()
{
Expand All @@ -171,18 +168,34 @@ fn general_array_reverse<O: OffsetSizeTrait>(

let mut index = end - O::one();
while index >= start {
mutable.extend(0, index.to_usize().unwrap(), index.to_usize().unwrap() + 1);
indices.push(index);
index = index - O::one();
}
let size = end - start;
offsets.push(offsets[row_index] + size);
}

let data = mutable.freeze();
// Materialize values from underlying array with take
let indices_array: ArrayRef = if O::IS_LARGE {
Arc::new(UInt64Array::from(
indices
.iter()
.map(|i| i.as_usize() as u64)
.collect::<Vec<_>>(),
))
} else {
Arc::new(UInt32Array::from(
indices
.iter()
.map(|i| i.as_usize() as u32)
.collect::<Vec<_>>(),
))
};
Comment on lines +178 to +193
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is duplicated for ListView. I figured twice was not enough to extract to a function, but if we find a nicer way to do it, we can improve both.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be nice if we figured out a nicer way of doing this but I still can't figure it out 😅

Went a bit crazy with generics in attempting once but didn't pan out; but it works and the if branch isn't a big deal, though I wonder if it's more efficient to just have Int32/Int64 arrays instead of their unsigned variants to avoid needing a map -> collect 🤔

let values = take(&values, &indices_array, None)?;
Ok(Arc::new(GenericListArray::<O>::try_new(
Arc::clone(field),
OffsetBuffer::<O>::new(offsets.into()),
arrow::array::make_array(data),
values,
array.nulls().cloned(),
)?))
}
Expand Down Expand Up @@ -231,7 +244,7 @@ fn list_view_reverse<O: OffsetSizeTrait>(

// Materialize values from underlying array with take
let indices_array: ArrayRef = if O::IS_LARGE {
Arc::new(arrow::array::UInt64Array::from(
Arc::new(UInt64Array::from(
indices
.iter()
.map(|i| i.as_usize() as u64)
Expand All @@ -245,13 +258,12 @@ fn list_view_reverse<O: OffsetSizeTrait>(
.collect::<Vec<_>>(),
))
};
let values_reversed = take(&values, &indices_array, None)?;

let values = take(&values, &indices_array, None)?;
Ok(Arc::new(GenericListViewArray::<O>::try_new(
Arc::clone(field),
ScalarBuffer::from(new_offsets),
ScalarBuffer::from(new_sizes),
values_reversed,
values,
array.nulls().cloned(),
)?))
}
Expand All @@ -261,30 +273,29 @@ fn fixed_size_array_reverse(
field: &FieldRef,
) -> Result<ArrayRef> {
let values = array.values();
let original_data = values.to_data();
let capacity = Capacities::Array(original_data.len());
let mut mutable =
MutableArrayData::with_capacities(vec![&original_data], false, capacity);
let value_length = array.value_length() as usize;
let mut indices: Vec<u64> = Vec::with_capacity(values.len());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do wonder for FSL's if we can take advantage of knowing its a fixed size list to do it in a more efficient way? For example we know a FSL like so:

FSL with fixed length 3:
[[a, b, c], [d, e, f]]

Would always have reverse offsets like so:

[2, 1, 0, 5, 4, 3]

Without even needing to iterate the FSL array itself 🤔

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very elegant! Included in 41e1044


for row_index in 0..array.len() {
// skip the null value
if array.is_null(row_index) {
mutable.extend(0, 0, value_length);
indices.push(0);
continue;
}
let start = row_index * value_length;
let end = start + value_length;
for idx in (start..end).rev() {
mutable.extend(0, idx, idx + 1);
indices.push(idx as u64);
}
}

let data = mutable.freeze();
// Materialize values from underlying array with take
let indices_array: ArrayRef = Arc::new(UInt64Array::from(indices));
let values = take(&values, &indices_array, None)?;
Ok(Arc::new(FixedSizeListArray::try_new(
Arc::clone(field),
array.value_length(),
arrow::array::make_array(data),
values,
array.nulls().cloned(),
)?))
}
Expand Down
Loading