From 9f8d8234b2a24b3d22125e28a6da0e888a7d4a9c Mon Sep 17 00:00:00 2001 From: Fu Chen Date: Fri, 24 Oct 2025 10:08:44 +0800 Subject: [PATCH 1/9] Fix: Return null for null entries in CometPlainVector.getBinary --- .../apache/comet/vector/CometPlainVector.java | 3 +++ .../comet/CometArrayExpressionSuite.scala | 25 +++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/common/src/main/java/org/apache/comet/vector/CometPlainVector.java b/common/src/main/java/org/apache/comet/vector/CometPlainVector.java index f3803d53a9..a4308aca37 100644 --- a/common/src/main/java/org/apache/comet/vector/CometPlainVector.java +++ b/common/src/main/java/org/apache/comet/vector/CometPlainVector.java @@ -147,6 +147,9 @@ public UTF8String getUTF8String(int rowId) { @Override public byte[] getBinary(int rowId) { + if (isNullAt(rowId)) { + return null; + } int offset; int length; if (valueVector instanceof BaseVariableWidthVector) { diff --git a/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala index c346dc2e95..9786432df1 100644 --- a/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala @@ -777,4 +777,29 @@ class CometArrayExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelp } } } + + test("array_reverse 2") { + withTempDir { dir => + val path = new Path(dir.toURI.toString, "test.parquet") + val filename = path.toString + val random = new Random(42) + withSQLConf(CometConf.COMET_ENABLED.key -> "false") { + val options = DataGenOptions( + allowNull = true, + generateNegativeZero = false, + generateArray = true, + generateStruct = false, + generateMap = false) + ParquetGenerator.makeParquetFile(random, spark, filename, 100, options) + } + withTempView("t1") { + val table = spark.read.parquet(filename) + table.createOrReplaceTempView("t1") + for (field <- table.schema.fields.filter(_.dataType.isInstanceOf[ArrayType])) { + val sql = s"SELECT ${field.name}, reverse(${field.name}) FROM t1 ORDER BY ${field.name}" + checkSparkAnswer(sql) + } + } + } + } } From 2b5efc16fc0190bf83200b3c6152e7e3d443e295 Mon Sep 17 00:00:00 2001 From: Fu Chen Date: Fri, 24 Oct 2025 12:10:48 +0800 Subject: [PATCH 2/9] rebase main --- .../org/apache/comet/CometArrayExpressionSuite.scala | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala index 9786432df1..972903f098 100644 --- a/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala @@ -26,6 +26,7 @@ import org.apache.hadoop.fs.Path import org.apache.spark.sql.CometTestBase import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper import org.apache.spark.sql.functions._ +import org.apache.spark.sql.types.ArrayType import org.apache.comet.CometSparkSessionExtensions.{isSpark35Plus, isSpark40Plus} import org.apache.comet.DataTypeSupport.isComplexType @@ -784,13 +785,10 @@ class CometArrayExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelp val filename = path.toString val random = new Random(42) withSQLConf(CometConf.COMET_ENABLED.key -> "false") { - val options = DataGenOptions( - allowNull = true, - generateNegativeZero = false, - generateArray = true, - generateStruct = false, - generateMap = false) - ParquetGenerator.makeParquetFile(random, spark, filename, 100, options) + val schemaOptions = + SchemaGenOptions(generateArray = true, generateStruct = false, generateMap = false) + val dataOptions = DataGenOptions(allowNull = true, generateNegativeZero = false) + ParquetGenerator.makeParquetFile(random, spark, filename, 100, schemaOptions, dataOptions) } withTempView("t1") { val table = spark.read.parquet(filename) From b06fb5b5b6449aecae90bcc2dfaf7bf633c51b3c Mon Sep 17 00:00:00 2001 From: Fu Chen Date: Sat, 25 Oct 2025 00:46:34 +0800 Subject: [PATCH 3/9] address comment --- .../test/scala/org/apache/comet/CometArrayExpressionSuite.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala index 972903f098..ae7054b102 100644 --- a/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala @@ -780,6 +780,8 @@ class CometArrayExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelp } test("array_reverse 2") { + // This test validates data correctness for array columns with nullable elements. + // See https://github.com/apache/datafusion-comet/issues/2612 withTempDir { dir => val path = new Path(dir.toURI.toString, "test.parquet") val filename = path.toString From af7538435c2897c7c9e9aed187d21d941f85d644 Mon Sep 17 00:00:00 2001 From: Fu Chen Date: Tue, 28 Oct 2025 13:47:02 +0800 Subject: [PATCH 4/9] Align with the APIs documentation of ColumnVector's get methods --- .../main/java/org/apache/comet/vector/CometListVector.java | 1 + .../main/java/org/apache/comet/vector/CometMapVector.java | 1 + .../main/java/org/apache/comet/vector/CometPlainVector.java | 5 ++--- .../src/main/java/org/apache/comet/vector/CometVector.java | 1 + 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/common/src/main/java/org/apache/comet/vector/CometListVector.java b/common/src/main/java/org/apache/comet/vector/CometListVector.java index 752495c0d8..93e8e8bf9f 100644 --- a/common/src/main/java/org/apache/comet/vector/CometListVector.java +++ b/common/src/main/java/org/apache/comet/vector/CometListVector.java @@ -45,6 +45,7 @@ public CometListVector( @Override public ColumnarArray getArray(int i) { + if (isNullAt(i)) return null; int start = listVector.getOffsetBuffer().getInt(i * ListVector.OFFSET_WIDTH); int end = listVector.getOffsetBuffer().getInt((i + 1) * ListVector.OFFSET_WIDTH); diff --git a/common/src/main/java/org/apache/comet/vector/CometMapVector.java b/common/src/main/java/org/apache/comet/vector/CometMapVector.java index 1d531ca903..c5984a4dcb 100644 --- a/common/src/main/java/org/apache/comet/vector/CometMapVector.java +++ b/common/src/main/java/org/apache/comet/vector/CometMapVector.java @@ -65,6 +65,7 @@ public CometMapVector( @Override public ColumnarMap getMap(int i) { + if (isNullAt(i)) return null; int start = mapVector.getOffsetBuffer().getInt(i * MapVector.OFFSET_WIDTH); int end = mapVector.getOffsetBuffer().getInt((i + 1) * MapVector.OFFSET_WIDTH); diff --git a/common/src/main/java/org/apache/comet/vector/CometPlainVector.java b/common/src/main/java/org/apache/comet/vector/CometPlainVector.java index a4308aca37..2a30be1b1c 100644 --- a/common/src/main/java/org/apache/comet/vector/CometPlainVector.java +++ b/common/src/main/java/org/apache/comet/vector/CometPlainVector.java @@ -123,6 +123,7 @@ public double getDouble(int rowId) { @Override public UTF8String getUTF8String(int rowId) { + if (isNullAt(rowId)) return null; if (!isBaseFixedWidthVector) { BaseVariableWidthVector varWidthVector = (BaseVariableWidthVector) valueVector; long offsetBufferAddress = varWidthVector.getOffsetBuffer().memoryAddress(); @@ -147,9 +148,7 @@ public UTF8String getUTF8String(int rowId) { @Override public byte[] getBinary(int rowId) { - if (isNullAt(rowId)) { - return null; - } + if (isNullAt(rowId)) return null; int offset; int length; if (valueVector instanceof BaseVariableWidthVector) { diff --git a/common/src/main/java/org/apache/comet/vector/CometVector.java b/common/src/main/java/org/apache/comet/vector/CometVector.java index 0c6fa8f12d..a1f75696f6 100644 --- a/common/src/main/java/org/apache/comet/vector/CometVector.java +++ b/common/src/main/java/org/apache/comet/vector/CometVector.java @@ -85,6 +85,7 @@ public boolean isFixedLength() { @Override public Decimal getDecimal(int i, int precision, int scale) { + if (isNullAt(i)) return null; if (!useDecimal128 && precision <= Decimal.MAX_INT_DIGITS() && type instanceof IntegerType) { return createDecimal(getInt(i), precision, scale); } else if (precision <= Decimal.MAX_LONG_DIGITS()) { From 910501ad2dc12efc5c6bd44697a8b1228b08813c Mon Sep 17 00:00:00 2001 From: Fu Chen Date: Thu, 30 Oct 2025 13:05:20 +0800 Subject: [PATCH 5/9] fix array_insert --- .../src/array_funcs/array_insert.rs | 148 +++++++++--------- 1 file changed, 78 insertions(+), 70 deletions(-) diff --git a/native/spark-expr/src/array_funcs/array_insert.rs b/native/spark-expr/src/array_funcs/array_insert.rs index eb96fec12f..211e0035b8 100644 --- a/native/spark-expr/src/array_funcs/array_insert.rs +++ b/native/spark-expr/src/array_funcs/array_insert.rs @@ -198,114 +198,122 @@ fn array_insert( pos_array: &ArrayRef, legacy_mode: bool, ) -> DataFusionResult { - // The code is based on the implementation of the array_append from the Apache DataFusion - // https://github.com/apache/datafusion/blob/main/datafusion/functions-nested/src/concat.rs#L513 - // - // This code is also based on the implementation of the array_insert from the Apache Spark - // https://github.com/apache/spark/blob/branch-3.5/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala#L4713 - let values = list_array.values(); let offsets = list_array.offsets(); let values_data = values.to_data(); let item_data = items_array.to_data(); + + // 预估容量 let new_capacity = Capacities::Array(values_data.len() + item_data.len()); let mut mutable_values = MutableArrayData::with_capacities(vec![&values_data, &item_data], true, new_capacity); - let mut new_offsets = vec![O::usize_as(0)]; - let mut new_nulls = Vec::::with_capacity(list_array.len()); + // offsets 和 list 有效位图 + let mut new_offsets = Vec::with_capacity(list_array.len() + 1); + new_offsets.push(O::usize_as(0)); + let mut list_valid = Vec::::with_capacity(list_array.len()); - let pos_data: &Int32Array = as_primitive_array(&pos_array); // Spark supports only i32 for positions + // Spark 只支持 Int32 的位置索引 + let pos_data: &Int32Array = as_primitive_array(&pos_array); - for (row_index, offset_window) in offsets.windows(2).enumerate() { - let pos = pos_data.values()[row_index]; - let start = offset_window[0].as_usize(); - let end = offset_window[1].as_usize(); - let is_item_null = items_array.is_null(row_index); + for (row_index, window) in offsets.windows(2).enumerate() { + let start = window[0].as_usize(); + let end = window[1].as_usize(); + let len = end - start; + let pos = pos_data.value(row_index); if list_array.is_null(row_index) { - // In Spark if value of the array is NULL than nothing happens - mutable_values.extend_nulls(1); - new_offsets.push(new_offsets[row_index] + O::one()); - new_nulls.push(false); + // 列表为 NULL:不往子数组写任何元素,offset 保持不变 + new_offsets.push(new_offsets[row_index]); + list_valid.push(false); continue; } if pos == 0 { return Err(DataFusionError::Internal( - "Position for array_insert should be greter or less than zero".to_string(), + "Position for array_insert should be greater or less than zero".to_string(), )); } - if (pos > 0) || ((-pos).as_usize() < (end - start + 1)) { - let corrected_pos = if pos > 0 { - (pos - 1).as_usize() - } else { - end - start - (-pos).as_usize() + if legacy_mode { 0 } else { 1 } - }; - let new_array_len = std::cmp::max(end - start + 1, corrected_pos); - if new_array_len > MAX_ROUNDED_ARRAY_LENGTH { - return Err(DataFusionError::Internal(format!( - "Max array length in Spark is {MAX_ROUNDED_ARRAY_LENGTH:?}, but got {new_array_len:?}" - ))); - } + let mut final_len: usize; - if (start + corrected_pos) <= end { - mutable_values.extend(0, start, start + corrected_pos); + if pos > 0 { + // 正数位置(1-based) + let pos1 = pos as usize; + if pos1 <= len + 1 { + // 插入范围内(包含追加到末尾) + let corrected = pos1 - 1; // 0-based + mutable_values.extend(0, start, start + corrected); mutable_values.extend(1, row_index, row_index + 1); - mutable_values.extend(0, start + corrected_pos, end); - new_offsets.push(new_offsets[row_index] + O::usize_as(new_array_len)); + mutable_values.extend(0, start + corrected, end); + final_len = len + 1; } else { + // 超出末尾,需要填充 null + let corrected = pos1 - 1; + let padding = corrected - len; mutable_values.extend(0, start, end); - mutable_values.extend_nulls(new_array_len - (end - start)); + mutable_values.extend_nulls(padding); mutable_values.extend(1, row_index, row_index + 1); - // In that case spark actualy makes array longer than expected; - // For example, if pos is equal to 5, len is eq to 3, than resulted len will be 5 - new_offsets.push(new_offsets[row_index] + O::usize_as(new_array_len) + O::one()); + final_len = corrected + 1; // 等于 pos1 } } else { - // This comment is takes from the Apache Spark source code as is: - // special case- if the new position is negative but larger than the current array size - // place the new item at start of array, place the current array contents at the end - // and fill the newly created array elements inbetween with a null - let base_offset = if legacy_mode { 1 } else { 0 }; - let new_array_len = (-pos + base_offset).as_usize(); - if new_array_len > MAX_ROUNDED_ARRAY_LENGTH { - return Err(DataFusionError::Internal(format!( - "Max array length in Spark is {MAX_ROUNDED_ARRAY_LENGTH:?}, but got {new_array_len:?}" - ))); - } - mutable_values.extend(1, row_index, row_index + 1); - mutable_values.extend_nulls(new_array_len - (end - start + 1)); - mutable_values.extend(0, start, end); - new_offsets.push(new_offsets[row_index] + O::usize_as(new_array_len)); - } - if is_item_null { - if (start == end) || (values.is_null(row_index)) { - new_nulls.push(false) + // 负索引(从末尾数,1-based) + let k = (-pos) as usize; + + if k <= len { + // 可插入范围内 + // 非 legacy:-1 视为追加到末尾(corrected = len - k + 1) + // legacy: -1 视为插在最后一个元素前面(corrected = len - k) + let base_offset = if legacy_mode { 0 } else { 1 }; + let corrected = len - k + base_offset; + mutable_values.extend(0, start, start + corrected); + mutable_values.extend(1, row_index, row_index + 1); + mutable_values.extend(0, start + corrected, end); + final_len = len + 1; } else { - new_nulls.push(true) + // 负索引超出开头的 Spark 特例: + // 放 item 到最前,然后补若干 null,再拼接原数组 + // 最终长度 = k + base_offset,其中 legacy 模式下 base_offset = 1,否则为 0 + let base_offset = if legacy_mode { 1 } else { 0 }; + let target_len = k + base_offset; + let padding = target_len.saturating_sub(len + 1); + mutable_values.extend(1, row_index, row_index + 1); // 先放 item + mutable_values.extend_nulls(padding); // 中间补 null + mutable_values.extend(0, start, end); // 再拼原数组 + final_len = target_len; } - } else { - new_nulls.push(true) } + + if final_len > MAX_ROUNDED_ARRAY_LENGTH { + return Err(DataFusionError::Internal(format!( + "Max array length in Spark is {MAX_ROUNDED_ARRAY_LENGTH}, but got {final_len}" + ))); + } + + let prev = new_offsets[row_index].as_usize(); + new_offsets.push(O::usize_as(prev + final_len)); + list_valid.push(true); } - let data = make_array(mutable_values.freeze()); - let data_type = match list_array.data_type() { - DataType::List(field) => field.data_type(), - DataType::LargeList(field) => field.data_type(), + let child = make_array(mutable_values.freeze()); + + // 复用原元素字段(名字/可空性/类型) + let elem_field = match list_array.data_type() { + DataType::List(field) => Arc::clone(field), + DataType::LargeList(field) => Arc::clone(field), _ => unreachable!(), }; - let new_array = GenericListArray::::try_new( - Arc::new(Field::new("item", data_type.clone(), true)), + + // 构造新的 List + let new_list = GenericListArray::::try_new( + elem_field, OffsetBuffer::new(new_offsets.into()), - data, - Some(NullBuffer::new(new_nulls.into())), + child, + Some(NullBuffer::new(list_valid.into())), )?; - Ok(ColumnarValue::Array(Arc::new(new_array))) + Ok(ColumnarValue::Array(Arc::new(new_list))) } impl Display for ArrayInsert { From c12b5b7c1af274f25bcc759ff8a95e42427fded3 Mon Sep 17 00:00:00 2001 From: Fu Chen Date: Thu, 30 Oct 2025 13:07:55 +0800 Subject: [PATCH 6/9] comment --- .../src/array_funcs/array_insert.rs | 44 ++++++++++--------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/native/spark-expr/src/array_funcs/array_insert.rs b/native/spark-expr/src/array_funcs/array_insert.rs index 211e0035b8..b9644363a6 100644 --- a/native/spark-expr/src/array_funcs/array_insert.rs +++ b/native/spark-expr/src/array_funcs/array_insert.rs @@ -198,23 +198,25 @@ fn array_insert( pos_array: &ArrayRef, legacy_mode: bool, ) -> DataFusionResult { + // Implementation aligned with Arrow's half-open offset ranges and Spark semantics. + let values = list_array.values(); let offsets = list_array.offsets(); let values_data = values.to_data(); let item_data = items_array.to_data(); - // 预估容量 + // Estimate capacity (original values + inserted items upper bound) let new_capacity = Capacities::Array(values_data.len() + item_data.len()); let mut mutable_values = MutableArrayData::with_capacities(vec![&values_data, &item_data], true, new_capacity); - // offsets 和 list 有效位图 + // New offsets and top-level list validity bitmap let mut new_offsets = Vec::with_capacity(list_array.len() + 1); new_offsets.push(O::usize_as(0)); let mut list_valid = Vec::::with_capacity(list_array.len()); - // Spark 只支持 Int32 的位置索引 + // Spark supports only Int32 position indices let pos_data: &Int32Array = as_primitive_array(&pos_array); for (row_index, window) in offsets.windows(2).enumerate() { @@ -224,7 +226,7 @@ fn array_insert( let pos = pos_data.value(row_index); if list_array.is_null(row_index) { - // 列表为 NULL:不往子数组写任何元素,offset 保持不变 + // Top-level list row is NULL: do not write any child values and do not advance offset new_offsets.push(new_offsets[row_index]); list_valid.push(false); continue; @@ -239,32 +241,32 @@ fn array_insert( let mut final_len: usize; if pos > 0 { - // 正数位置(1-based) + // Positive index (1-based) let pos1 = pos as usize; if pos1 <= len + 1 { - // 插入范围内(包含追加到末尾) - let corrected = pos1 - 1; // 0-based + // In-range insertion (including appending to end) + let corrected = pos1 - 1; // 0-based insertion point mutable_values.extend(0, start, start + corrected); mutable_values.extend(1, row_index, row_index + 1); mutable_values.extend(0, start + corrected, end); final_len = len + 1; } else { - // 超出末尾,需要填充 null + // Beyond end: pad with nulls then insert let corrected = pos1 - 1; let padding = corrected - len; mutable_values.extend(0, start, end); mutable_values.extend_nulls(padding); mutable_values.extend(1, row_index, row_index + 1); - final_len = corrected + 1; // 等于 pos1 + final_len = corrected + 1; // equals pos1 } } else { - // 负索引(从末尾数,1-based) + // Negative index (1-based from the end) let k = (-pos) as usize; if k <= len { - // 可插入范围内 - // 非 legacy:-1 视为追加到末尾(corrected = len - k + 1) - // legacy: -1 视为插在最后一个元素前面(corrected = len - k) + // In-range negative insertion + // Non-legacy: -1 behaves like append to end (corrected = len - k + 1) + // Legacy: -1 behaves like insert before the last element (corrected = len - k) let base_offset = if legacy_mode { 0 } else { 1 }; let corrected = len - k + base_offset; mutable_values.extend(0, start, start + corrected); @@ -272,15 +274,15 @@ fn array_insert( mutable_values.extend(0, start + corrected, end); final_len = len + 1; } else { - // 负索引超出开头的 Spark 特例: - // 放 item 到最前,然后补若干 null,再拼接原数组 - // 最终长度 = k + base_offset,其中 legacy 模式下 base_offset = 1,否则为 0 + // Negative index beyond the start (Spark-specific behavior): + // Place item first, then pad with nulls, then append the original array. + // Final length = k + base_offset, where base_offset = 1 in legacy mode, otherwise 0. let base_offset = if legacy_mode { 1 } else { 0 }; let target_len = k + base_offset; let padding = target_len.saturating_sub(len + 1); - mutable_values.extend(1, row_index, row_index + 1); // 先放 item - mutable_values.extend_nulls(padding); // 中间补 null - mutable_values.extend(0, start, end); // 再拼原数组 + mutable_values.extend(1, row_index, row_index + 1); // insert item first + mutable_values.extend_nulls(padding); // pad nulls + mutable_values.extend(0, start, end); // append original values final_len = target_len; } } @@ -298,14 +300,14 @@ fn array_insert( let child = make_array(mutable_values.freeze()); - // 复用原元素字段(名字/可空性/类型) + // Reuse the original list element field (name/type/nullability) let elem_field = match list_array.data_type() { DataType::List(field) => Arc::clone(field), DataType::LargeList(field) => Arc::clone(field), _ => unreachable!(), }; - // 构造新的 List + // Build the resulting list array let new_list = GenericListArray::::try_new( elem_field, OffsetBuffer::new(new_offsets.into()), From 3d1b288d95c8fa3f7fa171921b8751dc6b074ed2 Mon Sep 17 00:00:00 2001 From: Fu Chen Date: Thu, 30 Oct 2025 13:14:55 +0800 Subject: [PATCH 7/9] add ut --- .../src/array_funcs/array_insert.rs | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/native/spark-expr/src/array_funcs/array_insert.rs b/native/spark-expr/src/array_funcs/array_insert.rs index b9644363a6..2969e4cfe1 100644 --- a/native/spark-expr/src/array_funcs/array_insert.rs +++ b/native/spark-expr/src/array_funcs/array_insert.rs @@ -452,4 +452,37 @@ mod test { Ok(()) } + + #[test] + fn test_array_insert_bug_repro_null_item_pos1_fixed() -> Result<()> { + use arrow::array::{Array, ArrayRef, Int32Array, ListArray}; + use arrow::datatypes::Int32Type; + + // row0 = [0, null, 0] + // row1 = [1, null, 1] + let list = ListArray::from_iter_primitive::(vec![ + Some(vec![Some(0), None, Some(0)]), + Some(vec![Some(1), None, Some(1)]), + ]); + + let positions = Int32Array::from(vec![1, 1]); + let items = Int32Array::from(vec![None, None]); + + let ColumnarValue::Array(result) = array_insert( + &list, + &(Arc::new(items) as ArrayRef), + &(Arc::new(positions) as ArrayRef), + false, // legacy_mode = false + )? + else { + unreachable!() + }; + + let expected = ListArray::from_iter_primitive::(vec![ + Some(vec![None, Some(0), None, Some(0)]), + Some(vec![None, Some(1), None, Some(1)]) + ]); + assert_eq!(&result.to_data(), &expected.to_data()); + Ok(()) + } } From 557293b785b8cd552cdc78ea6382a626f5e8f601 Mon Sep 17 00:00:00 2001 From: Fu Chen Date: Thu, 30 Oct 2025 13:16:29 +0800 Subject: [PATCH 8/9] fix rs style --- native/spark-expr/src/array_funcs/array_insert.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/native/spark-expr/src/array_funcs/array_insert.rs b/native/spark-expr/src/array_funcs/array_insert.rs index 2969e4cfe1..d4f8d02651 100644 --- a/native/spark-expr/src/array_funcs/array_insert.rs +++ b/native/spark-expr/src/array_funcs/array_insert.rs @@ -16,11 +16,10 @@ // under the License. use arrow::array::{make_array, Array, ArrayRef, GenericListArray, Int32Array, OffsetSizeTrait}; -use arrow::datatypes::{DataType, Field, Schema}; +use arrow::datatypes::{DataType, Schema}; use arrow::{ array::{as_primitive_array, Capacities, MutableArrayData}, buffer::{NullBuffer, OffsetBuffer}, - datatypes::ArrowNativeType, record_batch::RecordBatch, }; use datafusion::common::{ @@ -238,7 +237,7 @@ fn array_insert( )); } - let mut final_len: usize; + let final_len: usize; if pos > 0 { // Positive index (1-based) From aa7287bbb4b7fec0d19b5e4cd34514b638d0562f Mon Sep 17 00:00:00 2001 From: Fu Chen Date: Thu, 30 Oct 2025 13:19:47 +0800 Subject: [PATCH 9/9] fix rs style --- native/spark-expr/src/array_funcs/array_insert.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/native/spark-expr/src/array_funcs/array_insert.rs b/native/spark-expr/src/array_funcs/array_insert.rs index d4f8d02651..14d91c30b4 100644 --- a/native/spark-expr/src/array_funcs/array_insert.rs +++ b/native/spark-expr/src/array_funcs/array_insert.rs @@ -479,7 +479,7 @@ mod test { let expected = ListArray::from_iter_primitive::(vec![ Some(vec![None, Some(0), None, Some(0)]), - Some(vec![None, Some(1), None, Some(1)]) + Some(vec![None, Some(1), None, Some(1)]), ]); assert_eq!(&result.to_data(), &expected.to_data()); Ok(())