Skip to content

Commit 33bcb67

Browse files
committed
fix string cast array missing null value
1 parent 9f15314 commit 33bcb67

2 files changed

Lines changed: 237 additions & 0 deletions

File tree

native-engine/datafusion-ext-commons/src/arrow/cast.rs

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,49 @@ pub fn cast_impl(
314314

315315
Arc::new(builder.finish())
316316
}
317+
// array to string (spark compatible)
318+
(&DataType::List(_), &DataType::Utf8) => {
319+
let list_array = as_list_array(array);
320+
let values = list_array.values();
321+
let casted_values = cast_impl(values, &DataType::Utf8, match_struct_fields)?;
322+
let string_values = as_string_array(&casted_values);
323+
324+
let mut builder = StringBuilder::new();
325+
326+
for row_idx in 0..list_array.len() {
327+
if list_array.is_null(row_idx) {
328+
builder.append_null();
329+
} else {
330+
let mut row_str = String::from("[");
331+
let start = list_array.value_offsets()[row_idx] as usize;
332+
let end = list_array.value_offsets()[row_idx + 1] as usize;
333+
let num_elements = end - start;
334+
335+
if num_elements > 0 {
336+
if values.is_null(start) {
337+
row_str.push_str("null");
338+
} else {
339+
row_str.push_str(string_values.value(start));
340+
}
341+
342+
for i in 1..num_elements {
343+
row_str.push(',');
344+
if values.is_null(start + i) {
345+
row_str.push_str(" null");
346+
} else {
347+
row_str.push(' ');
348+
row_str.push_str(string_values.value(start + i));
349+
}
350+
}
351+
}
352+
353+
row_str.push(']');
354+
builder.append_value(&row_str);
355+
}
356+
}
357+
358+
Arc::new(builder.finish())
359+
}
317360
_ => {
318361
// default cast
319362
arrow::compute::kernels::cast::cast(array, cast_type)?
@@ -520,6 +563,7 @@ fn to_date(s: &str) -> Option<i32> {
520563

521564
#[cfg(test)]
522565
mod test {
566+
use arrow::buffer::OffsetBuffer;
523567
use datafusion::common::cast::{as_decimal128_array, as_float64_array, as_int32_array};
524568

525569
use super::*;
@@ -1013,4 +1057,125 @@ mod test {
10131057
&StringArray::from_iter(vec![Some("{1 -> {x, true}, 2 -> {y, null}}")])
10141058
);
10151059
}
1060+
1061+
#[test]
1062+
fn test_array_to_string() {
1063+
// Create a list array with int32 elements
1064+
let values = Int32Array::from(vec![
1065+
Some(1),
1066+
Some(2),
1067+
Some(3),
1068+
None,
1069+
Some(5),
1070+
Some(6),
1071+
None,
1072+
None,
1073+
]);
1074+
let offsets = OffsetBuffer::new(vec![0, 3, 5, 8].into());
1075+
let list_array: ArrayRef = Arc::new(ListArray::new(
1076+
Arc::new(Field::new("item", DataType::Int32, true)),
1077+
offsets,
1078+
Arc::new(values),
1079+
None,
1080+
));
1081+
1082+
let casted = cast(&list_array, &DataType::Utf8).unwrap();
1083+
assert_eq!(
1084+
as_string_array(&casted),
1085+
&StringArray::from_iter(vec![
1086+
Some("[1, 2, 3]"),
1087+
Some("[null, 5]"),
1088+
Some("[6, null, null]"),
1089+
])
1090+
);
1091+
}
1092+
1093+
#[test]
1094+
fn test_array_to_string_with_null_array() {
1095+
// Create a list array where some rows are entirely null
1096+
let values = Int32Array::from(vec![Some(1), Some(2), Some(3), Some(4)]);
1097+
let offsets = OffsetBuffer::new(vec![0, 2, 2, 4].into());
1098+
let nulls = arrow::buffer::NullBuffer::from(vec![true, false, true]);
1099+
let list_array: ArrayRef = Arc::new(ListArray::new(
1100+
Arc::new(Field::new("item", DataType::Int32, true)),
1101+
offsets,
1102+
Arc::new(values),
1103+
Some(nulls),
1104+
));
1105+
1106+
let casted = cast(&list_array, &DataType::Utf8).unwrap();
1107+
assert_eq!(
1108+
as_string_array(&casted),
1109+
&StringArray::from_iter(vec![Some("[1, 2]"), None, Some("[3, 4]"),])
1110+
);
1111+
}
1112+
1113+
#[test]
1114+
fn test_empty_array_to_string() {
1115+
// Create a list array with empty arrays
1116+
let values = Int32Array::from(vec![] as Vec<Option<i32>>);
1117+
let offsets = OffsetBuffer::new(vec![0, 0, 0].into());
1118+
let list_array: ArrayRef = Arc::new(ListArray::new(
1119+
Arc::new(Field::new("item", DataType::Int32, true)),
1120+
offsets,
1121+
Arc::new(values),
1122+
None,
1123+
));
1124+
1125+
let casted = cast(&list_array, &DataType::Utf8).unwrap();
1126+
assert_eq!(
1127+
as_string_array(&casted),
1128+
&StringArray::from_iter(vec![Some("[]"), Some("[]")])
1129+
);
1130+
}
1131+
1132+
#[test]
1133+
fn test_nested_array_to_string() {
1134+
// Create a nested array: array<array<int>>
1135+
let inner_values = Int32Array::from(vec![Some(1), Some(2), Some(3), Some(4)]);
1136+
let inner_offsets = OffsetBuffer::new(vec![0, 2, 4].into());
1137+
let inner_list = ListArray::new(
1138+
Arc::new(Field::new("item", DataType::Int32, true)),
1139+
inner_offsets,
1140+
Arc::new(inner_values),
1141+
None,
1142+
);
1143+
1144+
let outer_offsets = OffsetBuffer::new(vec![0, 1, 2].into());
1145+
let outer_list: ArrayRef = Arc::new(ListArray::new(
1146+
Arc::new(Field::new(
1147+
"item",
1148+
DataType::List(Arc::new(Field::new("item", DataType::Int32, true))),
1149+
true,
1150+
)),
1151+
outer_offsets,
1152+
Arc::new(inner_list),
1153+
None,
1154+
));
1155+
1156+
let casted = cast(&outer_list, &DataType::Utf8).unwrap();
1157+
assert_eq!(
1158+
as_string_array(&casted),
1159+
&StringArray::from_iter(vec![Some("[[1, 2]]"), Some("[[3, 4]]"),])
1160+
);
1161+
}
1162+
1163+
#[test]
1164+
fn test_array_of_strings_to_string() {
1165+
// Create a list array with string elements
1166+
let values = StringArray::from(vec![Some("a"), Some("b"), None, Some("d")]);
1167+
let offsets = OffsetBuffer::new(vec![0, 2, 4].into());
1168+
let list_array: ArrayRef = Arc::new(ListArray::new(
1169+
Arc::new(Field::new("item", DataType::Utf8, true)),
1170+
offsets,
1171+
Arc::new(values),
1172+
None,
1173+
));
1174+
1175+
let casted = cast(&list_array, &DataType::Utf8).unwrap();
1176+
assert_eq!(
1177+
as_string_array(&casted),
1178+
&StringArray::from_iter(vec![Some("[a, b]"), Some("[null, d]"),])
1179+
);
1180+
}
10161181
}

spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronQuerySuite.scala

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -581,4 +581,76 @@ class AuronQuerySuite extends AuronQueryTest with BaseAuronSQLSuite with AuronSQ
581581
}
582582
}
583583
}
584+
585+
test("cast array to string") {
586+
if (AuronTestUtils.isSparkV31OrGreater) {
587+
withTable("t_array") {
588+
sql("""
589+
|create table t_array using parquet as
590+
|select array(1, 2, 3) as arr
591+
|union all select array(4, 5)
592+
|union all select array(null, 7, null)
593+
|""".stripMargin)
594+
595+
checkSparkAnswerAndOperator("select cast(arr as string) from t_array")
596+
}
597+
}
598+
}
599+
600+
test("cast nested array to string") {
601+
if (AuronTestUtils.isSparkV31OrGreater) {
602+
withTable("t_nested_array") {
603+
sql("""
604+
|create table t_nested_array using parquet as
605+
|select array(array(1, 2), array(3, 4, 5)) as arr
606+
|union all select array(array(6), array(7, 8))
607+
|""".stripMargin)
608+
609+
checkSparkAnswerAndOperator("select cast(arr as string) from t_nested_array")
610+
}
611+
}
612+
}
613+
614+
test("cast array with null elements to string") {
615+
if (AuronTestUtils.isSparkV31OrGreater) {
616+
withTable("t_array_nulls") {
617+
sql("""
618+
|create table t_array_nulls using parquet as
619+
|select array(cast(null as int), cast(null as int)) as arr
620+
|union all select array(1, null, 3)
621+
|union all select array(null, 2, null, 4)
622+
|""".stripMargin)
623+
624+
checkSparkAnswerAndOperator("select cast(arr as string) from t_array_nulls")
625+
}
626+
}
627+
}
628+
629+
test("cast array of strings to string") {
630+
if (AuronTestUtils.isSparkV31OrGreater) {
631+
withTable("t_array_strings") {
632+
sql("""
633+
|create table t_array_strings using parquet as
634+
|select array('hello', 'world') as arr
635+
|union all select array('foo', null, 'bar')
636+
|""".stripMargin)
637+
638+
checkSparkAnswerAndOperator("select cast(arr as string) from t_array_strings")
639+
}
640+
}
641+
}
642+
643+
test("cast empty array to string") {
644+
if (AuronTestUtils.isSparkV31OrGreater) {
645+
withTable("t_empty_array") {
646+
sql("""
647+
|create table t_empty_array using parquet as
648+
|select array() as arr
649+
|union all select array(1, 2)
650+
|""".stripMargin)
651+
652+
checkSparkAnswerAndOperator("select cast(arr as string) from t_empty_array")
653+
}
654+
}
655+
}
584656
}

0 commit comments

Comments
 (0)