@@ -21,8 +21,8 @@ use std::sync::{Arc, OnceLock};
2121use crate :: strings:: { make_and_append_view, StringArrayType } ;
2222use crate :: utils:: { make_scalar_function, utf8_to_str_type} ;
2323use arrow:: array:: {
24- Array , ArrayIter , ArrayRef , AsArray , GenericStringArray , Int64Array , OffsetSizeTrait ,
25- StringViewArray ,
24+ Array , ArrayIter , ArrayRef , AsArray , GenericStringBuilder , Int64Array ,
25+ OffsetSizeTrait , StringViewArray ,
2626} ;
2727use arrow:: datatypes:: DataType ;
2828use arrow_buffer:: { NullBufferBuilder , ScalarBuffer } ;
@@ -448,10 +448,9 @@ where
448448 match args. len ( ) {
449449 1 => {
450450 let iter = ArrayIter :: new ( string_array) ;
451-
452- let result = iter
453- . zip ( start_array. iter ( ) )
454- . map ( |( string, start) | match ( string, start) {
451+ let mut result_builder = GenericStringBuilder :: < T > :: new ( ) ;
452+ for ( string, start) in iter. zip ( start_array. iter ( ) ) {
453+ match ( string, start) {
455454 ( Some ( string) , Some ( start) ) => {
456455 let ( start, end) = get_true_start_end (
457456 string,
@@ -460,47 +459,51 @@ where
460459 enable_ascii_fast_path,
461460 ) ; // start, end is byte-based
462461 let substr = & string[ start..end] ;
463- Some ( substr . to_string ( ) )
462+ result_builder . append_value ( substr ) ;
464463 }
465- _ => None ,
466- } )
467- . collect :: < GenericStringArray < T > > ( ) ;
468- Ok ( Arc :: new ( result) as ArrayRef )
464+ _ => {
465+ result_builder. append_null ( ) ;
466+ }
467+ }
468+ }
469+ Ok ( Arc :: new ( result_builder. finish ( ) ) as ArrayRef )
469470 }
470471 2 => {
471472 let iter = ArrayIter :: new ( string_array) ;
472473 let count_array = count_array_opt. unwrap ( ) ;
474+ let mut result_builder = GenericStringBuilder :: < T > :: new ( ) ;
473475
474- let result = iter
475- . zip ( start_array. iter ( ) )
476- . zip ( count_array. iter ( ) )
477- . map ( |( ( string, start) , count) | {
478- match ( string, start, count) {
479- ( Some ( string) , Some ( start) , Some ( count) ) => {
480- if count < 0 {
481- exec_err ! (
476+ for ( ( string, start) , count) in
477+ iter. zip ( start_array. iter ( ) ) . zip ( count_array. iter ( ) )
478+ {
479+ match ( string, start, count) {
480+ ( Some ( string) , Some ( start) , Some ( count) ) => {
481+ if count < 0 {
482+ return exec_err ! (
482483 "negative substring length not allowed: substr(<str>, {start}, {count})"
483- )
484- } else {
485- if start == i64:: MIN {
486- return exec_err ! ( "negative overflow when calculating skip value" ) ;
487- }
488- let ( start, end) = get_true_start_end (
489- string,
490- start,
491- Some ( count as u64 ) ,
492- enable_ascii_fast_path,
493- ) ; // start, end is byte-based
494- let substr = & string[ start..end] ;
495- Ok ( Some ( substr. to_string ( ) ) )
484+ ) ;
485+ } else {
486+ if start == i64:: MIN {
487+ return exec_err ! (
488+ "negative overflow when calculating skip value"
489+ ) ;
496490 }
491+ let ( start, end) = get_true_start_end (
492+ string,
493+ start,
494+ Some ( count as u64 ) ,
495+ enable_ascii_fast_path,
496+ ) ; // start, end is byte-based
497+ let substr = & string[ start..end] ;
498+ result_builder. append_value ( substr) ;
497499 }
498- _ => Ok ( None ) ,
499500 }
500- } )
501- . collect :: < Result < GenericStringArray < T > > > ( ) ?;
502-
503- Ok ( Arc :: new ( result) as ArrayRef )
501+ _ => {
502+ result_builder. append_null ( ) ;
503+ }
504+ }
505+ }
506+ Ok ( Arc :: new ( result_builder. finish ( ) ) as ArrayRef )
504507 }
505508 other => {
506509 exec_err ! ( "substr was called with {other} arguments. It requires 2 or 3." )
0 commit comments