@@ -125,13 +125,17 @@ pub fn transform_arrow(array: ArrayRef, transform: &Transform) -> Result<ArrayRe
125125 ) ) ,
126126 ( DataType :: Int32 , Transform :: Bucket ( m) ) => Ok ( Arc :: < PrimitiveArray < Int32Type > > :: new (
127127 unary ( as_primitive_array :: < Int32Type > ( & array) , |i| {
128- ( fasthash:: murmur3:: hash32_with_seed ( ( i as i64 ) . to_le_bytes ( ) , 0 ) as i32 )
128+ let mut buffer = std:: io:: Cursor :: new ( ( i as i64 ) . to_le_bytes ( ) ) ;
129+ ( murmur3:: murmur3_32 ( & mut buffer, 0 ) . expect ( "murmur3 hash failled for some reason" )
130+ as i32 )
129131 . rem_euclid ( * m as i32 )
130132 } ) ,
131133 ) ) ,
132134 ( DataType :: Int64 , Transform :: Bucket ( m) ) => Ok ( Arc :: < PrimitiveArray < Int32Type > > :: new (
133135 unary ( as_primitive_array :: < Int64Type > ( & array) , |i| {
134- ( fasthash:: murmur3:: hash32_with_seed ( i. to_le_bytes ( ) , 0 ) as i32 )
136+ let mut buffer = std:: io:: Cursor :: new ( ( i) . to_le_bytes ( ) ) ;
137+ ( murmur3:: murmur3_32 ( & mut buffer, 0 ) . expect ( "murmur3 hash failled for some reason" )
138+ as i32 )
135139 . rem_euclid ( * m as i32 )
136140 } ) ,
137141 ) ) ,
@@ -141,7 +145,9 @@ pub fn transform_arrow(array: ArrayRef, transform: &Transform) -> Result<ArrayRe
141145 Ok ( Arc :: < PrimitiveArray < Int32Type > > :: new ( unary (
142146 as_primitive_array :: < Int32Type > ( & temp) ,
143147 |i| {
144- ( fasthash:: murmur3:: hash32_with_seed ( i. to_le_bytes ( ) , 0 ) as i32 )
148+ let mut buffer = std:: io:: Cursor :: new ( ( i as i64 ) . to_le_bytes ( ) ) ;
149+ ( murmur3:: murmur3_32 ( & mut buffer, 0 )
150+ . expect ( "murmur3 hash failled for some reason" ) as i32 )
145151 . rem_euclid ( * m as i32 )
146152 } ,
147153 ) ) )
@@ -152,7 +158,9 @@ pub fn transform_arrow(array: ArrayRef, transform: &Transform) -> Result<ArrayRe
152158 Ok ( Arc :: < PrimitiveArray < Int32Type > > :: new ( unary (
153159 as_primitive_array :: < Int32Type > ( & temp) ,
154160 |i : i32 | {
155- ( fasthash:: murmur3:: hash32_with_seed ( ( i as i64 ) . to_le_bytes ( ) , 0 ) as i32 )
161+ let mut buffer = std:: io:: Cursor :: new ( ( i as i64 ) . to_le_bytes ( ) ) ;
162+ ( murmur3:: murmur3_32 ( & mut buffer, 0 )
163+ . expect ( "murmur3 hash failled for some reason" ) as i32 )
156164 . rem_euclid ( * m as i32 )
157165 } ,
158166 ) ) )
@@ -164,7 +172,9 @@ pub fn transform_arrow(array: ArrayRef, transform: &Transform) -> Result<ArrayRe
164172 Ok ( Arc :: new ( PrimitiveArray :: < Int32Type > :: new (
165173 ScalarBuffer :: from_iter ( local_array. iter ( ) . map ( |a| {
166174 if let Some ( value) = a {
167- fasthash:: murmur3:: hash32_with_seed ( value. as_bytes ( ) , 0 ) as i32
175+ murmur3:: murmur3_32 ( & mut value. as_bytes ( ) , 0 )
176+ . expect ( "murmur3 hash failled for some reason" )
177+ as i32
168178 } else {
169179 0
170180 }
@@ -386,40 +396,36 @@ mod tests {
386396 // Check value match https://iceberg.apache.org/spec/#appendix-b-32-bit-hash-requirements
387397
388398 // 34 -> 2017239379
389- assert_eq ! (
390- fasthash:: murmur3:: hash32_with_seed( ( 34i32 as i64 ) . to_le_bytes( ) , 0 ) ,
391- 2017239379
392- ) ;
399+ let mut buffer = std:: io:: Cursor :: new ( ( 34i32 as i64 ) . to_le_bytes ( ) ) ;
400+ assert_eq ! ( murmur3:: murmur3_32( & mut buffer, 0 ) . unwrap( ) , 2017239379 ) ;
401+
393402 // 34 -> 2017239379
394- assert_eq ! (
395- fasthash:: murmur3:: hash32_with_seed( ( 34i64 ) . to_le_bytes( ) , 0 ) ,
396- 2017239379
397- ) ;
403+ let mut buffer = std:: io:: Cursor :: new ( ( 34i64 ) . to_le_bytes ( ) ) ;
404+ assert_eq ! ( murmur3:: murmur3_32( & mut buffer, 0 ) . unwrap( ) , 2017239379 ) ;
405+
398406 // daysFromUnixEpoch(2017-11-16) -> 17_486 -> -653330422
407+ let mut buffer = std:: io:: Cursor :: new ( ( 17_486i32 as i64 ) . to_le_bytes ( ) ) ;
399408 assert_eq ! (
400- fasthash :: murmur3:: hash32_with_seed ( ( 17_486i32 as i64 ) . to_le_bytes ( ) , 0 ) as i32 ,
409+ murmur3:: murmur3_32 ( & mut buffer , 0 ) . unwrap ( ) as i32 ,
401410 -653330422
402411 ) ;
412+
403413 // 81_068_000_000 number of micros from midnight 22:31:08
414+ let mut buffer = std:: io:: Cursor :: new ( ( 81_068_000_000i64 ) . to_le_bytes ( ) ) ;
404415 assert_eq ! (
405- fasthash :: murmur3:: hash32_with_seed ( ( 81_068_000_000i64 ) . to_le_bytes ( ) , 0 ) as i32 ,
416+ murmur3:: murmur3_32 ( & mut buffer , 0 ) . unwrap ( ) as i32 ,
406417 -662762989
407418 ) ;
408419
409420 // utf8Bytes(iceberg) -> 1210000089
410421 assert_eq ! (
411- fasthash :: murmur3:: hash32_with_seed ( "iceberg" . as_bytes( ) , 0 ) as i32 ,
422+ murmur3:: murmur3_32 ( & mut "iceberg" . as_bytes( ) , 0 ) . unwrap ( ) as i32 ,
412423 1210000089
413424 ) ;
414425 }
415426
416427 #[ test]
417428 fn test_int32_bucket_transform ( ) {
418- assert_eq ! (
419- fasthash:: murmur3:: hash32_with_seed( 17_486i64 . to_le_bytes( ) , 0 ) as i32 ,
420- -653_330_422
421- ) ;
422-
423429 let array = Arc :: new ( arrow:: array:: Int32Array :: from ( vec ! [
424430 Some ( 34 ) , // Spec value
425431 Some ( 17_486 ) , // number of day between 2017-11-16 and epoch
0 commit comments