@@ -342,82 +342,80 @@ where
342342 * item = 0 ;
343343 }
344344 for ( byte_ix, data_byte_ix) in data[ ..length] . iter ( ) . enumerate ( ) {
345- {
346- let block_id_ptr = & mut block_id[ byte_ix] ;
347- let ix: usize = byte_ix. wrapping_mul ( bitmaplen) ;
348- let insert_cost_ix: usize =
349- u64:: from ( data_byte_ix. clone ( ) ) . wrapping_mul ( num_histograms as u64 ) as usize ;
350- let mut min_cost: super :: util:: floatX = 1e38 as super :: util:: floatX ;
351- let mut block_switch_cost: super :: util:: floatX = block_switch_bitcost;
352- if false {
353- // nonvectorized version: same code below
354- for ( k, insert_cost_iter) in insert_cost
355- [ insert_cost_ix..( insert_cost_ix + num_histograms) ]
356- . iter ( )
357- . enumerate ( )
358- {
359- let cost_iter = & mut cost[ ( k >> 3 ) ] [ k & 7 ] ;
360- * cost_iter += * insert_cost_iter;
361- if * cost_iter < min_cost {
362- min_cost = * cost_iter;
363- * block_id_ptr = k as u8 ;
364- }
365- }
366- } else {
367- // main (vectorized) loop
368- let insert_cost_slice = insert_cost. split_at ( insert_cost_ix) . 1 ;
369- for ( v_index, cost_iter) in cost
370- . split_at_mut ( num_histograms >> 3 )
371- . 0
372- . iter_mut ( )
373- . enumerate ( )
374- {
375- let base_index = v_index << 3 ;
376- let mut local_insert_cost = [ 0.0 as super :: util:: floatX ; 8 ] ;
377- local_insert_cost
378- . clone_from_slice ( insert_cost_slice. split_at ( base_index) . 1 . split_at ( 8 ) . 0 ) ;
379- for sub_index in 0usize ..8usize {
380- cost_iter[ sub_index] += local_insert_cost[ sub_index] ;
381- let final_cost = cost_iter[ sub_index] ;
382- if final_cost < min_cost {
383- min_cost = final_cost;
384- * block_id_ptr = ( base_index + sub_index) as u8 ;
385- }
386- }
345+ let block_id_ptr = & mut block_id[ byte_ix] ;
346+ let ix: usize = byte_ix. wrapping_mul ( bitmaplen) ;
347+ let insert_cost_ix: usize =
348+ u64:: from ( data_byte_ix. clone ( ) ) . wrapping_mul ( num_histograms as u64 ) as usize ;
349+ let mut min_cost: super :: util:: floatX = 1e38 as super :: util:: floatX ;
350+ let mut block_switch_cost: super :: util:: floatX = block_switch_bitcost;
351+ if false {
352+ // nonvectorized version: same code below
353+ for ( k, insert_cost_iter) in insert_cost
354+ [ insert_cost_ix..( insert_cost_ix + num_histograms) ]
355+ . iter ( )
356+ . enumerate ( )
357+ {
358+ let cost_iter = & mut cost[ ( k >> 3 ) ] [ k & 7 ] ;
359+ * cost_iter += * insert_cost_iter;
360+ if * cost_iter < min_cost {
361+ min_cost = * cost_iter;
362+ * block_id_ptr = k as u8 ;
387363 }
388- let vectorized_offset = ( ( num_histograms >> 3 ) << 3 ) ;
389- let mut k = vectorized_offset;
390- //remainder loop for
391- for insert_cost_iter in insert_cost
392- . split_at ( insert_cost_ix + vectorized_offset)
393- . 1
394- . split_at ( num_histograms & 7 )
395- . 0
396- . iter ( )
397- {
398- let cost_iter = & mut cost[ ( k >> 3 ) ] ;
399- cost_iter[ k & 7 ] += * insert_cost_iter;
400- if cost_iter[ k & 7 ] < min_cost {
401- min_cost = cost_iter[ k & 7 ] ;
402- * block_id_ptr = k as u8 ;
364+ }
365+ } else {
366+ // main (vectorized) loop
367+ let insert_cost_slice = insert_cost. split_at ( insert_cost_ix) . 1 ;
368+ for ( v_index, cost_iter) in cost
369+ . split_at_mut ( num_histograms >> 3 )
370+ . 0
371+ . iter_mut ( )
372+ . enumerate ( )
373+ {
374+ let base_index = v_index << 3 ;
375+ let mut local_insert_cost = [ 0.0 as super :: util:: floatX ; 8 ] ;
376+ local_insert_cost
377+ . clone_from_slice ( insert_cost_slice. split_at ( base_index) . 1 . split_at ( 8 ) . 0 ) ;
378+ for sub_index in 0usize ..8usize {
379+ cost_iter[ sub_index] += local_insert_cost[ sub_index] ;
380+ let final_cost = cost_iter[ sub_index] ;
381+ if final_cost < min_cost {
382+ min_cost = final_cost;
383+ * block_id_ptr = ( base_index + sub_index) as u8 ;
403384 }
404- k += 1 ;
405385 }
406386 }
407- if byte_ix < 2000usize {
408- block_switch_cost *= ( 0.77 as super :: util:: floatX
409- + 0.07 as super :: util:: floatX * byte_ix as ( super :: util:: floatX )
410- / 2000i32 as ( super :: util:: floatX ) ) ;
387+ let vectorized_offset = ( ( num_histograms >> 3 ) << 3 ) ;
388+ let mut k = vectorized_offset;
389+ //remainder loop for
390+ for insert_cost_iter in insert_cost
391+ . split_at ( insert_cost_ix + vectorized_offset)
392+ . 1
393+ . split_at ( num_histograms & 7 )
394+ . 0
395+ . iter ( )
396+ {
397+ let cost_iter = & mut cost[ ( k >> 3 ) ] ;
398+ cost_iter[ k & 7 ] += * insert_cost_iter;
399+ if cost_iter[ k & 7 ] < min_cost {
400+ min_cost = cost_iter[ k & 7 ] ;
401+ * block_id_ptr = k as u8 ;
402+ }
403+ k += 1 ;
411404 }
412- update_cost_and_signal (
413- num_histograms as u32 ,
414- ix,
415- min_cost,
416- block_switch_cost,
417- cost,
418- switch_signal,
419- ) ;
420405 }
406+ if byte_ix < 2000usize {
407+ block_switch_cost *= ( 0.77 as super :: util:: floatX
408+ + 0.07 as super :: util:: floatX * byte_ix as ( super :: util:: floatX )
409+ / 2000i32 as ( super :: util:: floatX ) ) ;
410+ }
411+ update_cost_and_signal (
412+ num_histograms as u32 ,
413+ ix,
414+ min_cost,
415+ block_switch_cost,
416+ cost,
417+ switch_signal,
418+ ) ;
421419 }
422420 {
423421 let mut byte_ix: usize = length. wrapping_sub ( 1 ) ;
0 commit comments