@@ -567,26 +567,46 @@ compute_theta(long double * restrict theta, dimension_t dim,
567567 }
568568}
569569
570+ /*
571+ FIXME: This function is not vectorized well because it uses "long double"
572+ and because vectorization requires -funsafe-math-optimizations
573+ */
574+ static void
575+ compute_sin_cos_theta (const long double * restrict theta , dimension_t dm1 ,
576+ double * restrict sin_theta , double * restrict cos_theta )
577+ {
578+ for (size_t k = 0 ; k < dm1 ; k ++ ) {
579+ cos_theta [k ] = STATIC_CAST (double , theta [k ]);
580+ sin_theta [k ] = sin (cos_theta [k ]);
581+ cos_theta [k ] = cos (cos_theta [k ]);
582+ }
583+ }
584+
585+ /**
586+ w_0 = \prod_{k=0}^{dim - 1} sin(theta_k)
587+ w_j = cos(theta_{dim - j - 1}) * \prod_{k=0}^{dim - j - 1} sin(theta_k)
588+
589+ */
570590static void
571591compute_hua_wang_direction (double * restrict direction , dimension_t dim ,
572- const long double * restrict theta )
592+ const double * restrict sin_theta , const double * restrict cos_theta )
573593{
574594 ASSUME (2 <= dim && dim <= 32 );
575- dimension_t k , j ;
576- direction [ 0 ] = STATIC_CAST ( double , sinl ( theta [ 0 ]));
577- for ( k = 1 ; k < dim - 1 ; k ++ )
578- direction [ 0 ] *= STATIC_CAST ( double , sinl ( theta [ k ]));
579- for ( j = 1 ; j < dim ; j ++ ) {
580- direction [ j ] = STATIC_CAST ( double , cosl ( theta [ dim - j - 1 ]));
581- for ( k = 0 ; k < dim - j - 1 ; k ++ ) {
582- direction [j ] *= STATIC_CAST ( double , sinl ( theta [ k ])) ;
583- }
584- }
585- for ( k = 0 ; k < dim ; k ++ ) {
586- // FIXME: Can direction[k ] be negative? If not, then we don't need fabs().
587- direction [k ] = (fabs (direction [k ]) <= ALMOST_ZERO_WEIGHT )
595+ dimension_t j ;
596+ for ( j = 0 ; j < dim - 1 ; j ++ )
597+ direction [ j ] = sin_theta [ 0 ];
598+ for ( j = dim - 2 ; j > 0 ; j -- )
599+ direction [ j - 1 ] = sin_theta [ dim - j - 1 ] * direction [ j ];
600+ for ( j = 1 ; j < dim - 1 ; j ++ )
601+ direction [ j ] *= cos_theta [ dim - j - 1 ];
602+ direction [dim - 1 ] = cos_theta [ 0 ] ;
603+
604+ for ( j = 0 ; j < dim ; j ++ ) {
605+ assert ( direction [ j ] >= 0 );
606+ // FIXME: Can direction[j ] be negative? If not, then we don't need fabs().
607+ direction [j ] = (fabs (direction [j ]) <= ALMOST_ZERO_WEIGHT )
588608 ? 1. / ALMOST_ZERO_WEIGHT
589- : 1. / direction [k ];
609+ : 1. / direction [j ];
590610 }
591611}
592612
@@ -615,15 +635,20 @@ hv_approx_hua_wang(const double * restrict data, int nobjs, int n,
615635 // FIXME: OpenMP: #pragma omp parallel
616636 {
617637 long double * theta = malloc ((dim - 1 ) * sizeof (* theta ));
638+ double * sin_theta = malloc ((dim - 1 ) * sizeof (* sin_theta ));
639+ double * cos_theta = malloc ((dim - 1 ) * sizeof (* cos_theta ));
618640 double * w = malloc (dim * sizeof (* w ));
619641 // FIXME: Add OpenMP: #pragma omp for reduction(+:expected)
620642 for (uint_fast32_t j = 0 ; j < nsamples ; j ++ ) {
621643 compute_polar_sample (theta , dim - 1 , j , nsamples , polar_a );
622644 compute_theta (theta , dim , int_all );
623- compute_hua_wang_direction (w , dim , theta );
645+ compute_sin_cos_theta (theta , dim - 1 , sin_theta , cos_theta );
646+ compute_hua_wang_direction (w , dim , sin_theta , cos_theta );
624647 expected += get_expected_value (points , npoints , dim , w );
625648 }
626649 free (theta );
650+ free (sin_theta );
651+ free (cos_theta );
627652 free (w );
628653 }
629654 free ((void * ) int_all );
0 commit comments