Skip to content

Commit 708f97d

Browse files
committed
c/hvapprox.c (compute_hua_wang_direction): Help the vectorizer by pre-computing sin() and cos().
1 parent 90016ab commit 708f97d

File tree

1 file changed

+41
-16
lines changed

1 file changed

+41
-16
lines changed

c/hvapprox.c

Lines changed: 41 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -567,26 +567,46 @@ compute_theta(long double * restrict theta, dimension_t dim,
567567
}
568568
}
569569

570+
/*
571+
FIXME: This function is not vectorized well because it uses "long double"
572+
and because vectorization requires -funsafe-math-optimizations
573+
*/
574+
static void
575+
compute_sin_cos_theta(const long double * restrict theta, dimension_t dm1,
576+
double * restrict sin_theta, double * restrict cos_theta)
577+
{
578+
for (size_t k = 0; k < dm1; k++) {
579+
cos_theta[k] = STATIC_CAST(double, theta[k]);
580+
sin_theta[k] = sin(cos_theta[k]);
581+
cos_theta[k] = cos(cos_theta[k]);
582+
}
583+
}
584+
585+
/**
586+
w_0 = \prod_{k=0}^{dim - 1} sin(theta_k)
587+
w_j = cos(theta_{dim - j - 1}) * \prod_{k=0}^{dim - j - 1} sin(theta_k)
588+
589+
*/
570590
static void
571591
compute_hua_wang_direction(double * restrict direction, dimension_t dim,
572-
const long double * restrict theta)
592+
const double * restrict sin_theta, const double * restrict cos_theta)
573593
{
574594
ASSUME(2 <= dim && dim <= 32);
575-
dimension_t k, j;
576-
direction[0] = STATIC_CAST(double, sinl(theta[0]));
577-
for (k = 1; k < dim - 1; k++)
578-
direction[0] *= STATIC_CAST(double, sinl(theta[k]));
579-
for (j = 1; j < dim; j++) {
580-
direction[j] = STATIC_CAST(double, cosl(theta[dim - j - 1]));
581-
for (k = 0; k < dim - j - 1; k++) {
582-
direction[j] *= STATIC_CAST(double, sinl(theta[k]));
583-
}
584-
}
585-
for (k = 0; k < dim; k++) {
586-
// FIXME: Can direction[k] be negative? If not, then we don't need fabs().
587-
direction[k] = (fabs(direction[k]) <= ALMOST_ZERO_WEIGHT)
595+
dimension_t j;
596+
for (j = 0; j < dim - 1; j++)
597+
direction[j] = sin_theta[0];
598+
for (j = dim - 2; j > 0; j--)
599+
direction[j - 1] = sin_theta[dim - j - 1] * direction[j];
600+
for (j = 1; j < dim - 1; j++)
601+
direction[j] *= cos_theta[dim - j - 1];
602+
direction[dim - 1] = cos_theta[0];
603+
604+
for (j = 0; j < dim; j++) {
605+
assert(direction[j] >= 0);
606+
// FIXME: Can direction[j] be negative? If not, then we don't need fabs().
607+
direction[j] = (fabs(direction[j]) <= ALMOST_ZERO_WEIGHT)
588608
? 1. / ALMOST_ZERO_WEIGHT
589-
: 1. / direction[k];
609+
: 1. / direction[j];
590610
}
591611
}
592612

@@ -615,15 +635,20 @@ hv_approx_hua_wang(const double * restrict data, int nobjs, int n,
615635
// FIXME: OpenMP: #pragma omp parallel
616636
{
617637
long double * theta = malloc((dim - 1) * sizeof(*theta));
638+
double * sin_theta = malloc((dim - 1) * sizeof(*sin_theta));
639+
double * cos_theta = malloc((dim - 1) * sizeof(*cos_theta));
618640
double * w = malloc(dim * sizeof(*w));
619641
// FIXME: Add OpenMP: #pragma omp for reduction(+:expected)
620642
for (uint_fast32_t j = 0; j < nsamples; j++) {
621643
compute_polar_sample(theta, dim - 1, j, nsamples, polar_a);
622644
compute_theta(theta, dim, int_all);
623-
compute_hua_wang_direction(w, dim, theta);
645+
compute_sin_cos_theta(theta, dim - 1, sin_theta, cos_theta);
646+
compute_hua_wang_direction(w, dim, sin_theta, cos_theta);
624647
expected += get_expected_value(points, npoints, dim, w);
625648
}
626649
free(theta);
650+
free(sin_theta);
651+
free(cos_theta);
627652
free(w);
628653
}
629654
free((void *) int_all);

0 commit comments

Comments
 (0)