Skip to content

Commit c56916c

Browse files
authored
Merge pull request #1968 from sayantn/intrinsic-test
Format `f16` values correctly in intrinsic-test
2 parents 5dceb7a + 5536f1c commit c56916c

File tree

10 files changed

+88
-1100
lines changed

10 files changed

+88
-1100
lines changed

crates/core_arch/src/x86/avx512bw.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10705,9 +10705,13 @@ pub fn _mm_cvtepi16_epi8(a: __m128i) -> __m128i {
1070510705
#[cfg_attr(test, assert_instr(vpmovwb))]
1070610706
pub fn _mm_mask_cvtepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
1070710707
unsafe {
10708-
let convert = _mm_cvtepi16_epi8(a).as_i8x16();
10709-
let k: __mmask16 = 0b11111111_11111111 & k as __mmask16;
10710-
transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
10708+
let a = _mm_cvtepi16_epi8(a).as_i8x16();
10709+
let src = simd_shuffle!(
10710+
src.as_i8x16(),
10711+
i8x16::ZERO,
10712+
[0, 1, 2, 3, 4, 5, 6, 7, 16, 16, 16, 16, 16, 16, 16, 16]
10713+
);
10714+
simd_select_bitmask(k as u16, a, src).as_m128i()
1071110715
}
1071210716
}
1071310717

@@ -10719,11 +10723,7 @@ pub fn _mm_mask_cvtepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i
1071910723
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1072010724
#[cfg_attr(test, assert_instr(vpmovwb))]
1072110725
pub fn _mm_maskz_cvtepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
10722-
unsafe {
10723-
let convert = _mm_cvtepi16_epi8(a).as_i8x16();
10724-
let k: __mmask16 = 0b11111111_11111111 & k as __mmask16;
10725-
transmute(simd_select_bitmask(k, convert, i8x16::ZERO))
10726-
}
10726+
_mm_mask_cvtepi16_epi8(_mm_setzero_si128(), k, a)
1072710727
}
1072810728

1072910729
/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.

crates/core_arch/src/x86/avx512f.rs

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11149,10 +11149,7 @@ pub fn _mm256_maskz_cvtpd_ps(k: __mmask8, a: __m256d) -> __m128 {
1114911149
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1115011150
#[cfg_attr(test, assert_instr(vcvtpd2ps))]
1115111151
pub fn _mm_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m128d) -> __m128 {
11152-
unsafe {
11153-
let convert = _mm_cvtpd_ps(a);
11154-
transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
11155-
}
11152+
unsafe { vcvtpd2ps128(a.as_f64x2(), src.as_f32x4(), k).as_m128() }
1115611153
}
1115711154

1115811155
/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -11259,10 +11256,7 @@ pub fn _mm256_maskz_cvtpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
1125911256
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1126011257
#[cfg_attr(test, assert_instr(vcvtpd2dq))]
1126111258
pub fn _mm_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
11262-
unsafe {
11263-
let convert = _mm_cvtpd_epi32(a);
11264-
transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
11265-
}
11259+
unsafe { vcvtpd2dq128(a.as_f64x2(), src.as_i32x4(), k).as_m128i() }
1126611260
}
1126711261

1126811262
/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -42483,9 +42477,13 @@ unsafe extern "C" {
4248342477

4248442478
#[link_name = "llvm.x86.avx512.mask.cvtps2pd.512"]
4248542479
fn vcvtps2pd(a: f32x8, src: f64x8, mask: u8, sae: i32) -> f64x8;
42480+
#[link_name = "llvm.x86.avx512.mask.cvtpd2ps"]
42481+
fn vcvtpd2ps128(a: f64x2, src: f32x4, mask: u8) -> f32x4;
4248642482
#[link_name = "llvm.x86.avx512.mask.cvtpd2ps.512"]
4248742483
fn vcvtpd2ps(a: f64x8, src: f32x8, mask: u8, rounding: i32) -> f32x8;
4248842484

42485+
#[link_name = "llvm.x86.avx512.mask.cvtpd2dq.128"]
42486+
fn vcvtpd2dq128(a: f64x2, src: i32x4, k: u8) -> i32x4;
4248942487
#[link_name = "llvm.x86.avx512.mask.cvtpd2dq.512"]
4249042488
fn vcvtpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
4249142489

crates/core_arch/src/x86/avx512fp16.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1007,7 +1007,7 @@ pub fn _mm_comilt_sh(a: __m128h, b: __m128h) -> i32 {
10071007
#[target_feature(enable = "avx512fp16")]
10081008
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10091009
pub fn _mm_comineq_sh(a: __m128h, b: __m128h) -> i32 {
1010-
_mm_comi_sh::<_CMP_NEQ_OS>(a, b)
1010+
_mm_comi_sh::<_CMP_NEQ_US>(a, b)
10111011
}
10121012

10131013
/// Compare the lower half-precision (16-bit) floating-point elements in a and b for equality, and
@@ -1073,7 +1073,7 @@ pub fn _mm_ucomilt_sh(a: __m128h, b: __m128h) -> i32 {
10731073
#[target_feature(enable = "avx512fp16")]
10741074
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
10751075
pub fn _mm_ucomineq_sh(a: __m128h, b: __m128h) -> i32 {
1076-
_mm_comi_sh::<_CMP_NEQ_OQ>(a, b)
1076+
_mm_comi_sh::<_CMP_NEQ_UQ>(a, b)
10771077
}
10781078

10791079
/// Load 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from memory into

0 commit comments

Comments
 (0)