core/stdarch/crates/core_arch/src/x86/
avx512vbmi2.rs

1use crate::{
2    core_arch::{simd::*, x86::*},
3    intrinsics::simd::*,
4};
5
6#[cfg(test)]
7use stdarch_test::assert_instr;
8
9/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10///
11/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi16)
12#[inline]
13#[target_feature(enable = "avx512vbmi2")]
14#[cfg_attr(test, assert_instr(vpexpandw))]
15#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16pub unsafe fn _mm512_mask_expandloadu_epi16(
17    src: __m512i,
18    k: __mmask32,
19    mem_addr: *const i16,
20) -> __m512i {
21    transmute(expandloadw_512(mem_addr, src.as_i16x32(), k))
22}
23
24/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25///
26/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi16)
27#[inline]
28#[target_feature(enable = "avx512vbmi2")]
29#[cfg_attr(test, assert_instr(vpexpandw))]
30#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31pub unsafe fn _mm512_maskz_expandloadu_epi16(k: __mmask32, mem_addr: *const i16) -> __m512i {
32    _mm512_mask_expandloadu_epi16(_mm512_setzero_si512(), k, mem_addr)
33}
34
35/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
36///
37/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi16)
38#[inline]
39#[target_feature(enable = "avx512vbmi2,avx512vl")]
40#[cfg_attr(test, assert_instr(vpexpandw))]
41#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42pub unsafe fn _mm256_mask_expandloadu_epi16(
43    src: __m256i,
44    k: __mmask16,
45    mem_addr: *const i16,
46) -> __m256i {
47    transmute(expandloadw_256(mem_addr, src.as_i16x16(), k))
48}
49
50/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
51///
52/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi16)
53#[inline]
54#[target_feature(enable = "avx512vbmi2,avx512vl")]
55#[cfg_attr(test, assert_instr(vpexpandw))]
56#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
57pub unsafe fn _mm256_maskz_expandloadu_epi16(k: __mmask16, mem_addr: *const i16) -> __m256i {
58    _mm256_mask_expandloadu_epi16(_mm256_setzero_si256(), k, mem_addr)
59}
60
61/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
62///
63/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi16)
64#[inline]
65#[target_feature(enable = "avx512vbmi2,avx512vl")]
66#[cfg_attr(test, assert_instr(vpexpandw))]
67#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
68pub unsafe fn _mm_mask_expandloadu_epi16(
69    src: __m128i,
70    k: __mmask8,
71    mem_addr: *const i16,
72) -> __m128i {
73    transmute(expandloadw_128(mem_addr, src.as_i16x8(), k))
74}
75
76/// Load contiguous active 16-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
77///
78/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi16)
79#[inline]
80#[target_feature(enable = "avx512vbmi2,avx512vl")]
81#[cfg_attr(test, assert_instr(vpexpandw))]
82#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
83pub unsafe fn _mm_maskz_expandloadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128i {
84    _mm_mask_expandloadu_epi16(_mm_setzero_si128(), k, mem_addr)
85}
86
87/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
88///
89/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi8)
90#[inline]
91#[target_feature(enable = "avx512vbmi2")]
92#[cfg_attr(test, assert_instr(vpexpandb))]
93#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
94pub unsafe fn _mm512_mask_expandloadu_epi8(
95    src: __m512i,
96    k: __mmask64,
97    mem_addr: *const i8,
98) -> __m512i {
99    transmute(expandloadb_512(mem_addr, src.as_i8x64(), k))
100}
101
102/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
103///
104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi8)
105#[inline]
106#[target_feature(enable = "avx512vbmi2")]
107#[cfg_attr(test, assert_instr(vpexpandb))]
108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
109pub unsafe fn _mm512_maskz_expandloadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m512i {
110    _mm512_mask_expandloadu_epi8(_mm512_setzero_si512(), k, mem_addr)
111}
112
113/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
114///
115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi8)
116#[inline]
117#[target_feature(enable = "avx512vbmi2,avx512vl")]
118#[cfg_attr(test, assert_instr(vpexpandb))]
119#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
120pub unsafe fn _mm256_mask_expandloadu_epi8(
121    src: __m256i,
122    k: __mmask32,
123    mem_addr: *const i8,
124) -> __m256i {
125    transmute(expandloadb_256(mem_addr, src.as_i8x32(), k))
126}
127
128/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
129///
130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi8)
131#[inline]
132#[target_feature(enable = "avx512vbmi2,avx512vl")]
133#[cfg_attr(test, assert_instr(vpexpandb))]
134#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
135pub unsafe fn _mm256_maskz_expandloadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m256i {
136    _mm256_mask_expandloadu_epi8(_mm256_setzero_si256(), k, mem_addr)
137}
138
139/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
140///
141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi8)
142#[inline]
143#[target_feature(enable = "avx512vbmi2,avx512vl")]
144#[cfg_attr(test, assert_instr(vpexpandb))]
145#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
146pub unsafe fn _mm_mask_expandloadu_epi8(
147    src: __m128i,
148    k: __mmask16,
149    mem_addr: *const i8,
150) -> __m128i {
151    transmute(expandloadb_128(mem_addr, src.as_i8x16(), k))
152}
153
154/// Load contiguous active 8-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
155///
156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi8)
157#[inline]
158#[target_feature(enable = "avx512vbmi2,avx512vl")]
159#[cfg_attr(test, assert_instr(vpexpandb))]
160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
161pub unsafe fn _mm_maskz_expandloadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i {
162    _mm_mask_expandloadu_epi8(_mm_setzero_si128(), k, mem_addr)
163}
164
165/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
166///
167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi16)
168#[inline]
169#[target_feature(enable = "avx512vbmi2")]
170#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
171#[cfg_attr(test, assert_instr(vpcompressw))]
172pub unsafe fn _mm512_mask_compressstoreu_epi16(base_addr: *mut i16, k: __mmask32, a: __m512i) {
173    vcompressstorew(base_addr as *mut _, a.as_i16x32(), k)
174}
175
176/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
177///
178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi16)
179#[inline]
180#[target_feature(enable = "avx512vbmi2,avx512vl")]
181#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
182#[cfg_attr(test, assert_instr(vpcompressw))]
183pub unsafe fn _mm256_mask_compressstoreu_epi16(base_addr: *mut i16, k: __mmask16, a: __m256i) {
184    vcompressstorew256(base_addr as *mut _, a.as_i16x16(), k)
185}
186
187/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
188///
189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi16)
190#[inline]
191#[target_feature(enable = "avx512vbmi2,avx512vl")]
192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
193#[cfg_attr(test, assert_instr(vpcompressw))]
194pub unsafe fn _mm_mask_compressstoreu_epi16(base_addr: *mut i16, k: __mmask8, a: __m128i) {
195    vcompressstorew128(base_addr as *mut _, a.as_i16x8(), k)
196}
197
198/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
199///
200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi8)
201#[inline]
202#[target_feature(enable = "avx512vbmi2")]
203#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
204#[cfg_attr(test, assert_instr(vpcompressb))]
205pub unsafe fn _mm512_mask_compressstoreu_epi8(base_addr: *mut i8, k: __mmask64, a: __m512i) {
206    vcompressstoreb(base_addr, a.as_i8x64(), k)
207}
208
209/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
210///
211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi8)
212#[inline]
213#[target_feature(enable = "avx512vbmi2,avx512vl")]
214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
215#[cfg_attr(test, assert_instr(vpcompressb))]
216pub unsafe fn _mm256_mask_compressstoreu_epi8(base_addr: *mut i8, k: __mmask32, a: __m256i) {
217    vcompressstoreb256(base_addr, a.as_i8x32(), k)
218}
219
220/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
221///
222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi8)
223#[inline]
224#[target_feature(enable = "avx512vbmi2,avx512vl")]
225#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
226#[cfg_attr(test, assert_instr(vpcompressb))]
227pub unsafe fn _mm_mask_compressstoreu_epi8(base_addr: *mut i8, k: __mmask16, a: __m128i) {
228    vcompressstoreb128(base_addr, a.as_i8x16(), k)
229}
230
231/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
232///
233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi16&expand=1192)
234#[inline]
235#[target_feature(enable = "avx512vbmi2")]
236#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
237#[cfg_attr(test, assert_instr(vpcompressw))]
238pub fn _mm512_mask_compress_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
239    unsafe { transmute(vpcompressw(a.as_i16x32(), src.as_i16x32(), k)) }
240}
241
242/// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
243///
244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi16&expand=1193)
245#[inline]
246#[target_feature(enable = "avx512vbmi2")]
247#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
248#[cfg_attr(test, assert_instr(vpcompressw))]
249pub fn _mm512_maskz_compress_epi16(k: __mmask32, a: __m512i) -> __m512i {
250    unsafe { transmute(vpcompressw(a.as_i16x32(), i16x32::ZERO, k)) }
251}
252
253/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
254///
255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi16&expand=1190)
256#[inline]
257#[target_feature(enable = "avx512vbmi2,avx512vl")]
258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
259#[cfg_attr(test, assert_instr(vpcompressw))]
260pub fn _mm256_mask_compress_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
261    unsafe { transmute(vpcompressw256(a.as_i16x16(), src.as_i16x16(), k)) }
262}
263
264/// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
265///
266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi16&expand=1191)
267#[inline]
268#[target_feature(enable = "avx512vbmi2,avx512vl")]
269#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
270#[cfg_attr(test, assert_instr(vpcompressw))]
271pub fn _mm256_maskz_compress_epi16(k: __mmask16, a: __m256i) -> __m256i {
272    unsafe { transmute(vpcompressw256(a.as_i16x16(), i16x16::ZERO, k)) }
273}
274
275/// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
276///
277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi16&expand=1188)
278#[inline]
279#[target_feature(enable = "avx512vbmi2,avx512vl")]
280#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
281#[cfg_attr(test, assert_instr(vpcompressw))]
282pub fn _mm_mask_compress_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
283    unsafe { transmute(vpcompressw128(a.as_i16x8(), src.as_i16x8(), k)) }
284}
285
286/// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
287///
288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi16&expand=1189)
289#[inline]
290#[target_feature(enable = "avx512vbmi2,avx512vl")]
291#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
292#[cfg_attr(test, assert_instr(vpcompressw))]
293pub fn _mm_maskz_compress_epi16(k: __mmask8, a: __m128i) -> __m128i {
294    unsafe { transmute(vpcompressw128(a.as_i16x8(), i16x8::ZERO, k)) }
295}
296
297/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
298///
299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi8&expand=1210)
300#[inline]
301#[target_feature(enable = "avx512vbmi2")]
302#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
303#[cfg_attr(test, assert_instr(vpcompressb))]
304pub fn _mm512_mask_compress_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
305    unsafe { transmute(vpcompressb(a.as_i8x64(), src.as_i8x64(), k)) }
306}
307
308/// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
309///
310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi8&expand=1211)
311#[inline]
312#[target_feature(enable = "avx512vbmi2")]
313#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
314#[cfg_attr(test, assert_instr(vpcompressb))]
315pub fn _mm512_maskz_compress_epi8(k: __mmask64, a: __m512i) -> __m512i {
316    unsafe { transmute(vpcompressb(a.as_i8x64(), i8x64::ZERO, k)) }
317}
318
319/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
320///
321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi8&expand=1208)
322#[inline]
323#[target_feature(enable = "avx512vbmi2,avx512vl")]
324#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
325#[cfg_attr(test, assert_instr(vpcompressb))]
326pub fn _mm256_mask_compress_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
327    unsafe { transmute(vpcompressb256(a.as_i8x32(), src.as_i8x32(), k)) }
328}
329
330/// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
331///
332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi8&expand=1209)
333#[inline]
334#[target_feature(enable = "avx512vbmi2,avx512vl")]
335#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
336#[cfg_attr(test, assert_instr(vpcompressb))]
337pub fn _mm256_maskz_compress_epi8(k: __mmask32, a: __m256i) -> __m256i {
338    unsafe { transmute(vpcompressb256(a.as_i8x32(), i8x32::ZERO, k)) }
339}
340
341/// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
342///
343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi8&expand=1206)
344#[inline]
345#[target_feature(enable = "avx512vbmi2,avx512vl")]
346#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
347#[cfg_attr(test, assert_instr(vpcompressb))]
348pub fn _mm_mask_compress_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
349    unsafe { transmute(vpcompressb128(a.as_i8x16(), src.as_i8x16(), k)) }
350}
351
352/// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
353///
354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi8&expand=1207)
355#[inline]
356#[target_feature(enable = "avx512vbmi2,avx512vl")]
357#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
358#[cfg_attr(test, assert_instr(vpcompressb))]
359pub fn _mm_maskz_compress_epi8(k: __mmask16, a: __m128i) -> __m128i {
360    unsafe { transmute(vpcompressb128(a.as_i8x16(), i8x16::ZERO, k)) }
361}
362
363/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
364///
365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi16&expand=2310)
366#[inline]
367#[target_feature(enable = "avx512vbmi2")]
368#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
369#[cfg_attr(test, assert_instr(vpexpandw))]
370pub fn _mm512_mask_expand_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
371    unsafe { transmute(vpexpandw(a.as_i16x32(), src.as_i16x32(), k)) }
372}
373
374/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
375///
376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi16&expand=2311)
377#[inline]
378#[target_feature(enable = "avx512vbmi2")]
379#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
380#[cfg_attr(test, assert_instr(vpexpandw))]
381pub fn _mm512_maskz_expand_epi16(k: __mmask32, a: __m512i) -> __m512i {
382    unsafe { transmute(vpexpandw(a.as_i16x32(), i16x32::ZERO, k)) }
383}
384
385/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
386///
387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi16&expand=2308)
388#[inline]
389#[target_feature(enable = "avx512vbmi2,avx512vl")]
390#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
391#[cfg_attr(test, assert_instr(vpexpandw))]
392pub fn _mm256_mask_expand_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
393    unsafe { transmute(vpexpandw256(a.as_i16x16(), src.as_i16x16(), k)) }
394}
395
396/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
397///
398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi16&expand=2309)
399#[inline]
400#[target_feature(enable = "avx512vbmi2,avx512vl")]
401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
402#[cfg_attr(test, assert_instr(vpexpandw))]
403pub fn _mm256_maskz_expand_epi16(k: __mmask16, a: __m256i) -> __m256i {
404    unsafe { transmute(vpexpandw256(a.as_i16x16(), i16x16::ZERO, k)) }
405}
406
407/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
408///
409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi16&expand=2306)
410#[inline]
411#[target_feature(enable = "avx512vbmi2,avx512vl")]
412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
413#[cfg_attr(test, assert_instr(vpexpandw))]
414pub fn _mm_mask_expand_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
415    unsafe { transmute(vpexpandw128(a.as_i16x8(), src.as_i16x8(), k)) }
416}
417
418/// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
419///
420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi16&expand=2307)
421#[inline]
422#[target_feature(enable = "avx512vbmi2,avx512vl")]
423#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
424#[cfg_attr(test, assert_instr(vpexpandw))]
425pub fn _mm_maskz_expand_epi16(k: __mmask8, a: __m128i) -> __m128i {
426    unsafe { transmute(vpexpandw128(a.as_i16x8(), i16x8::ZERO, k)) }
427}
428
429/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
430///
431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi8&expand=2328)
432#[inline]
433#[target_feature(enable = "avx512vbmi2")]
434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
435#[cfg_attr(test, assert_instr(vpexpandb))]
436pub fn _mm512_mask_expand_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
437    unsafe { transmute(vpexpandb(a.as_i8x64(), src.as_i8x64(), k)) }
438}
439
440/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
441///
442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi8&expand=2329)
443#[inline]
444#[target_feature(enable = "avx512vbmi2")]
445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
446#[cfg_attr(test, assert_instr(vpexpandb))]
447pub fn _mm512_maskz_expand_epi8(k: __mmask64, a: __m512i) -> __m512i {
448    unsafe { transmute(vpexpandb(a.as_i8x64(), i8x64::ZERO, k)) }
449}
450
451/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
452///
453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi8&expand=2326)
454#[inline]
455#[target_feature(enable = "avx512vbmi2,avx512vl")]
456#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
457#[cfg_attr(test, assert_instr(vpexpandb))]
458pub fn _mm256_mask_expand_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
459    unsafe { transmute(vpexpandb256(a.as_i8x32(), src.as_i8x32(), k)) }
460}
461
462/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
463///
464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi8&expand=2327)
465#[inline]
466#[target_feature(enable = "avx512vbmi2,avx512vl")]
467#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
468#[cfg_attr(test, assert_instr(vpexpandb))]
469pub fn _mm256_maskz_expand_epi8(k: __mmask32, a: __m256i) -> __m256i {
470    unsafe { transmute(vpexpandb256(a.as_i8x32(), i8x32::ZERO, k)) }
471}
472
473/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
474///
475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi8&expand=2324)
476#[inline]
477#[target_feature(enable = "avx512vbmi2,avx512vl")]
478#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
479#[cfg_attr(test, assert_instr(vpexpandb))]
480pub fn _mm_mask_expand_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
481    unsafe { transmute(vpexpandb128(a.as_i8x16(), src.as_i8x16(), k)) }
482}
483
484/// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
485///
486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi8&expand=2325)
487#[inline]
488#[target_feature(enable = "avx512vbmi2,avx512vl")]
489#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
490#[cfg_attr(test, assert_instr(vpexpandb))]
491pub fn _mm_maskz_expand_epi8(k: __mmask16, a: __m128i) -> __m128i {
492    unsafe { transmute(vpexpandb128(a.as_i8x16(), i8x16::ZERO, k)) }
493}
494
495/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst.
496///
497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldv_epi64&expand=5087)
498#[inline]
499#[target_feature(enable = "avx512vbmi2")]
500#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
501#[cfg_attr(test, assert_instr(vpshldvq))]
502pub fn _mm512_shldv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
503    unsafe { transmute(simd_funnel_shl(a.as_i64x8(), b.as_i64x8(), c.as_i64x8())) }
504}
505
506/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
507///
508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldv_epi64&expand=5085)
509#[inline]
510#[target_feature(enable = "avx512vbmi2")]
511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
512#[cfg_attr(test, assert_instr(vpshldvq))]
513pub fn _mm512_mask_shldv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
514    unsafe {
515        let shf = _mm512_shldv_epi64(a, b, c).as_i64x8();
516        transmute(simd_select_bitmask(k, shf, a.as_i64x8()))
517    }
518}
519
520/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
521///
522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldv_epi64&expand=5086)
523#[inline]
524#[target_feature(enable = "avx512vbmi2")]
525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
526#[cfg_attr(test, assert_instr(vpshldvq))]
527pub fn _mm512_maskz_shldv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
528    unsafe {
529        let shf = _mm512_shldv_epi64(a, b, c).as_i64x8();
530        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
531    }
532}
533
534/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst.
535///
536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldv_epi64&expand=5084)
537#[inline]
538#[target_feature(enable = "avx512vbmi2,avx512vl")]
539#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
540#[cfg_attr(test, assert_instr(vpshldvq))]
541pub fn _mm256_shldv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
542    unsafe { transmute(simd_funnel_shl(a.as_i64x4(), b.as_i64x4(), c.as_i64x4())) }
543}
544
545/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
546///
547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldv_epi64&expand=5082)
548#[inline]
549#[target_feature(enable = "avx512vbmi2,avx512vl")]
550#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
551#[cfg_attr(test, assert_instr(vpshldvq))]
552pub fn _mm256_mask_shldv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
553    unsafe {
554        let shf = _mm256_shldv_epi64(a, b, c).as_i64x4();
555        transmute(simd_select_bitmask(k, shf, a.as_i64x4()))
556    }
557}
558
559/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
560///
561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldv_epi64&expand=5083)
562#[inline]
563#[target_feature(enable = "avx512vbmi2,avx512vl")]
564#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
565#[cfg_attr(test, assert_instr(vpshldvq))]
566pub fn _mm256_maskz_shldv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
567    unsafe {
568        let shf = _mm256_shldv_epi64(a, b, c).as_i64x4();
569        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
570    }
571}
572
573/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst.
574///
575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldv_epi64&expand=5081)
576#[inline]
577#[target_feature(enable = "avx512vbmi2,avx512vl")]
578#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
579#[cfg_attr(test, assert_instr(vpshldvq))]
580pub fn _mm_shldv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
581    unsafe { transmute(simd_funnel_shl(a.as_i64x2(), b.as_i64x2(), c.as_i64x2())) }
582}
583
584/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
585///
586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldv_epi64&expand=5079)
587#[inline]
588#[target_feature(enable = "avx512vbmi2,avx512vl")]
589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
590#[cfg_attr(test, assert_instr(vpshldvq))]
591pub fn _mm_mask_shldv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
592    unsafe {
593        let shf = _mm_shldv_epi64(a, b, c).as_i64x2();
594        transmute(simd_select_bitmask(k, shf, a.as_i64x2()))
595    }
596}
597
598/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
599///
600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldv_epi64&expand=5080)
601#[inline]
602#[target_feature(enable = "avx512vbmi2,avx512vl")]
603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
604#[cfg_attr(test, assert_instr(vpshldvq))]
605pub fn _mm_maskz_shldv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
606    unsafe {
607        let shf = _mm_shldv_epi64(a, b, c).as_i64x2();
608        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
609    }
610}
611
612/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst.
613///
614/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldv_epi32&expand=5078)
615#[inline]
616#[target_feature(enable = "avx512vbmi2")]
617#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
618#[cfg_attr(test, assert_instr(vpshldvd))]
619pub fn _mm512_shldv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
620    unsafe { transmute(simd_funnel_shl(a.as_i32x16(), b.as_i32x16(), c.as_i32x16())) }
621}
622
623/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
624///
625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldv_epi32&expand=5076)
626#[inline]
627#[target_feature(enable = "avx512vbmi2")]
628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
629#[cfg_attr(test, assert_instr(vpshldvd))]
630pub fn _mm512_mask_shldv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: __m512i) -> __m512i {
631    unsafe {
632        let shf = _mm512_shldv_epi32(a, b, c).as_i32x16();
633        transmute(simd_select_bitmask(k, shf, a.as_i32x16()))
634    }
635}
636
637/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
638///
639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldv_epi32&expand=5077)
640#[inline]
641#[target_feature(enable = "avx512vbmi2")]
642#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
643#[cfg_attr(test, assert_instr(vpshldvd))]
644pub fn _mm512_maskz_shldv_epi32(k: __mmask16, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
645    unsafe {
646        let shf = _mm512_shldv_epi32(a, b, c).as_i32x16();
647        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
648    }
649}
650
651/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst.
652///
653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldv_epi32&expand=5075)
654#[inline]
655#[target_feature(enable = "avx512vbmi2,avx512vl")]
656#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
657#[cfg_attr(test, assert_instr(vpshldvd))]
658pub fn _mm256_shldv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
659    unsafe { transmute(simd_funnel_shl(a.as_i32x8(), b.as_i32x8(), c.as_i32x8())) }
660}
661
662/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
663///
664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldv_epi32&expand=5073)
665#[inline]
666#[target_feature(enable = "avx512vbmi2,avx512vl")]
667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
668#[cfg_attr(test, assert_instr(vpshldvd))]
669pub fn _mm256_mask_shldv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
670    unsafe {
671        let shf = _mm256_shldv_epi32(a, b, c).as_i32x8();
672        transmute(simd_select_bitmask(k, shf, a.as_i32x8()))
673    }
674}
675
676/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
677///
678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldv_epi32&expand=5074)
679#[inline]
680#[target_feature(enable = "avx512vbmi2,avx512vl")]
681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
682#[cfg_attr(test, assert_instr(vpshldvd))]
683pub fn _mm256_maskz_shldv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
684    unsafe {
685        let shf = _mm256_shldv_epi32(a, b, c).as_i32x8();
686        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
687    }
688}
689
690/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst.
691///
692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldv_epi32&expand=5072)
693#[inline]
694#[target_feature(enable = "avx512vbmi2,avx512vl")]
695#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
696#[cfg_attr(test, assert_instr(vpshldvd))]
697pub fn _mm_shldv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
698    unsafe { transmute(simd_funnel_shl(a.as_i32x4(), b.as_i32x4(), c.as_i32x4())) }
699}
700
701/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
702///
703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldv_epi32&expand=5070)
704#[inline]
705#[target_feature(enable = "avx512vbmi2,avx512vl")]
706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
707#[cfg_attr(test, assert_instr(vpshldvd))]
708pub fn _mm_mask_shldv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
709    unsafe {
710        let shf = _mm_shldv_epi32(a, b, c).as_i32x4();
711        transmute(simd_select_bitmask(k, shf, a.as_i32x4()))
712    }
713}
714
715/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
716///
717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldv_epi32&expand=5071)
718#[inline]
719#[target_feature(enable = "avx512vbmi2,avx512vl")]
720#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
721#[cfg_attr(test, assert_instr(vpshldvd))]
722pub fn _mm_maskz_shldv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
723    unsafe {
724        let shf = _mm_shldv_epi32(a, b, c).as_i32x4();
725        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
726    }
727}
728
729/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst.
730///
731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldv_epi16&expand=5069)
732#[inline]
733#[target_feature(enable = "avx512vbmi2")]
734#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
735#[cfg_attr(test, assert_instr(vpshldvw))]
736pub fn _mm512_shldv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
737    unsafe { transmute(simd_funnel_shl(a.as_i16x32(), b.as_i16x32(), c.as_i16x32())) }
738}
739
740/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
741///
742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldv_epi16&expand=5067)
743#[inline]
744#[target_feature(enable = "avx512vbmi2")]
745#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
746#[cfg_attr(test, assert_instr(vpshldvw))]
747pub fn _mm512_mask_shldv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: __m512i) -> __m512i {
748    unsafe {
749        let shf = _mm512_shldv_epi16(a, b, c).as_i16x32();
750        transmute(simd_select_bitmask(k, shf, a.as_i16x32()))
751    }
752}
753
754/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
755///
756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldv_epi16&expand=5068)
757#[inline]
758#[target_feature(enable = "avx512vbmi2")]
759#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
760#[cfg_attr(test, assert_instr(vpshldvw))]
761pub fn _mm512_maskz_shldv_epi16(k: __mmask32, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
762    unsafe {
763        let shf = _mm512_shldv_epi16(a, b, c).as_i16x32();
764        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
765    }
766}
767
768/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst.
769///
770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldv_epi16&expand=5066)
771#[inline]
772#[target_feature(enable = "avx512vbmi2,avx512vl")]
773#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
774#[cfg_attr(test, assert_instr(vpshldvw))]
775pub fn _mm256_shldv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
776    unsafe { transmute(simd_funnel_shl(a.as_i16x16(), b.as_i16x16(), c.as_i16x16())) }
777}
778
779/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
780///
781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldv_epi16&expand=5064)
782#[inline]
783#[target_feature(enable = "avx512vbmi2,avx512vl")]
784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
785#[cfg_attr(test, assert_instr(vpshldvw))]
786pub fn _mm256_mask_shldv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: __m256i) -> __m256i {
787    unsafe {
788        let shf = _mm256_shldv_epi16(a, b, c).as_i16x16();
789        transmute(simd_select_bitmask(k, shf, a.as_i16x16()))
790    }
791}
792
793/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
794///
795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldv_epi16&expand=5065)
796#[inline]
797#[target_feature(enable = "avx512vbmi2,avx512vl")]
798#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
799#[cfg_attr(test, assert_instr(vpshldvw))]
800pub fn _mm256_maskz_shldv_epi16(k: __mmask16, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
801    unsafe {
802        let shf = _mm256_shldv_epi16(a, b, c).as_i16x16();
803        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
804    }
805}
806
807/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst.
808///
809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldv_epi16&expand=5063)
810#[inline]
811#[target_feature(enable = "avx512vbmi2,avx512vl")]
812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
813#[cfg_attr(test, assert_instr(vpshldvw))]
814pub fn _mm_shldv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
815    unsafe { transmute(simd_funnel_shl(a.as_i16x8(), b.as_i16x8(), c.as_i16x8())) }
816}
817
818/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
819///
820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldv_epi16&expand=5061)
821#[inline]
822#[target_feature(enable = "avx512vbmi2,avx512vl")]
823#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
824#[cfg_attr(test, assert_instr(vpshldvw))]
825pub fn _mm_mask_shldv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
826    unsafe {
827        let shf = _mm_shldv_epi16(a, b, c).as_i16x8();
828        transmute(simd_select_bitmask(k, shf, a.as_i16x8()))
829    }
830}
831
832/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
833///
834/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldv_epi16&expand=5062)
835#[inline]
836#[target_feature(enable = "avx512vbmi2,avx512vl")]
837#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
838#[cfg_attr(test, assert_instr(vpshldvw))]
839pub fn _mm_maskz_shldv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
840    unsafe {
841        let shf = _mm_shldv_epi16(a, b, c).as_i16x8();
842        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
843    }
844}
845
846/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst.
847///
848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdv_epi64&expand=5141)
849#[inline]
850#[target_feature(enable = "avx512vbmi2")]
851#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
852#[cfg_attr(test, assert_instr(vpshrdvq))]
853pub fn _mm512_shrdv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
854    unsafe { transmute(simd_funnel_shr(b.as_i64x8(), a.as_i64x8(), c.as_i64x8())) }
855}
856
857/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
858///
859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdv_epi64&expand=5139)
860#[inline]
861#[target_feature(enable = "avx512vbmi2")]
862#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
863#[cfg_attr(test, assert_instr(vpshrdvq))]
864pub fn _mm512_mask_shrdv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
865    unsafe {
866        let shf = _mm512_shrdv_epi64(a, b, c).as_i64x8();
867        transmute(simd_select_bitmask(k, shf, a.as_i64x8()))
868    }
869}
870
871/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
872///
873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdv_epi64&expand=5140)
874#[inline]
875#[target_feature(enable = "avx512vbmi2")]
876#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
877#[cfg_attr(test, assert_instr(vpshrdvq))]
878pub fn _mm512_maskz_shrdv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
879    unsafe {
880        let shf = _mm512_shrdv_epi64(a, b, c).as_i64x8();
881        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
882    }
883}
884
885/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst.
886///
887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdv_epi64&expand=5138)
888#[inline]
889#[target_feature(enable = "avx512vbmi2,avx512vl")]
890#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
891#[cfg_attr(test, assert_instr(vpshrdvq))]
892pub fn _mm256_shrdv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
893    unsafe { transmute(simd_funnel_shr(b.as_i64x4(), a.as_i64x4(), c.as_i64x4())) }
894}
895
896/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
897///
898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdv_epi64&expand=5136)
899#[inline]
900#[target_feature(enable = "avx512vbmi2,avx512vl")]
901#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
902#[cfg_attr(test, assert_instr(vpshrdvq))]
903pub fn _mm256_mask_shrdv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
904    unsafe {
905        let shf = _mm256_shrdv_epi64(a, b, c).as_i64x4();
906        transmute(simd_select_bitmask(k, shf, a.as_i64x4()))
907    }
908}
909
910/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
911///
912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdv_epi64&expand=5137)
913#[inline]
914#[target_feature(enable = "avx512vbmi2,avx512vl")]
915#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
916#[cfg_attr(test, assert_instr(vpshrdvq))]
917pub fn _mm256_maskz_shrdv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
918    unsafe {
919        let shf = _mm256_shrdv_epi64(a, b, c).as_i64x4();
920        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
921    }
922}
923
924/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst.
925///
926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdv_epi64&expand=5135)
927#[inline]
928#[target_feature(enable = "avx512vbmi2,avx512vl")]
929#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
930#[cfg_attr(test, assert_instr(vpshrdvq))]
931pub fn _mm_shrdv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
932    unsafe { transmute(simd_funnel_shr(b.as_i64x2(), a.as_i64x2(), c.as_i64x2())) }
933}
934
935/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
936///
937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdv_epi64&expand=5133)
938#[inline]
939#[target_feature(enable = "avx512vbmi2,avx512vl")]
940#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
941#[cfg_attr(test, assert_instr(vpshrdvq))]
942pub fn _mm_mask_shrdv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
943    unsafe {
944        let shf = _mm_shrdv_epi64(a, b, c).as_i64x2();
945        transmute(simd_select_bitmask(k, shf, a.as_i64x2()))
946    }
947}
948
949/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
950///
951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdv_epi64&expand=5134)
952#[inline]
953#[target_feature(enable = "avx512vbmi2,avx512vl")]
954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
955#[cfg_attr(test, assert_instr(vpshrdvq))]
956pub fn _mm_maskz_shrdv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
957    unsafe {
958        let shf = _mm_shrdv_epi64(a, b, c).as_i64x2();
959        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
960    }
961}
962
963/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst.
964///
965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdv_epi32&expand=5132)
966#[inline]
967#[target_feature(enable = "avx512vbmi2")]
968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
969#[cfg_attr(test, assert_instr(vpshrdvd))]
970pub fn _mm512_shrdv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
971    unsafe { transmute(simd_funnel_shr(b.as_i32x16(), a.as_i32x16(), c.as_i32x16())) }
972}
973
974/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
975///
976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdv_epi32&expand=5130)
977#[inline]
978#[target_feature(enable = "avx512vbmi2")]
979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
980#[cfg_attr(test, assert_instr(vpshrdvd))]
981pub fn _mm512_mask_shrdv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: __m512i) -> __m512i {
982    unsafe {
983        let shf = _mm512_shrdv_epi32(a, b, c).as_i32x16();
984        transmute(simd_select_bitmask(k, shf, a.as_i32x16()))
985    }
986}
987
988/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
989///
990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdv_epi32&expand=5131)
991#[inline]
992#[target_feature(enable = "avx512vbmi2")]
993#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
994#[cfg_attr(test, assert_instr(vpshrdvd))]
995pub fn _mm512_maskz_shrdv_epi32(k: __mmask16, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
996    unsafe {
997        let shf = _mm512_shrdv_epi32(a, b, c).as_i32x16();
998        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
999    }
1000}
1001
1002/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst.
1003///
1004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdv_epi32&expand=5129)
1005#[inline]
1006#[target_feature(enable = "avx512vbmi2,avx512vl")]
1007#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1008#[cfg_attr(test, assert_instr(vpshrdvd))]
1009pub fn _mm256_shrdv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
1010    unsafe { transmute(simd_funnel_shr(b.as_i32x8(), a.as_i32x8(), c.as_i32x8())) }
1011}
1012
1013/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1014///
1015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdv_epi32&expand=5127)
1016#[inline]
1017#[target_feature(enable = "avx512vbmi2,avx512vl")]
1018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1019#[cfg_attr(test, assert_instr(vpshrdvd))]
1020pub fn _mm256_mask_shrdv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
1021    unsafe {
1022        let shf = _mm256_shrdv_epi32(a, b, c).as_i32x8();
1023        transmute(simd_select_bitmask(k, shf, a.as_i32x8()))
1024    }
1025}
1026
1027/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1028///
1029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdv_epi32&expand=5128)
1030#[inline]
1031#[target_feature(enable = "avx512vbmi2,avx512vl")]
1032#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1033#[cfg_attr(test, assert_instr(vpshrdvd))]
1034pub fn _mm256_maskz_shrdv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
1035    unsafe {
1036        let shf = _mm256_shrdv_epi32(a, b, c).as_i32x8();
1037        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
1038    }
1039}
1040
1041/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst.
1042///
1043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdv_epi32&expand=5126)
1044#[inline]
1045#[target_feature(enable = "avx512vbmi2,avx512vl")]
1046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1047#[cfg_attr(test, assert_instr(vpshrdvd))]
1048pub fn _mm_shrdv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1049    unsafe { transmute(simd_funnel_shr(b.as_i32x4(), a.as_i32x4(), c.as_i32x4())) }
1050}
1051
1052/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1053///
1054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdv_epi32&expand=5124)
1055#[inline]
1056#[target_feature(enable = "avx512vbmi2,avx512vl")]
1057#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1058#[cfg_attr(test, assert_instr(vpshrdvd))]
1059pub fn _mm_mask_shrdv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
1060    unsafe {
1061        let shf = _mm_shrdv_epi32(a, b, c).as_i32x4();
1062        transmute(simd_select_bitmask(k, shf, a.as_i32x4()))
1063    }
1064}
1065
1066/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1067///
1068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdv_epi32&expand=5125)
1069#[inline]
1070#[target_feature(enable = "avx512vbmi2,avx512vl")]
1071#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1072#[cfg_attr(test, assert_instr(vpshrdvd))]
1073pub fn _mm_maskz_shrdv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1074    unsafe {
1075        let shf = _mm_shrdv_epi32(a, b, c).as_i32x4();
1076        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
1077    }
1078}
1079
1080/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst.
1081///
1082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdv_epi16&expand=5123)
1083#[inline]
1084#[target_feature(enable = "avx512vbmi2")]
1085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1086#[cfg_attr(test, assert_instr(vpshrdvw))]
1087pub fn _mm512_shrdv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
1088    unsafe { transmute(simd_funnel_shr(b.as_i16x32(), a.as_i16x32(), c.as_i16x32())) }
1089}
1090
1091/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1092///
1093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdv_epi16&expand=5121)
1094#[inline]
1095#[target_feature(enable = "avx512vbmi2")]
1096#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1097#[cfg_attr(test, assert_instr(vpshrdvw))]
1098pub fn _mm512_mask_shrdv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: __m512i) -> __m512i {
1099    unsafe {
1100        let shf = _mm512_shrdv_epi16(a, b, c).as_i16x32();
1101        transmute(simd_select_bitmask(k, shf, a.as_i16x32()))
1102    }
1103}
1104
1105/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1106///
1107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdv_epi16&expand=5122)
1108#[inline]
1109#[target_feature(enable = "avx512vbmi2")]
1110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1111#[cfg_attr(test, assert_instr(vpshrdvw))]
1112pub fn _mm512_maskz_shrdv_epi16(k: __mmask32, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
1113    unsafe {
1114        let shf = _mm512_shrdv_epi16(a, b, c).as_i16x32();
1115        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
1116    }
1117}
1118
1119/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst.
1120///
1121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdv_epi16&expand=5120)
1122#[inline]
1123#[target_feature(enable = "avx512vbmi2,avx512vl")]
1124#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1125#[cfg_attr(test, assert_instr(vpshrdvw))]
1126pub fn _mm256_shrdv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
1127    unsafe { transmute(simd_funnel_shr(b.as_i16x16(), a.as_i16x16(), c.as_i16x16())) }
1128}
1129
1130/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1131///
1132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdv_epi16&expand=5118)
1133#[inline]
1134#[target_feature(enable = "avx512vbmi2,avx512vl")]
1135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1136#[cfg_attr(test, assert_instr(vpshrdvw))]
1137pub fn _mm256_mask_shrdv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: __m256i) -> __m256i {
1138    unsafe {
1139        let shf = _mm256_shrdv_epi16(a, b, c).as_i16x16();
1140        transmute(simd_select_bitmask(k, shf, a.as_i16x16()))
1141    }
1142}
1143
1144/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1145///
1146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdv_epi16&expand=5119)
1147#[inline]
1148#[target_feature(enable = "avx512vbmi2,avx512vl")]
1149#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1150#[cfg_attr(test, assert_instr(vpshrdvw))]
1151pub fn _mm256_maskz_shrdv_epi16(k: __mmask16, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
1152    unsafe {
1153        let shf = _mm256_shrdv_epi16(a, b, c).as_i16x16();
1154        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
1155    }
1156}
1157
1158/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst.
1159///
1160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdv_epi16&expand=5117)
1161#[inline]
1162#[target_feature(enable = "avx512vbmi2,avx512vl")]
1163#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1164#[cfg_attr(test, assert_instr(vpshrdvw))]
1165pub fn _mm_shrdv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1166    unsafe { transmute(simd_funnel_shr(b.as_i16x8(), a.as_i16x8(), c.as_i16x8())) }
1167}
1168
1169/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1170///
1171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdv_epi16&expand=5115)
1172#[inline]
1173#[target_feature(enable = "avx512vbmi2,avx512vl")]
1174#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1175#[cfg_attr(test, assert_instr(vpshrdvw))]
1176pub fn _mm_mask_shrdv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
1177    unsafe {
1178        let shf = _mm_shrdv_epi16(a, b, c).as_i16x8();
1179        transmute(simd_select_bitmask(k, shf, a.as_i16x8()))
1180    }
1181}
1182
1183/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1184///
1185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdv_epi16&expand=5116)
1186#[inline]
1187#[target_feature(enable = "avx512vbmi2,avx512vl")]
1188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1189#[cfg_attr(test, assert_instr(vpshrdvw))]
1190pub fn _mm_maskz_shrdv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
1191    unsafe {
1192        let shf = _mm_shrdv_epi16(a, b, c).as_i16x8();
1193        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
1194    }
1195}
1196
1197/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst).
1198///
1199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldi_epi64&expand=5060)
1200#[inline]
1201#[target_feature(enable = "avx512vbmi2")]
1202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1203#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1204#[rustc_legacy_const_generics(2)]
1205pub fn _mm512_shldi_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1206    static_assert_uimm_bits!(IMM8, 8);
1207    _mm512_shldv_epi64(a, b, _mm512_set1_epi64(IMM8 as i64))
1208}
1209
1210/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1211///
1212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldi_epi64&expand=5058)
1213#[inline]
1214#[target_feature(enable = "avx512vbmi2")]
1215#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1216#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1217#[rustc_legacy_const_generics(4)]
1218pub fn _mm512_mask_shldi_epi64<const IMM8: i32>(
1219    src: __m512i,
1220    k: __mmask8,
1221    a: __m512i,
1222    b: __m512i,
1223) -> __m512i {
1224    unsafe {
1225        static_assert_uimm_bits!(IMM8, 8);
1226        let shf = _mm512_shldi_epi64::<IMM8>(a, b).as_i64x8();
1227        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
1228    }
1229}
1230
1231/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1232///
1233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldi_epi64&expand=5059)
1234#[inline]
1235#[target_feature(enable = "avx512vbmi2")]
1236#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1237#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1238#[rustc_legacy_const_generics(3)]
1239pub fn _mm512_maskz_shldi_epi64<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1240    unsafe {
1241        static_assert_uimm_bits!(IMM8, 8);
1242        let shf = _mm512_shldi_epi64::<IMM8>(a, b).as_i64x8();
1243        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
1244    }
1245}
1246
1247/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst).
1248///
1249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldi_epi64&expand=5057)
1250#[inline]
1251#[target_feature(enable = "avx512vbmi2,avx512vl")]
1252#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1253#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1254#[rustc_legacy_const_generics(2)]
1255pub fn _mm256_shldi_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1256    static_assert_uimm_bits!(IMM8, 8);
1257    _mm256_shldv_epi64(a, b, _mm256_set1_epi64x(IMM8 as i64))
1258}
1259
1260/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1261///
1262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldi_epi64&expand=5055)
1263#[inline]
1264#[target_feature(enable = "avx512vbmi2,avx512vl")]
1265#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1266#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1267#[rustc_legacy_const_generics(4)]
1268pub fn _mm256_mask_shldi_epi64<const IMM8: i32>(
1269    src: __m256i,
1270    k: __mmask8,
1271    a: __m256i,
1272    b: __m256i,
1273) -> __m256i {
1274    unsafe {
1275        static_assert_uimm_bits!(IMM8, 8);
1276        let shf = _mm256_shldi_epi64::<IMM8>(a, b).as_i64x4();
1277        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
1278    }
1279}
1280
1281/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1282///
1283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldi_epi64&expand=5056)
1284#[inline]
1285#[target_feature(enable = "avx512vbmi2,avx512vl")]
1286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1287#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1288#[rustc_legacy_const_generics(3)]
1289pub fn _mm256_maskz_shldi_epi64<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1290    unsafe {
1291        static_assert_uimm_bits!(IMM8, 8);
1292        let shf = _mm256_shldi_epi64::<IMM8>(a, b).as_i64x4();
1293        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
1294    }
1295}
1296
1297/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst).
1298///
1299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldi_epi64&expand=5054)
1300#[inline]
1301#[target_feature(enable = "avx512vbmi2,avx512vl")]
1302#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1303#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1304#[rustc_legacy_const_generics(2)]
1305pub fn _mm_shldi_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1306    static_assert_uimm_bits!(IMM8, 8);
1307    _mm_shldv_epi64(a, b, _mm_set1_epi64x(IMM8 as i64))
1308}
1309
1310/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1311///
1312/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldi_epi64&expand=5052)
1313#[inline]
1314#[target_feature(enable = "avx512vbmi2,avx512vl")]
1315#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1316#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1317#[rustc_legacy_const_generics(4)]
1318pub fn _mm_mask_shldi_epi64<const IMM8: i32>(
1319    src: __m128i,
1320    k: __mmask8,
1321    a: __m128i,
1322    b: __m128i,
1323) -> __m128i {
1324    unsafe {
1325        static_assert_uimm_bits!(IMM8, 8);
1326        let shf = _mm_shldi_epi64::<IMM8>(a, b).as_i64x2();
1327        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
1328    }
1329}
1330
1331/// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1332///
1333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldi_epi64&expand=5053)
1334#[inline]
1335#[target_feature(enable = "avx512vbmi2,avx512vl")]
1336#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1337#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
1338#[rustc_legacy_const_generics(3)]
1339pub fn _mm_maskz_shldi_epi64<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1340    unsafe {
1341        static_assert_uimm_bits!(IMM8, 8);
1342        let shf = _mm_shldi_epi64::<IMM8>(a, b).as_i64x2();
1343        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
1344    }
1345}
1346
1347/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst.
1348///
1349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldi_epi32&expand=5051)
1350#[inline]
1351#[target_feature(enable = "avx512vbmi2")]
1352#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1353#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1354#[rustc_legacy_const_generics(2)]
1355pub fn _mm512_shldi_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1356    static_assert_uimm_bits!(IMM8, 8);
1357    _mm512_shldv_epi32(a, b, _mm512_set1_epi32(IMM8))
1358}
1359
1360/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1361///
1362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldi_epi32&expand=5049)
1363#[inline]
1364#[target_feature(enable = "avx512vbmi2")]
1365#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1366#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1367#[rustc_legacy_const_generics(4)]
1368pub fn _mm512_mask_shldi_epi32<const IMM8: i32>(
1369    src: __m512i,
1370    k: __mmask16,
1371    a: __m512i,
1372    b: __m512i,
1373) -> __m512i {
1374    unsafe {
1375        static_assert_uimm_bits!(IMM8, 8);
1376        let shf = _mm512_shldi_epi32::<IMM8>(a, b).as_i32x16();
1377        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
1378    }
1379}
1380
1381/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1382///
1383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldi_epi32&expand=5050)
1384#[inline]
1385#[target_feature(enable = "avx512vbmi2")]
1386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1387#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1388#[rustc_legacy_const_generics(3)]
1389pub fn _mm512_maskz_shldi_epi32<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1390    unsafe {
1391        static_assert_uimm_bits!(IMM8, 8);
1392        let shf = _mm512_shldi_epi32::<IMM8>(a, b).as_i32x16();
1393        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
1394    }
1395}
1396
1397/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst.
1398///
1399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldi_epi32&expand=5048)
1400#[inline]
1401#[target_feature(enable = "avx512vbmi2,avx512vl")]
1402#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1403#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1404#[rustc_legacy_const_generics(2)]
1405pub fn _mm256_shldi_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1406    static_assert_uimm_bits!(IMM8, 8);
1407    _mm256_shldv_epi32(a, b, _mm256_set1_epi32(IMM8))
1408}
1409
1410/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1411///
1412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldi_epi32&expand=5046)
1413#[inline]
1414#[target_feature(enable = "avx512vbmi2,avx512vl")]
1415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1416#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1417#[rustc_legacy_const_generics(4)]
1418pub fn _mm256_mask_shldi_epi32<const IMM8: i32>(
1419    src: __m256i,
1420    k: __mmask8,
1421    a: __m256i,
1422    b: __m256i,
1423) -> __m256i {
1424    unsafe {
1425        static_assert_uimm_bits!(IMM8, 8);
1426        let shf = _mm256_shldi_epi32::<IMM8>(a, b).as_i32x8();
1427        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
1428    }
1429}
1430
1431/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1432///
1433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldi_epi32&expand=5047)
1434#[inline]
1435#[target_feature(enable = "avx512vbmi2,avx512vl")]
1436#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1437#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1438#[rustc_legacy_const_generics(3)]
1439pub fn _mm256_maskz_shldi_epi32<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1440    unsafe {
1441        static_assert_uimm_bits!(IMM8, 8);
1442        let shf = _mm256_shldi_epi32::<IMM8>(a, b).as_i32x8();
1443        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
1444    }
1445}
1446
1447/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst.
1448///
1449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldi_epi32&expand=5045)
1450#[inline]
1451#[target_feature(enable = "avx512vbmi2,avx512vl")]
1452#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1453#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1454#[rustc_legacy_const_generics(2)]
1455pub fn _mm_shldi_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1456    static_assert_uimm_bits!(IMM8, 8);
1457    _mm_shldv_epi32(a, b, _mm_set1_epi32(IMM8))
1458}
1459
1460/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1461///
1462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldi_epi32&expand=5043)
1463#[inline]
1464#[target_feature(enable = "avx512vbmi2,avx512vl")]
1465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1466#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1467#[rustc_legacy_const_generics(4)]
1468pub fn _mm_mask_shldi_epi32<const IMM8: i32>(
1469    src: __m128i,
1470    k: __mmask8,
1471    a: __m128i,
1472    b: __m128i,
1473) -> __m128i {
1474    unsafe {
1475        static_assert_uimm_bits!(IMM8, 8);
1476        let shf = _mm_shldi_epi32::<IMM8>(a, b).as_i32x4();
1477        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
1478    }
1479}
1480
1481/// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1482///
1483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldi_epi32&expand=5044)
1484#[inline]
1485#[target_feature(enable = "avx512vbmi2,avx512vl")]
1486#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1487#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
1488#[rustc_legacy_const_generics(3)]
1489pub fn _mm_maskz_shldi_epi32<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1490    unsafe {
1491        static_assert_uimm_bits!(IMM8, 8);
1492        let shf = _mm_shldi_epi32::<IMM8>(a, b).as_i32x4();
1493        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
1494    }
1495}
1496
1497/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst).
1498///
1499/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shldi_epi16&expand=5042)
1500#[inline]
1501#[target_feature(enable = "avx512vbmi2")]
1502#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1503#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1504#[rustc_legacy_const_generics(2)]
1505pub fn _mm512_shldi_epi16<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1506    static_assert_uimm_bits!(IMM8, 8);
1507    _mm512_shldv_epi16(a, b, _mm512_set1_epi16(IMM8 as i16))
1508}
1509
1510/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1511///
1512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shldi_epi16&expand=5040)
1513#[inline]
1514#[target_feature(enable = "avx512vbmi2")]
1515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1516#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1517#[rustc_legacy_const_generics(4)]
1518pub fn _mm512_mask_shldi_epi16<const IMM8: i32>(
1519    src: __m512i,
1520    k: __mmask32,
1521    a: __m512i,
1522    b: __m512i,
1523) -> __m512i {
1524    unsafe {
1525        static_assert_uimm_bits!(IMM8, 8);
1526        let shf = _mm512_shldi_epi16::<IMM8>(a, b).as_i16x32();
1527        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
1528    }
1529}
1530
1531/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1532///
1533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shldi_epi16&expand=5041)
1534#[inline]
1535#[target_feature(enable = "avx512vbmi2")]
1536#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1537#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1538#[rustc_legacy_const_generics(3)]
1539pub fn _mm512_maskz_shldi_epi16<const IMM8: i32>(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1540    unsafe {
1541        static_assert_uimm_bits!(IMM8, 8);
1542        let shf = _mm512_shldi_epi16::<IMM8>(a, b).as_i16x32();
1543        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
1544    }
1545}
1546
1547/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst).
1548///
1549/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shldi_epi16&expand=5039)
1550#[inline]
1551#[target_feature(enable = "avx512vbmi2,avx512vl")]
1552#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1553#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1554#[rustc_legacy_const_generics(2)]
1555pub fn _mm256_shldi_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1556    static_assert_uimm_bits!(IMM8, 8);
1557    _mm256_shldv_epi16(a, b, _mm256_set1_epi16(IMM8 as i16))
1558}
1559
1560/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1561///
1562/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shldi_epi16&expand=5037)
1563#[inline]
1564#[target_feature(enable = "avx512vbmi2,avx512vl")]
1565#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1566#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1567#[rustc_legacy_const_generics(4)]
1568pub fn _mm256_mask_shldi_epi16<const IMM8: i32>(
1569    src: __m256i,
1570    k: __mmask16,
1571    a: __m256i,
1572    b: __m256i,
1573) -> __m256i {
1574    unsafe {
1575        static_assert_uimm_bits!(IMM8, 8);
1576        let shf = _mm256_shldi_epi16::<IMM8>(a, b).as_i16x16();
1577        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
1578    }
1579}
1580
1581/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1582///
1583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shldi_epi16&expand=5038)
1584#[inline]
1585#[target_feature(enable = "avx512vbmi2,avx512vl")]
1586#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1587#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1588#[rustc_legacy_const_generics(3)]
1589pub fn _mm256_maskz_shldi_epi16<const IMM8: i32>(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1590    unsafe {
1591        static_assert_uimm_bits!(IMM8, 8);
1592        let shf = _mm256_shldi_epi16::<IMM8>(a, b).as_i16x16();
1593        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
1594    }
1595}
1596
1597/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst).
1598///
1599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shldi_epi16&expand=5036)
1600#[inline]
1601#[target_feature(enable = "avx512vbmi2,avx512vl")]
1602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1603#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1604#[rustc_legacy_const_generics(2)]
1605pub fn _mm_shldi_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1606    static_assert_uimm_bits!(IMM8, 8);
1607    _mm_shldv_epi16(a, b, _mm_set1_epi16(IMM8 as i16))
1608}
1609
1610/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1611///
1612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shldi_epi16&expand=5034)
1613#[inline]
1614#[target_feature(enable = "avx512vbmi2,avx512vl")]
1615#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1616#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1617#[rustc_legacy_const_generics(4)]
1618pub fn _mm_mask_shldi_epi16<const IMM8: i32>(
1619    src: __m128i,
1620    k: __mmask8,
1621    a: __m128i,
1622    b: __m128i,
1623) -> __m128i {
1624    unsafe {
1625        static_assert_uimm_bits!(IMM8, 8);
1626        let shf = _mm_shldi_epi16::<IMM8>(a, b).as_i16x8();
1627        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
1628    }
1629}
1630
1631/// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1632///
1633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shldi_epi16&expand=5035)
1634#[inline]
1635#[target_feature(enable = "avx512vbmi2,avx512vl")]
1636#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1637#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
1638#[rustc_legacy_const_generics(3)]
1639pub fn _mm_maskz_shldi_epi16<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1640    unsafe {
1641        static_assert_uimm_bits!(IMM8, 8);
1642        let shf = _mm_shldi_epi16::<IMM8>(a, b).as_i16x8();
1643        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
1644    }
1645}
1646
1647/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst.
1648///
1649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdi_epi64&expand=5114)
1650#[inline]
1651#[target_feature(enable = "avx512vbmi2")]
1652#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1653#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1654#[rustc_legacy_const_generics(2)]
1655pub fn _mm512_shrdi_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1656    static_assert_uimm_bits!(IMM8, 8);
1657    _mm512_shrdv_epi64(a, b, _mm512_set1_epi64(IMM8 as i64))
1658}
1659
1660/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set).
1661///
1662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdi_epi64&expand=5112)
1663#[inline]
1664#[target_feature(enable = "avx512vbmi2")]
1665#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1666#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1667#[rustc_legacy_const_generics(4)]
1668pub fn _mm512_mask_shrdi_epi64<const IMM8: i32>(
1669    src: __m512i,
1670    k: __mmask8,
1671    a: __m512i,
1672    b: __m512i,
1673) -> __m512i {
1674    unsafe {
1675        static_assert_uimm_bits!(IMM8, 8);
1676        let shf = _mm512_shrdi_epi64::<IMM8>(a, b).as_i64x8();
1677        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
1678    }
1679}
1680
1681/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1682///
1683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdi_epi64&expand=5113)
1684#[inline]
1685#[target_feature(enable = "avx512vbmi2")]
1686#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1687#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 255))] //should be vpshrdq
1688#[rustc_legacy_const_generics(3)]
1689pub fn _mm512_maskz_shrdi_epi64<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1690    unsafe {
1691        static_assert_uimm_bits!(IMM8, 8);
1692        let shf = _mm512_shrdi_epi64::<IMM8>(a, b).as_i64x8();
1693        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
1694    }
1695}
1696
1697/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst.
1698///
1699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdi_epi64&expand=5111)
1700#[inline]
1701#[target_feature(enable = "avx512vbmi2,avx512vl")]
1702#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1703#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1704#[rustc_legacy_const_generics(2)]
1705pub fn _mm256_shrdi_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1706    static_assert_uimm_bits!(IMM8, 8);
1707    _mm256_shrdv_epi64(a, b, _mm256_set1_epi64x(IMM8 as i64))
1708}
1709
1710/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set).
1711///
1712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdi_epi64&expand=5109)
1713#[inline]
1714#[target_feature(enable = "avx512vbmi2,avx512vl")]
1715#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1716#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1717#[rustc_legacy_const_generics(4)]
1718pub fn _mm256_mask_shrdi_epi64<const IMM8: i32>(
1719    src: __m256i,
1720    k: __mmask8,
1721    a: __m256i,
1722    b: __m256i,
1723) -> __m256i {
1724    unsafe {
1725        static_assert_uimm_bits!(IMM8, 8);
1726        let shf = _mm256_shrdi_epi64::<IMM8>(a, b).as_i64x4();
1727        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
1728    }
1729}
1730
1731/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1732///
1733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdi_epi64&expand=5110)
1734#[inline]
1735#[target_feature(enable = "avx512vbmi2,avx512vl")]
1736#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1737#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1738#[rustc_legacy_const_generics(3)]
1739pub fn _mm256_maskz_shrdi_epi64<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1740    unsafe {
1741        static_assert_uimm_bits!(IMM8, 8);
1742        let shf = _mm256_shrdi_epi64::<IMM8>(a, b).as_i64x4();
1743        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
1744    }
1745}
1746
1747/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst.
1748///
1749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdi_epi64&expand=5108)
1750#[inline]
1751#[target_feature(enable = "avx512vbmi2,avx512vl")]
1752#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1753#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1754#[rustc_legacy_const_generics(2)]
1755pub fn _mm_shrdi_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1756    static_assert_uimm_bits!(IMM8, 8);
1757    _mm_shrdv_epi64(a, b, _mm_set1_epi64x(IMM8 as i64))
1758}
1759
1760/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set).
1761///
1762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdi_epi64&expand=5106)
1763#[inline]
1764#[target_feature(enable = "avx512vbmi2,avx512vl")]
1765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1766#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1767#[rustc_legacy_const_generics(4)]
1768pub fn _mm_mask_shrdi_epi64<const IMM8: i32>(
1769    src: __m128i,
1770    k: __mmask8,
1771    a: __m128i,
1772    b: __m128i,
1773) -> __m128i {
1774    unsafe {
1775        static_assert_uimm_bits!(IMM8, 8);
1776        let shf = _mm_shrdi_epi64::<IMM8>(a, b).as_i64x2();
1777        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
1778    }
1779}
1780
1781/// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1782///
1783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdi_epi64&expand=5107)
1784#[inline]
1785#[target_feature(enable = "avx512vbmi2,avx512vl")]
1786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1787#[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
1788#[rustc_legacy_const_generics(3)]
1789pub fn _mm_maskz_shrdi_epi64<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1790    unsafe {
1791        static_assert_uimm_bits!(IMM8, 8);
1792        let shf = _mm_shrdi_epi64::<IMM8>(a, b).as_i64x2();
1793        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
1794    }
1795}
1796
1797/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst.
1798///
1799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdi_epi32&expand=5105)
1800#[inline]
1801#[target_feature(enable = "avx512vbmi2")]
1802#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1803#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
1804#[rustc_legacy_const_generics(2)]
1805pub fn _mm512_shrdi_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1806    static_assert_uimm_bits!(IMM8, 8);
1807    _mm512_shrdv_epi32(a, b, _mm512_set1_epi32(IMM8))
1808}
1809
1810/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1811///
1812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdi_epi32&expand=5103)
1813#[inline]
1814#[target_feature(enable = "avx512vbmi2")]
1815#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1816#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
1817#[rustc_legacy_const_generics(4)]
1818pub fn _mm512_mask_shrdi_epi32<const IMM8: i32>(
1819    src: __m512i,
1820    k: __mmask16,
1821    a: __m512i,
1822    b: __m512i,
1823) -> __m512i {
1824    unsafe {
1825        static_assert_uimm_bits!(IMM8, 8);
1826        let shf = _mm512_shrdi_epi32::<IMM8>(a, b).as_i32x16();
1827        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
1828    }
1829}
1830
1831/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1832///
1833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdi_epi32&expand=5104)
1834#[inline]
1835#[target_feature(enable = "avx512vbmi2")]
1836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1837#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
1838#[rustc_legacy_const_generics(3)]
1839pub fn _mm512_maskz_shrdi_epi32<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1840    unsafe {
1841        static_assert_uimm_bits!(IMM8, 8);
1842        let shf = _mm512_shrdi_epi32::<IMM8>(a, b).as_i32x16();
1843        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
1844    }
1845}
1846
1847/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst.
1848///
1849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdi_epi32&expand=5102)
1850#[inline]
1851#[target_feature(enable = "avx512vbmi2,avx512vl")]
1852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1853#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
1854#[rustc_legacy_const_generics(2)]
1855pub fn _mm256_shrdi_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1856    static_assert_uimm_bits!(IMM8, 8);
1857    _mm256_shrdv_epi32(a, b, _mm256_set1_epi32(IMM8))
1858}
1859
1860/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1861///
1862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdi_epi32&expand=5100)
1863#[inline]
1864#[target_feature(enable = "avx512vbmi2,avx512vl")]
1865#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1866#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
1867#[rustc_legacy_const_generics(4)]
1868pub fn _mm256_mask_shrdi_epi32<const IMM8: i32>(
1869    src: __m256i,
1870    k: __mmask8,
1871    a: __m256i,
1872    b: __m256i,
1873) -> __m256i {
1874    unsafe {
1875        static_assert_uimm_bits!(IMM8, 8);
1876        let shf = _mm256_shrdi_epi32::<IMM8>(a, b).as_i32x8();
1877        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
1878    }
1879}
1880
1881/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1882///
1883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdi_epi32&expand=5101)
1884#[inline]
1885#[target_feature(enable = "avx512vbmi2,avx512vl")]
1886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1887#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
1888#[rustc_legacy_const_generics(3)]
1889pub fn _mm256_maskz_shrdi_epi32<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1890    unsafe {
1891        static_assert_uimm_bits!(IMM8, 8);
1892        let shf = _mm256_shrdi_epi32::<IMM8>(a, b).as_i32x8();
1893        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
1894    }
1895}
1896
1897/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst.
1898///
1899/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdi_epi32&expand=5099)
1900#[inline]
1901#[target_feature(enable = "avx512vbmi2,avx512vl")]
1902#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1903#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
1904#[rustc_legacy_const_generics(2)]
1905pub fn _mm_shrdi_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
1906    static_assert_uimm_bits!(IMM8, 8);
1907    _mm_shrdv_epi32(a, b, _mm_set1_epi32(IMM8))
1908}
1909
1910/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1911///
1912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdi_epi32&expand=5097)
1913#[inline]
1914#[target_feature(enable = "avx512vbmi2,avx512vl")]
1915#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1916#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
1917#[rustc_legacy_const_generics(4)]
1918pub fn _mm_mask_shrdi_epi32<const IMM8: i32>(
1919    src: __m128i,
1920    k: __mmask8,
1921    a: __m128i,
1922    b: __m128i,
1923) -> __m128i {
1924    unsafe {
1925        static_assert_uimm_bits!(IMM8, 8);
1926        let shf = _mm_shrdi_epi32::<IMM8>(a, b).as_i32x4();
1927        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
1928    }
1929}
1930
1931/// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1932///
1933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdi_epi32&expand=5098)
1934#[inline]
1935#[target_feature(enable = "avx512vbmi2,avx512vl")]
1936#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1937#[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
1938#[rustc_legacy_const_generics(3)]
1939pub fn _mm_maskz_shrdi_epi32<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1940    unsafe {
1941        static_assert_uimm_bits!(IMM8, 8);
1942        let shf = _mm_shrdi_epi32::<IMM8>(a, b).as_i32x4();
1943        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
1944    }
1945}
1946
1947/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst.
1948///
1949/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shrdi_epi16&expand=5096)
1950#[inline]
1951#[target_feature(enable = "avx512vbmi2")]
1952#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1953#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
1954#[rustc_legacy_const_generics(2)]
1955pub fn _mm512_shrdi_epi16<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
1956    static_assert_uimm_bits!(IMM8, 8);
1957    _mm512_shrdv_epi16(a, b, _mm512_set1_epi16(IMM8 as i16))
1958}
1959
1960/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1961///
1962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shrdi_epi16&expand=5094)
1963#[inline]
1964#[target_feature(enable = "avx512vbmi2")]
1965#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1966#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
1967#[rustc_legacy_const_generics(4)]
1968pub fn _mm512_mask_shrdi_epi16<const IMM8: i32>(
1969    src: __m512i,
1970    k: __mmask32,
1971    a: __m512i,
1972    b: __m512i,
1973) -> __m512i {
1974    unsafe {
1975        static_assert_uimm_bits!(IMM8, 8);
1976        let shf = _mm512_shrdi_epi16::<IMM8>(a, b).as_i16x32();
1977        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
1978    }
1979}
1980
1981/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1982///
1983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shrdi_epi16&expand=5095)
1984#[inline]
1985#[target_feature(enable = "avx512vbmi2")]
1986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1987#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
1988#[rustc_legacy_const_generics(3)]
1989pub fn _mm512_maskz_shrdi_epi16<const IMM8: i32>(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1990    unsafe {
1991        static_assert_uimm_bits!(IMM8, 8);
1992        let shf = _mm512_shrdi_epi16::<IMM8>(a, b).as_i16x32();
1993        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
1994    }
1995}
1996
1997/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst.
1998///
1999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shrdi_epi16&expand=5093)
2000#[inline]
2001#[target_feature(enable = "avx512vbmi2,avx512vl")]
2002#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2003#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2004#[rustc_legacy_const_generics(2)]
2005pub fn _mm256_shrdi_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2006    static_assert_uimm_bits!(IMM8, 8);
2007    _mm256_shrdv_epi16(a, b, _mm256_set1_epi16(IMM8 as i16))
2008}
2009
2010/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2011///
2012/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shrdi_epi16&expand=5091)
2013#[inline]
2014#[target_feature(enable = "avx512vbmi2,avx512vl")]
2015#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2016#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2017#[rustc_legacy_const_generics(4)]
2018pub fn _mm256_mask_shrdi_epi16<const IMM8: i32>(
2019    src: __m256i,
2020    k: __mmask16,
2021    a: __m256i,
2022    b: __m256i,
2023) -> __m256i {
2024    unsafe {
2025        static_assert_uimm_bits!(IMM8, 8);
2026        let shf = _mm256_shrdi_epi16::<IMM8>(a, b).as_i16x16();
2027        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
2028    }
2029}
2030
2031/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2032///
2033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shrdi_epi16&expand=5092)
2034#[inline]
2035#[target_feature(enable = "avx512vbmi2,avx512vl")]
2036#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2037#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2038#[rustc_legacy_const_generics(3)]
2039pub fn _mm256_maskz_shrdi_epi16<const IMM8: i32>(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2040    unsafe {
2041        static_assert_uimm_bits!(IMM8, 8);
2042        let shf = _mm256_shrdi_epi16::<IMM8>(a, b).as_i16x16();
2043        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
2044    }
2045}
2046
2047/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst.
2048///
2049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shrdi_epi16&expand=5090)
2050#[inline]
2051#[target_feature(enable = "avx512vbmi2,avx512vl")]
2052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2053#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2054#[rustc_legacy_const_generics(2)]
2055pub fn _mm_shrdi_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
2056    static_assert_uimm_bits!(IMM8, 8);
2057    _mm_shrdv_epi16(a, b, _mm_set1_epi16(IMM8 as i16))
2058}
2059
2060/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2061///
2062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shrdi_epi16&expand=5088)
2063#[inline]
2064#[target_feature(enable = "avx512vbmi2,avx512vl")]
2065#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2066#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2067#[rustc_legacy_const_generics(4)]
2068pub fn _mm_mask_shrdi_epi16<const IMM8: i32>(
2069    src: __m128i,
2070    k: __mmask8,
2071    a: __m128i,
2072    b: __m128i,
2073) -> __m128i {
2074    unsafe {
2075        static_assert_uimm_bits!(IMM8, 8);
2076        let shf = _mm_shrdi_epi16::<IMM8>(a, b).as_i16x8();
2077        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
2078    }
2079}
2080
2081/// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2082///
2083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shrdi_epi16&expand=5089)
2084#[inline]
2085#[target_feature(enable = "avx512vbmi2,avx512vl")]
2086#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2087#[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
2088#[rustc_legacy_const_generics(3)]
2089pub fn _mm_maskz_shrdi_epi16<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2090    unsafe {
2091        static_assert_uimm_bits!(IMM8, 8);
2092        let shf = _mm_shrdi_epi16::<IMM8>(a, b).as_i16x8();
2093        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
2094    }
2095}
2096
2097#[allow(improper_ctypes)]
2098unsafe extern "C" {
2099    #[link_name = "llvm.x86.avx512.mask.compress.store.w.512"]
2100    fn vcompressstorew(mem: *mut i8, data: i16x32, mask: u32);
2101    #[link_name = "llvm.x86.avx512.mask.compress.store.w.256"]
2102    fn vcompressstorew256(mem: *mut i8, data: i16x16, mask: u16);
2103    #[link_name = "llvm.x86.avx512.mask.compress.store.w.128"]
2104    fn vcompressstorew128(mem: *mut i8, data: i16x8, mask: u8);
2105
2106    #[link_name = "llvm.x86.avx512.mask.compress.store.b.512"]
2107    fn vcompressstoreb(mem: *mut i8, data: i8x64, mask: u64);
2108    #[link_name = "llvm.x86.avx512.mask.compress.store.b.256"]
2109    fn vcompressstoreb256(mem: *mut i8, data: i8x32, mask: u32);
2110    #[link_name = "llvm.x86.avx512.mask.compress.store.b.128"]
2111    fn vcompressstoreb128(mem: *mut i8, data: i8x16, mask: u16);
2112
2113    #[link_name = "llvm.x86.avx512.mask.compress.w.512"]
2114    fn vpcompressw(a: i16x32, src: i16x32, mask: u32) -> i16x32;
2115    #[link_name = "llvm.x86.avx512.mask.compress.w.256"]
2116    fn vpcompressw256(a: i16x16, src: i16x16, mask: u16) -> i16x16;
2117    #[link_name = "llvm.x86.avx512.mask.compress.w.128"]
2118    fn vpcompressw128(a: i16x8, src: i16x8, mask: u8) -> i16x8;
2119
2120    #[link_name = "llvm.x86.avx512.mask.compress.b.512"]
2121    fn vpcompressb(a: i8x64, src: i8x64, mask: u64) -> i8x64;
2122    #[link_name = "llvm.x86.avx512.mask.compress.b.256"]
2123    fn vpcompressb256(a: i8x32, src: i8x32, mask: u32) -> i8x32;
2124    #[link_name = "llvm.x86.avx512.mask.compress.b.128"]
2125    fn vpcompressb128(a: i8x16, src: i8x16, mask: u16) -> i8x16;
2126
2127    #[link_name = "llvm.x86.avx512.mask.expand.w.512"]
2128    fn vpexpandw(a: i16x32, src: i16x32, mask: u32) -> i16x32;
2129    #[link_name = "llvm.x86.avx512.mask.expand.w.256"]
2130    fn vpexpandw256(a: i16x16, src: i16x16, mask: u16) -> i16x16;
2131    #[link_name = "llvm.x86.avx512.mask.expand.w.128"]
2132    fn vpexpandw128(a: i16x8, src: i16x8, mask: u8) -> i16x8;
2133
2134    #[link_name = "llvm.x86.avx512.mask.expand.b.512"]
2135    fn vpexpandb(a: i8x64, src: i8x64, mask: u64) -> i8x64;
2136    #[link_name = "llvm.x86.avx512.mask.expand.b.256"]
2137    fn vpexpandb256(a: i8x32, src: i8x32, mask: u32) -> i8x32;
2138    #[link_name = "llvm.x86.avx512.mask.expand.b.128"]
2139    fn vpexpandb128(a: i8x16, src: i8x16, mask: u16) -> i8x16;
2140
2141    #[link_name = "llvm.x86.avx512.mask.expand.load.b.128"]
2142    fn expandloadb_128(mem_addr: *const i8, a: i8x16, mask: u16) -> i8x16;
2143    #[link_name = "llvm.x86.avx512.mask.expand.load.w.128"]
2144    fn expandloadw_128(mem_addr: *const i16, a: i16x8, mask: u8) -> i16x8;
2145    #[link_name = "llvm.x86.avx512.mask.expand.load.b.256"]
2146    fn expandloadb_256(mem_addr: *const i8, a: i8x32, mask: u32) -> i8x32;
2147    #[link_name = "llvm.x86.avx512.mask.expand.load.w.256"]
2148    fn expandloadw_256(mem_addr: *const i16, a: i16x16, mask: u16) -> i16x16;
2149    #[link_name = "llvm.x86.avx512.mask.expand.load.b.512"]
2150    fn expandloadb_512(mem_addr: *const i8, a: i8x64, mask: u64) -> i8x64;
2151    #[link_name = "llvm.x86.avx512.mask.expand.load.w.512"]
2152    fn expandloadw_512(mem_addr: *const i16, a: i16x32, mask: u32) -> i16x32;
2153}
2154
2155#[cfg(test)]
2156mod tests {
2157
2158    use stdarch_test::simd_test;
2159
2160    use crate::core_arch::x86::*;
2161    use crate::hint::black_box;
2162
2163    #[simd_test(enable = "avx512vbmi2")]
2164    unsafe fn test_mm512_mask_compress_epi16() {
2165        let src = _mm512_set1_epi16(200);
2166        #[rustfmt::skip]
2167        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2168                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2169        let r = _mm512_mask_compress_epi16(src, 0b01010101_01010101_01010101_01010101, a);
2170        #[rustfmt::skip]
2171        let e = _mm512_set_epi16(
2172            200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200,
2173            1,   3,   5,   7,   9,   11,  13,  15,  17,  19,  21,  23,  25,  27,  29,  31,
2174        );
2175        assert_eq_m512i(r, e);
2176    }
2177
2178    #[simd_test(enable = "avx512vbmi2")]
2179    unsafe fn test_mm512_maskz_compress_epi16() {
2180        #[rustfmt::skip]
2181        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2182                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2183        let r = _mm512_maskz_compress_epi16(0b01010101_01010101_01010101_01010101, a);
2184        #[rustfmt::skip]
2185        let e = _mm512_set_epi16(
2186            0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2187            1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
2188        );
2189        assert_eq_m512i(r, e);
2190    }
2191
2192    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2193    unsafe fn test_mm256_mask_compress_epi16() {
2194        let src = _mm256_set1_epi16(200);
2195        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2196        let r = _mm256_mask_compress_epi16(src, 0b01010101_01010101, a);
2197        let e = _mm256_set_epi16(
2198            200, 200, 200, 200, 200, 200, 200, 200, 1, 3, 5, 7, 9, 11, 13, 15,
2199        );
2200        assert_eq_m256i(r, e);
2201    }
2202
2203    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2204    unsafe fn test_mm256_maskz_compress_epi16() {
2205        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2206        let r = _mm256_maskz_compress_epi16(0b01010101_01010101, a);
2207        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15);
2208        assert_eq_m256i(r, e);
2209    }
2210
2211    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2212    unsafe fn test_mm_mask_compress_epi16() {
2213        let src = _mm_set1_epi16(200);
2214        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
2215        let r = _mm_mask_compress_epi16(src, 0b01010101, a);
2216        let e = _mm_set_epi16(200, 200, 200, 200, 1, 3, 5, 7);
2217        assert_eq_m128i(r, e);
2218    }
2219
2220    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2221    unsafe fn test_mm_maskz_compress_epi16() {
2222        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
2223        let r = _mm_maskz_compress_epi16(0b01010101, a);
2224        let e = _mm_set_epi16(0, 0, 0, 0, 1, 3, 5, 7);
2225        assert_eq_m128i(r, e);
2226    }
2227
2228    #[simd_test(enable = "avx512vbmi2")]
2229    unsafe fn test_mm512_mask_compress_epi8() {
2230        let src = _mm512_set1_epi8(100);
2231        #[rustfmt::skip]
2232        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2233                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2234                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2235                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
2236        let r = _mm512_mask_compress_epi8(
2237            src,
2238            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101,
2239            a,
2240        );
2241        #[rustfmt::skip]
2242        let e = _mm512_set_epi8(
2243            100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
2244            100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
2245            1,   3,   5,   7,   9,   11,  13,  15,  17,  19,  21,  23,  25,  27,  29,  31,
2246            33,  35,  37,  39,  41,  43,  45,  47,  49,  51,  53,  55,  57,  59,  61,  63,
2247        );
2248        assert_eq_m512i(r, e);
2249    }
2250
2251    #[simd_test(enable = "avx512vbmi2")]
2252    unsafe fn test_mm512_maskz_compress_epi8() {
2253        #[rustfmt::skip]
2254        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2255                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2256                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2257                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
2258        let r = _mm512_maskz_compress_epi8(
2259            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101,
2260            a,
2261        );
2262        #[rustfmt::skip]
2263        let e = _mm512_set_epi8(
2264            0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2265            0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2266            1,  3,  5,  7,  9,  11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
2267            33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63,
2268        );
2269        assert_eq_m512i(r, e);
2270    }
2271
2272    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2273    unsafe fn test_mm256_mask_compress_epi8() {
2274        let src = _mm256_set1_epi8(100);
2275        #[rustfmt::skip]
2276        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2277                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2278        let r = _mm256_mask_compress_epi8(src, 0b01010101_01010101_01010101_01010101, a);
2279        #[rustfmt::skip]
2280        let e = _mm256_set_epi8(
2281            100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
2282            1,   3,   5,   7,   9,   11,  13,  15,  17,  19,  21,  23,  25,  27,  29,  31,
2283        );
2284        assert_eq_m256i(r, e);
2285    }
2286
2287    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2288    unsafe fn test_mm256_maskz_compress_epi8() {
2289        #[rustfmt::skip]
2290        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2291                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2292        let r = _mm256_maskz_compress_epi8(0b01010101_01010101_01010101_01010101, a);
2293        #[rustfmt::skip]
2294        let e = _mm256_set_epi8(
2295            0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2296            1,  3,  5,  7,  9,  11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
2297        );
2298        assert_eq_m256i(r, e);
2299    }
2300
2301    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2302    unsafe fn test_mm_mask_compress_epi8() {
2303        let src = _mm_set1_epi8(100);
2304        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2305        let r = _mm_mask_compress_epi8(src, 0b01010101_01010101, a);
2306        let e = _mm_set_epi8(
2307            100, 100, 100, 100, 100, 100, 100, 100, 1, 3, 5, 7, 9, 11, 13, 15,
2308        );
2309        assert_eq_m128i(r, e);
2310    }
2311
2312    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2313    unsafe fn test_mm_maskz_compress_epi8() {
2314        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2315        let r = _mm_maskz_compress_epi8(0b01010101_01010101, a);
2316        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15);
2317        assert_eq_m128i(r, e);
2318    }
2319
2320    #[simd_test(enable = "avx512vbmi2")]
2321    unsafe fn test_mm512_mask_expand_epi16() {
2322        let src = _mm512_set1_epi16(200);
2323        #[rustfmt::skip]
2324        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2325                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2326        let r = _mm512_mask_expand_epi16(src, 0b01010101_01010101_01010101_01010101, a);
2327        #[rustfmt::skip]
2328        let e = _mm512_set_epi16(
2329            200, 16, 200, 17, 200, 18, 200, 19, 200, 20, 200, 21, 200, 22, 200, 23,
2330            200, 24, 200, 25, 200, 26, 200, 27, 200, 28, 200, 29, 200, 30, 200, 31,
2331        );
2332        assert_eq_m512i(r, e);
2333    }
2334
2335    #[simd_test(enable = "avx512vbmi2")]
2336    unsafe fn test_mm512_maskz_expand_epi16() {
2337        #[rustfmt::skip]
2338        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2339                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2340        let r = _mm512_maskz_expand_epi16(0b01010101_01010101_01010101_01010101, a);
2341        #[rustfmt::skip]
2342        let e = _mm512_set_epi16(0, 16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21, 0, 22, 0, 23,
2343                                 0, 24, 0, 25, 0, 26, 0, 27, 0, 28, 0, 29, 0, 30, 0, 31);
2344        assert_eq_m512i(r, e);
2345    }
2346
2347    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2348    unsafe fn test_mm256_mask_expand_epi16() {
2349        let src = _mm256_set1_epi16(200);
2350        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2351        let r = _mm256_mask_expand_epi16(src, 0b01010101_01010101, a);
2352        let e = _mm256_set_epi16(
2353            200, 8, 200, 9, 200, 10, 200, 11, 200, 12, 200, 13, 200, 14, 200, 15,
2354        );
2355        assert_eq_m256i(r, e);
2356    }
2357
2358    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2359    unsafe fn test_mm256_maskz_expand_epi16() {
2360        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2361        let r = _mm256_maskz_expand_epi16(0b01010101_01010101, a);
2362        let e = _mm256_set_epi16(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15);
2363        assert_eq_m256i(r, e);
2364    }
2365
2366    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2367    unsafe fn test_mm_mask_expand_epi16() {
2368        let src = _mm_set1_epi16(200);
2369        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
2370        let r = _mm_mask_expand_epi16(src, 0b01010101, a);
2371        let e = _mm_set_epi16(200, 4, 200, 5, 200, 6, 200, 7);
2372        assert_eq_m128i(r, e);
2373    }
2374
2375    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2376    unsafe fn test_mm_maskz_expand_epi16() {
2377        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
2378        let r = _mm_maskz_expand_epi16(0b01010101, a);
2379        let e = _mm_set_epi16(0, 4, 0, 5, 0, 6, 0, 7);
2380        assert_eq_m128i(r, e);
2381    }
2382
2383    #[simd_test(enable = "avx512vbmi2")]
2384    unsafe fn test_mm512_mask_expand_epi8() {
2385        let src = _mm512_set1_epi8(100);
2386        #[rustfmt::skip]
2387        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2388                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2389                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2390                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
2391        let r = _mm512_mask_expand_epi8(
2392            src,
2393            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101,
2394            a,
2395        );
2396        #[rustfmt::skip]
2397        let e = _mm512_set_epi8(
2398            100, 32, 100, 33, 100, 34, 100, 35, 100, 36, 100, 37, 100, 38, 100, 39,
2399            100, 40, 100, 41, 100, 42, 100, 43, 100, 44, 100, 45, 100, 46, 100, 47,
2400            100, 48, 100, 49, 100, 50, 100, 51, 100, 52, 100, 53, 100, 54, 100, 55,
2401            100, 56, 100, 57, 100, 58, 100, 59, 100, 60, 100, 61, 100, 62, 100, 63,
2402        );
2403        assert_eq_m512i(r, e);
2404    }
2405
2406    #[simd_test(enable = "avx512vbmi2")]
2407    unsafe fn test_mm512_maskz_expand_epi8() {
2408        #[rustfmt::skip]
2409        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2410                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2411                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2412                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
2413        let r = _mm512_maskz_expand_epi8(
2414            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101,
2415            a,
2416        );
2417        #[rustfmt::skip]
2418        let e = _mm512_set_epi8(
2419            0, 32, 0, 33, 0, 34, 0, 35, 0, 36, 0, 37, 0, 38, 0, 39,
2420            0, 40, 0, 41, 0, 42, 0, 43, 0, 44, 0, 45, 0, 46, 0, 47,
2421            0, 48, 0, 49, 0, 50, 0, 51, 0, 52, 0, 53, 0, 54, 0, 55,
2422            0, 56, 0, 57, 0, 58, 0, 59, 0, 60, 0, 61, 0, 62, 0, 63,
2423        );
2424        assert_eq_m512i(r, e);
2425    }
2426
2427    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2428    unsafe fn test_mm256_mask_expand_epi8() {
2429        let src = _mm256_set1_epi8(100);
2430        #[rustfmt::skip]
2431        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2432                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2433        let r = _mm256_mask_expand_epi8(src, 0b01010101_01010101_01010101_01010101, a);
2434        #[rustfmt::skip]
2435        let e = _mm256_set_epi8(
2436            100, 16, 100, 17, 100, 18, 100, 19, 100, 20, 100, 21, 100, 22, 100, 23,
2437            100, 24, 100, 25, 100, 26, 100, 27, 100, 28, 100, 29, 100, 30, 100, 31,
2438        );
2439        assert_eq_m256i(r, e);
2440    }
2441
2442    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2443    unsafe fn test_mm256_maskz_expand_epi8() {
2444        #[rustfmt::skip]
2445        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
2446                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2447        let r = _mm256_maskz_expand_epi8(0b01010101_01010101_01010101_01010101, a);
2448        #[rustfmt::skip]
2449        let e = _mm256_set_epi8(
2450            0, 16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21, 0, 22, 0, 23,
2451            0, 24, 0, 25, 0, 26, 0, 27, 0, 28, 0, 29, 0, 30, 0, 31,
2452        );
2453        assert_eq_m256i(r, e);
2454    }
2455
2456    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2457    unsafe fn test_mm_mask_expand_epi8() {
2458        let src = _mm_set1_epi8(100);
2459        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2460        let r = _mm_mask_expand_epi8(src, 0b01010101_01010101, a);
2461        let e = _mm_set_epi8(
2462            100, 8, 100, 9, 100, 10, 100, 11, 100, 12, 100, 13, 100, 14, 100, 15,
2463        );
2464        assert_eq_m128i(r, e);
2465    }
2466
2467    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2468    unsafe fn test_mm_maskz_expand_epi8() {
2469        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2470        let r = _mm_maskz_expand_epi8(0b01010101_01010101, a);
2471        let e = _mm_set_epi8(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15);
2472        assert_eq_m128i(r, e);
2473    }
2474
2475    #[simd_test(enable = "avx512vbmi2")]
2476    unsafe fn test_mm512_shldv_epi64() {
2477        let a = _mm512_set1_epi64(1);
2478        let b = _mm512_set1_epi64(1 << 63);
2479        let c = _mm512_set1_epi64(2);
2480        let r = _mm512_shldv_epi64(a, b, c);
2481        let e = _mm512_set1_epi64(6);
2482        assert_eq_m512i(r, e);
2483    }
2484
2485    #[simd_test(enable = "avx512vbmi2")]
2486    unsafe fn test_mm512_mask_shldv_epi64() {
2487        let a = _mm512_set1_epi64(1);
2488        let b = _mm512_set1_epi64(1 << 63);
2489        let c = _mm512_set1_epi64(2);
2490        let r = _mm512_mask_shldv_epi64(a, 0, b, c);
2491        assert_eq_m512i(r, a);
2492        let r = _mm512_mask_shldv_epi64(a, 0b11111111, b, c);
2493        let e = _mm512_set1_epi64(6);
2494        assert_eq_m512i(r, e);
2495    }
2496
2497    #[simd_test(enable = "avx512vbmi2")]
2498    unsafe fn test_mm512_maskz_shldv_epi64() {
2499        let a = _mm512_set1_epi64(1);
2500        let b = _mm512_set1_epi64(1 << 63);
2501        let c = _mm512_set1_epi64(2);
2502        let r = _mm512_maskz_shldv_epi64(0, a, b, c);
2503        assert_eq_m512i(r, _mm512_setzero_si512());
2504        let r = _mm512_maskz_shldv_epi64(0b11111111, a, b, c);
2505        let e = _mm512_set1_epi64(6);
2506        assert_eq_m512i(r, e);
2507    }
2508
2509    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2510    unsafe fn test_mm256_shldv_epi64() {
2511        let a = _mm256_set1_epi64x(1);
2512        let b = _mm256_set1_epi64x(1 << 63);
2513        let c = _mm256_set1_epi64x(2);
2514        let r = _mm256_shldv_epi64(a, b, c);
2515        let e = _mm256_set1_epi64x(6);
2516        assert_eq_m256i(r, e);
2517    }
2518
2519    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2520    unsafe fn test_mm256_mask_shldv_epi64() {
2521        let a = _mm256_set1_epi64x(1);
2522        let b = _mm256_set1_epi64x(1 << 63);
2523        let c = _mm256_set1_epi64x(2);
2524        let r = _mm256_mask_shldv_epi64(a, 0, b, c);
2525        assert_eq_m256i(r, a);
2526        let r = _mm256_mask_shldv_epi64(a, 0b00001111, b, c);
2527        let e = _mm256_set1_epi64x(6);
2528        assert_eq_m256i(r, e);
2529    }
2530
2531    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2532    unsafe fn test_mm256_maskz_shldv_epi64() {
2533        let a = _mm256_set1_epi64x(1);
2534        let b = _mm256_set1_epi64x(1 << 63);
2535        let c = _mm256_set1_epi64x(2);
2536        let r = _mm256_maskz_shldv_epi64(0, a, b, c);
2537        assert_eq_m256i(r, _mm256_setzero_si256());
2538        let r = _mm256_maskz_shldv_epi64(0b00001111, a, b, c);
2539        let e = _mm256_set1_epi64x(6);
2540        assert_eq_m256i(r, e);
2541    }
2542
2543    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2544    unsafe fn test_mm_shldv_epi64() {
2545        let a = _mm_set1_epi64x(1);
2546        let b = _mm_set1_epi64x(1 << 63);
2547        let c = _mm_set1_epi64x(2);
2548        let r = _mm_shldv_epi64(a, b, c);
2549        let e = _mm_set1_epi64x(6);
2550        assert_eq_m128i(r, e);
2551    }
2552
2553    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2554    unsafe fn test_mm_mask_shldv_epi64() {
2555        let a = _mm_set1_epi64x(1);
2556        let b = _mm_set1_epi64x(1 << 63);
2557        let c = _mm_set1_epi64x(2);
2558        let r = _mm_mask_shldv_epi64(a, 0, b, c);
2559        assert_eq_m128i(r, a);
2560        let r = _mm_mask_shldv_epi64(a, 0b00000011, b, c);
2561        let e = _mm_set1_epi64x(6);
2562        assert_eq_m128i(r, e);
2563    }
2564
2565    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2566    unsafe fn test_mm_maskz_shldv_epi64() {
2567        let a = _mm_set1_epi64x(1);
2568        let b = _mm_set1_epi64x(1 << 63);
2569        let c = _mm_set1_epi64x(2);
2570        let r = _mm_maskz_shldv_epi64(0, a, b, c);
2571        assert_eq_m128i(r, _mm_setzero_si128());
2572        let r = _mm_maskz_shldv_epi64(0b00000011, a, b, c);
2573        let e = _mm_set1_epi64x(6);
2574        assert_eq_m128i(r, e);
2575    }
2576
2577    #[simd_test(enable = "avx512vbmi2")]
2578    unsafe fn test_mm512_shldv_epi32() {
2579        let a = _mm512_set1_epi32(1);
2580        let b = _mm512_set1_epi32(1 << 31);
2581        let c = _mm512_set1_epi32(2);
2582        let r = _mm512_shldv_epi32(a, b, c);
2583        let e = _mm512_set1_epi32(6);
2584        assert_eq_m512i(r, e);
2585    }
2586
2587    #[simd_test(enable = "avx512vbmi2")]
2588    unsafe fn test_mm512_mask_shldv_epi32() {
2589        let a = _mm512_set1_epi32(1);
2590        let b = _mm512_set1_epi32(1 << 31);
2591        let c = _mm512_set1_epi32(2);
2592        let r = _mm512_mask_shldv_epi32(a, 0, b, c);
2593        assert_eq_m512i(r, a);
2594        let r = _mm512_mask_shldv_epi32(a, 0b11111111_11111111, b, c);
2595        let e = _mm512_set1_epi32(6);
2596        assert_eq_m512i(r, e);
2597    }
2598
2599    #[simd_test(enable = "avx512vbmi2")]
2600    unsafe fn test_mm512_maskz_shldv_epi32() {
2601        let a = _mm512_set1_epi32(1);
2602        let b = _mm512_set1_epi32(1 << 31);
2603        let c = _mm512_set1_epi32(2);
2604        let r = _mm512_maskz_shldv_epi32(0, a, b, c);
2605        assert_eq_m512i(r, _mm512_setzero_si512());
2606        let r = _mm512_maskz_shldv_epi32(0b11111111_11111111, a, b, c);
2607        let e = _mm512_set1_epi32(6);
2608        assert_eq_m512i(r, e);
2609    }
2610
2611    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2612    unsafe fn test_mm256_shldv_epi32() {
2613        let a = _mm256_set1_epi32(1);
2614        let b = _mm256_set1_epi32(1 << 31);
2615        let c = _mm256_set1_epi32(2);
2616        let r = _mm256_shldv_epi32(a, b, c);
2617        let e = _mm256_set1_epi32(6);
2618        assert_eq_m256i(r, e);
2619    }
2620
2621    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2622    unsafe fn test_mm256_mask_shldv_epi32() {
2623        let a = _mm256_set1_epi32(1);
2624        let b = _mm256_set1_epi32(1 << 31);
2625        let c = _mm256_set1_epi32(2);
2626        let r = _mm256_mask_shldv_epi32(a, 0, b, c);
2627        assert_eq_m256i(r, a);
2628        let r = _mm256_mask_shldv_epi32(a, 0b11111111, b, c);
2629        let e = _mm256_set1_epi32(6);
2630        assert_eq_m256i(r, e);
2631    }
2632
2633    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2634    unsafe fn test_mm256_maskz_shldv_epi32() {
2635        let a = _mm256_set1_epi32(1);
2636        let b = _mm256_set1_epi32(1 << 31);
2637        let c = _mm256_set1_epi32(2);
2638        let r = _mm256_maskz_shldv_epi32(0, a, b, c);
2639        assert_eq_m256i(r, _mm256_setzero_si256());
2640        let r = _mm256_maskz_shldv_epi32(0b11111111, a, b, c);
2641        let e = _mm256_set1_epi32(6);
2642        assert_eq_m256i(r, e);
2643    }
2644
2645    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2646    unsafe fn test_mm_shldv_epi32() {
2647        let a = _mm_set1_epi32(1);
2648        let b = _mm_set1_epi32(1 << 31);
2649        let c = _mm_set1_epi32(2);
2650        let r = _mm_shldv_epi32(a, b, c);
2651        let e = _mm_set1_epi32(6);
2652        assert_eq_m128i(r, e);
2653    }
2654
2655    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2656    unsafe fn test_mm_mask_shldv_epi32() {
2657        let a = _mm_set1_epi32(1);
2658        let b = _mm_set1_epi32(1 << 31);
2659        let c = _mm_set1_epi32(2);
2660        let r = _mm_mask_shldv_epi32(a, 0, b, c);
2661        assert_eq_m128i(r, a);
2662        let r = _mm_mask_shldv_epi32(a, 0b00001111, b, c);
2663        let e = _mm_set1_epi32(6);
2664        assert_eq_m128i(r, e);
2665    }
2666
2667    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2668    unsafe fn test_mm_maskz_shldv_epi32() {
2669        let a = _mm_set1_epi32(1);
2670        let b = _mm_set1_epi32(1 << 31);
2671        let c = _mm_set1_epi32(2);
2672        let r = _mm_maskz_shldv_epi32(0, a, b, c);
2673        assert_eq_m128i(r, _mm_setzero_si128());
2674        let r = _mm_maskz_shldv_epi32(0b00001111, a, b, c);
2675        let e = _mm_set1_epi32(6);
2676        assert_eq_m128i(r, e);
2677    }
2678
2679    #[simd_test(enable = "avx512vbmi2")]
2680    unsafe fn test_mm512_shldv_epi16() {
2681        let a = _mm512_set1_epi16(1);
2682        let b = _mm512_set1_epi16(1 << 15);
2683        let c = _mm512_set1_epi16(2);
2684        let r = _mm512_shldv_epi16(a, b, c);
2685        let e = _mm512_set1_epi16(6);
2686        assert_eq_m512i(r, e);
2687    }
2688
2689    #[simd_test(enable = "avx512vbmi2")]
2690    unsafe fn test_mm512_mask_shldv_epi16() {
2691        let a = _mm512_set1_epi16(1);
2692        let b = _mm512_set1_epi16(1 << 15);
2693        let c = _mm512_set1_epi16(2);
2694        let r = _mm512_mask_shldv_epi16(a, 0, b, c);
2695        assert_eq_m512i(r, a);
2696        let r = _mm512_mask_shldv_epi16(a, 0b11111111_11111111_11111111_11111111, b, c);
2697        let e = _mm512_set1_epi16(6);
2698        assert_eq_m512i(r, e);
2699    }
2700
2701    #[simd_test(enable = "avx512vbmi2")]
2702    unsafe fn test_mm512_maskz_shldv_epi16() {
2703        let a = _mm512_set1_epi16(1);
2704        let b = _mm512_set1_epi16(1 << 15);
2705        let c = _mm512_set1_epi16(2);
2706        let r = _mm512_maskz_shldv_epi16(0, a, b, c);
2707        assert_eq_m512i(r, _mm512_setzero_si512());
2708        let r = _mm512_maskz_shldv_epi16(0b11111111_11111111_11111111_11111111, a, b, c);
2709        let e = _mm512_set1_epi16(6);
2710        assert_eq_m512i(r, e);
2711    }
2712
2713    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2714    unsafe fn test_mm256_shldv_epi16() {
2715        let a = _mm256_set1_epi16(1);
2716        let b = _mm256_set1_epi16(1 << 15);
2717        let c = _mm256_set1_epi16(2);
2718        let r = _mm256_shldv_epi16(a, b, c);
2719        let e = _mm256_set1_epi16(6);
2720        assert_eq_m256i(r, e);
2721    }
2722
2723    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2724    unsafe fn test_mm256_mask_shldv_epi16() {
2725        let a = _mm256_set1_epi16(1);
2726        let b = _mm256_set1_epi16(1 << 15);
2727        let c = _mm256_set1_epi16(2);
2728        let r = _mm256_mask_shldv_epi16(a, 0, b, c);
2729        assert_eq_m256i(r, a);
2730        let r = _mm256_mask_shldv_epi16(a, 0b11111111_11111111, b, c);
2731        let e = _mm256_set1_epi16(6);
2732        assert_eq_m256i(r, e);
2733    }
2734
2735    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2736    unsafe fn test_mm256_maskz_shldv_epi16() {
2737        let a = _mm256_set1_epi16(1);
2738        let b = _mm256_set1_epi16(1 << 15);
2739        let c = _mm256_set1_epi16(2);
2740        let r = _mm256_maskz_shldv_epi16(0, a, b, c);
2741        assert_eq_m256i(r, _mm256_setzero_si256());
2742        let r = _mm256_maskz_shldv_epi16(0b11111111_11111111, a, b, c);
2743        let e = _mm256_set1_epi16(6);
2744        assert_eq_m256i(r, e);
2745    }
2746
2747    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2748    unsafe fn test_mm_shldv_epi16() {
2749        let a = _mm_set1_epi16(1);
2750        let b = _mm_set1_epi16(1 << 15);
2751        let c = _mm_set1_epi16(2);
2752        let r = _mm_shldv_epi16(a, b, c);
2753        let e = _mm_set1_epi16(6);
2754        assert_eq_m128i(r, e);
2755    }
2756
2757    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2758    unsafe fn test_mm_mask_shldv_epi16() {
2759        let a = _mm_set1_epi16(1);
2760        let b = _mm_set1_epi16(1 << 15);
2761        let c = _mm_set1_epi16(2);
2762        let r = _mm_mask_shldv_epi16(a, 0, b, c);
2763        assert_eq_m128i(r, a);
2764        let r = _mm_mask_shldv_epi16(a, 0b11111111, b, c);
2765        let e = _mm_set1_epi16(6);
2766        assert_eq_m128i(r, e);
2767    }
2768
2769    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2770    unsafe fn test_mm_maskz_shldv_epi16() {
2771        let a = _mm_set1_epi16(1);
2772        let b = _mm_set1_epi16(1 << 15);
2773        let c = _mm_set1_epi16(2);
2774        let r = _mm_maskz_shldv_epi16(0, a, b, c);
2775        assert_eq_m128i(r, _mm_setzero_si128());
2776        let r = _mm_maskz_shldv_epi16(0b11111111, a, b, c);
2777        let e = _mm_set1_epi16(6);
2778        assert_eq_m128i(r, e);
2779    }
2780
2781    #[simd_test(enable = "avx512vbmi2")]
2782    unsafe fn test_mm512_shrdv_epi64() {
2783        let a = _mm512_set1_epi64(2);
2784        let b = _mm512_set1_epi64(8);
2785        let c = _mm512_set1_epi64(1);
2786        let r = _mm512_shrdv_epi64(a, b, c);
2787        let e = _mm512_set1_epi64(1);
2788        assert_eq_m512i(r, e);
2789    }
2790
2791    #[simd_test(enable = "avx512vbmi2")]
2792    unsafe fn test_mm512_mask_shrdv_epi64() {
2793        let a = _mm512_set1_epi64(2);
2794        let b = _mm512_set1_epi64(8);
2795        let c = _mm512_set1_epi64(1);
2796        let r = _mm512_mask_shrdv_epi64(a, 0, b, c);
2797        assert_eq_m512i(r, a);
2798        let r = _mm512_mask_shrdv_epi64(a, 0b11111111, b, c);
2799        let e = _mm512_set1_epi64(1);
2800        assert_eq_m512i(r, e);
2801    }
2802
2803    #[simd_test(enable = "avx512vbmi2")]
2804    unsafe fn test_mm512_maskz_shrdv_epi64() {
2805        let a = _mm512_set1_epi64(2);
2806        let b = _mm512_set1_epi64(8);
2807        let c = _mm512_set1_epi64(1);
2808        let r = _mm512_maskz_shrdv_epi64(0, a, b, c);
2809        assert_eq_m512i(r, _mm512_setzero_si512());
2810        let r = _mm512_maskz_shrdv_epi64(0b11111111, a, b, c);
2811        let e = _mm512_set1_epi64(1);
2812        assert_eq_m512i(r, e);
2813    }
2814
2815    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2816    unsafe fn test_mm256_shrdv_epi64() {
2817        let a = _mm256_set1_epi64x(2);
2818        let b = _mm256_set1_epi64x(8);
2819        let c = _mm256_set1_epi64x(1);
2820        let r = _mm256_shrdv_epi64(a, b, c);
2821        let e = _mm256_set1_epi64x(1);
2822        assert_eq_m256i(r, e);
2823    }
2824
2825    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2826    unsafe fn test_mm256_mask_shrdv_epi64() {
2827        let a = _mm256_set1_epi64x(2);
2828        let b = _mm256_set1_epi64x(8);
2829        let c = _mm256_set1_epi64x(1);
2830        let r = _mm256_mask_shrdv_epi64(a, 0, b, c);
2831        assert_eq_m256i(r, a);
2832        let r = _mm256_mask_shrdv_epi64(a, 0b00001111, b, c);
2833        let e = _mm256_set1_epi64x(1);
2834        assert_eq_m256i(r, e);
2835    }
2836
2837    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2838    unsafe fn test_mm256_maskz_shrdv_epi64() {
2839        let a = _mm256_set1_epi64x(2);
2840        let b = _mm256_set1_epi64x(8);
2841        let c = _mm256_set1_epi64x(1);
2842        let r = _mm256_maskz_shrdv_epi64(0, a, b, c);
2843        assert_eq_m256i(r, _mm256_setzero_si256());
2844        let r = _mm256_maskz_shrdv_epi64(0b00001111, a, b, c);
2845        let e = _mm256_set1_epi64x(1);
2846        assert_eq_m256i(r, e);
2847    }
2848
2849    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2850    unsafe fn test_mm_shrdv_epi64() {
2851        let a = _mm_set1_epi64x(2);
2852        let b = _mm_set1_epi64x(8);
2853        let c = _mm_set1_epi64x(1);
2854        let r = _mm_shrdv_epi64(a, b, c);
2855        let e = _mm_set1_epi64x(1);
2856        assert_eq_m128i(r, e);
2857    }
2858
2859    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2860    unsafe fn test_mm_mask_shrdv_epi64() {
2861        let a = _mm_set1_epi64x(2);
2862        let b = _mm_set1_epi64x(8);
2863        let c = _mm_set1_epi64x(1);
2864        let r = _mm_mask_shrdv_epi64(a, 0, b, c);
2865        assert_eq_m128i(r, a);
2866        let r = _mm_mask_shrdv_epi64(a, 0b00000011, b, c);
2867        let e = _mm_set1_epi64x(1);
2868        assert_eq_m128i(r, e);
2869    }
2870
2871    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2872    unsafe fn test_mm_maskz_shrdv_epi64() {
2873        let a = _mm_set1_epi64x(2);
2874        let b = _mm_set1_epi64x(8);
2875        let c = _mm_set1_epi64x(1);
2876        let r = _mm_maskz_shrdv_epi64(0, a, b, c);
2877        assert_eq_m128i(r, _mm_setzero_si128());
2878        let r = _mm_maskz_shrdv_epi64(0b00000011, a, b, c);
2879        let e = _mm_set1_epi64x(1);
2880        assert_eq_m128i(r, e);
2881    }
2882
2883    #[simd_test(enable = "avx512vbmi2")]
2884    unsafe fn test_mm512_shrdv_epi32() {
2885        let a = _mm512_set1_epi32(2);
2886        let b = _mm512_set1_epi32(8);
2887        let c = _mm512_set1_epi32(1);
2888        let r = _mm512_shrdv_epi32(a, b, c);
2889        let e = _mm512_set1_epi32(1);
2890        assert_eq_m512i(r, e);
2891    }
2892
2893    #[simd_test(enable = "avx512vbmi2")]
2894    unsafe fn test_mm512_mask_shrdv_epi32() {
2895        let a = _mm512_set1_epi32(2);
2896        let b = _mm512_set1_epi32(8);
2897        let c = _mm512_set1_epi32(1);
2898        let r = _mm512_mask_shrdv_epi32(a, 0, b, c);
2899        assert_eq_m512i(r, a);
2900        let r = _mm512_mask_shrdv_epi32(a, 0b11111111_11111111, b, c);
2901        let e = _mm512_set1_epi32(1);
2902        assert_eq_m512i(r, e);
2903    }
2904
2905    #[simd_test(enable = "avx512vbmi2")]
2906    unsafe fn test_mm512_maskz_shrdv_epi32() {
2907        let a = _mm512_set1_epi32(2);
2908        let b = _mm512_set1_epi32(8);
2909        let c = _mm512_set1_epi32(1);
2910        let r = _mm512_maskz_shrdv_epi32(0, a, b, c);
2911        assert_eq_m512i(r, _mm512_setzero_si512());
2912        let r = _mm512_maskz_shrdv_epi32(0b11111111_11111111, a, b, c);
2913        let e = _mm512_set1_epi32(1);
2914        assert_eq_m512i(r, e);
2915    }
2916
2917    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2918    unsafe fn test_mm256_shrdv_epi32() {
2919        let a = _mm256_set1_epi32(2);
2920        let b = _mm256_set1_epi32(8);
2921        let c = _mm256_set1_epi32(1);
2922        let r = _mm256_shrdv_epi32(a, b, c);
2923        let e = _mm256_set1_epi32(1);
2924        assert_eq_m256i(r, e);
2925    }
2926
2927    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2928    unsafe fn test_mm256_mask_shrdv_epi32() {
2929        let a = _mm256_set1_epi32(2);
2930        let b = _mm256_set1_epi32(8);
2931        let c = _mm256_set1_epi32(1);
2932        let r = _mm256_mask_shrdv_epi32(a, 0, b, c);
2933        assert_eq_m256i(r, a);
2934        let r = _mm256_mask_shrdv_epi32(a, 0b11111111, b, c);
2935        let e = _mm256_set1_epi32(1);
2936        assert_eq_m256i(r, e);
2937    }
2938
2939    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2940    unsafe fn test_mm256_maskz_shrdv_epi32() {
2941        let a = _mm256_set1_epi32(2);
2942        let b = _mm256_set1_epi32(8);
2943        let c = _mm256_set1_epi32(1);
2944        let r = _mm256_maskz_shrdv_epi32(0, a, b, c);
2945        assert_eq_m256i(r, _mm256_setzero_si256());
2946        let r = _mm256_maskz_shrdv_epi32(0b11111111, a, b, c);
2947        let e = _mm256_set1_epi32(1);
2948        assert_eq_m256i(r, e);
2949    }
2950
2951    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2952    unsafe fn test_mm_shrdv_epi32() {
2953        let a = _mm_set1_epi32(2);
2954        let b = _mm_set1_epi32(8);
2955        let c = _mm_set1_epi32(1);
2956        let r = _mm_shrdv_epi32(a, b, c);
2957        let e = _mm_set1_epi32(1);
2958        assert_eq_m128i(r, e);
2959    }
2960
2961    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2962    unsafe fn test_mm_mask_shrdv_epi32() {
2963        let a = _mm_set1_epi32(2);
2964        let b = _mm_set1_epi32(8);
2965        let c = _mm_set1_epi32(1);
2966        let r = _mm_mask_shrdv_epi32(a, 0, b, c);
2967        assert_eq_m128i(r, a);
2968        let r = _mm_mask_shrdv_epi32(a, 0b00001111, b, c);
2969        let e = _mm_set1_epi32(1);
2970        assert_eq_m128i(r, e);
2971    }
2972
2973    #[simd_test(enable = "avx512vbmi2,avx512vl")]
2974    unsafe fn test_mm_maskz_shrdv_epi32() {
2975        let a = _mm_set1_epi32(2);
2976        let b = _mm_set1_epi32(8);
2977        let c = _mm_set1_epi32(1);
2978        let r = _mm_maskz_shrdv_epi32(0, a, b, c);
2979        assert_eq_m128i(r, _mm_setzero_si128());
2980        let r = _mm_maskz_shrdv_epi32(0b00001111, a, b, c);
2981        let e = _mm_set1_epi32(1);
2982        assert_eq_m128i(r, e);
2983    }
2984
2985    #[simd_test(enable = "avx512vbmi2")]
2986    unsafe fn test_mm512_shrdv_epi16() {
2987        let a = _mm512_set1_epi16(2);
2988        let b = _mm512_set1_epi16(8);
2989        let c = _mm512_set1_epi16(1);
2990        let r = _mm512_shrdv_epi16(a, b, c);
2991        let e = _mm512_set1_epi16(1);
2992        assert_eq_m512i(r, e);
2993    }
2994
2995    #[simd_test(enable = "avx512vbmi2")]
2996    unsafe fn test_mm512_mask_shrdv_epi16() {
2997        let a = _mm512_set1_epi16(2);
2998        let b = _mm512_set1_epi16(8);
2999        let c = _mm512_set1_epi16(1);
3000        let r = _mm512_mask_shrdv_epi16(a, 0, b, c);
3001        assert_eq_m512i(r, a);
3002        let r = _mm512_mask_shrdv_epi16(a, 0b11111111_11111111_11111111_11111111, b, c);
3003        let e = _mm512_set1_epi16(1);
3004        assert_eq_m512i(r, e);
3005    }
3006
3007    #[simd_test(enable = "avx512vbmi2")]
3008    unsafe fn test_mm512_maskz_shrdv_epi16() {
3009        let a = _mm512_set1_epi16(2);
3010        let b = _mm512_set1_epi16(8);
3011        let c = _mm512_set1_epi16(1);
3012        let r = _mm512_maskz_shrdv_epi16(0, a, b, c);
3013        assert_eq_m512i(r, _mm512_setzero_si512());
3014        let r = _mm512_maskz_shrdv_epi16(0b11111111_11111111_11111111_11111111, a, b, c);
3015        let e = _mm512_set1_epi16(1);
3016        assert_eq_m512i(r, e);
3017    }
3018
3019    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3020    unsafe fn test_mm256_shrdv_epi16() {
3021        let a = _mm256_set1_epi16(2);
3022        let b = _mm256_set1_epi16(8);
3023        let c = _mm256_set1_epi16(1);
3024        let r = _mm256_shrdv_epi16(a, b, c);
3025        let e = _mm256_set1_epi16(1);
3026        assert_eq_m256i(r, e);
3027    }
3028
3029    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3030    unsafe fn test_mm256_mask_shrdv_epi16() {
3031        let a = _mm256_set1_epi16(2);
3032        let b = _mm256_set1_epi16(8);
3033        let c = _mm256_set1_epi16(1);
3034        let r = _mm256_mask_shrdv_epi16(a, 0, b, c);
3035        assert_eq_m256i(r, a);
3036        let r = _mm256_mask_shrdv_epi16(a, 0b11111111_11111111, b, c);
3037        let e = _mm256_set1_epi16(1);
3038        assert_eq_m256i(r, e);
3039    }
3040
3041    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3042    unsafe fn test_mm256_maskz_shrdv_epi16() {
3043        let a = _mm256_set1_epi16(2);
3044        let b = _mm256_set1_epi16(8);
3045        let c = _mm256_set1_epi16(1);
3046        let r = _mm256_maskz_shrdv_epi16(0, a, b, c);
3047        assert_eq_m256i(r, _mm256_setzero_si256());
3048        let r = _mm256_maskz_shrdv_epi16(0b11111111_11111111, a, b, c);
3049        let e = _mm256_set1_epi16(1);
3050        assert_eq_m256i(r, e);
3051    }
3052
3053    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3054    unsafe fn test_mm_shrdv_epi16() {
3055        let a = _mm_set1_epi16(2);
3056        let b = _mm_set1_epi16(8);
3057        let c = _mm_set1_epi16(1);
3058        let r = _mm_shrdv_epi16(a, b, c);
3059        let e = _mm_set1_epi16(1);
3060        assert_eq_m128i(r, e);
3061    }
3062
3063    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3064    unsafe fn test_mm_mask_shrdv_epi16() {
3065        let a = _mm_set1_epi16(2);
3066        let b = _mm_set1_epi16(8);
3067        let c = _mm_set1_epi16(1);
3068        let r = _mm_mask_shrdv_epi16(a, 0, b, c);
3069        assert_eq_m128i(r, a);
3070        let r = _mm_mask_shrdv_epi16(a, 0b11111111, b, c);
3071        let e = _mm_set1_epi16(1);
3072        assert_eq_m128i(r, e);
3073    }
3074
3075    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3076    unsafe fn test_mm_maskz_shrdv_epi16() {
3077        let a = _mm_set1_epi16(2);
3078        let b = _mm_set1_epi16(8);
3079        let c = _mm_set1_epi16(1);
3080        let r = _mm_maskz_shrdv_epi16(0, a, b, c);
3081        assert_eq_m128i(r, _mm_setzero_si128());
3082        let r = _mm_maskz_shrdv_epi16(0b11111111, a, b, c);
3083        let e = _mm_set1_epi16(1);
3084        assert_eq_m128i(r, e);
3085    }
3086
3087    #[simd_test(enable = "avx512vbmi2")]
3088    unsafe fn test_mm512_shldi_epi64() {
3089        let a = _mm512_set1_epi64(1);
3090        let b = _mm512_set1_epi64(1 << 63);
3091        let r = _mm512_shldi_epi64::<2>(a, b);
3092        let e = _mm512_set1_epi64(6);
3093        assert_eq_m512i(r, e);
3094    }
3095
3096    #[simd_test(enable = "avx512vbmi2")]
3097    unsafe fn test_mm512_mask_shldi_epi64() {
3098        let a = _mm512_set1_epi64(1);
3099        let b = _mm512_set1_epi64(1 << 63);
3100        let r = _mm512_mask_shldi_epi64::<2>(a, 0, a, b);
3101        assert_eq_m512i(r, a);
3102        let r = _mm512_mask_shldi_epi64::<2>(a, 0b11111111, a, b);
3103        let e = _mm512_set1_epi64(6);
3104        assert_eq_m512i(r, e);
3105    }
3106
3107    #[simd_test(enable = "avx512vbmi2")]
3108    unsafe fn test_mm512_maskz_shldi_epi64() {
3109        let a = _mm512_set1_epi64(1);
3110        let b = _mm512_set1_epi64(1 << 63);
3111        let r = _mm512_maskz_shldi_epi64::<2>(0, a, b);
3112        assert_eq_m512i(r, _mm512_setzero_si512());
3113        let r = _mm512_maskz_shldi_epi64::<2>(0b11111111, a, b);
3114        let e = _mm512_set1_epi64(6);
3115        assert_eq_m512i(r, e);
3116    }
3117
3118    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3119    unsafe fn test_mm256_shldi_epi64() {
3120        let a = _mm256_set1_epi64x(1);
3121        let b = _mm256_set1_epi64x(1 << 63);
3122        let r = _mm256_shldi_epi64::<2>(a, b);
3123        let e = _mm256_set1_epi64x(6);
3124        assert_eq_m256i(r, e);
3125    }
3126
3127    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3128    unsafe fn test_mm256_mask_shldi_epi64() {
3129        let a = _mm256_set1_epi64x(1);
3130        let b = _mm256_set1_epi64x(1 << 63);
3131        let r = _mm256_mask_shldi_epi64::<2>(a, 0, a, b);
3132        assert_eq_m256i(r, a);
3133        let r = _mm256_mask_shldi_epi64::<2>(a, 0b00001111, a, b);
3134        let e = _mm256_set1_epi64x(6);
3135        assert_eq_m256i(r, e);
3136    }
3137
3138    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3139    unsafe fn test_mm256_maskz_shldi_epi64() {
3140        let a = _mm256_set1_epi64x(1);
3141        let b = _mm256_set1_epi64x(1 << 63);
3142        let r = _mm256_maskz_shldi_epi64::<2>(0, a, b);
3143        assert_eq_m256i(r, _mm256_setzero_si256());
3144        let r = _mm256_maskz_shldi_epi64::<2>(0b00001111, a, b);
3145        let e = _mm256_set1_epi64x(6);
3146        assert_eq_m256i(r, e);
3147    }
3148
3149    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3150    unsafe fn test_mm_shldi_epi64() {
3151        let a = _mm_set1_epi64x(1);
3152        let b = _mm_set1_epi64x(1 << 63);
3153        let r = _mm_shldi_epi64::<2>(a, b);
3154        let e = _mm_set1_epi64x(6);
3155        assert_eq_m128i(r, e);
3156    }
3157
3158    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3159    unsafe fn test_mm_mask_shldi_epi64() {
3160        let a = _mm_set1_epi64x(1);
3161        let b = _mm_set1_epi64x(1 << 63);
3162        let r = _mm_mask_shldi_epi64::<2>(a, 0, a, b);
3163        assert_eq_m128i(r, a);
3164        let r = _mm_mask_shldi_epi64::<2>(a, 0b00000011, a, b);
3165        let e = _mm_set1_epi64x(6);
3166        assert_eq_m128i(r, e);
3167    }
3168
3169    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3170    unsafe fn test_mm_maskz_shldi_epi64() {
3171        let a = _mm_set1_epi64x(1);
3172        let b = _mm_set1_epi64x(1 << 63);
3173        let r = _mm_maskz_shldi_epi64::<2>(0, a, b);
3174        assert_eq_m128i(r, _mm_setzero_si128());
3175        let r = _mm_maskz_shldi_epi64::<2>(0b00000011, a, b);
3176        let e = _mm_set1_epi64x(6);
3177        assert_eq_m128i(r, e);
3178    }
3179
3180    #[simd_test(enable = "avx512vbmi2")]
3181    unsafe fn test_mm512_shldi_epi32() {
3182        let a = _mm512_set1_epi32(1);
3183        let b = _mm512_set1_epi32(1 << 31);
3184        let r = _mm512_shldi_epi32::<2>(a, b);
3185        let e = _mm512_set1_epi32(6);
3186        assert_eq_m512i(r, e);
3187    }
3188
3189    #[simd_test(enable = "avx512vbmi2")]
3190    unsafe fn test_mm512_mask_shldi_epi32() {
3191        let a = _mm512_set1_epi32(1);
3192        let b = _mm512_set1_epi32(1 << 31);
3193        let r = _mm512_mask_shldi_epi32::<2>(a, 0, a, b);
3194        assert_eq_m512i(r, a);
3195        let r = _mm512_mask_shldi_epi32::<2>(a, 0b11111111_11111111, a, b);
3196        let e = _mm512_set1_epi32(6);
3197        assert_eq_m512i(r, e);
3198    }
3199
3200    #[simd_test(enable = "avx512vbmi2")]
3201    unsafe fn test_mm512_maskz_shldi_epi32() {
3202        let a = _mm512_set1_epi32(1);
3203        let b = _mm512_set1_epi32(1 << 31);
3204        let r = _mm512_maskz_shldi_epi32::<2>(0, a, b);
3205        assert_eq_m512i(r, _mm512_setzero_si512());
3206        let r = _mm512_maskz_shldi_epi32::<2>(0b11111111_11111111, a, b);
3207        let e = _mm512_set1_epi32(6);
3208        assert_eq_m512i(r, e);
3209    }
3210
3211    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3212    unsafe fn test_mm256_shldi_epi32() {
3213        let a = _mm256_set1_epi32(1);
3214        let b = _mm256_set1_epi32(1 << 31);
3215        let r = _mm256_shldi_epi32::<2>(a, b);
3216        let e = _mm256_set1_epi32(6);
3217        assert_eq_m256i(r, e);
3218    }
3219
3220    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3221    unsafe fn test_mm256_mask_shldi_epi32() {
3222        let a = _mm256_set1_epi32(1);
3223        let b = _mm256_set1_epi32(1 << 31);
3224        let r = _mm256_mask_shldi_epi32::<2>(a, 0, a, b);
3225        assert_eq_m256i(r, a);
3226        let r = _mm256_mask_shldi_epi32::<2>(a, 0b11111111, a, b);
3227        let e = _mm256_set1_epi32(6);
3228        assert_eq_m256i(r, e);
3229    }
3230
3231    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3232    unsafe fn test_mm256_maskz_shldi_epi32() {
3233        let a = _mm256_set1_epi32(1);
3234        let b = _mm256_set1_epi32(1 << 31);
3235        let r = _mm256_maskz_shldi_epi32::<2>(0, a, b);
3236        assert_eq_m256i(r, _mm256_setzero_si256());
3237        let r = _mm256_maskz_shldi_epi32::<2>(0b11111111, a, b);
3238        let e = _mm256_set1_epi32(6);
3239        assert_eq_m256i(r, e);
3240    }
3241
3242    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3243    unsafe fn test_mm_shldi_epi32() {
3244        let a = _mm_set1_epi32(1);
3245        let b = _mm_set1_epi32(1 << 31);
3246        let r = _mm_shldi_epi32::<2>(a, b);
3247        let e = _mm_set1_epi32(6);
3248        assert_eq_m128i(r, e);
3249    }
3250
3251    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3252    unsafe fn test_mm_mask_shldi_epi32() {
3253        let a = _mm_set1_epi32(1);
3254        let b = _mm_set1_epi32(1 << 31);
3255        let r = _mm_mask_shldi_epi32::<2>(a, 0, a, b);
3256        assert_eq_m128i(r, a);
3257        let r = _mm_mask_shldi_epi32::<2>(a, 0b00001111, a, b);
3258        let e = _mm_set1_epi32(6);
3259        assert_eq_m128i(r, e);
3260    }
3261
3262    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3263    unsafe fn test_mm_maskz_shldi_epi32() {
3264        let a = _mm_set1_epi32(1);
3265        let b = _mm_set1_epi32(1 << 31);
3266        let r = _mm_maskz_shldi_epi32::<2>(0, a, b);
3267        assert_eq_m128i(r, _mm_setzero_si128());
3268        let r = _mm_maskz_shldi_epi32::<2>(0b00001111, a, b);
3269        let e = _mm_set1_epi32(6);
3270        assert_eq_m128i(r, e);
3271    }
3272
3273    #[simd_test(enable = "avx512vbmi2")]
3274    unsafe fn test_mm512_shldi_epi16() {
3275        let a = _mm512_set1_epi16(1);
3276        let b = _mm512_set1_epi16(1 << 15);
3277        let r = _mm512_shldi_epi16::<2>(a, b);
3278        let e = _mm512_set1_epi16(6);
3279        assert_eq_m512i(r, e);
3280    }
3281
3282    #[simd_test(enable = "avx512vbmi2")]
3283    unsafe fn test_mm512_mask_shldi_epi16() {
3284        let a = _mm512_set1_epi16(1);
3285        let b = _mm512_set1_epi16(1 << 15);
3286        let r = _mm512_mask_shldi_epi16::<2>(a, 0, a, b);
3287        assert_eq_m512i(r, a);
3288        let r = _mm512_mask_shldi_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a, b);
3289        let e = _mm512_set1_epi16(6);
3290        assert_eq_m512i(r, e);
3291    }
3292
3293    #[simd_test(enable = "avx512vbmi2")]
3294    unsafe fn test_mm512_maskz_shldi_epi16() {
3295        let a = _mm512_set1_epi16(1);
3296        let b = _mm512_set1_epi16(1 << 15);
3297        let r = _mm512_maskz_shldi_epi16::<2>(0, a, b);
3298        assert_eq_m512i(r, _mm512_setzero_si512());
3299        let r = _mm512_maskz_shldi_epi16::<2>(0b11111111_11111111_11111111_11111111, a, b);
3300        let e = _mm512_set1_epi16(6);
3301        assert_eq_m512i(r, e);
3302    }
3303
3304    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3305    unsafe fn test_mm256_shldi_epi16() {
3306        let a = _mm256_set1_epi16(1);
3307        let b = _mm256_set1_epi16(1 << 15);
3308        let r = _mm256_shldi_epi16::<2>(a, b);
3309        let e = _mm256_set1_epi16(6);
3310        assert_eq_m256i(r, e);
3311    }
3312
3313    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3314    unsafe fn test_mm256_mask_shldi_epi16() {
3315        let a = _mm256_set1_epi16(1);
3316        let b = _mm256_set1_epi16(1 << 15);
3317        let r = _mm256_mask_shldi_epi16::<2>(a, 0, a, b);
3318        assert_eq_m256i(r, a);
3319        let r = _mm256_mask_shldi_epi16::<2>(a, 0b11111111_11111111, a, b);
3320        let e = _mm256_set1_epi16(6);
3321        assert_eq_m256i(r, e);
3322    }
3323
3324    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3325    unsafe fn test_mm256_maskz_shldi_epi16() {
3326        let a = _mm256_set1_epi16(1);
3327        let b = _mm256_set1_epi16(1 << 15);
3328        let r = _mm256_maskz_shldi_epi16::<2>(0, a, b);
3329        assert_eq_m256i(r, _mm256_setzero_si256());
3330        let r = _mm256_maskz_shldi_epi16::<2>(0b11111111_11111111, a, b);
3331        let e = _mm256_set1_epi16(6);
3332        assert_eq_m256i(r, e);
3333    }
3334
3335    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3336    unsafe fn test_mm_shldi_epi16() {
3337        let a = _mm_set1_epi16(1);
3338        let b = _mm_set1_epi16(1 << 15);
3339        let r = _mm_shldi_epi16::<2>(a, b);
3340        let e = _mm_set1_epi16(6);
3341        assert_eq_m128i(r, e);
3342    }
3343
3344    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3345    unsafe fn test_mm_mask_shldi_epi16() {
3346        let a = _mm_set1_epi16(1);
3347        let b = _mm_set1_epi16(1 << 15);
3348        let r = _mm_mask_shldi_epi16::<2>(a, 0, a, b);
3349        assert_eq_m128i(r, a);
3350        let r = _mm_mask_shldi_epi16::<2>(a, 0b11111111, a, b);
3351        let e = _mm_set1_epi16(6);
3352        assert_eq_m128i(r, e);
3353    }
3354
3355    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3356    unsafe fn test_mm_maskz_shldi_epi16() {
3357        let a = _mm_set1_epi16(1);
3358        let b = _mm_set1_epi16(1 << 15);
3359        let r = _mm_maskz_shldi_epi16::<2>(0, a, b);
3360        assert_eq_m128i(r, _mm_setzero_si128());
3361        let r = _mm_maskz_shldi_epi16::<2>(0b11111111, a, b);
3362        let e = _mm_set1_epi16(6);
3363        assert_eq_m128i(r, e);
3364    }
3365
3366    #[simd_test(enable = "avx512vbmi2")]
3367    unsafe fn test_mm512_shrdi_epi64() {
3368        let a = _mm512_set1_epi64(2);
3369        let b = _mm512_set1_epi64(8);
3370        let r = _mm512_shrdi_epi64::<1>(a, b);
3371        let e = _mm512_set1_epi64(1);
3372        assert_eq_m512i(r, e);
3373    }
3374
3375    #[simd_test(enable = "avx512vbmi2")]
3376    unsafe fn test_mm512_mask_shrdi_epi64() {
3377        let a = _mm512_set1_epi64(2);
3378        let b = _mm512_set1_epi64(8);
3379        let r = _mm512_mask_shrdi_epi64::<1>(a, 0, a, b);
3380        assert_eq_m512i(r, a);
3381        let r = _mm512_mask_shrdi_epi64::<1>(a, 0b11111111, a, b);
3382        let e = _mm512_set1_epi64(1);
3383        assert_eq_m512i(r, e);
3384    }
3385
3386    #[simd_test(enable = "avx512vbmi2")]
3387    unsafe fn test_mm512_maskz_shrdi_epi64() {
3388        let a = _mm512_set1_epi64(2);
3389        let b = _mm512_set1_epi64(8);
3390        let r = _mm512_maskz_shrdi_epi64::<1>(0, a, b);
3391        assert_eq_m512i(r, _mm512_setzero_si512());
3392        let r = _mm512_maskz_shrdi_epi64::<1>(0b11111111, a, b);
3393        let e = _mm512_set1_epi64(1);
3394        assert_eq_m512i(r, e);
3395    }
3396
3397    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3398    unsafe fn test_mm256_shrdi_epi64() {
3399        let a = _mm256_set1_epi64x(2);
3400        let b = _mm256_set1_epi64x(8);
3401        let r = _mm256_shrdi_epi64::<1>(a, b);
3402        let e = _mm256_set1_epi64x(1);
3403        assert_eq_m256i(r, e);
3404    }
3405
3406    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3407    unsafe fn test_mm256_mask_shrdi_epi64() {
3408        let a = _mm256_set1_epi64x(2);
3409        let b = _mm256_set1_epi64x(8);
3410        let r = _mm256_mask_shrdi_epi64::<1>(a, 0, a, b);
3411        assert_eq_m256i(r, a);
3412        let r = _mm256_mask_shrdi_epi64::<1>(a, 0b00001111, a, b);
3413        let e = _mm256_set1_epi64x(1);
3414        assert_eq_m256i(r, e);
3415    }
3416
3417    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3418    unsafe fn test_mm256_maskz_shrdi_epi64() {
3419        let a = _mm256_set1_epi64x(2);
3420        let b = _mm256_set1_epi64x(8);
3421        let r = _mm256_maskz_shrdi_epi64::<1>(0, a, b);
3422        assert_eq_m256i(r, _mm256_setzero_si256());
3423        let r = _mm256_maskz_shrdi_epi64::<1>(0b00001111, a, b);
3424        let e = _mm256_set1_epi64x(1);
3425        assert_eq_m256i(r, e);
3426    }
3427
3428    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3429    unsafe fn test_mm_shrdi_epi64() {
3430        let a = _mm_set1_epi64x(2);
3431        let b = _mm_set1_epi64x(8);
3432        let r = _mm_shrdi_epi64::<1>(a, b);
3433        let e = _mm_set1_epi64x(1);
3434        assert_eq_m128i(r, e);
3435    }
3436
3437    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3438    unsafe fn test_mm_mask_shrdi_epi64() {
3439        let a = _mm_set1_epi64x(2);
3440        let b = _mm_set1_epi64x(8);
3441        let r = _mm_mask_shrdi_epi64::<1>(a, 0, a, b);
3442        assert_eq_m128i(r, a);
3443        let r = _mm_mask_shrdi_epi64::<1>(a, 0b00000011, a, b);
3444        let e = _mm_set1_epi64x(1);
3445        assert_eq_m128i(r, e);
3446    }
3447
3448    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3449    unsafe fn test_mm_maskz_shrdi_epi64() {
3450        let a = _mm_set1_epi64x(2);
3451        let b = _mm_set1_epi64x(8);
3452        let r = _mm_maskz_shrdi_epi64::<1>(0, a, b);
3453        assert_eq_m128i(r, _mm_setzero_si128());
3454        let r = _mm_maskz_shrdi_epi64::<1>(0b00000011, a, b);
3455        let e = _mm_set1_epi64x(1);
3456        assert_eq_m128i(r, e);
3457    }
3458
3459    #[simd_test(enable = "avx512vbmi2")]
3460    unsafe fn test_mm512_shrdi_epi32() {
3461        let a = _mm512_set1_epi32(2);
3462        let b = _mm512_set1_epi32(8);
3463        let r = _mm512_shrdi_epi32::<1>(a, b);
3464        let e = _mm512_set1_epi32(1);
3465        assert_eq_m512i(r, e);
3466    }
3467
3468    #[simd_test(enable = "avx512vbmi2")]
3469    unsafe fn test_mm512_mask_shrdi_epi32() {
3470        let a = _mm512_set1_epi32(2);
3471        let b = _mm512_set1_epi32(8);
3472        let r = _mm512_mask_shrdi_epi32::<1>(a, 0, a, b);
3473        assert_eq_m512i(r, a);
3474        let r = _mm512_mask_shrdi_epi32::<1>(a, 0b11111111_11111111, a, b);
3475        let e = _mm512_set1_epi32(1);
3476        assert_eq_m512i(r, e);
3477    }
3478
3479    #[simd_test(enable = "avx512vbmi2")]
3480    unsafe fn test_mm512_maskz_shrdi_epi32() {
3481        let a = _mm512_set1_epi32(2);
3482        let b = _mm512_set1_epi32(8);
3483        let r = _mm512_maskz_shrdi_epi32::<1>(0, a, b);
3484        assert_eq_m512i(r, _mm512_setzero_si512());
3485        let r = _mm512_maskz_shrdi_epi32::<1>(0b11111111_11111111, a, b);
3486        let e = _mm512_set1_epi32(1);
3487        assert_eq_m512i(r, e);
3488    }
3489
3490    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3491    unsafe fn test_mm256_shrdi_epi32() {
3492        let a = _mm256_set1_epi32(2);
3493        let b = _mm256_set1_epi32(8);
3494        let r = _mm256_shrdi_epi32::<1>(a, b);
3495        let e = _mm256_set1_epi32(1);
3496        assert_eq_m256i(r, e);
3497    }
3498
3499    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3500    unsafe fn test_mm256_mask_shrdi_epi32() {
3501        let a = _mm256_set1_epi32(2);
3502        let b = _mm256_set1_epi32(8);
3503        let r = _mm256_mask_shrdi_epi32::<1>(a, 0, a, b);
3504        assert_eq_m256i(r, a);
3505        let r = _mm256_mask_shrdi_epi32::<1>(a, 0b11111111, a, b);
3506        let e = _mm256_set1_epi32(1);
3507        assert_eq_m256i(r, e);
3508    }
3509
3510    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3511    unsafe fn test_mm256_maskz_shrdi_epi32() {
3512        let a = _mm256_set1_epi32(2);
3513        let b = _mm256_set1_epi32(8);
3514        let r = _mm256_maskz_shrdi_epi32::<1>(0, a, b);
3515        assert_eq_m256i(r, _mm256_setzero_si256());
3516        let r = _mm256_maskz_shrdi_epi32::<1>(0b11111111, a, b);
3517        let e = _mm256_set1_epi32(1);
3518        assert_eq_m256i(r, e);
3519    }
3520
3521    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3522    unsafe fn test_mm_shrdi_epi32() {
3523        let a = _mm_set1_epi32(2);
3524        let b = _mm_set1_epi32(8);
3525        let r = _mm_shrdi_epi32::<1>(a, b);
3526        let e = _mm_set1_epi32(1);
3527        assert_eq_m128i(r, e);
3528    }
3529
3530    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3531    unsafe fn test_mm_mask_shrdi_epi32() {
3532        let a = _mm_set1_epi32(2);
3533        let b = _mm_set1_epi32(8);
3534        let r = _mm_mask_shrdi_epi32::<1>(a, 0, a, b);
3535        assert_eq_m128i(r, a);
3536        let r = _mm_mask_shrdi_epi32::<1>(a, 0b00001111, a, b);
3537        let e = _mm_set1_epi32(1);
3538        assert_eq_m128i(r, e);
3539    }
3540
3541    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3542    unsafe fn test_mm_maskz_shrdi_epi32() {
3543        let a = _mm_set1_epi32(2);
3544        let b = _mm_set1_epi32(8);
3545        let r = _mm_maskz_shrdi_epi32::<1>(0, a, b);
3546        assert_eq_m128i(r, _mm_setzero_si128());
3547        let r = _mm_maskz_shrdi_epi32::<1>(0b00001111, a, b);
3548        let e = _mm_set1_epi32(1);
3549        assert_eq_m128i(r, e);
3550    }
3551
3552    #[simd_test(enable = "avx512vbmi2")]
3553    unsafe fn test_mm512_shrdi_epi16() {
3554        let a = _mm512_set1_epi16(2);
3555        let b = _mm512_set1_epi16(8);
3556        let r = _mm512_shrdi_epi16::<1>(a, b);
3557        let e = _mm512_set1_epi16(1);
3558        assert_eq_m512i(r, e);
3559    }
3560
3561    #[simd_test(enable = "avx512vbmi2")]
3562    unsafe fn test_mm512_mask_shrdi_epi16() {
3563        let a = _mm512_set1_epi16(2);
3564        let b = _mm512_set1_epi16(8);
3565        let r = _mm512_mask_shrdi_epi16::<1>(a, 0, a, b);
3566        assert_eq_m512i(r, a);
3567        let r = _mm512_mask_shrdi_epi16::<1>(a, 0b11111111_11111111_11111111_11111111, a, b);
3568        let e = _mm512_set1_epi16(1);
3569        assert_eq_m512i(r, e);
3570    }
3571
3572    #[simd_test(enable = "avx512vbmi2")]
3573    unsafe fn test_mm512_maskz_shrdi_epi16() {
3574        let a = _mm512_set1_epi16(2);
3575        let b = _mm512_set1_epi16(8);
3576        let r = _mm512_maskz_shrdi_epi16::<1>(0, a, b);
3577        assert_eq_m512i(r, _mm512_setzero_si512());
3578        let r = _mm512_maskz_shrdi_epi16::<1>(0b11111111_11111111_11111111_11111111, a, b);
3579        let e = _mm512_set1_epi16(1);
3580        assert_eq_m512i(r, e);
3581    }
3582
3583    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3584    unsafe fn test_mm256_shrdi_epi16() {
3585        let a = _mm256_set1_epi16(2);
3586        let b = _mm256_set1_epi16(8);
3587        let r = _mm256_shrdi_epi16::<1>(a, b);
3588        let e = _mm256_set1_epi16(1);
3589        assert_eq_m256i(r, e);
3590    }
3591
3592    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3593    unsafe fn test_mm256_mask_shrdi_epi16() {
3594        let a = _mm256_set1_epi16(2);
3595        let b = _mm256_set1_epi16(8);
3596        let r = _mm256_mask_shrdi_epi16::<1>(a, 0, a, b);
3597        assert_eq_m256i(r, a);
3598        let r = _mm256_mask_shrdi_epi16::<1>(a, 0b11111111_11111111, a, b);
3599        let e = _mm256_set1_epi16(1);
3600        assert_eq_m256i(r, e);
3601    }
3602
3603    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3604    unsafe fn test_mm256_maskz_shrdi_epi16() {
3605        let a = _mm256_set1_epi16(2);
3606        let b = _mm256_set1_epi16(8);
3607        let r = _mm256_maskz_shrdi_epi16::<1>(0, a, b);
3608        assert_eq_m256i(r, _mm256_setzero_si256());
3609        let r = _mm256_maskz_shrdi_epi16::<1>(0b11111111_11111111, a, b);
3610        let e = _mm256_set1_epi16(1);
3611        assert_eq_m256i(r, e);
3612    }
3613
3614    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3615    unsafe fn test_mm_shrdi_epi16() {
3616        let a = _mm_set1_epi16(2);
3617        let b = _mm_set1_epi16(8);
3618        let r = _mm_shrdi_epi16::<1>(a, b);
3619        let e = _mm_set1_epi16(1);
3620        assert_eq_m128i(r, e);
3621    }
3622
3623    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3624    unsafe fn test_mm_mask_shrdi_epi16() {
3625        let a = _mm_set1_epi16(2);
3626        let b = _mm_set1_epi16(8);
3627        let r = _mm_mask_shrdi_epi16::<1>(a, 0, a, b);
3628        assert_eq_m128i(r, a);
3629        let r = _mm_mask_shrdi_epi16::<1>(a, 0b11111111, a, b);
3630        let e = _mm_set1_epi16(1);
3631        assert_eq_m128i(r, e);
3632    }
3633
3634    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3635    unsafe fn test_mm_maskz_shrdi_epi16() {
3636        let a = _mm_set1_epi16(2);
3637        let b = _mm_set1_epi16(8);
3638        let r = _mm_maskz_shrdi_epi16::<1>(0, a, b);
3639        assert_eq_m128i(r, _mm_setzero_si128());
3640        let r = _mm_maskz_shrdi_epi16::<1>(0b11111111, a, b);
3641        let e = _mm_set1_epi16(1);
3642        assert_eq_m128i(r, e);
3643    }
3644
3645    #[simd_test(enable = "avx512vbmi2")]
3646    unsafe fn test_mm512_mask_expandloadu_epi16() {
3647        let src = _mm512_set1_epi16(42);
3648        let a = &[
3649            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
3650            24, 25, 26, 27, 28, 29, 30, 31, 32,
3651        ];
3652        let p = a.as_ptr();
3653        let m = 0b11101000_11001010_11110000_00001111;
3654        let r = _mm512_mask_expandloadu_epi16(src, m, black_box(p));
3655        let e = _mm512_set_epi16(
3656            16, 15, 14, 42, 13, 42, 42, 42, 12, 11, 42, 42, 10, 42, 9, 42, 8, 7, 6, 5, 42, 42, 42,
3657            42, 42, 42, 42, 42, 4, 3, 2, 1,
3658        );
3659        assert_eq_m512i(r, e);
3660    }
3661
3662    #[simd_test(enable = "avx512vbmi2")]
3663    unsafe fn test_mm512_maskz_expandloadu_epi16() {
3664        let a = &[
3665            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
3666            24, 25, 26, 27, 28, 29, 30, 31, 32,
3667        ];
3668        let p = a.as_ptr();
3669        let m = 0b11101000_11001010_11110000_00001111;
3670        let r = _mm512_maskz_expandloadu_epi16(m, black_box(p));
3671        let e = _mm512_set_epi16(
3672            16, 15, 14, 0, 13, 0, 0, 0, 12, 11, 0, 0, 10, 0, 9, 0, 8, 7, 6, 5, 0, 0, 0, 0, 0, 0, 0,
3673            0, 4, 3, 2, 1,
3674        );
3675        assert_eq_m512i(r, e);
3676    }
3677
3678    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3679    unsafe fn test_mm256_mask_expandloadu_epi16() {
3680        let src = _mm256_set1_epi16(42);
3681        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
3682        let p = a.as_ptr();
3683        let m = 0b11101000_11001010;
3684        let r = _mm256_mask_expandloadu_epi16(src, m, black_box(p));
3685        let e = _mm256_set_epi16(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42);
3686        assert_eq_m256i(r, e);
3687    }
3688
3689    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3690    unsafe fn test_mm256_maskz_expandloadu_epi16() {
3691        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
3692        let p = a.as_ptr();
3693        let m = 0b11101000_11001010;
3694        let r = _mm256_maskz_expandloadu_epi16(m, black_box(p));
3695        let e = _mm256_set_epi16(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0);
3696        assert_eq_m256i(r, e);
3697    }
3698
3699    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3700    unsafe fn test_mm_mask_expandloadu_epi16() {
3701        let src = _mm_set1_epi16(42);
3702        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
3703        let p = a.as_ptr();
3704        let m = 0b11101000;
3705        let r = _mm_mask_expandloadu_epi16(src, m, black_box(p));
3706        let e = _mm_set_epi16(4, 3, 2, 42, 1, 42, 42, 42);
3707        assert_eq_m128i(r, e);
3708    }
3709
3710    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3711    unsafe fn test_mm_maskz_expandloadu_epi16() {
3712        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
3713        let p = a.as_ptr();
3714        let m = 0b11101000;
3715        let r = _mm_maskz_expandloadu_epi16(m, black_box(p));
3716        let e = _mm_set_epi16(4, 3, 2, 0, 1, 0, 0, 0);
3717        assert_eq_m128i(r, e);
3718    }
3719
3720    #[simd_test(enable = "avx512vbmi2")]
3721    unsafe fn test_mm512_mask_expandloadu_epi8() {
3722        let src = _mm512_set1_epi8(42);
3723        let a = &[
3724            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
3725            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
3726            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
3727        ];
3728        let p = a.as_ptr();
3729        let m = 0b11101000_11001010_11110000_00001111_11111111_00000000_10101010_01010101;
3730        let r = _mm512_mask_expandloadu_epi8(src, m, black_box(p));
3731        let e = _mm512_set_epi8(
3732            32, 31, 30, 42, 29, 42, 42, 42, 28, 27, 42, 42, 26, 42, 25, 42, 24, 23, 22, 21, 42, 42,
3733            42, 42, 42, 42, 42, 42, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 42, 42, 42, 42,
3734            42, 42, 42, 42, 8, 42, 7, 42, 6, 42, 5, 42, 42, 4, 42, 3, 42, 2, 42, 1,
3735        );
3736        assert_eq_m512i(r, e);
3737    }
3738
3739    #[simd_test(enable = "avx512vbmi2")]
3740    unsafe fn test_mm512_maskz_expandloadu_epi8() {
3741        let a = &[
3742            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
3743            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
3744            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
3745        ];
3746        let p = a.as_ptr();
3747        let m = 0b11101000_11001010_11110000_00001111_11111111_00000000_10101010_01010101;
3748        let r = _mm512_maskz_expandloadu_epi8(m, black_box(p));
3749        let e = _mm512_set_epi8(
3750            32, 31, 30, 0, 29, 0, 0, 0, 28, 27, 0, 0, 26, 0, 25, 0, 24, 23, 22, 21, 0, 0, 0, 0, 0,
3751            0, 0, 0, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0,
3752            7, 0, 6, 0, 5, 0, 0, 4, 0, 3, 0, 2, 0, 1,
3753        );
3754        assert_eq_m512i(r, e);
3755    }
3756
3757    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3758    unsafe fn test_mm256_mask_expandloadu_epi8() {
3759        let src = _mm256_set1_epi8(42);
3760        let a = &[
3761            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
3762            24, 25, 26, 27, 28, 29, 30, 31, 32,
3763        ];
3764        let p = a.as_ptr();
3765        let m = 0b11101000_11001010_11110000_00001111;
3766        let r = _mm256_mask_expandloadu_epi8(src, m, black_box(p));
3767        let e = _mm256_set_epi8(
3768            16, 15, 14, 42, 13, 42, 42, 42, 12, 11, 42, 42, 10, 42, 9, 42, 8, 7, 6, 5, 42, 42, 42,
3769            42, 42, 42, 42, 42, 4, 3, 2, 1,
3770        );
3771        assert_eq_m256i(r, e);
3772    }
3773
3774    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3775    unsafe fn test_mm256_maskz_expandloadu_epi8() {
3776        let a = &[
3777            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
3778            24, 25, 26, 27, 28, 29, 30, 31, 32,
3779        ];
3780        let p = a.as_ptr();
3781        let m = 0b11101000_11001010_11110000_00001111;
3782        let r = _mm256_maskz_expandloadu_epi8(m, black_box(p));
3783        let e = _mm256_set_epi8(
3784            16, 15, 14, 0, 13, 0, 0, 0, 12, 11, 0, 0, 10, 0, 9, 0, 8, 7, 6, 5, 0, 0, 0, 0, 0, 0, 0,
3785            0, 4, 3, 2, 1,
3786        );
3787        assert_eq_m256i(r, e);
3788    }
3789
3790    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3791    unsafe fn test_mm_mask_expandloadu_epi8() {
3792        let src = _mm_set1_epi8(42);
3793        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
3794        let p = a.as_ptr();
3795        let m = 0b11101000_11001010;
3796        let r = _mm_mask_expandloadu_epi8(src, m, black_box(p));
3797        let e = _mm_set_epi8(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42);
3798        assert_eq_m128i(r, e);
3799    }
3800
3801    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3802    unsafe fn test_mm_maskz_expandloadu_epi8() {
3803        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
3804        let p = a.as_ptr();
3805        let m = 0b11101000_11001010;
3806        let r = _mm_maskz_expandloadu_epi8(m, black_box(p));
3807        let e = _mm_set_epi8(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0);
3808        assert_eq_m128i(r, e);
3809    }
3810
3811    #[simd_test(enable = "avx512vbmi2")]
3812    unsafe fn test_mm512_mask_compressstoreu_epi16() {
3813        let a = _mm512_set_epi16(
3814            32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11,
3815            10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
3816        );
3817        let mut r = [0_i16; 32];
3818        _mm512_mask_compressstoreu_epi16(r.as_mut_ptr(), 0, a);
3819        assert_eq!(&r, &[0_i16; 32]);
3820        _mm512_mask_compressstoreu_epi16(r.as_mut_ptr(), 0b11110000_11001010_11111111_00000000, a);
3821        assert_eq!(
3822            &r,
3823            &[
3824                9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 23, 24, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0,
3825                0, 0, 0, 0, 0, 0, 0, 0, 0
3826            ]
3827        );
3828    }
3829
3830    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3831    unsafe fn test_mm256_mask_compressstoreu_epi16() {
3832        let a = _mm256_set_epi16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
3833        let mut r = [0_i16; 16];
3834        _mm256_mask_compressstoreu_epi16(r.as_mut_ptr(), 0, a);
3835        assert_eq!(&r, &[0_i16; 16]);
3836        _mm256_mask_compressstoreu_epi16(r.as_mut_ptr(), 0b11110000_11001010, a);
3837        assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]);
3838    }
3839
3840    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3841    unsafe fn test_mm_mask_compressstoreu_epi16() {
3842        let a = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1);
3843        let mut r = [0_i16; 8];
3844        _mm_mask_compressstoreu_epi16(r.as_mut_ptr(), 0, a);
3845        assert_eq!(&r, &[0_i16; 8]);
3846        _mm_mask_compressstoreu_epi16(r.as_mut_ptr(), 0b11110000, a);
3847        assert_eq!(&r, &[5, 6, 7, 8, 0, 0, 0, 0]);
3848    }
3849
3850    #[simd_test(enable = "avx512vbmi2")]
3851    unsafe fn test_mm512_mask_compressstoreu_epi8() {
3852        let a = _mm512_set_epi8(
3853            64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43,
3854            42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21,
3855            20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
3856        );
3857        let mut r = [0_i8; 64];
3858        _mm512_mask_compressstoreu_epi8(r.as_mut_ptr(), 0, a);
3859        assert_eq!(&r, &[0_i8; 64]);
3860        _mm512_mask_compressstoreu_epi8(
3861            r.as_mut_ptr(),
3862            0b11110000_11001010_11111111_00000000_10101010_01010101_11110000_00001111,
3863            a,
3864        );
3865        assert_eq!(
3866            &r,
3867            &[
3868                1, 2, 3, 4, 13, 14, 15, 16, 17, 19, 21, 23, 26, 28, 30, 32, 41, 42, 43, 44, 45, 46,
3869                47, 48, 50, 52, 55, 56, 61, 62, 63, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
3870                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3871            ]
3872        );
3873    }
3874
3875    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3876    unsafe fn test_mm256_mask_compressstoreu_epi8() {
3877        let a = _mm256_set_epi8(
3878            32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11,
3879            10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
3880        );
3881        let mut r = [0_i8; 32];
3882        _mm256_mask_compressstoreu_epi8(r.as_mut_ptr(), 0, a);
3883        assert_eq!(&r, &[0_i8; 32]);
3884        _mm256_mask_compressstoreu_epi8(r.as_mut_ptr(), 0b11110000_11001010_11111111_00000000, a);
3885        assert_eq!(
3886            &r,
3887            &[
3888                9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 23, 24, 29, 30, 31, 32, 0, 0, 0, 0, 0, 0, 0,
3889                0, 0, 0, 0, 0, 0, 0, 0, 0
3890            ]
3891        );
3892    }
3893
3894    #[simd_test(enable = "avx512vbmi2,avx512vl")]
3895    unsafe fn test_mm_mask_compressstoreu_epi8() {
3896        let a = _mm_set_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
3897        let mut r = [0_i8; 16];
3898        _mm_mask_compressstoreu_epi8(r.as_mut_ptr(), 0, a);
3899        assert_eq!(&r, &[0_i8; 16]);
3900        _mm_mask_compressstoreu_epi8(r.as_mut_ptr(), 0b11110000_11001010, a);
3901        assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]);
3902    }
3903}