1 | // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -target-feature +avx -target-feature +avx2 -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK |
2 | // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -target-feature +avx -target-feature +avx2 -target-feature +avx512f -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=CHECK |
3 | |
4 | // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -target-feature +avx -target-feature +avx2 -target-feature +avx512f -emit-llvm -o - -Wall -Werror -fmax-type-align=16 | FileCheck %s --check-prefix=CHECK |
5 | // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -target-feature +avx -target-feature +avx2 -target-feature +avx512f -fno-signed-char -emit-llvm -o - -Wall -Werror -fmax-type-align=16 | FileCheck %s --check-prefix=CHECK |
6 | |
7 | #include <immintrin.h> |
8 | |
9 | // (PR33830) Tests ensure the correct alignment of non-temporal load/stores on darwin targets where fmax-type-align is set to 16. |
10 | |
11 | // |
12 | // 128-bit vectors |
13 | // |
14 | |
15 | void test_mm_stream_pd(double* A, __m128d B) { |
16 | // CHECK-LABEL: test_mm_stream_pd |
17 | // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16, !nontemporal |
18 | _mm_stream_pd(A, B); |
19 | } |
20 | |
21 | void test_mm_stream_ps(float* A, __m128 B) { |
22 | // CHECK16-LABEL: test_mm_stream_ps |
23 | // CHECK16: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 16, !nontemporal |
24 | _mm_stream_ps(A, B); |
25 | } |
26 | |
27 | void test_mm_stream_si128(__m128i* A, __m128i B) { |
28 | // CHECK-LABEL: test_mm_stream_si128 |
29 | // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16, !nontemporal |
30 | _mm_stream_si128(A, B); |
31 | } |
32 | |
33 | __m128i test_mm_stream_load_si128(__m128i const *A) { |
34 | // CHECK-LABEL: test_mm_stream_load_si128 |
35 | // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16, !nontemporal |
36 | return _mm_stream_load_si128(A); |
37 | } |
38 | |
39 | // |
40 | // 256-bit vectors |
41 | // |
42 | |
43 | void test_mm256_stream_pd(double* A, __m256d B) { |
44 | // CHECK-LABEL: test_mm256_stream_pd |
45 | // CHECK: store <4 x double> %{{.*}}, <4 x double>* %{{.*}}, align 32, !nontemporal |
46 | _mm256_stream_pd(A, B); |
47 | } |
48 | |
49 | void test_mm256_stream_ps(float* A, __m256 B) { |
50 | // CHECK-LABEL: test_mm256_stream_ps |
51 | // CHECK: store <8 x float> %{{.*}}, <8 x float>* %{{.*}}, align 32, !nontemporal |
52 | _mm256_stream_ps(A, B); |
53 | } |
54 | |
55 | void test_mm256_stream_si256(__m256i* A, __m256i B) { |
56 | // CHECK-LABEL: test_mm256_stream_si256 |
57 | // CHECK: store <4 x i64> %{{.*}}, <4 x i64>* %{{.*}}, align 32, !nontemporal |
58 | _mm256_stream_si256(A, B); |
59 | } |
60 | |
61 | __m256i test_mm256_stream_load_si256(__m256i const *A) { |
62 | // CHECK-LABEL: test_mm256_stream_load_si256 |
63 | // CHECK: load <4 x i64>, <4 x i64>* %{{.*}}, align 32, !nontemporal |
64 | return _mm256_stream_load_si256(A); |
65 | } |
66 | |
67 | // |
68 | // 512-bit vectors |
69 | // |
70 | |
71 | void test_mm512_stream_pd(double* A, __m512d B) { |
72 | // CHECK-LABEL: test_mm512_stream_pd |
73 | // CHECK: store <8 x double> %{{.*}}, <8 x double>* %{{.*}}, align 64, !nontemporal |
74 | _mm512_stream_pd(A, B); |
75 | } |
76 | |
77 | void test_mm512_stream_ps(float* A, __m512 B) { |
78 | // CHECK-LABEL: test_mm512_stream_ps |
79 | // CHECK: store <16 x float> %{{.*}}, <16 x float>* %{{.*}}, align 64, !nontemporal |
80 | _mm512_stream_ps(A, B); |
81 | } |
82 | |
83 | void test_mm512_stream_si512(__m512i* A, __m512i B) { |
84 | // CHECK-LABEL: test_mm512_stream_si512 |
85 | // CHECK: store <8 x i64> %{{.*}}, <8 x i64>* %{{.*}}, align 64, !nontemporal |
86 | _mm512_stream_si512(A, B); |
87 | } |
88 | |
89 | __m512i test_mm512_stream_load_si512(void *A) { |
90 | // CHECK-LABEL: test_mm512_stream_load_si512 |
91 | // CHECK: load <8 x i64>, <8 x i64>* %{{.*}}, align 64, !nontemporal |
92 | return _mm512_stream_load_si512(A); |
93 | } |
94 | |