1 | // RUN: %clang_cc1 -triple armv8.1a-linux-gnu -target-abi apcs-gnu -target-feature +neon \ |
2 | // RUN: -S -emit-llvm -o - %s \ |
3 | // RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM |
4 | |
5 | // RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \ |
6 | // RUN: -target-feature +v8.1a -S -emit-llvm -o - %s \ |
7 | // RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 |
8 | |
9 | // REQUIRES: arm-registered-target,aarch64-registered-target |
10 | |
11 | #include <arm_neon.h> |
12 | |
13 | // CHECK-LABEL: test_vqrdmlah_s16 |
14 | int16x4_t test_vqrdmlah_s16(int16x4_t a, int16x4_t b, int16x4_t c) { |
15 | // CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) |
16 | // CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) |
17 | |
18 | // CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) |
19 | // CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) |
20 | return vqrdmlah_s16(a, b, c); |
21 | } |
22 | |
23 | // CHECK-LABEL: test_vqrdmlah_s32 |
24 | int32x2_t test_vqrdmlah_s32(int32x2_t a, int32x2_t b, int32x2_t c) { |
25 | // CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) |
26 | // CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) |
27 | |
28 | // CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) |
29 | // CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) |
30 | return vqrdmlah_s32(a, b, c); |
31 | } |
32 | |
33 | // CHECK-LABEL: test_vqrdmlahq_s16 |
34 | int16x8_t test_vqrdmlahq_s16(int16x8_t a, int16x8_t b, int16x8_t c) { |
35 | // CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) |
36 | // CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) |
37 | |
38 | // CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) |
39 | // CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) |
40 | return vqrdmlahq_s16(a, b, c); |
41 | } |
42 | |
43 | // CHECK-LABEL: test_vqrdmlahq_s32 |
44 | int32x4_t test_vqrdmlahq_s32(int32x4_t a, int32x4_t b, int32x4_t c) { |
45 | // CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) |
46 | // CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) |
47 | |
48 | // CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) |
49 | // CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) |
50 | return vqrdmlahq_s32(a, b, c); |
51 | } |
52 | |
53 | // CHECK-LABEL: test_vqrdmlah_lane_s16 |
54 | int16x4_t test_vqrdmlah_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) { |
55 | // CHECK-ARM: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
56 | // CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) |
57 | // CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) |
58 | |
59 | // CHECK-AARCH64: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
60 | // CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) |
61 | // CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) |
62 | return vqrdmlah_lane_s16(a, b, c, 3); |
63 | } |
64 | |
65 | // CHECK-LABEL: test_vqrdmlah_lane_s32 |
66 | int32x2_t test_vqrdmlah_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) { |
67 | // CHECK-ARM: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> <i32 1, i32 1> |
68 | // CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) |
69 | // CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) |
70 | |
71 | // CHECK-AARCH64: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> <i32 1, i32 1> |
72 | // CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) |
73 | // CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) |
74 | return vqrdmlah_lane_s32(a, b, c, 1); |
75 | } |
76 | |
77 | // CHECK-LABEL: test_vqrdmlahq_lane_s16 |
78 | int16x8_t test_vqrdmlahq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) { |
79 | // CHECK-ARM: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> |
80 | // CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) |
81 | // CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) |
82 | |
83 | // CHECK-AARCH64: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> |
84 | // CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) |
85 | // CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) |
86 | return vqrdmlahq_lane_s16(a, b, c, 3); |
87 | } |
88 | |
89 | // CHECK-LABEL: test_vqrdmlahq_lane_s32 |
90 | int32x4_t test_vqrdmlahq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) { |
91 | // CHECK-ARM: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <4 x i32> <i32 1, i32 1, i32 1, i32 1> |
92 | // CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) |
93 | // CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) |
94 | |
95 | // CHECK-AARCH64: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <4 x i32> <i32 1, i32 1, i32 1, i32 1> |
96 | // CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) |
97 | // CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) |
98 | return vqrdmlahq_lane_s32(a, b, c, 1); |
99 | } |
100 | |
101 | // CHECK-LABEL: test_vqrdmlsh_s16 |
102 | int16x4_t test_vqrdmlsh_s16(int16x4_t a, int16x4_t b, int16x4_t c) { |
103 | // CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) |
104 | // CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) |
105 | |
106 | // CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) |
107 | // CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) |
108 | return vqrdmlsh_s16(a, b, c); |
109 | } |
110 | |
111 | // CHECK-LABEL: test_vqrdmlsh_s32 |
112 | int32x2_t test_vqrdmlsh_s32(int32x2_t a, int32x2_t b, int32x2_t c) { |
113 | // CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) |
114 | // CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) |
115 | |
116 | // CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) |
117 | // CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) |
118 | return vqrdmlsh_s32(a, b, c); |
119 | } |
120 | |
121 | // CHECK-LABEL: test_vqrdmlshq_s16 |
122 | int16x8_t test_vqrdmlshq_s16(int16x8_t a, int16x8_t b, int16x8_t c) { |
123 | // CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) |
124 | // CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) |
125 | |
126 | // CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) |
127 | // CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) |
128 | return vqrdmlshq_s16(a, b, c); |
129 | } |
130 | |
131 | // CHECK-LABEL: test_vqrdmlshq_s32 |
132 | int32x4_t test_vqrdmlshq_s32(int32x4_t a, int32x4_t b, int32x4_t c) { |
133 | // CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) |
134 | // CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) |
135 | |
136 | // CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) |
137 | // CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) |
138 | return vqrdmlshq_s32(a, b, c); |
139 | } |
140 | |
141 | // CHECK-LABEL: test_vqrdmlsh_lane_s16 |
142 | int16x4_t test_vqrdmlsh_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) { |
143 | // CHECK-ARM: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
144 | // CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) |
145 | // CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) |
146 | |
147 | // CHECK-AARCH64: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
148 | // CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) |
149 | // CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) |
150 | return vqrdmlsh_lane_s16(a, b, c, 3); |
151 | } |
152 | |
153 | // CHECK-LABEL: test_vqrdmlsh_lane_s32 |
154 | int32x2_t test_vqrdmlsh_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) { |
155 | // CHECK-ARM: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> <i32 1, i32 1> |
156 | // CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) |
157 | // CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) |
158 | |
159 | // CHECK-AARCH64: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> <i32 1, i32 1> |
160 | // CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) |
161 | // CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) |
162 | return vqrdmlsh_lane_s32(a, b, c, 1); |
163 | } |
164 | |
165 | // CHECK-LABEL: test_vqrdmlshq_lane_s16 |
166 | int16x8_t test_vqrdmlshq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) { |
167 | // CHECK-ARM: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> |
168 | // CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) |
169 | // CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) |
170 | |
171 | // CHECK-AARCH64: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> |
172 | // CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) |
173 | // CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) |
174 | return vqrdmlshq_lane_s16(a, b, c, 3); |
175 | } |
176 | |
177 | // CHECK-LABEL: test_vqrdmlshq_lane_s32 |
178 | int32x4_t test_vqrdmlshq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) { |
179 | // CHECK-ARM: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <4 x i32> <i32 1, i32 1, i32 1, i32 1> |
180 | // CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) |
181 | // CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) |
182 | |
183 | // CHECK-AARCH64: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <4 x i32> <i32 1, i32 1, i32 1, i32 1> |
184 | // CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) |
185 | // CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) |
186 | return vqrdmlshq_lane_s32(a, b, c, 1); |
187 | } |
188 | |