1 | // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s |
2 | |
3 | // Test new aarch64 intrinsics and types |
4 | |
5 | #include <arm_neon.h> |
6 | |
7 | // CHECK-LABEL: @test_vmla_lane_s16( |
8 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
9 | // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] |
10 | // CHECK: [[ADD:%.*]] = add <4 x i16> %a, [[MUL]] |
11 | // CHECK: ret <4 x i16> [[ADD]] |
12 | int16x4_t test_vmla_lane_s16(int16x4_t a, int16x4_t b, int16x4_t v) { |
13 | return vmla_lane_s16(a, b, v, 3); |
14 | } |
15 | |
16 | // CHECK-LABEL: @test_vmlaq_lane_s16( |
17 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> |
18 | // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] |
19 | // CHECK: [[ADD:%.*]] = add <8 x i16> %a, [[MUL]] |
20 | // CHECK: ret <8 x i16> [[ADD]] |
21 | int16x8_t test_vmlaq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t v) { |
22 | return vmlaq_lane_s16(a, b, v, 3); |
23 | } |
24 | |
25 | // CHECK-LABEL: @test_vmla_lane_s32( |
26 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> |
27 | // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] |
28 | // CHECK: [[ADD:%.*]] = add <2 x i32> %a, [[MUL]] |
29 | // CHECK: ret <2 x i32> [[ADD]] |
30 | int32x2_t test_vmla_lane_s32(int32x2_t a, int32x2_t b, int32x2_t v) { |
31 | return vmla_lane_s32(a, b, v, 1); |
32 | } |
33 | |
34 | // CHECK-LABEL: @test_vmlaq_lane_s32( |
35 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1> |
36 | // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] |
37 | // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[MUL]] |
38 | // CHECK: ret <4 x i32> [[ADD]] |
39 | int32x4_t test_vmlaq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v) { |
40 | return vmlaq_lane_s32(a, b, v, 1); |
41 | } |
42 | |
43 | // CHECK-LABEL: @test_vmla_laneq_s16( |
44 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> |
45 | // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] |
46 | // CHECK: [[ADD:%.*]] = add <4 x i16> %a, [[MUL]] |
47 | // CHECK: ret <4 x i16> [[ADD]] |
48 | int16x4_t test_vmla_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) { |
49 | return vmla_laneq_s16(a, b, v, 7); |
50 | } |
51 | |
52 | // CHECK-LABEL: @test_vmlaq_laneq_s16( |
53 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> |
54 | // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] |
55 | // CHECK: [[ADD:%.*]] = add <8 x i16> %a, [[MUL]] |
56 | // CHECK: ret <8 x i16> [[ADD]] |
57 | int16x8_t test_vmlaq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) { |
58 | return vmlaq_laneq_s16(a, b, v, 7); |
59 | } |
60 | |
61 | // CHECK-LABEL: @test_vmla_laneq_s32( |
62 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> |
63 | // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] |
64 | // CHECK: [[ADD:%.*]] = add <2 x i32> %a, [[MUL]] |
65 | // CHECK: ret <2 x i32> [[ADD]] |
66 | int32x2_t test_vmla_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { |
67 | return vmla_laneq_s32(a, b, v, 3); |
68 | } |
69 | |
70 | // CHECK-LABEL: @test_vmlaq_laneq_s32( |
71 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
72 | // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] |
73 | // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[MUL]] |
74 | // CHECK: ret <4 x i32> [[ADD]] |
75 | int32x4_t test_vmlaq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) { |
76 | return vmlaq_laneq_s32(a, b, v, 3); |
77 | } |
78 | |
79 | // CHECK-LABEL: @test_vmls_lane_s16( |
80 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
81 | // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] |
82 | // CHECK: [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]] |
83 | // CHECK: ret <4 x i16> [[SUB]] |
84 | int16x4_t test_vmls_lane_s16(int16x4_t a, int16x4_t b, int16x4_t v) { |
85 | return vmls_lane_s16(a, b, v, 3); |
86 | } |
87 | |
88 | // CHECK-LABEL: @test_vmlsq_lane_s16( |
89 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> |
90 | // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] |
91 | // CHECK: [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]] |
92 | // CHECK: ret <8 x i16> [[SUB]] |
93 | int16x8_t test_vmlsq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t v) { |
94 | return vmlsq_lane_s16(a, b, v, 3); |
95 | } |
96 | |
97 | // CHECK-LABEL: @test_vmls_lane_s32( |
98 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> |
99 | // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] |
100 | // CHECK: [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]] |
101 | // CHECK: ret <2 x i32> [[SUB]] |
102 | int32x2_t test_vmls_lane_s32(int32x2_t a, int32x2_t b, int32x2_t v) { |
103 | return vmls_lane_s32(a, b, v, 1); |
104 | } |
105 | |
106 | // CHECK-LABEL: @test_vmlsq_lane_s32( |
107 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1> |
108 | // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] |
109 | // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]] |
110 | // CHECK: ret <4 x i32> [[SUB]] |
111 | int32x4_t test_vmlsq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v) { |
112 | return vmlsq_lane_s32(a, b, v, 1); |
113 | } |
114 | |
115 | // CHECK-LABEL: @test_vmls_laneq_s16( |
116 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> |
117 | // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] |
118 | // CHECK: [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]] |
119 | // CHECK: ret <4 x i16> [[SUB]] |
120 | int16x4_t test_vmls_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) { |
121 | return vmls_laneq_s16(a, b, v, 7); |
122 | } |
123 | |
124 | // CHECK-LABEL: @test_vmlsq_laneq_s16( |
125 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> |
126 | // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] |
127 | // CHECK: [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]] |
128 | // CHECK: ret <8 x i16> [[SUB]] |
129 | int16x8_t test_vmlsq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) { |
130 | return vmlsq_laneq_s16(a, b, v, 7); |
131 | } |
132 | |
133 | // CHECK-LABEL: @test_vmls_laneq_s32( |
134 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> |
135 | // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] |
136 | // CHECK: [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]] |
137 | // CHECK: ret <2 x i32> [[SUB]] |
138 | int32x2_t test_vmls_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { |
139 | return vmls_laneq_s32(a, b, v, 3); |
140 | } |
141 | |
142 | // CHECK-LABEL: @test_vmlsq_laneq_s32( |
143 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
144 | // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] |
145 | // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]] |
146 | // CHECK: ret <4 x i32> [[SUB]] |
147 | int32x4_t test_vmlsq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) { |
148 | return vmlsq_laneq_s32(a, b, v, 3); |
149 | } |
150 | |
151 | // CHECK-LABEL: @test_vmul_lane_s16( |
152 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
153 | // CHECK: [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]] |
154 | // CHECK: ret <4 x i16> [[MUL]] |
155 | int16x4_t test_vmul_lane_s16(int16x4_t a, int16x4_t v) { |
156 | return vmul_lane_s16(a, v, 3); |
157 | } |
158 | |
159 | // CHECK-LABEL: @test_vmulq_lane_s16( |
160 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> |
161 | // CHECK: [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]] |
162 | // CHECK: ret <8 x i16> [[MUL]] |
163 | int16x8_t test_vmulq_lane_s16(int16x8_t a, int16x4_t v) { |
164 | return vmulq_lane_s16(a, v, 3); |
165 | } |
166 | |
167 | // CHECK-LABEL: @test_vmul_lane_s32( |
168 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> |
169 | // CHECK: [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]] |
170 | // CHECK: ret <2 x i32> [[MUL]] |
171 | int32x2_t test_vmul_lane_s32(int32x2_t a, int32x2_t v) { |
172 | return vmul_lane_s32(a, v, 1); |
173 | } |
174 | |
175 | // CHECK-LABEL: @test_vmulq_lane_s32( |
176 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1> |
177 | // CHECK: [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]] |
178 | // CHECK: ret <4 x i32> [[MUL]] |
179 | int32x4_t test_vmulq_lane_s32(int32x4_t a, int32x2_t v) { |
180 | return vmulq_lane_s32(a, v, 1); |
181 | } |
182 | |
183 | // CHECK-LABEL: @test_vmul_lane_u16( |
184 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
185 | // CHECK: [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]] |
186 | // CHECK: ret <4 x i16> [[MUL]] |
187 | uint16x4_t test_vmul_lane_u16(uint16x4_t a, uint16x4_t v) { |
188 | return vmul_lane_u16(a, v, 3); |
189 | } |
190 | |
191 | // CHECK-LABEL: @test_vmulq_lane_u16( |
192 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> |
193 | // CHECK: [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]] |
194 | // CHECK: ret <8 x i16> [[MUL]] |
195 | uint16x8_t test_vmulq_lane_u16(uint16x8_t a, uint16x4_t v) { |
196 | return vmulq_lane_u16(a, v, 3); |
197 | } |
198 | |
199 | // CHECK-LABEL: @test_vmul_lane_u32( |
200 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> |
201 | // CHECK: [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]] |
202 | // CHECK: ret <2 x i32> [[MUL]] |
203 | uint32x2_t test_vmul_lane_u32(uint32x2_t a, uint32x2_t v) { |
204 | return vmul_lane_u32(a, v, 1); |
205 | } |
206 | |
207 | // CHECK-LABEL: @test_vmulq_lane_u32( |
208 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1> |
209 | // CHECK: [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]] |
210 | // CHECK: ret <4 x i32> [[MUL]] |
211 | uint32x4_t test_vmulq_lane_u32(uint32x4_t a, uint32x2_t v) { |
212 | return vmulq_lane_u32(a, v, 1); |
213 | } |
214 | |
215 | // CHECK-LABEL: @test_vmul_laneq_s16( |
216 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> |
217 | // CHECK: [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]] |
218 | // CHECK: ret <4 x i16> [[MUL]] |
219 | int16x4_t test_vmul_laneq_s16(int16x4_t a, int16x8_t v) { |
220 | return vmul_laneq_s16(a, v, 7); |
221 | } |
222 | |
223 | // CHECK-LABEL: @test_vmulq_laneq_s16( |
224 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> |
225 | // CHECK: [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]] |
226 | // CHECK: ret <8 x i16> [[MUL]] |
227 | int16x8_t test_vmulq_laneq_s16(int16x8_t a, int16x8_t v) { |
228 | return vmulq_laneq_s16(a, v, 7); |
229 | } |
230 | |
231 | // CHECK-LABEL: @test_vmul_laneq_s32( |
232 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> |
233 | // CHECK: [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]] |
234 | // CHECK: ret <2 x i32> [[MUL]] |
235 | int32x2_t test_vmul_laneq_s32(int32x2_t a, int32x4_t v) { |
236 | return vmul_laneq_s32(a, v, 3); |
237 | } |
238 | |
239 | // CHECK-LABEL: @test_vmulq_laneq_s32( |
240 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
241 | // CHECK: [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]] |
242 | // CHECK: ret <4 x i32> [[MUL]] |
243 | int32x4_t test_vmulq_laneq_s32(int32x4_t a, int32x4_t v) { |
244 | return vmulq_laneq_s32(a, v, 3); |
245 | } |
246 | |
247 | // CHECK-LABEL: @test_vmul_laneq_u16( |
248 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> |
249 | // CHECK: [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]] |
250 | // CHECK: ret <4 x i16> [[MUL]] |
251 | uint16x4_t test_vmul_laneq_u16(uint16x4_t a, uint16x8_t v) { |
252 | return vmul_laneq_u16(a, v, 7); |
253 | } |
254 | |
255 | // CHECK-LABEL: @test_vmulq_laneq_u16( |
256 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> |
257 | // CHECK: [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]] |
258 | // CHECK: ret <8 x i16> [[MUL]] |
259 | uint16x8_t test_vmulq_laneq_u16(uint16x8_t a, uint16x8_t v) { |
260 | return vmulq_laneq_u16(a, v, 7); |
261 | } |
262 | |
263 | // CHECK-LABEL: @test_vmul_laneq_u32( |
264 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> |
265 | // CHECK: [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]] |
266 | // CHECK: ret <2 x i32> [[MUL]] |
267 | uint32x2_t test_vmul_laneq_u32(uint32x2_t a, uint32x4_t v) { |
268 | return vmul_laneq_u32(a, v, 3); |
269 | } |
270 | |
271 | // CHECK-LABEL: @test_vmulq_laneq_u32( |
272 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
273 | // CHECK: [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]] |
274 | // CHECK: ret <4 x i32> [[MUL]] |
275 | uint32x4_t test_vmulq_laneq_u32(uint32x4_t a, uint32x4_t v) { |
276 | return vmulq_laneq_u32(a, v, 3); |
277 | } |
278 | |
279 | // CHECK-LABEL: @test_vfma_lane_f32( |
280 | // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> |
281 | // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> |
282 | // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8> |
283 | // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> |
284 | // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> <i32 1, i32 1> |
285 | // CHECK: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> |
286 | // CHECK: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> |
287 | // CHECK: [[FMLA2:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FMLA]], <2 x float> [[LANE]], <2 x float> [[FMLA1]]) |
288 | // CHECK: ret <2 x float> [[FMLA2]] |
289 | float32x2_t test_vfma_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) { |
290 | return vfma_lane_f32(a, b, v, 1); |
291 | } |
292 | |
293 | // CHECK-LABEL: @test_vfmaq_lane_f32( |
294 | // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> |
295 | // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> |
296 | // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8> |
297 | // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> |
298 | // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <4 x i32> <i32 1, i32 1, i32 1, i32 1> |
299 | // CHECK: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> |
300 | // CHECK: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> |
301 | // CHECK: [[FMLA2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FMLA]], <4 x float> [[LANE]], <4 x float> [[FMLA1]]) |
302 | // CHECK: ret <4 x float> [[FMLA2]] |
303 | float32x4_t test_vfmaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) { |
304 | return vfmaq_lane_f32(a, b, v, 1); |
305 | } |
306 | |
307 | // CHECK-LABEL: @test_vfma_laneq_f32( |
308 | // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> |
309 | // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> |
310 | // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8> |
311 | // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> |
312 | // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> |
313 | // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> |
314 | // CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <2 x i32> <i32 3, i32 3> |
315 | // CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[LANE]], <2 x float> [[TMP4]], <2 x float> [[TMP3]]) |
316 | // CHECK: ret <2 x float> [[TMP6]] |
317 | float32x2_t test_vfma_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) { |
318 | return vfma_laneq_f32(a, b, v, 3); |
319 | } |
320 | |
321 | // CHECK-LABEL: @test_vfmaq_laneq_f32( |
322 | // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> |
323 | // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> |
324 | // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8> |
325 | // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> |
326 | // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> |
327 | // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> |
328 | // CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
329 | // CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[LANE]], <4 x float> [[TMP4]], <4 x float> [[TMP3]]) |
330 | // CHECK: ret <4 x float> [[TMP6]] |
331 | float32x4_t test_vfmaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) { |
332 | return vfmaq_laneq_f32(a, b, v, 3); |
333 | } |
334 | |
335 | // CHECK-LABEL: @test_vfms_lane_f32( |
336 | // CHECK: [[SUB:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b |
337 | // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> |
338 | // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB]] to <8 x i8> |
339 | // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8> |
340 | // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> |
341 | // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> <i32 1, i32 1> |
342 | // CHECK: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> |
343 | // CHECK: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> |
344 | // CHECK: [[FMLA2:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FMLA]], <2 x float> [[LANE]], <2 x float> [[FMLA1]]) |
345 | // CHECK: ret <2 x float> [[FMLA2]] |
346 | float32x2_t test_vfms_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) { |
347 | return vfms_lane_f32(a, b, v, 1); |
348 | } |
349 | |
350 | // CHECK-LABEL: @test_vfmsq_lane_f32( |
351 | // CHECK: [[SUB:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b |
352 | // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> |
353 | // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB]] to <16 x i8> |
354 | // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8> |
355 | // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> |
356 | // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <4 x i32> <i32 1, i32 1, i32 1, i32 1> |
357 | // CHECK: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> |
358 | // CHECK: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> |
359 | // CHECK: [[FMLA2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FMLA]], <4 x float> [[LANE]], <4 x float> [[FMLA1]]) |
360 | // CHECK: ret <4 x float> [[FMLA2]] |
361 | float32x4_t test_vfmsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) { |
362 | return vfmsq_lane_f32(a, b, v, 1); |
363 | } |
364 | |
365 | // CHECK-LABEL: @test_vfms_laneq_f32( |
366 | // CHECK: [[SUB:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b |
367 | // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> |
368 | // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB]] to <8 x i8> |
369 | // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8> |
370 | // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> |
371 | // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> |
372 | // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> |
373 | // CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <2 x i32> <i32 3, i32 3> |
374 | // CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[LANE]], <2 x float> [[TMP4]], <2 x float> [[TMP3]]) |
375 | // CHECK: ret <2 x float> [[TMP6]] |
376 | float32x2_t test_vfms_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) { |
377 | return vfms_laneq_f32(a, b, v, 3); |
378 | } |
379 | |
380 | // CHECK-LABEL: @test_vfmsq_laneq_f32( |
381 | // CHECK: [[SUB:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b |
382 | // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> |
383 | // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB]] to <16 x i8> |
384 | // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8> |
385 | // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> |
386 | // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> |
387 | // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> |
388 | // CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
389 | // CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[LANE]], <4 x float> [[TMP4]], <4 x float> [[TMP3]]) |
390 | // CHECK: ret <4 x float> [[TMP6]] |
391 | float32x4_t test_vfmsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) { |
392 | return vfmsq_laneq_f32(a, b, v, 3); |
393 | } |
394 | |
395 | // CHECK-LABEL: @test_vfmaq_lane_f64( |
396 | // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> |
397 | // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> |
398 | // CHECK: [[TMP2:%.*]] = bitcast <1 x double> %v to <8 x i8> |
399 | // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double> |
400 | // CHECK: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <2 x i32> zeroinitializer |
401 | // CHECK: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> |
402 | // CHECK: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> |
403 | // CHECK: [[FMLA2:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[FMLA]], <2 x double> [[LANE]], <2 x double> [[FMLA1]]) |
404 | // CHECK: ret <2 x double> [[FMLA2]] |
405 | float64x2_t test_vfmaq_lane_f64(float64x2_t a, float64x2_t b, float64x1_t v) { |
406 | return vfmaq_lane_f64(a, b, v, 0); |
407 | } |
408 | |
409 | // CHECK-LABEL: @test_vfmaq_laneq_f64( |
410 | // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> |
411 | // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> |
412 | // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8> |
413 | // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> |
414 | // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> |
415 | // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> |
416 | // CHECK: [[LANE:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP5]], <2 x i32> <i32 1, i32 1> |
417 | // CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[LANE]], <2 x double> [[TMP4]], <2 x double> [[TMP3]]) |
418 | // CHECK: ret <2 x double> [[TMP6]] |
419 | float64x2_t test_vfmaq_laneq_f64(float64x2_t a, float64x2_t b, float64x2_t v) { |
420 | return vfmaq_laneq_f64(a, b, v, 1); |
421 | } |
422 | |
423 | // CHECK-LABEL: @test_vfmsq_lane_f64( |
424 | // CHECK: [[SUB:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %b |
425 | // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> |
426 | // CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SUB]] to <16 x i8> |
427 | // CHECK: [[TMP2:%.*]] = bitcast <1 x double> %v to <8 x i8> |
428 | // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double> |
429 | // CHECK: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <2 x i32> zeroinitializer |
430 | // CHECK: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> |
431 | // CHECK: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> |
432 | // CHECK: [[FMLA2:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[FMLA]], <2 x double> [[LANE]], <2 x double> [[FMLA1]]) |
433 | // CHECK: ret <2 x double> [[FMLA2]] |
434 | float64x2_t test_vfmsq_lane_f64(float64x2_t a, float64x2_t b, float64x1_t v) { |
435 | return vfmsq_lane_f64(a, b, v, 0); |
436 | } |
437 | |
438 | // CHECK-LABEL: @test_vfmsq_laneq_f64( |
439 | // CHECK: [[SUB:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %b |
440 | // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> |
441 | // CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SUB]] to <16 x i8> |
442 | // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8> |
443 | // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> |
444 | // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> |
445 | // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> |
446 | // CHECK: [[LANE:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP5]], <2 x i32> <i32 1, i32 1> |
447 | // CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[LANE]], <2 x double> [[TMP4]], <2 x double> [[TMP3]]) |
448 | // CHECK: ret <2 x double> [[TMP6]] |
449 | float64x2_t test_vfmsq_laneq_f64(float64x2_t a, float64x2_t b, float64x2_t v) { |
450 | return vfmsq_laneq_f64(a, b, v, 1); |
451 | } |
452 | |
453 | // CHECK-LABEL: @test_vfmas_laneq_f32( |
454 | // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v to <16 x i8> |
455 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> |
456 | // CHECK: [[EXTRACT:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 |
457 | // CHECK: [[TMP2:%.*]] = call float @llvm.fma.f32(float %b, float [[EXTRACT]], float %a) |
458 | // CHECK: ret float [[TMP2]] |
459 | float32_t test_vfmas_laneq_f32(float32_t a, float32_t b, float32x4_t v) { |
460 | return vfmas_laneq_f32(a, b, v, 3); |
461 | } |
462 | |
463 | // CHECK-LABEL: @test_vfmsd_lane_f64( |
464 | // CHECK: [[SUB:%.*]] = fsub double -0.000000e+00, %b |
465 | // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %v to <8 x i8> |
466 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> |
467 | // CHECK: [[EXTRACT:%.*]] = extractelement <1 x double> [[TMP1]], i32 0 |
468 | // CHECK: [[TMP2:%.*]] = call double @llvm.fma.f64(double [[SUB]], double [[EXTRACT]], double %a) |
469 | // CHECK: ret double [[TMP2]] |
470 | float64_t test_vfmsd_lane_f64(float64_t a, float64_t b, float64x1_t v) { |
471 | return vfmsd_lane_f64(a, b, v, 0); |
472 | } |
473 | |
474 | // CHECK-LABEL: @test_vfmss_laneq_f32( |
475 | // CHECK: [[SUB:%.*]] = fsub float -0.000000e+00, %b |
476 | // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v to <16 x i8> |
477 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> |
478 | // CHECK: [[EXTRACT:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 |
479 | // CHECK: [[TMP2:%.*]] = call float @llvm.fma.f32(float [[SUB]], float [[EXTRACT]], float %a) |
480 | // CHECK: ret float [[TMP2]] |
481 | float32_t test_vfmss_laneq_f32(float32_t a, float32_t b, float32x4_t v) { |
482 | return vfmss_laneq_f32(a, b, v, 3); |
483 | } |
484 | |
485 | // CHECK-LABEL: @test_vfmsd_laneq_f64( |
486 | // CHECK: [[SUB:%.*]] = fsub double -0.000000e+00, %b |
487 | // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v to <16 x i8> |
488 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> |
489 | // CHECK: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 |
490 | // CHECK: [[TMP2:%.*]] = call double @llvm.fma.f64(double [[SUB]], double [[EXTRACT]], double %a) |
491 | // CHECK: ret double [[TMP2]] |
492 | float64_t test_vfmsd_laneq_f64(float64_t a, float64_t b, float64x2_t v) { |
493 | return vfmsd_laneq_f64(a, b, v, 1); |
494 | } |
495 | |
496 | // CHECK-LABEL: @test_vmlal_lane_s16( |
497 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
498 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
499 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
500 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) |
501 | // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] |
502 | // CHECK: ret <4 x i32> [[ADD]] |
503 | int32x4_t test_vmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) { |
504 | return vmlal_lane_s16(a, b, v, 3); |
505 | } |
506 | |
507 | // CHECK-LABEL: @test_vmlal_lane_s32( |
508 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> |
509 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
510 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
511 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) |
512 | // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] |
513 | // CHECK: ret <2 x i64> [[ADD]] |
514 | int64x2_t test_vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { |
515 | return vmlal_lane_s32(a, b, v, 1); |
516 | } |
517 | |
518 | // CHECK-LABEL: @test_vmlal_laneq_s16( |
519 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> |
520 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
521 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
522 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) |
523 | // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] |
524 | // CHECK: ret <4 x i32> [[ADD]] |
525 | int32x4_t test_vmlal_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) { |
526 | return vmlal_laneq_s16(a, b, v, 7); |
527 | } |
528 | |
529 | // CHECK-LABEL: @test_vmlal_laneq_s32( |
530 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> |
531 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
532 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
533 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) |
534 | // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] |
535 | // CHECK: ret <2 x i64> [[ADD]] |
536 | int64x2_t test_vmlal_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) { |
537 | return vmlal_laneq_s32(a, b, v, 3); |
538 | } |
539 | |
540 | // CHECK-LABEL: @test_vmlal_high_lane_s16( |
541 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
542 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
543 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
544 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
545 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
546 | // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] |
547 | // CHECK: ret <4 x i32> [[ADD]] |
548 | int32x4_t test_vmlal_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) { |
549 | return vmlal_high_lane_s16(a, b, v, 3); |
550 | } |
551 | |
552 | // CHECK-LABEL: @test_vmlal_high_lane_s32( |
553 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> |
554 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> |
555 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
556 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
557 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
558 | // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] |
559 | // CHECK: ret <2 x i64> [[ADD]] |
560 | int64x2_t test_vmlal_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { |
561 | return vmlal_high_lane_s32(a, b, v, 1); |
562 | } |
563 | |
564 | // CHECK-LABEL: @test_vmlal_high_laneq_s16( |
565 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
566 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> |
567 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
568 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
569 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
570 | // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] |
571 | // CHECK: ret <4 x i32> [[ADD]] |
572 | int32x4_t test_vmlal_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) { |
573 | return vmlal_high_laneq_s16(a, b, v, 7); |
574 | } |
575 | |
576 | // CHECK-LABEL: @test_vmlal_high_laneq_s32( |
577 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> |
578 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> |
579 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
580 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
581 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
582 | // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] |
583 | // CHECK: ret <2 x i64> [[ADD]] |
584 | int64x2_t test_vmlal_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) { |
585 | return vmlal_high_laneq_s32(a, b, v, 3); |
586 | } |
587 | |
588 | // CHECK-LABEL: @test_vmlsl_lane_s16( |
589 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
590 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
591 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
592 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) |
593 | // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] |
594 | // CHECK: ret <4 x i32> [[SUB]] |
595 | int32x4_t test_vmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) { |
596 | return vmlsl_lane_s16(a, b, v, 3); |
597 | } |
598 | |
599 | // CHECK-LABEL: @test_vmlsl_lane_s32( |
600 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> |
601 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
602 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
603 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) |
604 | // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] |
605 | // CHECK: ret <2 x i64> [[SUB]] |
606 | int64x2_t test_vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { |
607 | return vmlsl_lane_s32(a, b, v, 1); |
608 | } |
609 | |
610 | // CHECK-LABEL: @test_vmlsl_laneq_s16( |
611 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> |
612 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
613 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
614 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) |
615 | // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] |
616 | // CHECK: ret <4 x i32> [[SUB]] |
617 | int32x4_t test_vmlsl_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) { |
618 | return vmlsl_laneq_s16(a, b, v, 7); |
619 | } |
620 | |
621 | // CHECK-LABEL: @test_vmlsl_laneq_s32( |
622 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> |
623 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
624 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
625 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) |
626 | // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] |
627 | // CHECK: ret <2 x i64> [[SUB]] |
628 | int64x2_t test_vmlsl_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) { |
629 | return vmlsl_laneq_s32(a, b, v, 3); |
630 | } |
631 | |
632 | // CHECK-LABEL: @test_vmlsl_high_lane_s16( |
633 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
634 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
635 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
636 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
637 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
638 | // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] |
639 | // CHECK: ret <4 x i32> [[SUB]] |
640 | int32x4_t test_vmlsl_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) { |
641 | return vmlsl_high_lane_s16(a, b, v, 3); |
642 | } |
643 | |
644 | // CHECK-LABEL: @test_vmlsl_high_lane_s32( |
645 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> |
646 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> |
647 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
648 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
649 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
650 | // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] |
651 | // CHECK: ret <2 x i64> [[SUB]] |
652 | int64x2_t test_vmlsl_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { |
653 | return vmlsl_high_lane_s32(a, b, v, 1); |
654 | } |
655 | |
656 | // CHECK-LABEL: @test_vmlsl_high_laneq_s16( |
657 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
658 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> |
659 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
660 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
661 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
662 | // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] |
663 | // CHECK: ret <4 x i32> [[SUB]] |
664 | int32x4_t test_vmlsl_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) { |
665 | return vmlsl_high_laneq_s16(a, b, v, 7); |
666 | } |
667 | |
668 | // CHECK-LABEL: @test_vmlsl_high_laneq_s32( |
669 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> |
670 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> |
671 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
672 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
673 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
674 | // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] |
675 | // CHECK: ret <2 x i64> [[SUB]] |
676 | int64x2_t test_vmlsl_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) { |
677 | return vmlsl_high_laneq_s32(a, b, v, 3); |
678 | } |
679 | |
680 | // CHECK-LABEL: @test_vmlal_lane_u16( |
681 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
682 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
683 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
684 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) |
685 | // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] |
686 | // CHECK: ret <4 x i32> [[ADD]] |
687 | int32x4_t test_vmlal_lane_u16(int32x4_t a, int16x4_t b, int16x4_t v) { |
688 | return vmlal_lane_u16(a, b, v, 3); |
689 | } |
690 | |
691 | // CHECK-LABEL: @test_vmlal_lane_u32( |
692 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> |
693 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
694 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
695 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) |
696 | // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] |
697 | // CHECK: ret <2 x i64> [[ADD]] |
698 | int64x2_t test_vmlal_lane_u32(int64x2_t a, int32x2_t b, int32x2_t v) { |
699 | return vmlal_lane_u32(a, b, v, 1); |
700 | } |
701 | |
702 | // CHECK-LABEL: @test_vmlal_laneq_u16( |
703 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> |
704 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
705 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
706 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) |
707 | // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] |
708 | // CHECK: ret <4 x i32> [[ADD]] |
709 | int32x4_t test_vmlal_laneq_u16(int32x4_t a, int16x4_t b, int16x8_t v) { |
710 | return vmlal_laneq_u16(a, b, v, 7); |
711 | } |
712 | |
713 | // CHECK-LABEL: @test_vmlal_laneq_u32( |
714 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> |
715 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
716 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
717 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) |
718 | // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] |
719 | // CHECK: ret <2 x i64> [[ADD]] |
720 | int64x2_t test_vmlal_laneq_u32(int64x2_t a, int32x2_t b, int32x4_t v) { |
721 | return vmlal_laneq_u32(a, b, v, 3); |
722 | } |
723 | |
724 | // CHECK-LABEL: @test_vmlal_high_lane_u16( |
725 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
726 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
727 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
728 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
729 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
730 | // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] |
731 | // CHECK: ret <4 x i32> [[ADD]] |
732 | int32x4_t test_vmlal_high_lane_u16(int32x4_t a, int16x8_t b, int16x4_t v) { |
733 | return vmlal_high_lane_u16(a, b, v, 3); |
734 | } |
735 | |
736 | // CHECK-LABEL: @test_vmlal_high_lane_u32( |
737 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> |
738 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> |
739 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
740 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
741 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
742 | // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] |
743 | // CHECK: ret <2 x i64> [[ADD]] |
744 | int64x2_t test_vmlal_high_lane_u32(int64x2_t a, int32x4_t b, int32x2_t v) { |
745 | return vmlal_high_lane_u32(a, b, v, 1); |
746 | } |
747 | |
748 | // CHECK-LABEL: @test_vmlal_high_laneq_u16( |
749 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
750 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> |
751 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
752 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
753 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
754 | // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] |
755 | // CHECK: ret <4 x i32> [[ADD]] |
756 | int32x4_t test_vmlal_high_laneq_u16(int32x4_t a, int16x8_t b, int16x8_t v) { |
757 | return vmlal_high_laneq_u16(a, b, v, 7); |
758 | } |
759 | |
760 | // CHECK-LABEL: @test_vmlal_high_laneq_u32( |
761 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> |
762 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> |
763 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
764 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
765 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
766 | // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] |
767 | // CHECK: ret <2 x i64> [[ADD]] |
768 | int64x2_t test_vmlal_high_laneq_u32(int64x2_t a, int32x4_t b, int32x4_t v) { |
769 | return vmlal_high_laneq_u32(a, b, v, 3); |
770 | } |
771 | |
772 | // CHECK-LABEL: @test_vmlsl_lane_u16( |
773 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
774 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
775 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
776 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) |
777 | // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] |
778 | // CHECK: ret <4 x i32> [[SUB]] |
779 | int32x4_t test_vmlsl_lane_u16(int32x4_t a, int16x4_t b, int16x4_t v) { |
780 | return vmlsl_lane_u16(a, b, v, 3); |
781 | } |
782 | |
783 | // CHECK-LABEL: @test_vmlsl_lane_u32( |
784 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> |
785 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
786 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
787 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) |
788 | // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] |
789 | // CHECK: ret <2 x i64> [[SUB]] |
790 | int64x2_t test_vmlsl_lane_u32(int64x2_t a, int32x2_t b, int32x2_t v) { |
791 | return vmlsl_lane_u32(a, b, v, 1); |
792 | } |
793 | |
794 | // CHECK-LABEL: @test_vmlsl_laneq_u16( |
795 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> |
796 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
797 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
798 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) |
799 | // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] |
800 | // CHECK: ret <4 x i32> [[SUB]] |
801 | int32x4_t test_vmlsl_laneq_u16(int32x4_t a, int16x4_t b, int16x8_t v) { |
802 | return vmlsl_laneq_u16(a, b, v, 7); |
803 | } |
804 | |
805 | // CHECK-LABEL: @test_vmlsl_laneq_u32( |
806 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> |
807 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
808 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
809 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) |
810 | // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] |
811 | // CHECK: ret <2 x i64> [[SUB]] |
812 | int64x2_t test_vmlsl_laneq_u32(int64x2_t a, int32x2_t b, int32x4_t v) { |
813 | return vmlsl_laneq_u32(a, b, v, 3); |
814 | } |
815 | |
816 | // CHECK-LABEL: @test_vmlsl_high_lane_u16( |
817 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
818 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
819 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
820 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
821 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
822 | // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] |
823 | // CHECK: ret <4 x i32> [[SUB]] |
824 | int32x4_t test_vmlsl_high_lane_u16(int32x4_t a, int16x8_t b, int16x4_t v) { |
825 | return vmlsl_high_lane_u16(a, b, v, 3); |
826 | } |
827 | |
828 | // CHECK-LABEL: @test_vmlsl_high_lane_u32( |
829 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> |
830 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> |
831 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
832 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
833 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
834 | // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] |
835 | // CHECK: ret <2 x i64> [[SUB]] |
836 | int64x2_t test_vmlsl_high_lane_u32(int64x2_t a, int32x4_t b, int32x2_t v) { |
837 | return vmlsl_high_lane_u32(a, b, v, 1); |
838 | } |
839 | |
840 | // CHECK-LABEL: @test_vmlsl_high_laneq_u16( |
841 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
842 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> |
843 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
844 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
845 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
846 | // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] |
847 | // CHECK: ret <4 x i32> [[SUB]] |
848 | int32x4_t test_vmlsl_high_laneq_u16(int32x4_t a, int16x8_t b, int16x8_t v) { |
849 | return vmlsl_high_laneq_u16(a, b, v, 7); |
850 | } |
851 | |
852 | // CHECK-LABEL: @test_vmlsl_high_laneq_u32( |
853 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> |
854 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> |
855 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
856 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
857 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
858 | // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] |
859 | // CHECK: ret <2 x i64> [[SUB]] |
860 | int64x2_t test_vmlsl_high_laneq_u32(int64x2_t a, int32x4_t b, int32x4_t v) { |
861 | return vmlsl_high_laneq_u32(a, b, v, 3); |
862 | } |
863 | |
864 | // CHECK-LABEL: @test_vmull_lane_s16( |
865 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
866 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
867 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
868 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) |
869 | // CHECK: ret <4 x i32> [[VMULL2_I]] |
870 | int32x4_t test_vmull_lane_s16(int16x4_t a, int16x4_t v) { |
871 | return vmull_lane_s16(a, v, 3); |
872 | } |
873 | |
874 | // CHECK-LABEL: @test_vmull_lane_s32( |
875 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> |
876 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
877 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
878 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) |
879 | // CHECK: ret <2 x i64> [[VMULL2_I]] |
880 | int64x2_t test_vmull_lane_s32(int32x2_t a, int32x2_t v) { |
881 | return vmull_lane_s32(a, v, 1); |
882 | } |
883 | |
884 | // CHECK-LABEL: @test_vmull_lane_u16( |
885 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
886 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
887 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
888 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) |
889 | // CHECK: ret <4 x i32> [[VMULL2_I]] |
890 | uint32x4_t test_vmull_lane_u16(uint16x4_t a, uint16x4_t v) { |
891 | return vmull_lane_u16(a, v, 3); |
892 | } |
893 | |
894 | // CHECK-LABEL: @test_vmull_lane_u32( |
895 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> |
896 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
897 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
898 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) |
899 | // CHECK: ret <2 x i64> [[VMULL2_I]] |
900 | uint64x2_t test_vmull_lane_u32(uint32x2_t a, uint32x2_t v) { |
901 | return vmull_lane_u32(a, v, 1); |
902 | } |
903 | |
904 | // CHECK-LABEL: @test_vmull_high_lane_s16( |
905 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
906 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
907 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
908 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
909 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
910 | // CHECK: ret <4 x i32> [[VMULL2_I]] |
911 | int32x4_t test_vmull_high_lane_s16(int16x8_t a, int16x4_t v) { |
912 | return vmull_high_lane_s16(a, v, 3); |
913 | } |
914 | |
915 | // CHECK-LABEL: @test_vmull_high_lane_s32( |
916 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> |
917 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> |
918 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
919 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
920 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
921 | // CHECK: ret <2 x i64> [[VMULL2_I]] |
922 | int64x2_t test_vmull_high_lane_s32(int32x4_t a, int32x2_t v) { |
923 | return vmull_high_lane_s32(a, v, 1); |
924 | } |
925 | |
926 | // CHECK-LABEL: @test_vmull_high_lane_u16( |
927 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
928 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
929 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
930 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
931 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
932 | // CHECK: ret <4 x i32> [[VMULL2_I]] |
933 | uint32x4_t test_vmull_high_lane_u16(uint16x8_t a, uint16x4_t v) { |
934 | return vmull_high_lane_u16(a, v, 3); |
935 | } |
936 | |
937 | // CHECK-LABEL: @test_vmull_high_lane_u32( |
938 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> |
939 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> |
940 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
941 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
942 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
943 | // CHECK: ret <2 x i64> [[VMULL2_I]] |
944 | uint64x2_t test_vmull_high_lane_u32(uint32x4_t a, uint32x2_t v) { |
945 | return vmull_high_lane_u32(a, v, 1); |
946 | } |
947 | |
948 | // CHECK-LABEL: @test_vmull_laneq_s16( |
949 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> |
950 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
951 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
952 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) |
953 | // CHECK: ret <4 x i32> [[VMULL2_I]] |
954 | int32x4_t test_vmull_laneq_s16(int16x4_t a, int16x8_t v) { |
955 | return vmull_laneq_s16(a, v, 7); |
956 | } |
957 | |
958 | // CHECK-LABEL: @test_vmull_laneq_s32( |
959 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> |
960 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
961 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
962 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) |
963 | // CHECK: ret <2 x i64> [[VMULL2_I]] |
964 | int64x2_t test_vmull_laneq_s32(int32x2_t a, int32x4_t v) { |
965 | return vmull_laneq_s32(a, v, 3); |
966 | } |
967 | |
968 | // CHECK-LABEL: @test_vmull_laneq_u16( |
969 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> |
970 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
971 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
972 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) |
973 | // CHECK: ret <4 x i32> [[VMULL2_I]] |
974 | uint32x4_t test_vmull_laneq_u16(uint16x4_t a, uint16x8_t v) { |
975 | return vmull_laneq_u16(a, v, 7); |
976 | } |
977 | |
978 | // CHECK-LABEL: @test_vmull_laneq_u32( |
979 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> |
980 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
981 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
982 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) |
983 | // CHECK: ret <2 x i64> [[VMULL2_I]] |
984 | uint64x2_t test_vmull_laneq_u32(uint32x2_t a, uint32x4_t v) { |
985 | return vmull_laneq_u32(a, v, 3); |
986 | } |
987 | |
988 | // CHECK-LABEL: @test_vmull_high_laneq_s16( |
989 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
990 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> |
991 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
992 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
993 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
994 | // CHECK: ret <4 x i32> [[VMULL2_I]] |
995 | int32x4_t test_vmull_high_laneq_s16(int16x8_t a, int16x8_t v) { |
996 | return vmull_high_laneq_s16(a, v, 7); |
997 | } |
998 | |
999 | // CHECK-LABEL: @test_vmull_high_laneq_s32( |
1000 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> |
1001 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> |
1002 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
1003 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
1004 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
1005 | // CHECK: ret <2 x i64> [[VMULL2_I]] |
1006 | int64x2_t test_vmull_high_laneq_s32(int32x4_t a, int32x4_t v) { |
1007 | return vmull_high_laneq_s32(a, v, 3); |
1008 | } |
1009 | |
1010 | // CHECK-LABEL: @test_vmull_high_laneq_u16( |
1011 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
1012 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> |
1013 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
1014 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
1015 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
1016 | // CHECK: ret <4 x i32> [[VMULL2_I]] |
1017 | uint32x4_t test_vmull_high_laneq_u16(uint16x8_t a, uint16x8_t v) { |
1018 | return vmull_high_laneq_u16(a, v, 7); |
1019 | } |
1020 | |
1021 | // CHECK-LABEL: @test_vmull_high_laneq_u32( |
1022 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> |
1023 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> |
1024 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
1025 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
1026 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
1027 | // CHECK: ret <2 x i64> [[VMULL2_I]] |
1028 | uint64x2_t test_vmull_high_laneq_u32(uint32x4_t a, uint32x4_t v) { |
1029 | return vmull_high_laneq_u32(a, v, 3); |
1030 | } |
1031 | |
1032 | // CHECK-LABEL: @test_vqdmlal_lane_s16( |
1033 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
1034 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
1035 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
1036 | // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
1037 | // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) |
1038 | // CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) |
1039 | // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] |
1040 | int32x4_t test_vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) { |
1041 | return vqdmlal_lane_s16(a, b, v, 3); |
1042 | } |
1043 | |
1044 | // CHECK-LABEL: @test_vqdmlal_lane_s32( |
1045 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> |
1046 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
1047 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
1048 | // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
1049 | // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) |
1050 | // CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) |
1051 | // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] |
1052 | int64x2_t test_vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { |
1053 | return vqdmlal_lane_s32(a, b, v, 1); |
1054 | } |
1055 | |
1056 | // CHECK-LABEL: @test_vqdmlal_high_lane_s16( |
1057 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
1058 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
1059 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
1060 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
1061 | // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
1062 | // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
1063 | // CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) |
1064 | // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] |
1065 | int32x4_t test_vqdmlal_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) { |
1066 | return vqdmlal_high_lane_s16(a, b, v, 3); |
1067 | } |
1068 | |
1069 | // CHECK-LABEL: @test_vqdmlal_high_lane_s32( |
1070 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> |
1071 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> |
1072 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
1073 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
1074 | // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
1075 | // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
1076 | // CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) |
1077 | // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] |
1078 | int64x2_t test_vqdmlal_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { |
1079 | return vqdmlal_high_lane_s32(a, b, v, 1); |
1080 | } |
1081 | |
1082 | // CHECK-LABEL: @test_vqdmlsl_lane_s16( |
1083 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
1084 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
1085 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
1086 | // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
1087 | // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) |
1088 | // CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) |
1089 | // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] |
1090 | int32x4_t test_vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) { |
1091 | return vqdmlsl_lane_s16(a, b, v, 3); |
1092 | } |
1093 | |
1094 | // CHECK-LABEL: @test_vqdmlsl_lane_s32( |
1095 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> |
1096 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
1097 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
1098 | // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
1099 | // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) |
1100 | // CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) |
1101 | // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] |
1102 | int64x2_t test_vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { |
1103 | return vqdmlsl_lane_s32(a, b, v, 1); |
1104 | } |
1105 | |
1106 | // CHECK-LABEL: @test_vqdmlsl_high_lane_s16( |
1107 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
1108 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
1109 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
1110 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
1111 | // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
1112 | // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
1113 | // CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) |
1114 | // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] |
1115 | int32x4_t test_vqdmlsl_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) { |
1116 | return vqdmlsl_high_lane_s16(a, b, v, 3); |
1117 | } |
1118 | |
1119 | // CHECK-LABEL: @test_vqdmlsl_high_lane_s32( |
1120 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> |
1121 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> |
1122 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
1123 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
1124 | // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
1125 | // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
1126 | // CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) |
1127 | // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] |
1128 | int64x2_t test_vqdmlsl_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { |
1129 | return vqdmlsl_high_lane_s32(a, b, v, 1); |
1130 | } |
1131 | |
1132 | // CHECK-LABEL: @test_vqdmull_lane_s16( |
1133 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
1134 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
1135 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
1136 | // CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) |
1137 | // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> |
1138 | // CHECK: ret <4 x i32> [[VQDMULL_V2_I]] |
1139 | int32x4_t test_vqdmull_lane_s16(int16x4_t a, int16x4_t v) { |
1140 | return vqdmull_lane_s16(a, v, 3); |
1141 | } |
1142 | |
1143 | // CHECK-LABEL: @test_vqdmull_lane_s32( |
1144 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> |
1145 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
1146 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
1147 | // CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) |
1148 | // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> |
1149 | // CHECK: ret <2 x i64> [[VQDMULL_V2_I]] |
1150 | int64x2_t test_vqdmull_lane_s32(int32x2_t a, int32x2_t v) { |
1151 | return vqdmull_lane_s32(a, v, 1); |
1152 | } |
1153 | |
1154 | // CHECK-LABEL: @test_vqdmull_laneq_s16( |
1155 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
1156 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
1157 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
1158 | // CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) |
1159 | // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> |
1160 | // CHECK: ret <4 x i32> [[VQDMULL_V2_I]] |
1161 | int32x4_t test_vqdmull_laneq_s16(int16x4_t a, int16x8_t v) { |
1162 | return vqdmull_laneq_s16(a, v, 3); |
1163 | } |
1164 | |
1165 | // CHECK-LABEL: @test_vqdmull_laneq_s32( |
1166 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> |
1167 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
1168 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
1169 | // CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) |
1170 | // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> |
1171 | // CHECK: ret <2 x i64> [[VQDMULL_V2_I]] |
1172 | int64x2_t test_vqdmull_laneq_s32(int32x2_t a, int32x4_t v) { |
1173 | return vqdmull_laneq_s32(a, v, 3); |
1174 | } |
1175 | |
1176 | // CHECK-LABEL: @test_vqdmull_high_lane_s16( |
1177 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
1178 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
1179 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
1180 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
1181 | // CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
1182 | // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> |
1183 | // CHECK: ret <4 x i32> [[VQDMULL_V2_I]] |
1184 | int32x4_t test_vqdmull_high_lane_s16(int16x8_t a, int16x4_t v) { |
1185 | return vqdmull_high_lane_s16(a, v, 3); |
1186 | } |
1187 | |
1188 | // CHECK-LABEL: @test_vqdmull_high_lane_s32( |
1189 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> |
1190 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> |
1191 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
1192 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
1193 | // CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
1194 | // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> |
1195 | // CHECK: ret <2 x i64> [[VQDMULL_V2_I]] |
1196 | int64x2_t test_vqdmull_high_lane_s32(int32x4_t a, int32x2_t v) { |
1197 | return vqdmull_high_lane_s32(a, v, 1); |
1198 | } |
1199 | |
1200 | // CHECK-LABEL: @test_vqdmull_high_laneq_s16( |
1201 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
1202 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> |
1203 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
1204 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
1205 | // CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
1206 | // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> |
1207 | // CHECK: ret <4 x i32> [[VQDMULL_V2_I]] |
1208 | int32x4_t test_vqdmull_high_laneq_s16(int16x8_t a, int16x8_t v) { |
1209 | return vqdmull_high_laneq_s16(a, v, 7); |
1210 | } |
1211 | |
1212 | // CHECK-LABEL: @test_vqdmull_high_laneq_s32( |
1213 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> |
1214 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> |
1215 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
1216 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
1217 | // CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
1218 | // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> |
1219 | // CHECK: ret <2 x i64> [[VQDMULL_V2_I]] |
1220 | int64x2_t test_vqdmull_high_laneq_s32(int32x4_t a, int32x4_t v) { |
1221 | return vqdmull_high_laneq_s32(a, v, 3); |
1222 | } |
1223 | |
1224 | // CHECK-LABEL: @test_vqdmulh_lane_s16( |
1225 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
1226 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
1227 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
1228 | // CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) |
1229 | // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8> |
1230 | // CHECK: ret <4 x i16> [[VQDMULH_V2_I]] |
1231 | int16x4_t test_vqdmulh_lane_s16(int16x4_t a, int16x4_t v) { |
1232 | return vqdmulh_lane_s16(a, v, 3); |
1233 | } |
1234 | |
1235 | // CHECK-LABEL: @test_vqdmulhq_lane_s16( |
1236 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> |
1237 | // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> |
1238 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> |
1239 | // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) |
1240 | // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8> |
1241 | // CHECK: ret <8 x i16> [[VQDMULHQ_V2_I]] |
1242 | int16x8_t test_vqdmulhq_lane_s16(int16x8_t a, int16x4_t v) { |
1243 | return vqdmulhq_lane_s16(a, v, 3); |
1244 | } |
1245 | |
1246 | // CHECK-LABEL: @test_vqdmulh_lane_s32( |
1247 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> |
1248 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
1249 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
1250 | // CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) |
1251 | // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8> |
1252 | // CHECK: ret <2 x i32> [[VQDMULH_V2_I]] |
1253 | int32x2_t test_vqdmulh_lane_s32(int32x2_t a, int32x2_t v) { |
1254 | return vqdmulh_lane_s32(a, v, 1); |
1255 | } |
1256 | |
1257 | // CHECK-LABEL: @test_vqdmulhq_lane_s32( |
1258 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1> |
1259 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
1260 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> |
1261 | // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) |
1262 | // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8> |
1263 | // CHECK: ret <4 x i32> [[VQDMULHQ_V2_I]] |
1264 | int32x4_t test_vqdmulhq_lane_s32(int32x4_t a, int32x2_t v) { |
1265 | return vqdmulhq_lane_s32(a, v, 1); |
1266 | } |
1267 | |
1268 | // CHECK-LABEL: @test_vqrdmulh_lane_s16( |
1269 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
1270 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
1271 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
1272 | // CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) |
1273 | // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8> |
1274 | // CHECK: ret <4 x i16> [[VQRDMULH_V2_I]] |
1275 | int16x4_t test_vqrdmulh_lane_s16(int16x4_t a, int16x4_t v) { |
1276 | return vqrdmulh_lane_s16(a, v, 3); |
1277 | } |
1278 | |
1279 | // CHECK-LABEL: @test_vqrdmulhq_lane_s16( |
1280 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> |
1281 | // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> |
1282 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> |
1283 | // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) |
1284 | // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8> |
1285 | // CHECK: ret <8 x i16> [[VQRDMULHQ_V2_I]] |
1286 | int16x8_t test_vqrdmulhq_lane_s16(int16x8_t a, int16x4_t v) { |
1287 | return vqrdmulhq_lane_s16(a, v, 3); |
1288 | } |
1289 | |
1290 | // CHECK-LABEL: @test_vqrdmulh_lane_s32( |
1291 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> |
1292 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
1293 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
1294 | // CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) |
1295 | // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8> |
1296 | // CHECK: ret <2 x i32> [[VQRDMULH_V2_I]] |
1297 | int32x2_t test_vqrdmulh_lane_s32(int32x2_t a, int32x2_t v) { |
1298 | return vqrdmulh_lane_s32(a, v, 1); |
1299 | } |
1300 | |
1301 | // CHECK-LABEL: @test_vqrdmulhq_lane_s32( |
1302 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1> |
1303 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
1304 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> |
1305 | // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) |
1306 | // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8> |
1307 | // CHECK: ret <4 x i32> [[VQRDMULHQ_V2_I]] |
1308 | int32x4_t test_vqrdmulhq_lane_s32(int32x4_t a, int32x2_t v) { |
1309 | return vqrdmulhq_lane_s32(a, v, 1); |
1310 | } |
1311 | |
1312 | // CHECK-LABEL: @test_vmul_lane_f32( |
1313 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> <i32 1, i32 1> |
1314 | // CHECK: [[MUL:%.*]] = fmul <2 x float> %a, [[SHUFFLE]] |
1315 | // CHECK: ret <2 x float> [[MUL]] |
1316 | float32x2_t test_vmul_lane_f32(float32x2_t a, float32x2_t v) { |
1317 | return vmul_lane_f32(a, v, 1); |
1318 | } |
1319 | |
1320 | // CHECK-LABEL: @test_vmul_lane_f64( |
1321 | // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> |
1322 | // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %v to <8 x i8> |
1323 | // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to double |
1324 | // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> |
1325 | // CHECK: [[EXTRACT:%.*]] = extractelement <1 x double> [[TMP3]], i32 0 |
1326 | // CHECK: [[TMP4:%.*]] = fmul double [[TMP2]], [[EXTRACT]] |
1327 | // CHECK: [[TMP5:%.*]] = bitcast double [[TMP4]] to <1 x double> |
1328 | // CHECK: ret <1 x double> [[TMP5]] |
1329 | |
1330 | float64x1_t test_vmul_lane_f64(float64x1_t a, float64x1_t v) { |
1331 | return vmul_lane_f64(a, v, 0); |
1332 | } |
1333 | |
1334 | // CHECK-LABEL: @test_vmulq_lane_f32( |
1335 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1> |
1336 | // CHECK: [[MUL:%.*]] = fmul <4 x float> %a, [[SHUFFLE]] |
1337 | // CHECK: ret <4 x float> [[MUL]] |
1338 | |
1339 | float32x4_t test_vmulq_lane_f32(float32x4_t a, float32x2_t v) { |
1340 | return vmulq_lane_f32(a, v, 1); |
1341 | } |
1342 | |
1343 | // CHECK-LABEL: @test_vmulq_lane_f64( |
1344 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x double> %v, <1 x double> %v, <2 x i32> zeroinitializer |
1345 | // CHECK: [[MUL:%.*]] = fmul <2 x double> %a, [[SHUFFLE]] |
1346 | // CHECK: ret <2 x double> [[MUL]] |
1347 | float64x2_t test_vmulq_lane_f64(float64x2_t a, float64x1_t v) { |
1348 | return vmulq_lane_f64(a, v, 0); |
1349 | } |
1350 | |
1351 | // CHECK-LABEL: @test_vmul_laneq_f32( |
1352 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> <i32 3, i32 3> |
1353 | // CHECK: [[MUL:%.*]] = fmul <2 x float> %a, [[SHUFFLE]] |
1354 | // CHECK: ret <2 x float> [[MUL]] |
1355 | float32x2_t test_vmul_laneq_f32(float32x2_t a, float32x4_t v) { |
1356 | return vmul_laneq_f32(a, v, 3); |
1357 | } |
1358 | |
1359 | // CHECK-LABEL: @test_vmul_laneq_f64( |
1360 | // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> |
1361 | // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v to <16 x i8> |
1362 | // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to double |
1363 | // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> |
1364 | // CHECK: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP3]], i32 1 |
1365 | // CHECK: [[TMP4:%.*]] = fmul double [[TMP2]], [[EXTRACT]] |
1366 | // CHECK: [[TMP5:%.*]] = bitcast double [[TMP4]] to <1 x double> |
1367 | // CHECK: ret <1 x double> [[TMP5]] |
1368 | float64x1_t test_vmul_laneq_f64(float64x1_t a, float64x2_t v) { |
1369 | return vmul_laneq_f64(a, v, 1); |
1370 | } |
1371 | |
1372 | // CHECK-LABEL: @test_vmulq_laneq_f32( |
1373 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
1374 | // CHECK: [[MUL:%.*]] = fmul <4 x float> %a, [[SHUFFLE]] |
1375 | // CHECK: ret <4 x float> [[MUL]] |
1376 | |
1377 | float32x4_t test_vmulq_laneq_f32(float32x4_t a, float32x4_t v) { |
1378 | return vmulq_laneq_f32(a, v, 3); |
1379 | } |
1380 | |
1381 | // CHECK-LABEL: @test_vmulq_laneq_f64( |
1382 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x double> %v, <2 x double> %v, <2 x i32> <i32 1, i32 1> |
1383 | // CHECK: [[MUL:%.*]] = fmul <2 x double> %a, [[SHUFFLE]] |
1384 | // CHECK: ret <2 x double> [[MUL]] |
1385 | float64x2_t test_vmulq_laneq_f64(float64x2_t a, float64x2_t v) { |
1386 | return vmulq_laneq_f64(a, v, 1); |
1387 | } |
1388 | |
1389 | // CHECK-LABEL: @test_vmulx_lane_f32( |
1390 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> <i32 1, i32 1> |
1391 | // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> |
1392 | // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SHUFFLE]] to <8 x i8> |
1393 | // CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> [[SHUFFLE]]) |
1394 | // CHECK: ret <2 x float> [[VMULX2_I]] |
1395 | float32x2_t test_vmulx_lane_f32(float32x2_t a, float32x2_t v) { |
1396 | return vmulx_lane_f32(a, v, 1); |
1397 | } |
1398 | |
1399 | // CHECK-LABEL: @test_vmulxq_lane_f32( |
1400 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1> |
1401 | // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> |
1402 | // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SHUFFLE]] to <16 x i8> |
1403 | // CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> [[SHUFFLE]]) |
1404 | // CHECK: ret <4 x float> [[VMULX2_I]] |
1405 | float32x4_t test_vmulxq_lane_f32(float32x4_t a, float32x2_t v) { |
1406 | return vmulxq_lane_f32(a, v, 1); |
1407 | } |
1408 | |
1409 | // CHECK-LABEL: @test_vmulxq_lane_f64( |
1410 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x double> %v, <1 x double> %v, <2 x i32> zeroinitializer |
1411 | // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> |
1412 | // CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SHUFFLE]] to <16 x i8> |
1413 | // CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> [[SHUFFLE]]) |
1414 | // CHECK: ret <2 x double> [[VMULX2_I]] |
1415 | float64x2_t test_vmulxq_lane_f64(float64x2_t a, float64x1_t v) { |
1416 | return vmulxq_lane_f64(a, v, 0); |
1417 | } |
1418 | |
1419 | // CHECK-LABEL: @test_vmulx_laneq_f32( |
1420 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> <i32 3, i32 3> |
1421 | // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> |
1422 | // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SHUFFLE]] to <8 x i8> |
1423 | // CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> [[SHUFFLE]]) |
1424 | // CHECK: ret <2 x float> [[VMULX2_I]] |
1425 | float32x2_t test_vmulx_laneq_f32(float32x2_t a, float32x4_t v) { |
1426 | return vmulx_laneq_f32(a, v, 3); |
1427 | } |
1428 | |
1429 | // CHECK-LABEL: @test_vmulxq_laneq_f32( |
1430 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
1431 | // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> |
1432 | // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SHUFFLE]] to <16 x i8> |
1433 | // CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> [[SHUFFLE]]) |
1434 | // CHECK: ret <4 x float> [[VMULX2_I]] |
1435 | float32x4_t test_vmulxq_laneq_f32(float32x4_t a, float32x4_t v) { |
1436 | return vmulxq_laneq_f32(a, v, 3); |
1437 | } |
1438 | |
1439 | // CHECK-LABEL: @test_vmulxq_laneq_f64( |
1440 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x double> %v, <2 x double> %v, <2 x i32> <i32 1, i32 1> |
1441 | // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> |
1442 | // CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SHUFFLE]] to <16 x i8> |
1443 | // CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> [[SHUFFLE]]) |
1444 | // CHECK: ret <2 x double> [[VMULX2_I]] |
1445 | float64x2_t test_vmulxq_laneq_f64(float64x2_t a, float64x2_t v) { |
1446 | return vmulxq_laneq_f64(a, v, 1); |
1447 | } |
1448 | |
1449 | // CHECK-LABEL: @test_vmla_lane_s16_0( |
1450 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer |
1451 | // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] |
1452 | // CHECK: [[ADD:%.*]] = add <4 x i16> %a, [[MUL]] |
1453 | // CHECK: ret <4 x i16> [[ADD]] |
1454 | int16x4_t test_vmla_lane_s16_0(int16x4_t a, int16x4_t b, int16x4_t v) { |
1455 | return vmla_lane_s16(a, b, v, 0); |
1456 | } |
1457 | |
1458 | // CHECK-LABEL: @test_vmlaq_lane_s16_0( |
1459 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer |
1460 | // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] |
1461 | // CHECK: [[ADD:%.*]] = add <8 x i16> %a, [[MUL]] |
1462 | // CHECK: ret <8 x i16> [[ADD]] |
1463 | int16x8_t test_vmlaq_lane_s16_0(int16x8_t a, int16x8_t b, int16x4_t v) { |
1464 | return vmlaq_lane_s16(a, b, v, 0); |
1465 | } |
1466 | |
1467 | // CHECK-LABEL: @test_vmla_lane_s32_0( |
1468 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer |
1469 | // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] |
1470 | // CHECK: [[ADD:%.*]] = add <2 x i32> %a, [[MUL]] |
1471 | // CHECK: ret <2 x i32> [[ADD]] |
1472 | int32x2_t test_vmla_lane_s32_0(int32x2_t a, int32x2_t b, int32x2_t v) { |
1473 | return vmla_lane_s32(a, b, v, 0); |
1474 | } |
1475 | |
1476 | // CHECK-LABEL: @test_vmlaq_lane_s32_0( |
1477 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> zeroinitializer |
1478 | // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] |
1479 | // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[MUL]] |
1480 | // CHECK: ret <4 x i32> [[ADD]] |
1481 | int32x4_t test_vmlaq_lane_s32_0(int32x4_t a, int32x4_t b, int32x2_t v) { |
1482 | return vmlaq_lane_s32(a, b, v, 0); |
1483 | } |
1484 | |
1485 | // CHECK-LABEL: @test_vmla_laneq_s16_0( |
1486 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer |
1487 | // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] |
1488 | // CHECK: [[ADD:%.*]] = add <4 x i16> %a, [[MUL]] |
1489 | // CHECK: ret <4 x i16> [[ADD]] |
1490 | int16x4_t test_vmla_laneq_s16_0(int16x4_t a, int16x4_t b, int16x8_t v) { |
1491 | return vmla_laneq_s16(a, b, v, 0); |
1492 | } |
1493 | |
1494 | // CHECK-LABEL: @test_vmlaq_laneq_s16_0( |
1495 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> zeroinitializer |
1496 | // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] |
1497 | // CHECK: [[ADD:%.*]] = add <8 x i16> %a, [[MUL]] |
1498 | // CHECK: ret <8 x i16> [[ADD]] |
1499 | int16x8_t test_vmlaq_laneq_s16_0(int16x8_t a, int16x8_t b, int16x8_t v) { |
1500 | return vmlaq_laneq_s16(a, b, v, 0); |
1501 | } |
1502 | |
1503 | // CHECK-LABEL: @test_vmla_laneq_s32_0( |
1504 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer |
1505 | // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] |
1506 | // CHECK: [[ADD:%.*]] = add <2 x i32> %a, [[MUL]] |
1507 | // CHECK: ret <2 x i32> [[ADD]] |
1508 | int32x2_t test_vmla_laneq_s32_0(int32x2_t a, int32x2_t b, int32x4_t v) { |
1509 | return vmla_laneq_s32(a, b, v, 0); |
1510 | } |
1511 | |
1512 | // CHECK-LABEL: @test_vmlaq_laneq_s32_0( |
1513 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> zeroinitializer |
1514 | // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] |
1515 | // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[MUL]] |
1516 | // CHECK: ret <4 x i32> [[ADD]] |
1517 | int32x4_t test_vmlaq_laneq_s32_0(int32x4_t a, int32x4_t b, int32x4_t v) { |
1518 | return vmlaq_laneq_s32(a, b, v, 0); |
1519 | } |
1520 | |
1521 | // CHECK-LABEL: @test_vmls_lane_s16_0( |
1522 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer |
1523 | // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] |
1524 | // CHECK: [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]] |
1525 | // CHECK: ret <4 x i16> [[SUB]] |
1526 | int16x4_t test_vmls_lane_s16_0(int16x4_t a, int16x4_t b, int16x4_t v) { |
1527 | return vmls_lane_s16(a, b, v, 0); |
1528 | } |
1529 | |
1530 | // CHECK-LABEL: @test_vmlsq_lane_s16_0( |
1531 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer |
1532 | // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] |
1533 | // CHECK: [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]] |
1534 | // CHECK: ret <8 x i16> [[SUB]] |
1535 | int16x8_t test_vmlsq_lane_s16_0(int16x8_t a, int16x8_t b, int16x4_t v) { |
1536 | return vmlsq_lane_s16(a, b, v, 0); |
1537 | } |
1538 | |
1539 | // CHECK-LABEL: @test_vmls_lane_s32_0( |
1540 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer |
1541 | // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] |
1542 | // CHECK: [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]] |
1543 | // CHECK: ret <2 x i32> [[SUB]] |
1544 | int32x2_t test_vmls_lane_s32_0(int32x2_t a, int32x2_t b, int32x2_t v) { |
1545 | return vmls_lane_s32(a, b, v, 0); |
1546 | } |
1547 | |
1548 | // CHECK-LABEL: @test_vmlsq_lane_s32_0( |
1549 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> zeroinitializer |
1550 | // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] |
1551 | // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]] |
1552 | // CHECK: ret <4 x i32> [[SUB]] |
1553 | int32x4_t test_vmlsq_lane_s32_0(int32x4_t a, int32x4_t b, int32x2_t v) { |
1554 | return vmlsq_lane_s32(a, b, v, 0); |
1555 | } |
1556 | |
1557 | // CHECK-LABEL: @test_vmls_laneq_s16_0( |
1558 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer |
1559 | // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] |
1560 | // CHECK: [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]] |
1561 | // CHECK: ret <4 x i16> [[SUB]] |
1562 | int16x4_t test_vmls_laneq_s16_0(int16x4_t a, int16x4_t b, int16x8_t v) { |
1563 | return vmls_laneq_s16(a, b, v, 0); |
1564 | } |
1565 | |
1566 | // CHECK-LABEL: @test_vmlsq_laneq_s16_0( |
1567 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> zeroinitializer |
1568 | // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] |
1569 | // CHECK: [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]] |
1570 | // CHECK: ret <8 x i16> [[SUB]] |
1571 | int16x8_t test_vmlsq_laneq_s16_0(int16x8_t a, int16x8_t b, int16x8_t v) { |
1572 | return vmlsq_laneq_s16(a, b, v, 0); |
1573 | } |
1574 | |
1575 | // CHECK-LABEL: @test_vmls_laneq_s32_0( |
1576 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer |
1577 | // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] |
1578 | // CHECK: [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]] |
1579 | // CHECK: ret <2 x i32> [[SUB]] |
1580 | int32x2_t test_vmls_laneq_s32_0(int32x2_t a, int32x2_t b, int32x4_t v) { |
1581 | return vmls_laneq_s32(a, b, v, 0); |
1582 | } |
1583 | |
1584 | // CHECK-LABEL: @test_vmlsq_laneq_s32_0( |
1585 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> zeroinitializer |
1586 | // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] |
1587 | // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]] |
1588 | // CHECK: ret <4 x i32> [[SUB]] |
1589 | int32x4_t test_vmlsq_laneq_s32_0(int32x4_t a, int32x4_t b, int32x4_t v) { |
1590 | return vmlsq_laneq_s32(a, b, v, 0); |
1591 | } |
1592 | |
1593 | // CHECK-LABEL: @test_vmul_lane_s16_0( |
1594 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer |
1595 | // CHECK: [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]] |
1596 | // CHECK: ret <4 x i16> [[MUL]] |
1597 | int16x4_t test_vmul_lane_s16_0(int16x4_t a, int16x4_t v) { |
1598 | return vmul_lane_s16(a, v, 0); |
1599 | } |
1600 | |
1601 | // CHECK-LABEL: @test_vmulq_lane_s16_0( |
1602 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer |
1603 | // CHECK: [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]] |
1604 | // CHECK: ret <8 x i16> [[MUL]] |
1605 | int16x8_t test_vmulq_lane_s16_0(int16x8_t a, int16x4_t v) { |
1606 | return vmulq_lane_s16(a, v, 0); |
1607 | } |
1608 | |
1609 | // CHECK-LABEL: @test_vmul_lane_s32_0( |
1610 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer |
1611 | // CHECK: [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]] |
1612 | // CHECK: ret <2 x i32> [[MUL]] |
1613 | int32x2_t test_vmul_lane_s32_0(int32x2_t a, int32x2_t v) { |
1614 | return vmul_lane_s32(a, v, 0); |
1615 | } |
1616 | |
1617 | // CHECK-LABEL: @test_vmulq_lane_s32_0( |
1618 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> zeroinitializer |
1619 | // CHECK: [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]] |
1620 | // CHECK: ret <4 x i32> [[MUL]] |
1621 | int32x4_t test_vmulq_lane_s32_0(int32x4_t a, int32x2_t v) { |
1622 | return vmulq_lane_s32(a, v, 0); |
1623 | } |
1624 | |
1625 | // CHECK-LABEL: @test_vmul_lane_u16_0( |
1626 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer |
1627 | // CHECK: [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]] |
1628 | // CHECK: ret <4 x i16> [[MUL]] |
1629 | uint16x4_t test_vmul_lane_u16_0(uint16x4_t a, uint16x4_t v) { |
1630 | return vmul_lane_u16(a, v, 0); |
1631 | } |
1632 | |
1633 | // CHECK-LABEL: @test_vmulq_lane_u16_0( |
1634 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer |
1635 | // CHECK: [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]] |
1636 | // CHECK: ret <8 x i16> [[MUL]] |
1637 | uint16x8_t test_vmulq_lane_u16_0(uint16x8_t a, uint16x4_t v) { |
1638 | return vmulq_lane_u16(a, v, 0); |
1639 | } |
1640 | |
1641 | // CHECK-LABEL: @test_vmul_lane_u32_0( |
1642 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer |
1643 | // CHECK: [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]] |
1644 | // CHECK: ret <2 x i32> [[MUL]] |
1645 | uint32x2_t test_vmul_lane_u32_0(uint32x2_t a, uint32x2_t v) { |
1646 | return vmul_lane_u32(a, v, 0); |
1647 | } |
1648 | |
1649 | // CHECK-LABEL: @test_vmulq_lane_u32_0( |
1650 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> zeroinitializer |
1651 | // CHECK: [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]] |
1652 | // CHECK: ret <4 x i32> [[MUL]] |
1653 | uint32x4_t test_vmulq_lane_u32_0(uint32x4_t a, uint32x2_t v) { |
1654 | return vmulq_lane_u32(a, v, 0); |
1655 | } |
1656 | |
1657 | // CHECK-LABEL: @test_vmul_laneq_s16_0( |
1658 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer |
1659 | // CHECK: [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]] |
1660 | // CHECK: ret <4 x i16> [[MUL]] |
1661 | int16x4_t test_vmul_laneq_s16_0(int16x4_t a, int16x8_t v) { |
1662 | return vmul_laneq_s16(a, v, 0); |
1663 | } |
1664 | |
1665 | // CHECK-LABEL: @test_vmulq_laneq_s16_0( |
1666 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> zeroinitializer |
1667 | // CHECK: [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]] |
1668 | // CHECK: ret <8 x i16> [[MUL]] |
1669 | int16x8_t test_vmulq_laneq_s16_0(int16x8_t a, int16x8_t v) { |
1670 | return vmulq_laneq_s16(a, v, 0); |
1671 | } |
1672 | |
1673 | // CHECK-LABEL: @test_vmul_laneq_s32_0( |
1674 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer |
1675 | // CHECK: [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]] |
1676 | // CHECK: ret <2 x i32> [[MUL]] |
1677 | int32x2_t test_vmul_laneq_s32_0(int32x2_t a, int32x4_t v) { |
1678 | return vmul_laneq_s32(a, v, 0); |
1679 | } |
1680 | |
1681 | // CHECK-LABEL: @test_vmulq_laneq_s32_0( |
1682 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> zeroinitializer |
1683 | // CHECK: [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]] |
1684 | // CHECK: ret <4 x i32> [[MUL]] |
1685 | int32x4_t test_vmulq_laneq_s32_0(int32x4_t a, int32x4_t v) { |
1686 | return vmulq_laneq_s32(a, v, 0); |
1687 | } |
1688 | |
1689 | // CHECK-LABEL: @test_vmul_laneq_u16_0( |
1690 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer |
1691 | // CHECK: [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]] |
1692 | // CHECK: ret <4 x i16> [[MUL]] |
1693 | uint16x4_t test_vmul_laneq_u16_0(uint16x4_t a, uint16x8_t v) { |
1694 | return vmul_laneq_u16(a, v, 0); |
1695 | } |
1696 | |
1697 | // CHECK-LABEL: @test_vmulq_laneq_u16_0( |
1698 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> zeroinitializer |
1699 | // CHECK: [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]] |
1700 | // CHECK: ret <8 x i16> [[MUL]] |
1701 | uint16x8_t test_vmulq_laneq_u16_0(uint16x8_t a, uint16x8_t v) { |
1702 | return vmulq_laneq_u16(a, v, 0); |
1703 | } |
1704 | |
1705 | // CHECK-LABEL: @test_vmul_laneq_u32_0( |
1706 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer |
1707 | // CHECK: [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]] |
1708 | // CHECK: ret <2 x i32> [[MUL]] |
1709 | uint32x2_t test_vmul_laneq_u32_0(uint32x2_t a, uint32x4_t v) { |
1710 | return vmul_laneq_u32(a, v, 0); |
1711 | } |
1712 | |
1713 | // CHECK-LABEL: @test_vmulq_laneq_u32_0( |
1714 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> zeroinitializer |
1715 | // CHECK: [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]] |
1716 | // CHECK: ret <4 x i32> [[MUL]] |
1717 | uint32x4_t test_vmulq_laneq_u32_0(uint32x4_t a, uint32x4_t v) { |
1718 | return vmulq_laneq_u32(a, v, 0); |
1719 | } |
1720 | |
1721 | // CHECK-LABEL: @test_vfma_lane_f32_0( |
1722 | // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> |
1723 | // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> |
1724 | // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8> |
1725 | // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> |
1726 | // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> zeroinitializer |
1727 | // CHECK: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> |
1728 | // CHECK: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> |
1729 | // CHECK: [[FMLA2:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FMLA]], <2 x float> [[LANE]], <2 x float> [[FMLA1]]) |
1730 | // CHECK: ret <2 x float> [[FMLA2]] |
1731 | float32x2_t test_vfma_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) { |
1732 | return vfma_lane_f32(a, b, v, 0); |
1733 | } |
1734 | |
1735 | // CHECK-LABEL: @test_vfmaq_lane_f32_0( |
1736 | // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> |
1737 | // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> |
1738 | // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8> |
1739 | // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> |
1740 | // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <4 x i32> zeroinitializer |
1741 | // CHECK: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> |
1742 | // CHECK: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> |
1743 | // CHECK: [[FMLA2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FMLA]], <4 x float> [[LANE]], <4 x float> [[FMLA1]]) |
1744 | // CHECK: ret <4 x float> [[FMLA2]] |
1745 | float32x4_t test_vfmaq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) { |
1746 | return vfmaq_lane_f32(a, b, v, 0); |
1747 | } |
1748 | |
1749 | // CHECK-LABEL: @test_vfma_laneq_f32_0( |
1750 | // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> |
1751 | // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> |
1752 | // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8> |
1753 | // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> |
1754 | // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> |
1755 | // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> |
1756 | // CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <2 x i32> zeroinitializer |
1757 | // CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[LANE]], <2 x float> [[TMP4]], <2 x float> [[TMP3]]) |
1758 | // CHECK: ret <2 x float> [[TMP6]] |
1759 | float32x2_t test_vfma_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) { |
1760 | return vfma_laneq_f32(a, b, v, 0); |
1761 | } |
1762 | |
1763 | // CHECK-LABEL: @test_vfmaq_laneq_f32_0( |
1764 | // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> |
1765 | // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> |
1766 | // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8> |
1767 | // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> |
1768 | // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> |
1769 | // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> |
1770 | // CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <4 x i32> zeroinitializer |
1771 | // CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[LANE]], <4 x float> [[TMP4]], <4 x float> [[TMP3]]) |
1772 | // CHECK: ret <4 x float> [[TMP6]] |
1773 | float32x4_t test_vfmaq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) { |
1774 | return vfmaq_laneq_f32(a, b, v, 0); |
1775 | } |
1776 | |
1777 | // CHECK-LABEL: @test_vfms_lane_f32_0( |
1778 | // CHECK: [[SUB:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b |
1779 | // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> |
1780 | // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB]] to <8 x i8> |
1781 | // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8> |
1782 | // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> |
1783 | // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> zeroinitializer |
1784 | // CHECK: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> |
1785 | // CHECK: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> |
1786 | // CHECK: [[FMLA2:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FMLA]], <2 x float> [[LANE]], <2 x float> [[FMLA1]]) |
1787 | // CHECK: ret <2 x float> [[FMLA2]] |
1788 | float32x2_t test_vfms_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) { |
1789 | return vfms_lane_f32(a, b, v, 0); |
1790 | } |
1791 | |
1792 | // CHECK-LABEL: @test_vfmsq_lane_f32_0( |
1793 | // CHECK: [[SUB:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b |
1794 | // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> |
1795 | // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB]] to <16 x i8> |
1796 | // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8> |
1797 | // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> |
1798 | // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <4 x i32> zeroinitializer |
1799 | // CHECK: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> |
1800 | // CHECK: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> |
1801 | // CHECK: [[FMLA2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FMLA]], <4 x float> [[LANE]], <4 x float> [[FMLA1]]) |
1802 | // CHECK: ret <4 x float> [[FMLA2]] |
1803 | float32x4_t test_vfmsq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) { |
1804 | return vfmsq_lane_f32(a, b, v, 0); |
1805 | } |
1806 | |
1807 | // CHECK-LABEL: @test_vfms_laneq_f32_0( |
1808 | // CHECK: [[SUB:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b |
1809 | // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> |
1810 | // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB]] to <8 x i8> |
1811 | // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8> |
1812 | // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> |
1813 | // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> |
1814 | // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> |
1815 | // CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <2 x i32> zeroinitializer |
1816 | // CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[LANE]], <2 x float> [[TMP4]], <2 x float> [[TMP3]]) |
1817 | // CHECK: ret <2 x float> [[TMP6]] |
1818 | float32x2_t test_vfms_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) { |
1819 | return vfms_laneq_f32(a, b, v, 0); |
1820 | } |
1821 | |
1822 | // CHECK-LABEL: @test_vfmsq_laneq_f32_0( |
1823 | // CHECK: [[SUB:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b |
1824 | // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> |
1825 | // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB]] to <16 x i8> |
1826 | // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8> |
1827 | // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> |
1828 | // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> |
1829 | // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> |
1830 | // CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <4 x i32> zeroinitializer |
1831 | // CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[LANE]], <4 x float> [[TMP4]], <4 x float> [[TMP3]]) |
1832 | // CHECK: ret <4 x float> [[TMP6]] |
1833 | float32x4_t test_vfmsq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) { |
1834 | return vfmsq_laneq_f32(a, b, v, 0); |
1835 | } |
1836 | |
1837 | // CHECK-LABEL: @test_vfmaq_laneq_f64_0( |
1838 | // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> |
1839 | // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> |
1840 | // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8> |
1841 | // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> |
1842 | // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> |
1843 | // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> |
1844 | // CHECK: [[LANE:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP5]], <2 x i32> zeroinitializer |
1845 | // CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[LANE]], <2 x double> [[TMP4]], <2 x double> [[TMP3]]) |
1846 | // CHECK: ret <2 x double> [[TMP6]] |
1847 | float64x2_t test_vfmaq_laneq_f64_0(float64x2_t a, float64x2_t b, float64x2_t v) { |
1848 | return vfmaq_laneq_f64(a, b, v, 0); |
1849 | } |
1850 | |
1851 | // CHECK-LABEL: @test_vfmsq_laneq_f64_0( |
1852 | // CHECK: [[SUB:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %b |
1853 | // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> |
1854 | // CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SUB]] to <16 x i8> |
1855 | // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8> |
1856 | // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> |
1857 | // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> |
1858 | // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> |
1859 | // CHECK: [[LANE:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP5]], <2 x i32> zeroinitializer |
1860 | // CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[LANE]], <2 x double> [[TMP4]], <2 x double> [[TMP3]]) |
1861 | // CHECK: ret <2 x double> [[TMP6]] |
1862 | float64x2_t test_vfmsq_laneq_f64_0(float64x2_t a, float64x2_t b, float64x2_t v) { |
1863 | return vfmsq_laneq_f64(a, b, v, 0); |
1864 | } |
1865 | |
1866 | // CHECK-LABEL: @test_vmlal_lane_s16_0( |
1867 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer |
1868 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
1869 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
1870 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) |
1871 | // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] |
1872 | // CHECK: ret <4 x i32> [[ADD]] |
1873 | int32x4_t test_vmlal_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) { |
1874 | return vmlal_lane_s16(a, b, v, 0); |
1875 | } |
1876 | |
1877 | // CHECK-LABEL: @test_vmlal_lane_s32_0( |
1878 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer |
1879 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
1880 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
1881 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) |
1882 | // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] |
1883 | // CHECK: ret <2 x i64> [[ADD]] |
1884 | int64x2_t test_vmlal_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) { |
1885 | return vmlal_lane_s32(a, b, v, 0); |
1886 | } |
1887 | |
1888 | // CHECK-LABEL: @test_vmlal_laneq_s16_0( |
1889 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer |
1890 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
1891 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
1892 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) |
1893 | // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] |
1894 | // CHECK: ret <4 x i32> [[ADD]] |
1895 | int32x4_t test_vmlal_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) { |
1896 | return vmlal_laneq_s16(a, b, v, 0); |
1897 | } |
1898 | |
1899 | // CHECK-LABEL: @test_vmlal_laneq_s32_0( |
1900 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer |
1901 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
1902 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
1903 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) |
1904 | // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] |
1905 | // CHECK: ret <2 x i64> [[ADD]] |
1906 | int64x2_t test_vmlal_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) { |
1907 | return vmlal_laneq_s32(a, b, v, 0); |
1908 | } |
1909 | |
1910 | // CHECK-LABEL: @test_vmlal_high_lane_s16_0( |
1911 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
1912 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer |
1913 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
1914 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
1915 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
1916 | // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] |
1917 | // CHECK: ret <4 x i32> [[ADD]] |
1918 | int32x4_t test_vmlal_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) { |
1919 | return vmlal_high_lane_s16(a, b, v, 0); |
1920 | } |
1921 | |
1922 | // CHECK-LABEL: @test_vmlal_high_lane_s32_0( |
1923 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> |
1924 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer |
1925 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
1926 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
1927 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
1928 | // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] |
1929 | // CHECK: ret <2 x i64> [[ADD]] |
1930 | int64x2_t test_vmlal_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) { |
1931 | return vmlal_high_lane_s32(a, b, v, 0); |
1932 | } |
1933 | |
1934 | // CHECK-LABEL: @test_vmlal_high_laneq_s16_0( |
1935 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
1936 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer |
1937 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
1938 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
1939 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
1940 | // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] |
1941 | // CHECK: ret <4 x i32> [[ADD]] |
1942 | int32x4_t test_vmlal_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) { |
1943 | return vmlal_high_laneq_s16(a, b, v, 0); |
1944 | } |
1945 | |
1946 | // CHECK-LABEL: @test_vmlal_high_laneq_s32_0( |
1947 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> |
1948 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer |
1949 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
1950 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
1951 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
1952 | // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] |
1953 | // CHECK: ret <2 x i64> [[ADD]] |
1954 | int64x2_t test_vmlal_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) { |
1955 | return vmlal_high_laneq_s32(a, b, v, 0); |
1956 | } |
1957 | |
1958 | // CHECK-LABEL: @test_vmlsl_lane_s16_0( |
1959 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer |
1960 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
1961 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
1962 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) |
1963 | // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] |
1964 | // CHECK: ret <4 x i32> [[SUB]] |
1965 | int32x4_t test_vmlsl_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) { |
1966 | return vmlsl_lane_s16(a, b, v, 0); |
1967 | } |
1968 | |
1969 | // CHECK-LABEL: @test_vmlsl_lane_s32_0( |
1970 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer |
1971 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
1972 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
1973 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) |
1974 | // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] |
1975 | // CHECK: ret <2 x i64> [[SUB]] |
1976 | int64x2_t test_vmlsl_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) { |
1977 | return vmlsl_lane_s32(a, b, v, 0); |
1978 | } |
1979 | |
1980 | // CHECK-LABEL: @test_vmlsl_laneq_s16_0( |
1981 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer |
1982 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
1983 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
1984 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) |
1985 | // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] |
1986 | // CHECK: ret <4 x i32> [[SUB]] |
1987 | int32x4_t test_vmlsl_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) { |
1988 | return vmlsl_laneq_s16(a, b, v, 0); |
1989 | } |
1990 | |
1991 | // CHECK-LABEL: @test_vmlsl_laneq_s32_0( |
1992 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer |
1993 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
1994 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
1995 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) |
1996 | // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] |
1997 | // CHECK: ret <2 x i64> [[SUB]] |
1998 | int64x2_t test_vmlsl_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) { |
1999 | return vmlsl_laneq_s32(a, b, v, 0); |
2000 | } |
2001 | |
2002 | // CHECK-LABEL: @test_vmlsl_high_lane_s16_0( |
2003 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
2004 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer |
2005 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
2006 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
2007 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
2008 | // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] |
2009 | // CHECK: ret <4 x i32> [[SUB]] |
2010 | int32x4_t test_vmlsl_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) { |
2011 | return vmlsl_high_lane_s16(a, b, v, 0); |
2012 | } |
2013 | |
2014 | // CHECK-LABEL: @test_vmlsl_high_lane_s32_0( |
2015 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> |
2016 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer |
2017 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
2018 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
2019 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
2020 | // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] |
2021 | // CHECK: ret <2 x i64> [[SUB]] |
2022 | int64x2_t test_vmlsl_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) { |
2023 | return vmlsl_high_lane_s32(a, b, v, 0); |
2024 | } |
2025 | |
2026 | // CHECK-LABEL: @test_vmlsl_high_laneq_s16_0( |
2027 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
2028 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer |
2029 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
2030 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
2031 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
2032 | // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] |
2033 | // CHECK: ret <4 x i32> [[SUB]] |
2034 | int32x4_t test_vmlsl_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) { |
2035 | return vmlsl_high_laneq_s16(a, b, v, 0); |
2036 | } |
2037 | |
2038 | // CHECK-LABEL: @test_vmlsl_high_laneq_s32_0( |
2039 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> |
2040 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer |
2041 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
2042 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
2043 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
2044 | // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] |
2045 | // CHECK: ret <2 x i64> [[SUB]] |
2046 | int64x2_t test_vmlsl_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) { |
2047 | return vmlsl_high_laneq_s32(a, b, v, 0); |
2048 | } |
2049 | |
2050 | // CHECK-LABEL: @test_vmlal_lane_u16_0( |
2051 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer |
2052 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
2053 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
2054 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) |
2055 | // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] |
2056 | // CHECK: ret <4 x i32> [[ADD]] |
2057 | int32x4_t test_vmlal_lane_u16_0(int32x4_t a, int16x4_t b, int16x4_t v) { |
2058 | return vmlal_lane_u16(a, b, v, 0); |
2059 | } |
2060 | |
2061 | // CHECK-LABEL: @test_vmlal_lane_u32_0( |
2062 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer |
2063 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
2064 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
2065 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) |
2066 | // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] |
2067 | // CHECK: ret <2 x i64> [[ADD]] |
2068 | int64x2_t test_vmlal_lane_u32_0(int64x2_t a, int32x2_t b, int32x2_t v) { |
2069 | return vmlal_lane_u32(a, b, v, 0); |
2070 | } |
2071 | |
2072 | // CHECK-LABEL: @test_vmlal_laneq_u16_0( |
2073 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer |
2074 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
2075 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
2076 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) |
2077 | // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] |
2078 | // CHECK: ret <4 x i32> [[ADD]] |
2079 | int32x4_t test_vmlal_laneq_u16_0(int32x4_t a, int16x4_t b, int16x8_t v) { |
2080 | return vmlal_laneq_u16(a, b, v, 0); |
2081 | } |
2082 | |
2083 | // CHECK-LABEL: @test_vmlal_laneq_u32_0( |
2084 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer |
2085 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
2086 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
2087 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) |
2088 | // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] |
2089 | // CHECK: ret <2 x i64> [[ADD]] |
2090 | int64x2_t test_vmlal_laneq_u32_0(int64x2_t a, int32x2_t b, int32x4_t v) { |
2091 | return vmlal_laneq_u32(a, b, v, 0); |
2092 | } |
2093 | |
2094 | // CHECK-LABEL: @test_vmlal_high_lane_u16_0( |
2095 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
2096 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer |
2097 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
2098 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
2099 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
2100 | // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] |
2101 | // CHECK: ret <4 x i32> [[ADD]] |
2102 | int32x4_t test_vmlal_high_lane_u16_0(int32x4_t a, int16x8_t b, int16x4_t v) { |
2103 | return vmlal_high_lane_u16(a, b, v, 0); |
2104 | } |
2105 | |
2106 | // CHECK-LABEL: @test_vmlal_high_lane_u32_0( |
2107 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> |
2108 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer |
2109 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
2110 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
2111 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
2112 | // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] |
2113 | // CHECK: ret <2 x i64> [[ADD]] |
2114 | int64x2_t test_vmlal_high_lane_u32_0(int64x2_t a, int32x4_t b, int32x2_t v) { |
2115 | return vmlal_high_lane_u32(a, b, v, 0); |
2116 | } |
2117 | |
2118 | // CHECK-LABEL: @test_vmlal_high_laneq_u16_0( |
2119 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
2120 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer |
2121 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
2122 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
2123 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
2124 | // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] |
2125 | // CHECK: ret <4 x i32> [[ADD]] |
2126 | int32x4_t test_vmlal_high_laneq_u16_0(int32x4_t a, int16x8_t b, int16x8_t v) { |
2127 | return vmlal_high_laneq_u16(a, b, v, 0); |
2128 | } |
2129 | |
2130 | // CHECK-LABEL: @test_vmlal_high_laneq_u32_0( |
2131 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> |
2132 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer |
2133 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
2134 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
2135 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
2136 | // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] |
2137 | // CHECK: ret <2 x i64> [[ADD]] |
2138 | int64x2_t test_vmlal_high_laneq_u32_0(int64x2_t a, int32x4_t b, int32x4_t v) { |
2139 | return vmlal_high_laneq_u32(a, b, v, 0); |
2140 | } |
2141 | |
2142 | // CHECK-LABEL: @test_vmlsl_lane_u16_0( |
2143 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer |
2144 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
2145 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
2146 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) |
2147 | // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] |
2148 | // CHECK: ret <4 x i32> [[SUB]] |
2149 | int32x4_t test_vmlsl_lane_u16_0(int32x4_t a, int16x4_t b, int16x4_t v) { |
2150 | return vmlsl_lane_u16(a, b, v, 0); |
2151 | } |
2152 | |
2153 | // CHECK-LABEL: @test_vmlsl_lane_u32_0( |
2154 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer |
2155 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
2156 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
2157 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) |
2158 | // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] |
2159 | // CHECK: ret <2 x i64> [[SUB]] |
2160 | int64x2_t test_vmlsl_lane_u32_0(int64x2_t a, int32x2_t b, int32x2_t v) { |
2161 | return vmlsl_lane_u32(a, b, v, 0); |
2162 | } |
2163 | |
2164 | // CHECK-LABEL: @test_vmlsl_laneq_u16_0( |
2165 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer |
2166 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
2167 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
2168 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) |
2169 | // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] |
2170 | // CHECK: ret <4 x i32> [[SUB]] |
2171 | int32x4_t test_vmlsl_laneq_u16_0(int32x4_t a, int16x4_t b, int16x8_t v) { |
2172 | return vmlsl_laneq_u16(a, b, v, 0); |
2173 | } |
2174 | |
2175 | // CHECK-LABEL: @test_vmlsl_laneq_u32_0( |
2176 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer |
2177 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
2178 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
2179 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) |
2180 | // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] |
2181 | // CHECK: ret <2 x i64> [[SUB]] |
2182 | int64x2_t test_vmlsl_laneq_u32_0(int64x2_t a, int32x2_t b, int32x4_t v) { |
2183 | return vmlsl_laneq_u32(a, b, v, 0); |
2184 | } |
2185 | |
2186 | // CHECK-LABEL: @test_vmlsl_high_lane_u16_0( |
2187 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
2188 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer |
2189 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
2190 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
2191 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
2192 | // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] |
2193 | // CHECK: ret <4 x i32> [[SUB]] |
2194 | int32x4_t test_vmlsl_high_lane_u16_0(int32x4_t a, int16x8_t b, int16x4_t v) { |
2195 | return vmlsl_high_lane_u16(a, b, v, 0); |
2196 | } |
2197 | |
2198 | // CHECK-LABEL: @test_vmlsl_high_lane_u32_0( |
2199 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> |
2200 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer |
2201 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
2202 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
2203 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
2204 | // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] |
2205 | // CHECK: ret <2 x i64> [[SUB]] |
2206 | int64x2_t test_vmlsl_high_lane_u32_0(int64x2_t a, int32x4_t b, int32x2_t v) { |
2207 | return vmlsl_high_lane_u32(a, b, v, 0); |
2208 | } |
2209 | |
2210 | // CHECK-LABEL: @test_vmlsl_high_laneq_u16_0( |
2211 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
2212 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer |
2213 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
2214 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
2215 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
2216 | // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] |
2217 | // CHECK: ret <4 x i32> [[SUB]] |
2218 | int32x4_t test_vmlsl_high_laneq_u16_0(int32x4_t a, int16x8_t b, int16x8_t v) { |
2219 | return vmlsl_high_laneq_u16(a, b, v, 0); |
2220 | } |
2221 | |
2222 | // CHECK-LABEL: @test_vmlsl_high_laneq_u32_0( |
2223 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> |
2224 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer |
2225 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
2226 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
2227 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
2228 | // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] |
2229 | // CHECK: ret <2 x i64> [[SUB]] |
2230 | int64x2_t test_vmlsl_high_laneq_u32_0(int64x2_t a, int32x4_t b, int32x4_t v) { |
2231 | return vmlsl_high_laneq_u32(a, b, v, 0); |
2232 | } |
2233 | |
2234 | // CHECK-LABEL: @test_vmull_lane_s16_0( |
2235 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer |
2236 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
2237 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
2238 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) |
2239 | // CHECK: ret <4 x i32> [[VMULL2_I]] |
2240 | int32x4_t test_vmull_lane_s16_0(int16x4_t a, int16x4_t v) { |
2241 | return vmull_lane_s16(a, v, 0); |
2242 | } |
2243 | |
2244 | // CHECK-LABEL: @test_vmull_lane_s32_0( |
2245 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer |
2246 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
2247 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
2248 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) |
2249 | // CHECK: ret <2 x i64> [[VMULL2_I]] |
2250 | int64x2_t test_vmull_lane_s32_0(int32x2_t a, int32x2_t v) { |
2251 | return vmull_lane_s32(a, v, 0); |
2252 | } |
2253 | |
2254 | // CHECK-LABEL: @test_vmull_lane_u16_0( |
2255 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer |
2256 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
2257 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
2258 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) |
2259 | // CHECK: ret <4 x i32> [[VMULL2_I]] |
2260 | uint32x4_t test_vmull_lane_u16_0(uint16x4_t a, uint16x4_t v) { |
2261 | return vmull_lane_u16(a, v, 0); |
2262 | } |
2263 | |
2264 | // CHECK-LABEL: @test_vmull_lane_u32_0( |
2265 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer |
2266 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
2267 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
2268 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) |
2269 | // CHECK: ret <2 x i64> [[VMULL2_I]] |
2270 | uint64x2_t test_vmull_lane_u32_0(uint32x2_t a, uint32x2_t v) { |
2271 | return vmull_lane_u32(a, v, 0); |
2272 | } |
2273 | |
2274 | // CHECK-LABEL: @test_vmull_high_lane_s16_0( |
2275 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
2276 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer |
2277 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
2278 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
2279 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
2280 | // CHECK: ret <4 x i32> [[VMULL2_I]] |
2281 | int32x4_t test_vmull_high_lane_s16_0(int16x8_t a, int16x4_t v) { |
2282 | return vmull_high_lane_s16(a, v, 0); |
2283 | } |
2284 | |
2285 | // CHECK-LABEL: @test_vmull_high_lane_s32_0( |
2286 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> |
2287 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer |
2288 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
2289 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
2290 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
2291 | // CHECK: ret <2 x i64> [[VMULL2_I]] |
2292 | int64x2_t test_vmull_high_lane_s32_0(int32x4_t a, int32x2_t v) { |
2293 | return vmull_high_lane_s32(a, v, 0); |
2294 | } |
2295 | |
2296 | // CHECK-LABEL: @test_vmull_high_lane_u16_0( |
2297 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
2298 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer |
2299 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
2300 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
2301 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
2302 | // CHECK: ret <4 x i32> [[VMULL2_I]] |
2303 | uint32x4_t test_vmull_high_lane_u16_0(uint16x8_t a, uint16x4_t v) { |
2304 | return vmull_high_lane_u16(a, v, 0); |
2305 | } |
2306 | |
2307 | // CHECK-LABEL: @test_vmull_high_lane_u32_0( |
2308 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> |
2309 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer |
2310 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
2311 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
2312 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
2313 | // CHECK: ret <2 x i64> [[VMULL2_I]] |
2314 | uint64x2_t test_vmull_high_lane_u32_0(uint32x4_t a, uint32x2_t v) { |
2315 | return vmull_high_lane_u32(a, v, 0); |
2316 | } |
2317 | |
2318 | // CHECK-LABEL: @test_vmull_laneq_s16_0( |
2319 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer |
2320 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
2321 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
2322 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) |
2323 | // CHECK: ret <4 x i32> [[VMULL2_I]] |
2324 | int32x4_t test_vmull_laneq_s16_0(int16x4_t a, int16x8_t v) { |
2325 | return vmull_laneq_s16(a, v, 0); |
2326 | } |
2327 | |
2328 | // CHECK-LABEL: @test_vmull_laneq_s32_0( |
2329 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer |
2330 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
2331 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
2332 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) |
2333 | // CHECK: ret <2 x i64> [[VMULL2_I]] |
2334 | int64x2_t test_vmull_laneq_s32_0(int32x2_t a, int32x4_t v) { |
2335 | return vmull_laneq_s32(a, v, 0); |
2336 | } |
2337 | |
2338 | // CHECK-LABEL: @test_vmull_laneq_u16_0( |
2339 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer |
2340 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
2341 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
2342 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) |
2343 | // CHECK: ret <4 x i32> [[VMULL2_I]] |
2344 | uint32x4_t test_vmull_laneq_u16_0(uint16x4_t a, uint16x8_t v) { |
2345 | return vmull_laneq_u16(a, v, 0); |
2346 | } |
2347 | |
2348 | // CHECK-LABEL: @test_vmull_laneq_u32_0( |
2349 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer |
2350 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
2351 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
2352 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) |
2353 | // CHECK: ret <2 x i64> [[VMULL2_I]] |
2354 | uint64x2_t test_vmull_laneq_u32_0(uint32x2_t a, uint32x4_t v) { |
2355 | return vmull_laneq_u32(a, v, 0); |
2356 | } |
2357 | |
2358 | // CHECK-LABEL: @test_vmull_high_laneq_s16_0( |
2359 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
2360 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer |
2361 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
2362 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
2363 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
2364 | // CHECK: ret <4 x i32> [[VMULL2_I]] |
2365 | int32x4_t test_vmull_high_laneq_s16_0(int16x8_t a, int16x8_t v) { |
2366 | return vmull_high_laneq_s16(a, v, 0); |
2367 | } |
2368 | |
2369 | // CHECK-LABEL: @test_vmull_high_laneq_s32_0( |
2370 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> |
2371 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer |
2372 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
2373 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
2374 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
2375 | // CHECK: ret <2 x i64> [[VMULL2_I]] |
2376 | int64x2_t test_vmull_high_laneq_s32_0(int32x4_t a, int32x4_t v) { |
2377 | return vmull_high_laneq_s32(a, v, 0); |
2378 | } |
2379 | |
2380 | // CHECK-LABEL: @test_vmull_high_laneq_u16_0( |
2381 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
2382 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer |
2383 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
2384 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
2385 | // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
2386 | // CHECK: ret <4 x i32> [[VMULL2_I]] |
2387 | uint32x4_t test_vmull_high_laneq_u16_0(uint16x8_t a, uint16x8_t v) { |
2388 | return vmull_high_laneq_u16(a, v, 0); |
2389 | } |
2390 | |
2391 | // CHECK-LABEL: @test_vmull_high_laneq_u32_0( |
2392 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> |
2393 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer |
2394 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
2395 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
2396 | // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
2397 | // CHECK: ret <2 x i64> [[VMULL2_I]] |
2398 | uint64x2_t test_vmull_high_laneq_u32_0(uint32x4_t a, uint32x4_t v) { |
2399 | return vmull_high_laneq_u32(a, v, 0); |
2400 | } |
2401 | |
2402 | // CHECK-LABEL: @test_vqdmlal_lane_s16_0( |
2403 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer |
2404 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
2405 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
2406 | // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
2407 | // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) |
2408 | // CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) |
2409 | // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] |
2410 | int32x4_t test_vqdmlal_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) { |
2411 | return vqdmlal_lane_s16(a, b, v, 0); |
2412 | } |
2413 | |
2414 | // CHECK-LABEL: @test_vqdmlal_lane_s32_0( |
2415 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer |
2416 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
2417 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
2418 | // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
2419 | // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) |
2420 | // CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) |
2421 | // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] |
2422 | int64x2_t test_vqdmlal_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) { |
2423 | return vqdmlal_lane_s32(a, b, v, 0); |
2424 | } |
2425 | |
2426 | // CHECK-LABEL: @test_vqdmlal_high_lane_s16_0( |
2427 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
2428 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer |
2429 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
2430 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
2431 | // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
2432 | // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
2433 | // CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) |
2434 | // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] |
2435 | int32x4_t test_vqdmlal_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) { |
2436 | return vqdmlal_high_lane_s16(a, b, v, 0); |
2437 | } |
2438 | |
2439 | // CHECK-LABEL: @test_vqdmlal_high_lane_s32_0( |
2440 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> |
2441 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer |
2442 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
2443 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
2444 | // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
2445 | // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
2446 | // CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) |
2447 | // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] |
2448 | int64x2_t test_vqdmlal_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) { |
2449 | return vqdmlal_high_lane_s32(a, b, v, 0); |
2450 | } |
2451 | |
2452 | // CHECK-LABEL: @test_vqdmlsl_lane_s16_0( |
2453 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer |
2454 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
2455 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
2456 | // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
2457 | // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) |
2458 | // CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) |
2459 | // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] |
2460 | int32x4_t test_vqdmlsl_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) { |
2461 | return vqdmlsl_lane_s16(a, b, v, 0); |
2462 | } |
2463 | |
2464 | // CHECK-LABEL: @test_vqdmlsl_lane_s32_0( |
2465 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer |
2466 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
2467 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
2468 | // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
2469 | // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) |
2470 | // CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) |
2471 | // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] |
2472 | int64x2_t test_vqdmlsl_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) { |
2473 | return vqdmlsl_lane_s32(a, b, v, 0); |
2474 | } |
2475 | |
2476 | // CHECK-LABEL: @test_vqdmlsl_high_lane_s16_0( |
2477 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
2478 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer |
2479 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
2480 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
2481 | // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
2482 | // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
2483 | // CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) |
2484 | // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] |
2485 | int32x4_t test_vqdmlsl_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) { |
2486 | return vqdmlsl_high_lane_s16(a, b, v, 0); |
2487 | } |
2488 | |
2489 | // CHECK-LABEL: @test_vqdmlsl_high_lane_s32_0( |
2490 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> |
2491 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer |
2492 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
2493 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
2494 | // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
2495 | // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
2496 | // CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) |
2497 | // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] |
2498 | int64x2_t test_vqdmlsl_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) { |
2499 | return vqdmlsl_high_lane_s32(a, b, v, 0); |
2500 | } |
2501 | |
2502 | // CHECK-LABEL: @test_vqdmull_lane_s16_0( |
2503 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer |
2504 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
2505 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
2506 | // CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) |
2507 | // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> |
2508 | // CHECK: ret <4 x i32> [[VQDMULL_V2_I]] |
2509 | int32x4_t test_vqdmull_lane_s16_0(int16x4_t a, int16x4_t v) { |
2510 | return vqdmull_lane_s16(a, v, 0); |
2511 | } |
2512 | |
2513 | // CHECK-LABEL: @test_vqdmull_lane_s32_0( |
2514 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer |
2515 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
2516 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
2517 | // CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) |
2518 | // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> |
2519 | // CHECK: ret <2 x i64> [[VQDMULL_V2_I]] |
2520 | int64x2_t test_vqdmull_lane_s32_0(int32x2_t a, int32x2_t v) { |
2521 | return vqdmull_lane_s32(a, v, 0); |
2522 | } |
2523 | |
2524 | // CHECK-LABEL: @test_vqdmull_laneq_s16_0( |
2525 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer |
2526 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
2527 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
2528 | // CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) |
2529 | // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> |
2530 | // CHECK: ret <4 x i32> [[VQDMULL_V2_I]] |
2531 | int32x4_t test_vqdmull_laneq_s16_0(int16x4_t a, int16x8_t v) { |
2532 | return vqdmull_laneq_s16(a, v, 0); |
2533 | } |
2534 | |
2535 | // CHECK-LABEL: @test_vqdmull_laneq_s32_0( |
2536 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer |
2537 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
2538 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
2539 | // CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) |
2540 | // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> |
2541 | // CHECK: ret <2 x i64> [[VQDMULL_V2_I]] |
2542 | int64x2_t test_vqdmull_laneq_s32_0(int32x2_t a, int32x4_t v) { |
2543 | return vqdmull_laneq_s32(a, v, 0); |
2544 | } |
2545 | |
2546 | // CHECK-LABEL: @test_vqdmull_high_lane_s16_0( |
2547 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
2548 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer |
2549 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
2550 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
2551 | // CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
2552 | // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> |
2553 | // CHECK: ret <4 x i32> [[VQDMULL_V2_I]] |
2554 | int32x4_t test_vqdmull_high_lane_s16_0(int16x8_t a, int16x4_t v) { |
2555 | return vqdmull_high_lane_s16(a, v, 0); |
2556 | } |
2557 | |
2558 | // CHECK-LABEL: @test_vqdmull_high_lane_s32_0( |
2559 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> |
2560 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer |
2561 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
2562 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
2563 | // CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
2564 | // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> |
2565 | // CHECK: ret <2 x i64> [[VQDMULL_V2_I]] |
2566 | int64x2_t test_vqdmull_high_lane_s32_0(int32x4_t a, int32x2_t v) { |
2567 | return vqdmull_high_lane_s32(a, v, 0); |
2568 | } |
2569 | |
2570 | // CHECK-LABEL: @test_vqdmull_high_laneq_s16_0( |
2571 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
2572 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer |
2573 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
2574 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
2575 | // CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
2576 | // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> |
2577 | // CHECK: ret <4 x i32> [[VQDMULL_V2_I]] |
2578 | int32x4_t test_vqdmull_high_laneq_s16_0(int16x8_t a, int16x8_t v) { |
2579 | return vqdmull_high_laneq_s16(a, v, 0); |
2580 | } |
2581 | |
2582 | // CHECK-LABEL: @test_vqdmull_high_laneq_s32_0( |
2583 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> |
2584 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer |
2585 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
2586 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
2587 | // CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
2588 | // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> |
2589 | // CHECK: ret <2 x i64> [[VQDMULL_V2_I]] |
2590 | int64x2_t test_vqdmull_high_laneq_s32_0(int32x4_t a, int32x4_t v) { |
2591 | return vqdmull_high_laneq_s32(a, v, 0); |
2592 | } |
2593 | |
2594 | // CHECK-LABEL: @test_vqdmulh_lane_s16_0( |
2595 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer |
2596 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
2597 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
2598 | // CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) |
2599 | // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8> |
2600 | // CHECK: ret <4 x i16> [[VQDMULH_V2_I]] |
2601 | int16x4_t test_vqdmulh_lane_s16_0(int16x4_t a, int16x4_t v) { |
2602 | return vqdmulh_lane_s16(a, v, 0); |
2603 | } |
2604 | |
2605 | // CHECK-LABEL: @test_vqdmulhq_lane_s16_0( |
2606 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer |
2607 | // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> |
2608 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> |
2609 | // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) |
2610 | // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8> |
2611 | // CHECK: ret <8 x i16> [[VQDMULHQ_V2_I]] |
2612 | int16x8_t test_vqdmulhq_lane_s16_0(int16x8_t a, int16x4_t v) { |
2613 | return vqdmulhq_lane_s16(a, v, 0); |
2614 | } |
2615 | |
2616 | // CHECK-LABEL: @test_vqdmulh_lane_s32_0( |
2617 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer |
2618 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
2619 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
2620 | // CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) |
2621 | // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8> |
2622 | // CHECK: ret <2 x i32> [[VQDMULH_V2_I]] |
2623 | int32x2_t test_vqdmulh_lane_s32_0(int32x2_t a, int32x2_t v) { |
2624 | return vqdmulh_lane_s32(a, v, 0); |
2625 | } |
2626 | |
2627 | // CHECK-LABEL: @test_vqdmulhq_lane_s32_0( |
2628 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> zeroinitializer |
2629 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
2630 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> |
2631 | // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) |
2632 | // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8> |
2633 | // CHECK: ret <4 x i32> [[VQDMULHQ_V2_I]] |
2634 | int32x4_t test_vqdmulhq_lane_s32_0(int32x4_t a, int32x2_t v) { |
2635 | return vqdmulhq_lane_s32(a, v, 0); |
2636 | } |
2637 | |
2638 | // CHECK-LABEL: @test_vqrdmulh_lane_s16_0( |
2639 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer |
2640 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
2641 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
2642 | // CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) |
2643 | // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8> |
2644 | // CHECK: ret <4 x i16> [[VQRDMULH_V2_I]] |
2645 | int16x4_t test_vqrdmulh_lane_s16_0(int16x4_t a, int16x4_t v) { |
2646 | return vqrdmulh_lane_s16(a, v, 0); |
2647 | } |
2648 | |
2649 | // CHECK-LABEL: @test_vqrdmulhq_lane_s16_0( |
2650 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer |
2651 | // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> |
2652 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> |
2653 | // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) |
2654 | // CHECK: ret <8 x i16> [[VQRDMULHQ_V2_I]] |
2655 | int16x8_t test_vqrdmulhq_lane_s16_0(int16x8_t a, int16x4_t v) { |
2656 | return vqrdmulhq_lane_s16(a, v, 0); |
2657 | } |
2658 | |
2659 | // CHECK-LABEL: @test_vqrdmulh_lane_s32_0( |
2660 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer |
2661 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
2662 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
2663 | // CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) |
2664 | // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8> |
2665 | // CHECK: ret <2 x i32> [[VQRDMULH_V2_I]] |
2666 | int32x2_t test_vqrdmulh_lane_s32_0(int32x2_t a, int32x2_t v) { |
2667 | return vqrdmulh_lane_s32(a, v, 0); |
2668 | } |
2669 | |
2670 | // CHECK-LABEL: @test_vqrdmulhq_lane_s32_0( |
2671 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> zeroinitializer |
2672 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
2673 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> |
2674 | // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) |
2675 | // CHECK: ret <4 x i32> [[VQRDMULHQ_V2_I]] |
2676 | int32x4_t test_vqrdmulhq_lane_s32_0(int32x4_t a, int32x2_t v) { |
2677 | return vqrdmulhq_lane_s32(a, v, 0); |
2678 | } |
2679 | |
2680 | // CHECK-LABEL: @test_vmul_lane_f32_0( |
2681 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> zeroinitializer |
2682 | // CHECK: [[MUL:%.*]] = fmul <2 x float> %a, [[SHUFFLE]] |
2683 | // CHECK: ret <2 x float> [[MUL]] |
2684 | float32x2_t test_vmul_lane_f32_0(float32x2_t a, float32x2_t v) { |
2685 | return vmul_lane_f32(a, v, 0); |
2686 | } |
2687 | |
2688 | // CHECK-LABEL: @test_vmulq_lane_f32_0( |
2689 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> zeroinitializer |
2690 | // CHECK: [[MUL:%.*]] = fmul <4 x float> %a, [[SHUFFLE]] |
2691 | // CHECK: ret <4 x float> [[MUL]] |
2692 | float32x4_t test_vmulq_lane_f32_0(float32x4_t a, float32x2_t v) { |
2693 | return vmulq_lane_f32(a, v, 0); |
2694 | } |
2695 | |
2696 | // CHECK-LABEL: @test_vmul_laneq_f32_0( |
2697 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> zeroinitializer |
2698 | // CHECK: [[MUL:%.*]] = fmul <2 x float> %a, [[SHUFFLE]] |
2699 | // CHECK: ret <2 x float> [[MUL]] |
2700 | float32x2_t test_vmul_laneq_f32_0(float32x2_t a, float32x4_t v) { |
2701 | return vmul_laneq_f32(a, v, 0); |
2702 | } |
2703 | |
2704 | // CHECK-LABEL: @test_vmul_laneq_f64_0( |
2705 | // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> |
2706 | // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v to <16 x i8> |
2707 | // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to double |
2708 | // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> |
2709 | // CHECK: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP3]], i32 0 |
2710 | // CHECK: [[TMP4:%.*]] = fmul double [[TMP2]], [[EXTRACT]] |
2711 | // CHECK: [[TMP5:%.*]] = bitcast double [[TMP4]] to <1 x double> |
2712 | // CHECK: ret <1 x double> [[TMP5]] |
2713 | float64x1_t test_vmul_laneq_f64_0(float64x1_t a, float64x2_t v) { |
2714 | return vmul_laneq_f64(a, v, 0); |
2715 | } |
2716 | |
2717 | // CHECK-LABEL: @test_vmulq_laneq_f32_0( |
2718 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> zeroinitializer |
2719 | // CHECK: [[MUL:%.*]] = fmul <4 x float> %a, [[SHUFFLE]] |
2720 | // CHECK: ret <4 x float> [[MUL]] |
2721 | float32x4_t test_vmulq_laneq_f32_0(float32x4_t a, float32x4_t v) { |
2722 | return vmulq_laneq_f32(a, v, 0); |
2723 | } |
2724 | |
2725 | // CHECK-LABEL: @test_vmulq_laneq_f64_0( |
2726 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x double> %v, <2 x double> %v, <2 x i32> zeroinitializer |
2727 | // CHECK: [[MUL:%.*]] = fmul <2 x double> %a, [[SHUFFLE]] |
2728 | // CHECK: ret <2 x double> [[MUL]] |
2729 | float64x2_t test_vmulq_laneq_f64_0(float64x2_t a, float64x2_t v) { |
2730 | return vmulq_laneq_f64(a, v, 0); |
2731 | } |
2732 | |
2733 | // CHECK-LABEL: @test_vmulx_lane_f32_0( |
2734 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> zeroinitializer |
2735 | // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> |
2736 | // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SHUFFLE]] to <8 x i8> |
2737 | // CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> [[SHUFFLE]]) |
2738 | // CHECK: ret <2 x float> [[VMULX2_I]] |
2739 | float32x2_t test_vmulx_lane_f32_0(float32x2_t a, float32x2_t v) { |
2740 | return vmulx_lane_f32(a, v, 0); |
2741 | } |
2742 | |
2743 | // CHECK-LABEL: @test_vmulxq_lane_f32_0( |
2744 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> zeroinitializer |
2745 | // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> |
2746 | // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SHUFFLE]] to <16 x i8> |
2747 | // CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> [[SHUFFLE]]) |
2748 | // CHECK: ret <4 x float> [[VMULX2_I]] |
2749 | float32x4_t test_vmulxq_lane_f32_0(float32x4_t a, float32x2_t v) { |
2750 | return vmulxq_lane_f32(a, v, 0); |
2751 | } |
2752 | |
2753 | // CHECK-LABEL: @test_vmulxq_lane_f64_0( |
2754 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x double> %v, <1 x double> %v, <2 x i32> zeroinitializer |
2755 | // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> |
2756 | // CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SHUFFLE]] to <16 x i8> |
2757 | // CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> [[SHUFFLE]]) |
2758 | // CHECK: ret <2 x double> [[VMULX2_I]] |
2759 | float64x2_t test_vmulxq_lane_f64_0(float64x2_t a, float64x1_t v) { |
2760 | return vmulxq_lane_f64(a, v, 0); |
2761 | } |
2762 | |
2763 | // CHECK-LABEL: @test_vmulx_laneq_f32_0( |
2764 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> zeroinitializer |
2765 | // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> |
2766 | // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SHUFFLE]] to <8 x i8> |
2767 | // CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> [[SHUFFLE]]) |
2768 | // CHECK: ret <2 x float> [[VMULX2_I]] |
2769 | float32x2_t test_vmulx_laneq_f32_0(float32x2_t a, float32x4_t v) { |
2770 | return vmulx_laneq_f32(a, v, 0); |
2771 | } |
2772 | |
2773 | // CHECK-LABEL: @test_vmulxq_laneq_f32_0( |
2774 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> zeroinitializer |
2775 | // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> |
2776 | // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SHUFFLE]] to <16 x i8> |
2777 | // CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> [[SHUFFLE]]) |
2778 | // CHECK: ret <4 x float> [[VMULX2_I]] |
2779 | float32x4_t test_vmulxq_laneq_f32_0(float32x4_t a, float32x4_t v) { |
2780 | return vmulxq_laneq_f32(a, v, 0); |
2781 | } |
2782 | |
2783 | // CHECK-LABEL: @test_vmulxq_laneq_f64_0( |
2784 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x double> %v, <2 x double> %v, <2 x i32> zeroinitializer |
2785 | // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> |
2786 | // CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SHUFFLE]] to <16 x i8> |
2787 | // CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> [[SHUFFLE]]) |
2788 | // CHECK: ret <2 x double> [[VMULX2_I]] |
2789 | float64x2_t test_vmulxq_laneq_f64_0(float64x2_t a, float64x2_t v) { |
2790 | return vmulxq_laneq_f64(a, v, 0); |
2791 | } |
2792 | |
2793 | // CHECK-LABEL: @test_vmull_high_n_s16( |
2794 | // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
2795 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> |
2796 | // CHECK: [[VECINIT_I_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 |
2797 | // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <4 x i16> [[VECINIT_I_I]], i16 %b, i32 1 |
2798 | // CHECK: [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %b, i32 2 |
2799 | // CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %b, i32 3 |
2800 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> |
2801 | // CHECK: [[VMULL5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) |
2802 | // CHECK: ret <4 x i32> [[VMULL5_I_I]] |
2803 | int32x4_t test_vmull_high_n_s16(int16x8_t a, int16_t b) { |
2804 | return vmull_high_n_s16(a, b); |
2805 | } |
2806 | |
2807 | // CHECK-LABEL: @test_vmull_high_n_s32( |
2808 | // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> |
2809 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> |
2810 | // CHECK: [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 |
2811 | // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %b, i32 1 |
2812 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> |
2813 | // CHECK: [[VMULL3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) |
2814 | // CHECK: ret <2 x i64> [[VMULL3_I_I]] |
2815 | int64x2_t test_vmull_high_n_s32(int32x4_t a, int32_t b) { |
2816 | return vmull_high_n_s32(a, b); |
2817 | } |
2818 | |
2819 | // CHECK-LABEL: @test_vmull_high_n_u16( |
2820 | // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
2821 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> |
2822 | // CHECK: [[VECINIT_I_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 |
2823 | // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <4 x i16> [[VECINIT_I_I]], i16 %b, i32 1 |
2824 | // CHECK: [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %b, i32 2 |
2825 | // CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %b, i32 3 |
2826 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> |
2827 | // CHECK: [[VMULL5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) |
2828 | // CHECK: ret <4 x i32> [[VMULL5_I_I]] |
2829 | uint32x4_t test_vmull_high_n_u16(uint16x8_t a, uint16_t b) { |
2830 | return vmull_high_n_u16(a, b); |
2831 | } |
2832 | |
2833 | // CHECK-LABEL: @test_vmull_high_n_u32( |
2834 | // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> |
2835 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> |
2836 | // CHECK: [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 |
2837 | // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %b, i32 1 |
2838 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> |
2839 | // CHECK: [[VMULL3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) |
2840 | // CHECK: ret <2 x i64> [[VMULL3_I_I]] |
2841 | uint64x2_t test_vmull_high_n_u32(uint32x4_t a, uint32_t b) { |
2842 | return vmull_high_n_u32(a, b); |
2843 | } |
2844 | |
2845 | // CHECK-LABEL: @test_vqdmull_high_n_s16( |
2846 | // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
2847 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> |
2848 | // CHECK: [[VECINIT_I_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 |
2849 | // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <4 x i16> [[VECINIT_I_I]], i16 %b, i32 1 |
2850 | // CHECK: [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %b, i32 2 |
2851 | // CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %b, i32 3 |
2852 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> |
2853 | // CHECK: [[VQDMULL_V5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) |
2854 | // CHECK: [[VQDMULL_V6_I_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V5_I_I]] to <16 x i8> |
2855 | // CHECK: ret <4 x i32> [[VQDMULL_V5_I_I]] |
2856 | int32x4_t test_vqdmull_high_n_s16(int16x8_t a, int16_t b) { |
2857 | return vqdmull_high_n_s16(a, b); |
2858 | } |
2859 | |
2860 | // CHECK-LABEL: @test_vqdmull_high_n_s32( |
2861 | // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> |
2862 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> |
2863 | // CHECK: [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 |
2864 | // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %b, i32 1 |
2865 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> |
2866 | // CHECK: [[VQDMULL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) |
2867 | // CHECK: [[VQDMULL_V4_I_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V3_I_I]] to <16 x i8> |
2868 | // CHECK: ret <2 x i64> [[VQDMULL_V3_I_I]] |
2869 | int64x2_t test_vqdmull_high_n_s32(int32x4_t a, int32_t b) { |
2870 | return vqdmull_high_n_s32(a, b); |
2871 | } |
2872 | |
2873 | // CHECK-LABEL: @test_vmlal_high_n_s16( |
2874 | // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
2875 | // CHECK: [[VECINIT_I_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 |
2876 | // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <4 x i16> [[VECINIT_I_I]], i16 %c, i32 1 |
2877 | // CHECK: [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %c, i32 2 |
2878 | // CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %c, i32 3 |
2879 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> |
2880 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> |
2881 | // CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) |
2882 | // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]] |
2883 | // CHECK: ret <4 x i32> [[ADD_I_I]] |
2884 | int32x4_t test_vmlal_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) { |
2885 | return vmlal_high_n_s16(a, b, c); |
2886 | } |
2887 | |
2888 | // CHECK-LABEL: @test_vmlal_high_n_s32( |
2889 | // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> |
2890 | // CHECK: [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 |
2891 | // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %c, i32 1 |
2892 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> |
2893 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> |
2894 | // CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) |
2895 | // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]] |
2896 | // CHECK: ret <2 x i64> [[ADD_I_I]] |
2897 | int64x2_t test_vmlal_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) { |
2898 | return vmlal_high_n_s32(a, b, c); |
2899 | } |
2900 | |
2901 | // CHECK-LABEL: @test_vmlal_high_n_u16( |
2902 | // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
2903 | // CHECK: [[VECINIT_I_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 |
2904 | // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <4 x i16> [[VECINIT_I_I]], i16 %c, i32 1 |
2905 | // CHECK: [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %c, i32 2 |
2906 | // CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %c, i32 3 |
2907 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> |
2908 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> |
2909 | // CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) |
2910 | // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]] |
2911 | // CHECK: ret <4 x i32> [[ADD_I_I]] |
2912 | uint32x4_t test_vmlal_high_n_u16(uint32x4_t a, uint16x8_t b, uint16_t c) { |
2913 | return vmlal_high_n_u16(a, b, c); |
2914 | } |
2915 | |
2916 | // CHECK-LABEL: @test_vmlal_high_n_u32( |
2917 | // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> |
2918 | // CHECK: [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 |
2919 | // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %c, i32 1 |
2920 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> |
2921 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> |
2922 | // CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) |
2923 | // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]] |
2924 | // CHECK: ret <2 x i64> [[ADD_I_I]] |
2925 | uint64x2_t test_vmlal_high_n_u32(uint64x2_t a, uint32x4_t b, uint32_t c) { |
2926 | return vmlal_high_n_u32(a, b, c); |
2927 | } |
2928 | |
2929 | // CHECK-LABEL: @test_vqdmlal_high_n_s16( |
2930 | // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
2931 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
2932 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> |
2933 | // CHECK: [[VECINIT_I_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 |
2934 | // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <4 x i16> [[VECINIT_I_I]], i16 %c, i32 1 |
2935 | // CHECK: [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %c, i32 2 |
2936 | // CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %c, i32 3 |
2937 | // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> |
2938 | // CHECK: [[VQDMLAL5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) |
2939 | // CHECK: [[VQDMLAL_V6_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I_I]]) |
2940 | // CHECK: ret <4 x i32> [[VQDMLAL_V6_I_I]] |
2941 | int32x4_t test_vqdmlal_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) { |
2942 | return vqdmlal_high_n_s16(a, b, c); |
2943 | } |
2944 | |
2945 | // CHECK-LABEL: @test_vqdmlal_high_n_s32( |
2946 | // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> |
2947 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
2948 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> |
2949 | // CHECK: [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 |
2950 | // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %c, i32 1 |
2951 | // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> |
2952 | // CHECK: [[VQDMLAL3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) |
2953 | // CHECK: [[VQDMLAL_V4_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I_I]]) |
2954 | // CHECK: ret <2 x i64> [[VQDMLAL_V4_I_I]] |
2955 | int64x2_t test_vqdmlal_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) { |
2956 | return vqdmlal_high_n_s32(a, b, c); |
2957 | } |
2958 | |
2959 | // CHECK-LABEL: @test_vmlsl_high_n_s16( |
2960 | // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
2961 | // CHECK: [[VECINIT_I_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 |
2962 | // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <4 x i16> [[VECINIT_I_I]], i16 %c, i32 1 |
2963 | // CHECK: [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %c, i32 2 |
2964 | // CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %c, i32 3 |
2965 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> |
2966 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> |
2967 | // CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) |
2968 | // CHECK: [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]] |
2969 | // CHECK: ret <4 x i32> [[SUB_I_I]] |
2970 | int32x4_t test_vmlsl_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) { |
2971 | return vmlsl_high_n_s16(a, b, c); |
2972 | } |
2973 | |
2974 | // CHECK-LABEL: @test_vmlsl_high_n_s32( |
2975 | // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> |
2976 | // CHECK: [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 |
2977 | // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %c, i32 1 |
2978 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> |
2979 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> |
2980 | // CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) |
2981 | // CHECK: [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]] |
2982 | // CHECK: ret <2 x i64> [[SUB_I_I]] |
2983 | int64x2_t test_vmlsl_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) { |
2984 | return vmlsl_high_n_s32(a, b, c); |
2985 | } |
2986 | |
2987 | // CHECK-LABEL: @test_vmlsl_high_n_u16( |
2988 | // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
2989 | // CHECK: [[VECINIT_I_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 |
2990 | // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <4 x i16> [[VECINIT_I_I]], i16 %c, i32 1 |
2991 | // CHECK: [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %c, i32 2 |
2992 | // CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %c, i32 3 |
2993 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> |
2994 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> |
2995 | // CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) |
2996 | // CHECK: [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]] |
2997 | // CHECK: ret <4 x i32> [[SUB_I_I]] |
2998 | uint32x4_t test_vmlsl_high_n_u16(uint32x4_t a, uint16x8_t b, uint16_t c) { |
2999 | return vmlsl_high_n_u16(a, b, c); |
3000 | } |
3001 | |
3002 | // CHECK-LABEL: @test_vmlsl_high_n_u32( |
3003 | // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> |
3004 | // CHECK: [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 |
3005 | // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %c, i32 1 |
3006 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> |
3007 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> |
3008 | // CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) |
3009 | // CHECK: [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]] |
3010 | // CHECK: ret <2 x i64> [[SUB_I_I]] |
3011 | uint64x2_t test_vmlsl_high_n_u32(uint64x2_t a, uint32x4_t b, uint32_t c) { |
3012 | return vmlsl_high_n_u32(a, b, c); |
3013 | } |
3014 | |
3015 | // CHECK-LABEL: @test_vqdmlsl_high_n_s16( |
3016 | // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
3017 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
3018 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> |
3019 | // CHECK: [[VECINIT_I_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 |
3020 | // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <4 x i16> [[VECINIT_I_I]], i16 %c, i32 1 |
3021 | // CHECK: [[VECINIT2_I_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I_I]], i16 %c, i32 2 |
3022 | // CHECK: [[VECINIT3_I_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I_I]], i16 %c, i32 3 |
3023 | // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I_I]] to <8 x i8> |
3024 | // CHECK: [[VQDMLAL5_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I_I]]) |
3025 | // CHECK: [[VQDMLSL_V6_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I_I]]) |
3026 | // CHECK: ret <4 x i32> [[VQDMLSL_V6_I_I]] |
3027 | int32x4_t test_vqdmlsl_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) { |
3028 | return vqdmlsl_high_n_s16(a, b, c); |
3029 | } |
3030 | |
3031 | // CHECK-LABEL: @test_vqdmlsl_high_n_s32( |
3032 | // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> |
3033 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
3034 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> |
3035 | // CHECK: [[VECINIT_I_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 |
3036 | // CHECK: [[VECINIT1_I_I:%.*]] = insertelement <2 x i32> [[VECINIT_I_I]], i32 %c, i32 1 |
3037 | // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I_I]] to <8 x i8> |
3038 | // CHECK: [[VQDMLAL3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I_I]]) |
3039 | // CHECK: [[VQDMLSL_V4_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I_I]]) |
3040 | // CHECK: ret <2 x i64> [[VQDMLSL_V4_I_I]] |
3041 | int64x2_t test_vqdmlsl_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) { |
3042 | return vqdmlsl_high_n_s32(a, b, c); |
3043 | } |
3044 | |
3045 | // CHECK-LABEL: @test_vmul_n_f32( |
3046 | // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %b, i32 0 |
3047 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %b, i32 1 |
3048 | // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %a, [[VECINIT1_I]] |
3049 | // CHECK: ret <2 x float> [[MUL_I]] |
3050 | float32x2_t test_vmul_n_f32(float32x2_t a, float32_t b) { |
3051 | return vmul_n_f32(a, b); |
3052 | } |
3053 | |
3054 | // CHECK-LABEL: @test_vmulq_n_f32( |
3055 | // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %b, i32 0 |
3056 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %b, i32 1 |
3057 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %b, i32 2 |
3058 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %b, i32 3 |
3059 | // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %a, [[VECINIT3_I]] |
3060 | // CHECK: ret <4 x float> [[MUL_I]] |
3061 | float32x4_t test_vmulq_n_f32(float32x4_t a, float32_t b) { |
3062 | return vmulq_n_f32(a, b); |
3063 | } |
3064 | |
3065 | // CHECK-LABEL: @test_vmulq_n_f64( |
3066 | // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %b, i32 0 |
3067 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %b, i32 1 |
3068 | // CHECK: [[MUL_I:%.*]] = fmul <2 x double> %a, [[VECINIT1_I]] |
3069 | // CHECK: ret <2 x double> [[MUL_I]] |
3070 | float64x2_t test_vmulq_n_f64(float64x2_t a, float64_t b) { |
3071 | return vmulq_n_f64(a, b); |
3072 | } |
3073 | |
3074 | // CHECK-LABEL: @test_vfma_n_f32( |
3075 | // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %n, i32 0 |
3076 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %n, i32 1 |
3077 | // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> |
3078 | // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> |
3079 | // CHECK: [[TMP2:%.*]] = bitcast <2 x float> [[VECINIT1_I]] to <8 x i8> |
3080 | // CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> [[VECINIT1_I]], <2 x float> %a) |
3081 | // CHECK: ret <2 x float> [[TMP3]] |
3082 | float32x2_t test_vfma_n_f32(float32x2_t a, float32x2_t b, float32_t n) { |
3083 | return vfma_n_f32(a, b, n); |
3084 | } |
3085 | |
3086 | // CHECK-LABEL: @test_vfma_n_f64( |
3087 | // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x double> undef, double %n, i32 0 |
3088 | // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> |
3089 | // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> |
3090 | // CHECK: [[TMP2:%.*]] = bitcast <1 x double> [[VECINIT_I]] to <8 x i8> |
3091 | // CHECK: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> [[VECINIT_I]], <1 x double> %a) |
3092 | // CHECK: ret <1 x double> [[TMP3]] |
3093 | float64x1_t test_vfma_n_f64(float64x1_t a, float64x1_t b, float64_t n) { |
3094 | return vfma_n_f64(a, b, n); |
3095 | } |
3096 | |
3097 | // CHECK-LABEL: @test_vfmaq_n_f32( |
3098 | // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %n, i32 0 |
3099 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %n, i32 1 |
3100 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %n, i32 2 |
3101 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %n, i32 3 |
3102 | // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> |
3103 | // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> |
3104 | // CHECK: [[TMP2:%.*]] = bitcast <4 x float> [[VECINIT3_I]] to <16 x i8> |
3105 | // CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> [[VECINIT3_I]], <4 x float> %a) |
3106 | // CHECK: ret <4 x float> [[TMP3]] |
3107 | float32x4_t test_vfmaq_n_f32(float32x4_t a, float32x4_t b, float32_t n) { |
3108 | return vfmaq_n_f32(a, b, n); |
3109 | } |
3110 | |
3111 | // CHECK-LABEL: @test_vfms_n_f32( |
3112 | // CHECK: [[SUB_I:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b |
3113 | // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %n, i32 0 |
3114 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %n, i32 1 |
3115 | // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> |
3116 | // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8> |
3117 | // CHECK: [[TMP2:%.*]] = bitcast <2 x float> [[VECINIT1_I]] to <8 x i8> |
3118 | // CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[SUB_I]], <2 x float> [[VECINIT1_I]], <2 x float> %a) |
3119 | // CHECK: ret <2 x float> [[TMP3]] |
3120 | float32x2_t test_vfms_n_f32(float32x2_t a, float32x2_t b, float32_t n) { |
3121 | return vfms_n_f32(a, b, n); |
3122 | } |
3123 | |
3124 | // CHECK-LABEL: @test_vfms_n_f64( |
3125 | // CHECK: [[SUB_I:%.*]] = fsub <1 x double> <double -0.000000e+00>, %b |
3126 | // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x double> undef, double %n, i32 0 |
3127 | // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> |
3128 | // CHECK: [[TMP1:%.*]] = bitcast <1 x double> [[SUB_I]] to <8 x i8> |
3129 | // CHECK: [[TMP2:%.*]] = bitcast <1 x double> [[VECINIT_I]] to <8 x i8> |
3130 | // CHECK: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[SUB_I]], <1 x double> [[VECINIT_I]], <1 x double> %a) |
3131 | // CHECK: ret <1 x double> [[TMP3]] |
3132 | float64x1_t test_vfms_n_f64(float64x1_t a, float64x1_t b, float64_t n) { |
3133 | return vfms_n_f64(a, b, n); |
3134 | } |
3135 | |
3136 | // CHECK-LABEL: @test_vfmsq_n_f32( |
3137 | // CHECK: [[SUB_I:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b |
3138 | // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %n, i32 0 |
3139 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %n, i32 1 |
3140 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %n, i32 2 |
3141 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %n, i32 3 |
3142 | // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> |
3143 | // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8> |
3144 | // CHECK: [[TMP2:%.*]] = bitcast <4 x float> [[VECINIT3_I]] to <16 x i8> |
3145 | // CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[SUB_I]], <4 x float> [[VECINIT3_I]], <4 x float> %a) |
3146 | // CHECK: ret <4 x float> [[TMP3]] |
3147 | float32x4_t test_vfmsq_n_f32(float32x4_t a, float32x4_t b, float32_t n) { |
3148 | return vfmsq_n_f32(a, b, n); |
3149 | } |
3150 | |
3151 | // CHECK-LABEL: @test_vmul_n_s16( |
3152 | // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 |
3153 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 |
3154 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 |
3155 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 |
3156 | // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, [[VECINIT3_I]] |
3157 | // CHECK: ret <4 x i16> [[MUL_I]] |
3158 | int16x4_t test_vmul_n_s16(int16x4_t a, int16_t b) { |
3159 | return vmul_n_s16(a, b); |
3160 | } |
3161 | |
3162 | // CHECK-LABEL: @test_vmulq_n_s16( |
3163 | // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0 |
3164 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1 |
3165 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2 |
3166 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3 |
3167 | // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4 |
3168 | // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5 |
3169 | // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6 |
3170 | // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7 |
3171 | // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, [[VECINIT7_I]] |
3172 | // CHECK: ret <8 x i16> [[MUL_I]] |
3173 | int16x8_t test_vmulq_n_s16(int16x8_t a, int16_t b) { |
3174 | return vmulq_n_s16(a, b); |
3175 | } |
3176 | |
3177 | // CHECK-LABEL: @test_vmul_n_s32( |
3178 | // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 |
3179 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 |
3180 | // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, [[VECINIT1_I]] |
3181 | // CHECK: ret <2 x i32> [[MUL_I]] |
3182 | int32x2_t test_vmul_n_s32(int32x2_t a, int32_t b) { |
3183 | return vmul_n_s32(a, b); |
3184 | } |
3185 | |
3186 | // CHECK-LABEL: @test_vmulq_n_s32( |
3187 | // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0 |
3188 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1 |
3189 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2 |
3190 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3 |
3191 | // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, [[VECINIT3_I]] |
3192 | // CHECK: ret <4 x i32> [[MUL_I]] |
3193 | int32x4_t test_vmulq_n_s32(int32x4_t a, int32_t b) { |
3194 | return vmulq_n_s32(a, b); |
3195 | } |
3196 | |
3197 | // CHECK-LABEL: @test_vmul_n_u16( |
3198 | // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 |
3199 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 |
3200 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 |
3201 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 |
3202 | // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, [[VECINIT3_I]] |
3203 | // CHECK: ret <4 x i16> [[MUL_I]] |
3204 | uint16x4_t test_vmul_n_u16(uint16x4_t a, uint16_t b) { |
3205 | return vmul_n_u16(a, b); |
3206 | } |
3207 | |
3208 | // CHECK-LABEL: @test_vmulq_n_u16( |
3209 | // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0 |
3210 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1 |
3211 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2 |
3212 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3 |
3213 | // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4 |
3214 | // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5 |
3215 | // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6 |
3216 | // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7 |
3217 | // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, [[VECINIT7_I]] |
3218 | // CHECK: ret <8 x i16> [[MUL_I]] |
3219 | uint16x8_t test_vmulq_n_u16(uint16x8_t a, uint16_t b) { |
3220 | return vmulq_n_u16(a, b); |
3221 | } |
3222 | |
3223 | // CHECK-LABEL: @test_vmul_n_u32( |
3224 | // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 |
3225 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 |
3226 | // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, [[VECINIT1_I]] |
3227 | // CHECK: ret <2 x i32> [[MUL_I]] |
3228 | uint32x2_t test_vmul_n_u32(uint32x2_t a, uint32_t b) { |
3229 | return vmul_n_u32(a, b); |
3230 | } |
3231 | |
3232 | // CHECK-LABEL: @test_vmulq_n_u32( |
3233 | // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0 |
3234 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1 |
3235 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2 |
3236 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3 |
3237 | // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, [[VECINIT3_I]] |
3238 | // CHECK: ret <4 x i32> [[MUL_I]] |
3239 | uint32x4_t test_vmulq_n_u32(uint32x4_t a, uint32_t b) { |
3240 | return vmulq_n_u32(a, b); |
3241 | } |
3242 | |
3243 | // CHECK-LABEL: @test_vmull_n_s16( |
3244 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
3245 | // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 |
3246 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 |
3247 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 |
3248 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 |
3249 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> |
3250 | // CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) |
3251 | // CHECK: ret <4 x i32> [[VMULL5_I]] |
3252 | int32x4_t test_vmull_n_s16(int16x4_t a, int16_t b) { |
3253 | return vmull_n_s16(a, b); |
3254 | } |
3255 | |
3256 | // CHECK-LABEL: @test_vmull_n_s32( |
3257 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
3258 | // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 |
3259 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 |
3260 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> |
3261 | // CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) |
3262 | // CHECK: ret <2 x i64> [[VMULL3_I]] |
3263 | int64x2_t test_vmull_n_s32(int32x2_t a, int32_t b) { |
3264 | return vmull_n_s32(a, b); |
3265 | } |
3266 | |
3267 | // CHECK-LABEL: @test_vmull_n_u16( |
3268 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
3269 | // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 |
3270 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 |
3271 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 |
3272 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 |
3273 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> |
3274 | // CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) |
3275 | // CHECK: ret <4 x i32> [[VMULL5_I]] |
3276 | uint32x4_t test_vmull_n_u16(uint16x4_t a, uint16_t b) { |
3277 | return vmull_n_u16(a, b); |
3278 | } |
3279 | |
3280 | // CHECK-LABEL: @test_vmull_n_u32( |
3281 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
3282 | // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 |
3283 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 |
3284 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> |
3285 | // CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) |
3286 | // CHECK: ret <2 x i64> [[VMULL3_I]] |
3287 | uint64x2_t test_vmull_n_u32(uint32x2_t a, uint32_t b) { |
3288 | return vmull_n_u32(a, b); |
3289 | } |
3290 | |
3291 | // CHECK-LABEL: @test_vqdmull_n_s16( |
3292 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
3293 | // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 |
3294 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 |
3295 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 |
3296 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 |
3297 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> |
3298 | // CHECK: [[VQDMULL_V5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) |
3299 | // CHECK: [[VQDMULL_V6_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V5_I]] to <16 x i8> |
3300 | // CHECK: ret <4 x i32> [[VQDMULL_V5_I]] |
3301 | int32x4_t test_vqdmull_n_s16(int16x4_t a, int16_t b) { |
3302 | return vqdmull_n_s16(a, b); |
3303 | } |
3304 | |
3305 | // CHECK-LABEL: @test_vqdmull_n_s32( |
3306 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
3307 | // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 |
3308 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 |
3309 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> |
3310 | // CHECK: [[VQDMULL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) |
3311 | // CHECK: [[VQDMULL_V4_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V3_I]] to <16 x i8> |
3312 | // CHECK: ret <2 x i64> [[VQDMULL_V3_I]] |
3313 | int64x2_t test_vqdmull_n_s32(int32x2_t a, int32_t b) { |
3314 | return vqdmull_n_s32(a, b); |
3315 | } |
3316 | |
3317 | // CHECK-LABEL: @test_vqdmulh_n_s16( |
3318 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
3319 | // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 |
3320 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 |
3321 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 |
3322 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 |
3323 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> |
3324 | // CHECK: [[VQDMULH_V5_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) |
3325 | // CHECK: [[VQDMULH_V6_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V5_I]] to <8 x i8> |
3326 | // CHECK: ret <4 x i16> [[VQDMULH_V5_I]] |
3327 | int16x4_t test_vqdmulh_n_s16(int16x4_t a, int16_t b) { |
3328 | return vqdmulh_n_s16(a, b); |
3329 | } |
3330 | |
3331 | // CHECK-LABEL: @test_vqdmulhq_n_s16( |
3332 | // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> |
3333 | // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0 |
3334 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1 |
3335 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2 |
3336 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3 |
3337 | // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4 |
3338 | // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5 |
3339 | // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6 |
3340 | // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7 |
3341 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8> |
3342 | // CHECK: [[VQDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> [[VECINIT7_I]]) |
3343 | // CHECK: [[VQDMULHQ_V10_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V9_I]] to <16 x i8> |
3344 | // CHECK: ret <8 x i16> [[VQDMULHQ_V9_I]] |
3345 | int16x8_t test_vqdmulhq_n_s16(int16x8_t a, int16_t b) { |
3346 | return vqdmulhq_n_s16(a, b); |
3347 | } |
3348 | |
3349 | // CHECK-LABEL: @test_vqdmulh_n_s32( |
3350 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
3351 | // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 |
3352 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 |
3353 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> |
3354 | // CHECK: [[VQDMULH_V3_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) |
3355 | // CHECK: [[VQDMULH_V4_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V3_I]] to <8 x i8> |
3356 | // CHECK: ret <2 x i32> [[VQDMULH_V3_I]] |
3357 | int32x2_t test_vqdmulh_n_s32(int32x2_t a, int32_t b) { |
3358 | return vqdmulh_n_s32(a, b); |
3359 | } |
3360 | |
3361 | // CHECK-LABEL: @test_vqdmulhq_n_s32( |
3362 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
3363 | // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0 |
3364 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1 |
3365 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2 |
3366 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3 |
3367 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8> |
3368 | // CHECK: [[VQDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> [[VECINIT3_I]]) |
3369 | // CHECK: [[VQDMULHQ_V6_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V5_I]] to <16 x i8> |
3370 | // CHECK: ret <4 x i32> [[VQDMULHQ_V5_I]] |
3371 | int32x4_t test_vqdmulhq_n_s32(int32x4_t a, int32_t b) { |
3372 | return vqdmulhq_n_s32(a, b); |
3373 | } |
3374 | |
3375 | // CHECK-LABEL: @test_vqrdmulh_n_s16( |
3376 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
3377 | // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 |
3378 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 |
3379 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 |
3380 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 |
3381 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> |
3382 | // CHECK: [[VQRDMULH_V5_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) |
3383 | // CHECK: [[VQRDMULH_V6_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V5_I]] to <8 x i8> |
3384 | // CHECK: ret <4 x i16> [[VQRDMULH_V5_I]] |
3385 | int16x4_t test_vqrdmulh_n_s16(int16x4_t a, int16_t b) { |
3386 | return vqrdmulh_n_s16(a, b); |
3387 | } |
3388 | |
3389 | // CHECK-LABEL: @test_vqrdmulhq_n_s16( |
3390 | // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> |
3391 | // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0 |
3392 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1 |
3393 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2 |
3394 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3 |
3395 | // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4 |
3396 | // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5 |
3397 | // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6 |
3398 | // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7 |
3399 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8> |
3400 | // CHECK: [[VQRDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> [[VECINIT7_I]]) |
3401 | // CHECK: [[VQRDMULHQ_V10_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V9_I]] to <16 x i8> |
3402 | // CHECK: ret <8 x i16> [[VQRDMULHQ_V9_I]] |
3403 | int16x8_t test_vqrdmulhq_n_s16(int16x8_t a, int16_t b) { |
3404 | return vqrdmulhq_n_s16(a, b); |
3405 | } |
3406 | |
3407 | // CHECK-LABEL: @test_vqrdmulh_n_s32( |
3408 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
3409 | // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 |
3410 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 |
3411 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> |
3412 | // CHECK: [[VQRDMULH_V3_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) |
3413 | // CHECK: [[VQRDMULH_V4_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V3_I]] to <8 x i8> |
3414 | // CHECK: ret <2 x i32> [[VQRDMULH_V3_I]] |
3415 | int32x2_t test_vqrdmulh_n_s32(int32x2_t a, int32_t b) { |
3416 | return vqrdmulh_n_s32(a, b); |
3417 | } |
3418 | |
3419 | // CHECK-LABEL: @test_vqrdmulhq_n_s32( |
3420 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
3421 | // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0 |
3422 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1 |
3423 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2 |
3424 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3 |
3425 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8> |
3426 | // CHECK: [[VQRDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> [[VECINIT3_I]]) |
3427 | // CHECK: [[VQRDMULHQ_V6_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V5_I]] to <16 x i8> |
3428 | // CHECK: ret <4 x i32> [[VQRDMULHQ_V5_I]] |
3429 | int32x4_t test_vqrdmulhq_n_s32(int32x4_t a, int32_t b) { |
3430 | return vqrdmulhq_n_s32(a, b); |
3431 | } |
3432 | |
3433 | // CHECK-LABEL: @test_vmla_n_s16( |
3434 | // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 |
3435 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 |
3436 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 |
3437 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 |
3438 | // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]] |
3439 | // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]] |
3440 | // CHECK: ret <4 x i16> [[ADD_I]] |
3441 | int16x4_t test_vmla_n_s16(int16x4_t a, int16x4_t b, int16_t c) { |
3442 | return vmla_n_s16(a, b, c); |
3443 | } |
3444 | |
3445 | // CHECK-LABEL: @test_vmlaq_n_s16( |
3446 | // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0 |
3447 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1 |
3448 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2 |
3449 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3 |
3450 | // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4 |
3451 | // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5 |
3452 | // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6 |
3453 | // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7 |
3454 | // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]] |
3455 | // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]] |
3456 | // CHECK: ret <8 x i16> [[ADD_I]] |
3457 | int16x8_t test_vmlaq_n_s16(int16x8_t a, int16x8_t b, int16_t c) { |
3458 | return vmlaq_n_s16(a, b, c); |
3459 | } |
3460 | |
3461 | // CHECK-LABEL: @test_vmla_n_s32( |
3462 | // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 |
3463 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 |
3464 | // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]] |
3465 | // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]] |
3466 | // CHECK: ret <2 x i32> [[ADD_I]] |
3467 | int32x2_t test_vmla_n_s32(int32x2_t a, int32x2_t b, int32_t c) { |
3468 | return vmla_n_s32(a, b, c); |
3469 | } |
3470 | |
3471 | // CHECK-LABEL: @test_vmlaq_n_s32( |
3472 | // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0 |
3473 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1 |
3474 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2 |
3475 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3 |
3476 | // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]] |
3477 | // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]] |
3478 | // CHECK: ret <4 x i32> [[ADD_I]] |
3479 | int32x4_t test_vmlaq_n_s32(int32x4_t a, int32x4_t b, int32_t c) { |
3480 | return vmlaq_n_s32(a, b, c); |
3481 | } |
3482 | |
3483 | // CHECK-LABEL: @test_vmla_n_u16( |
3484 | // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 |
3485 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 |
3486 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 |
3487 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 |
3488 | // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]] |
3489 | // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]] |
3490 | // CHECK: ret <4 x i16> [[ADD_I]] |
3491 | uint16x4_t test_vmla_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) { |
3492 | return vmla_n_u16(a, b, c); |
3493 | } |
3494 | |
3495 | // CHECK-LABEL: @test_vmlaq_n_u16( |
3496 | // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0 |
3497 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1 |
3498 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2 |
3499 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3 |
3500 | // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4 |
3501 | // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5 |
3502 | // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6 |
3503 | // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7 |
3504 | // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]] |
3505 | // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]] |
3506 | // CHECK: ret <8 x i16> [[ADD_I]] |
3507 | uint16x8_t test_vmlaq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) { |
3508 | return vmlaq_n_u16(a, b, c); |
3509 | } |
3510 | |
3511 | // CHECK-LABEL: @test_vmla_n_u32( |
3512 | // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 |
3513 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 |
3514 | // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]] |
3515 | // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]] |
3516 | // CHECK: ret <2 x i32> [[ADD_I]] |
3517 | uint32x2_t test_vmla_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) { |
3518 | return vmla_n_u32(a, b, c); |
3519 | } |
3520 | |
3521 | // CHECK-LABEL: @test_vmlaq_n_u32( |
3522 | // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0 |
3523 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1 |
3524 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2 |
3525 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3 |
3526 | // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]] |
3527 | // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]] |
3528 | // CHECK: ret <4 x i32> [[ADD_I]] |
3529 | uint32x4_t test_vmlaq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) { |
3530 | return vmlaq_n_u32(a, b, c); |
3531 | } |
3532 | |
3533 | // CHECK-LABEL: @test_vmlal_n_s16( |
3534 | // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 |
3535 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 |
3536 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 |
3537 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 |
3538 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
3539 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> |
3540 | // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) |
3541 | // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]] |
3542 | // CHECK: ret <4 x i32> [[ADD_I]] |
3543 | int32x4_t test_vmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) { |
3544 | return vmlal_n_s16(a, b, c); |
3545 | } |
3546 | |
3547 | // CHECK-LABEL: @test_vmlal_n_s32( |
3548 | // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 |
3549 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 |
3550 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
3551 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> |
3552 | // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) |
3553 | // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]] |
3554 | // CHECK: ret <2 x i64> [[ADD_I]] |
3555 | int64x2_t test_vmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) { |
3556 | return vmlal_n_s32(a, b, c); |
3557 | } |
3558 | |
3559 | // CHECK-LABEL: @test_vmlal_n_u16( |
3560 | // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 |
3561 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 |
3562 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 |
3563 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 |
3564 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
3565 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> |
3566 | // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) |
3567 | // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]] |
3568 | // CHECK: ret <4 x i32> [[ADD_I]] |
3569 | uint32x4_t test_vmlal_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) { |
3570 | return vmlal_n_u16(a, b, c); |
3571 | } |
3572 | |
3573 | // CHECK-LABEL: @test_vmlal_n_u32( |
3574 | // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 |
3575 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 |
3576 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
3577 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> |
3578 | // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) |
3579 | // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]] |
3580 | // CHECK: ret <2 x i64> [[ADD_I]] |
3581 | uint64x2_t test_vmlal_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) { |
3582 | return vmlal_n_u32(a, b, c); |
3583 | } |
3584 | |
3585 | // CHECK-LABEL: @test_vqdmlal_n_s16( |
3586 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
3587 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
3588 | // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 |
3589 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 |
3590 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 |
3591 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 |
3592 | // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> |
3593 | // CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) |
3594 | // CHECK: [[VQDMLAL_V6_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I]]) |
3595 | // CHECK: ret <4 x i32> [[VQDMLAL_V6_I]] |
3596 | int32x4_t test_vqdmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) { |
3597 | return vqdmlal_n_s16(a, b, c); |
3598 | } |
3599 | |
3600 | // CHECK-LABEL: @test_vqdmlal_n_s32( |
3601 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
3602 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
3603 | // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 |
3604 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 |
3605 | // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> |
3606 | // CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) |
3607 | // CHECK: [[VQDMLAL_V4_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I]]) |
3608 | // CHECK: ret <2 x i64> [[VQDMLAL_V4_I]] |
3609 | int64x2_t test_vqdmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) { |
3610 | return vqdmlal_n_s32(a, b, c); |
3611 | } |
3612 | |
3613 | // CHECK-LABEL: @test_vmls_n_s16( |
3614 | // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 |
3615 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 |
3616 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 |
3617 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 |
3618 | // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]] |
3619 | // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]] |
3620 | // CHECK: ret <4 x i16> [[SUB_I]] |
3621 | int16x4_t test_vmls_n_s16(int16x4_t a, int16x4_t b, int16_t c) { |
3622 | return vmls_n_s16(a, b, c); |
3623 | } |
3624 | |
3625 | // CHECK-LABEL: @test_vmlsq_n_s16( |
3626 | // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0 |
3627 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1 |
3628 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2 |
3629 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3 |
3630 | // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4 |
3631 | // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5 |
3632 | // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6 |
3633 | // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7 |
3634 | // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]] |
3635 | // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]] |
3636 | // CHECK: ret <8 x i16> [[SUB_I]] |
3637 | int16x8_t test_vmlsq_n_s16(int16x8_t a, int16x8_t b, int16_t c) { |
3638 | return vmlsq_n_s16(a, b, c); |
3639 | } |
3640 | |
3641 | // CHECK-LABEL: @test_vmls_n_s32( |
3642 | // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 |
3643 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 |
3644 | // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]] |
3645 | // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]] |
3646 | // CHECK: ret <2 x i32> [[SUB_I]] |
3647 | int32x2_t test_vmls_n_s32(int32x2_t a, int32x2_t b, int32_t c) { |
3648 | return vmls_n_s32(a, b, c); |
3649 | } |
3650 | |
3651 | // CHECK-LABEL: @test_vmlsq_n_s32( |
3652 | // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0 |
3653 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1 |
3654 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2 |
3655 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3 |
3656 | // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]] |
3657 | // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]] |
3658 | // CHECK: ret <4 x i32> [[SUB_I]] |
3659 | int32x4_t test_vmlsq_n_s32(int32x4_t a, int32x4_t b, int32_t c) { |
3660 | return vmlsq_n_s32(a, b, c); |
3661 | } |
3662 | |
3663 | // CHECK-LABEL: @test_vmls_n_u16( |
3664 | // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 |
3665 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 |
3666 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 |
3667 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 |
3668 | // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]] |
3669 | // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]] |
3670 | // CHECK: ret <4 x i16> [[SUB_I]] |
3671 | uint16x4_t test_vmls_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) { |
3672 | return vmls_n_u16(a, b, c); |
3673 | } |
3674 | |
3675 | // CHECK-LABEL: @test_vmlsq_n_u16( |
3676 | // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0 |
3677 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1 |
3678 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2 |
3679 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3 |
3680 | // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4 |
3681 | // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5 |
3682 | // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6 |
3683 | // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7 |
3684 | // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]] |
3685 | // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]] |
3686 | // CHECK: ret <8 x i16> [[SUB_I]] |
3687 | uint16x8_t test_vmlsq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) { |
3688 | return vmlsq_n_u16(a, b, c); |
3689 | } |
3690 | |
3691 | // CHECK-LABEL: @test_vmls_n_u32( |
3692 | // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 |
3693 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 |
3694 | // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]] |
3695 | // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]] |
3696 | // CHECK: ret <2 x i32> [[SUB_I]] |
3697 | uint32x2_t test_vmls_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) { |
3698 | return vmls_n_u32(a, b, c); |
3699 | } |
3700 | |
3701 | // CHECK-LABEL: @test_vmlsq_n_u32( |
3702 | // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0 |
3703 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1 |
3704 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2 |
3705 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3 |
3706 | // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]] |
3707 | // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]] |
3708 | // CHECK: ret <4 x i32> [[SUB_I]] |
3709 | uint32x4_t test_vmlsq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) { |
3710 | return vmlsq_n_u32(a, b, c); |
3711 | } |
3712 | |
3713 | // CHECK-LABEL: @test_vmlsl_n_s16( |
3714 | // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 |
3715 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 |
3716 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 |
3717 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 |
3718 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
3719 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> |
3720 | // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) |
3721 | // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]] |
3722 | // CHECK: ret <4 x i32> [[SUB_I]] |
3723 | int32x4_t test_vmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) { |
3724 | return vmlsl_n_s16(a, b, c); |
3725 | } |
3726 | |
3727 | // CHECK-LABEL: @test_vmlsl_n_s32( |
3728 | // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 |
3729 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 |
3730 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
3731 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> |
3732 | // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) |
3733 | // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]] |
3734 | // CHECK: ret <2 x i64> [[SUB_I]] |
3735 | int64x2_t test_vmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) { |
3736 | return vmlsl_n_s32(a, b, c); |
3737 | } |
3738 | |
3739 | // CHECK-LABEL: @test_vmlsl_n_u16( |
3740 | // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 |
3741 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 |
3742 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 |
3743 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 |
3744 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
3745 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> |
3746 | // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) |
3747 | // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]] |
3748 | // CHECK: ret <4 x i32> [[SUB_I]] |
3749 | uint32x4_t test_vmlsl_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) { |
3750 | return vmlsl_n_u16(a, b, c); |
3751 | } |
3752 | |
3753 | // CHECK-LABEL: @test_vmlsl_n_u32( |
3754 | // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 |
3755 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 |
3756 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
3757 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> |
3758 | // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) |
3759 | // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]] |
3760 | // CHECK: ret <2 x i64> [[SUB_I]] |
3761 | uint64x2_t test_vmlsl_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) { |
3762 | return vmlsl_n_u32(a, b, c); |
3763 | } |
3764 | |
3765 | // CHECK-LABEL: @test_vqdmlsl_n_s16( |
3766 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
3767 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
3768 | // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 |
3769 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 |
3770 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 |
3771 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 |
3772 | // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> |
3773 | // CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) |
3774 | // CHECK: [[VQDMLSL_V6_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I]]) |
3775 | // CHECK: ret <4 x i32> [[VQDMLSL_V6_I]] |
3776 | int32x4_t test_vqdmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) { |
3777 | return vqdmlsl_n_s16(a, b, c); |
3778 | } |
3779 | |
3780 | // CHECK-LABEL: @test_vqdmlsl_n_s32( |
3781 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
3782 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
3783 | // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 |
3784 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 |
3785 | // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> |
3786 | // CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) |
3787 | // CHECK: [[VQDMLSL_V4_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I]]) |
3788 | // CHECK: ret <2 x i64> [[VQDMLSL_V4_I]] |
3789 | int64x2_t test_vqdmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) { |
3790 | return vqdmlsl_n_s32(a, b, c); |
3791 | } |
3792 | |
3793 | // CHECK-LABEL: @test_vmla_lane_u16_0( |
3794 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer |
3795 | // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] |
3796 | // CHECK: [[ADD:%.*]] = add <4 x i16> %a, [[MUL]] |
3797 | // CHECK: ret <4 x i16> [[ADD]] |
3798 | uint16x4_t test_vmla_lane_u16_0(uint16x4_t a, uint16x4_t b, uint16x4_t v) { |
3799 | return vmla_lane_u16(a, b, v, 0); |
3800 | } |
3801 | |
3802 | // CHECK-LABEL: @test_vmlaq_lane_u16_0( |
3803 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer |
3804 | // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] |
3805 | // CHECK: [[ADD:%.*]] = add <8 x i16> %a, [[MUL]] |
3806 | // CHECK: ret <8 x i16> [[ADD]] |
3807 | uint16x8_t test_vmlaq_lane_u16_0(uint16x8_t a, uint16x8_t b, uint16x4_t v) { |
3808 | return vmlaq_lane_u16(a, b, v, 0); |
3809 | } |
3810 | |
3811 | // CHECK-LABEL: @test_vmla_lane_u32_0( |
3812 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer |
3813 | // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] |
3814 | // CHECK: [[ADD:%.*]] = add <2 x i32> %a, [[MUL]] |
3815 | // CHECK: ret <2 x i32> [[ADD]] |
3816 | uint32x2_t test_vmla_lane_u32_0(uint32x2_t a, uint32x2_t b, uint32x2_t v) { |
3817 | return vmla_lane_u32(a, b, v, 0); |
3818 | } |
3819 | |
3820 | // CHECK-LABEL: @test_vmlaq_lane_u32_0( |
3821 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> zeroinitializer |
3822 | // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] |
3823 | // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[MUL]] |
3824 | // CHECK: ret <4 x i32> [[ADD]] |
3825 | uint32x4_t test_vmlaq_lane_u32_0(uint32x4_t a, uint32x4_t b, uint32x2_t v) { |
3826 | return vmlaq_lane_u32(a, b, v, 0); |
3827 | } |
3828 | |
3829 | // CHECK-LABEL: @test_vmla_laneq_u16_0( |
3830 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer |
3831 | // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] |
3832 | // CHECK: [[ADD:%.*]] = add <4 x i16> %a, [[MUL]] |
3833 | // CHECK: ret <4 x i16> [[ADD]] |
3834 | uint16x4_t test_vmla_laneq_u16_0(uint16x4_t a, uint16x4_t b, uint16x8_t v) { |
3835 | return vmla_laneq_u16(a, b, v, 0); |
3836 | } |
3837 | |
3838 | // CHECK-LABEL: @test_vmlaq_laneq_u16_0( |
3839 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> zeroinitializer |
3840 | // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] |
3841 | // CHECK: [[ADD:%.*]] = add <8 x i16> %a, [[MUL]] |
3842 | // CHECK: ret <8 x i16> [[ADD]] |
3843 | uint16x8_t test_vmlaq_laneq_u16_0(uint16x8_t a, uint16x8_t b, uint16x8_t v) { |
3844 | return vmlaq_laneq_u16(a, b, v, 0); |
3845 | } |
3846 | |
3847 | // CHECK-LABEL: @test_vmla_laneq_u32_0( |
3848 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer |
3849 | // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] |
3850 | // CHECK: [[ADD:%.*]] = add <2 x i32> %a, [[MUL]] |
3851 | // CHECK: ret <2 x i32> [[ADD]] |
3852 | uint32x2_t test_vmla_laneq_u32_0(uint32x2_t a, uint32x2_t b, uint32x4_t v) { |
3853 | return vmla_laneq_u32(a, b, v, 0); |
3854 | } |
3855 | |
3856 | // CHECK-LABEL: @test_vmlaq_laneq_u32_0( |
3857 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> zeroinitializer |
3858 | // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] |
3859 | // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[MUL]] |
3860 | // CHECK: ret <4 x i32> [[ADD]] |
3861 | uint32x4_t test_vmlaq_laneq_u32_0(uint32x4_t a, uint32x4_t b, uint32x4_t v) { |
3862 | return vmlaq_laneq_u32(a, b, v, 0); |
3863 | } |
3864 | |
3865 | // CHECK-LABEL: @test_vqdmlal_laneq_s16_0( |
3866 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer |
3867 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
3868 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
3869 | // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
3870 | // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) |
3871 | // CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) |
3872 | // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] |
3873 | int32x4_t test_vqdmlal_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) { |
3874 | return vqdmlal_laneq_s16(a, b, v, 0); |
3875 | } |
3876 | |
3877 | // CHECK-LABEL: @test_vqdmlal_laneq_s32_0( |
3878 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer |
3879 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
3880 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
3881 | // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
3882 | // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) |
3883 | // CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) |
3884 | // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] |
3885 | int64x2_t test_vqdmlal_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) { |
3886 | return vqdmlal_laneq_s32(a, b, v, 0); |
3887 | } |
3888 | |
3889 | // CHECK-LABEL: @test_vqdmlal_high_laneq_s16_0( |
3890 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
3891 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer |
3892 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
3893 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
3894 | // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
3895 | // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
3896 | // CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) |
3897 | // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] |
3898 | int32x4_t test_vqdmlal_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) { |
3899 | return vqdmlal_high_laneq_s16(a, b, v, 0); |
3900 | } |
3901 | |
3902 | // CHECK-LABEL: @test_vqdmlal_high_laneq_s32_0( |
3903 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> |
3904 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer |
3905 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
3906 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
3907 | // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
3908 | // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
3909 | // CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) |
3910 | // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] |
3911 | int64x2_t test_vqdmlal_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) { |
3912 | return vqdmlal_high_laneq_s32(a, b, v, 0); |
3913 | } |
3914 | |
3915 | // CHECK-LABEL: @test_vmls_lane_u16_0( |
3916 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> zeroinitializer |
3917 | // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] |
3918 | // CHECK: [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]] |
3919 | // CHECK: ret <4 x i16> [[SUB]] |
3920 | uint16x4_t test_vmls_lane_u16_0(uint16x4_t a, uint16x4_t b, uint16x4_t v) { |
3921 | return vmls_lane_u16(a, b, v, 0); |
3922 | } |
3923 | |
3924 | // CHECK-LABEL: @test_vmlsq_lane_u16_0( |
3925 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> zeroinitializer |
3926 | // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] |
3927 | // CHECK: [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]] |
3928 | // CHECK: ret <8 x i16> [[SUB]] |
3929 | uint16x8_t test_vmlsq_lane_u16_0(uint16x8_t a, uint16x8_t b, uint16x4_t v) { |
3930 | return vmlsq_lane_u16(a, b, v, 0); |
3931 | } |
3932 | |
3933 | // CHECK-LABEL: @test_vmls_lane_u32_0( |
3934 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> zeroinitializer |
3935 | // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] |
3936 | // CHECK: [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]] |
3937 | // CHECK: ret <2 x i32> [[SUB]] |
3938 | uint32x2_t test_vmls_lane_u32_0(uint32x2_t a, uint32x2_t b, uint32x2_t v) { |
3939 | return vmls_lane_u32(a, b, v, 0); |
3940 | } |
3941 | |
3942 | // CHECK-LABEL: @test_vmlsq_lane_u32_0( |
3943 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> zeroinitializer |
3944 | // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] |
3945 | // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]] |
3946 | // CHECK: ret <4 x i32> [[SUB]] |
3947 | uint32x4_t test_vmlsq_lane_u32_0(uint32x4_t a, uint32x4_t b, uint32x2_t v) { |
3948 | return vmlsq_lane_u32(a, b, v, 0); |
3949 | } |
3950 | |
3951 | // CHECK-LABEL: @test_vmls_laneq_u16_0( |
3952 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer |
3953 | // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] |
3954 | // CHECK: [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]] |
3955 | // CHECK: ret <4 x i16> [[SUB]] |
3956 | uint16x4_t test_vmls_laneq_u16_0(uint16x4_t a, uint16x4_t b, uint16x8_t v) { |
3957 | return vmls_laneq_u16(a, b, v, 0); |
3958 | } |
3959 | |
3960 | // CHECK-LABEL: @test_vmlsq_laneq_u16_0( |
3961 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> zeroinitializer |
3962 | // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] |
3963 | // CHECK: [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]] |
3964 | // CHECK: ret <8 x i16> [[SUB]] |
3965 | uint16x8_t test_vmlsq_laneq_u16_0(uint16x8_t a, uint16x8_t b, uint16x8_t v) { |
3966 | return vmlsq_laneq_u16(a, b, v, 0); |
3967 | } |
3968 | |
3969 | // CHECK-LABEL: @test_vmls_laneq_u32_0( |
3970 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer |
3971 | // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] |
3972 | // CHECK: [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]] |
3973 | // CHECK: ret <2 x i32> [[SUB]] |
3974 | uint32x2_t test_vmls_laneq_u32_0(uint32x2_t a, uint32x2_t b, uint32x4_t v) { |
3975 | return vmls_laneq_u32(a, b, v, 0); |
3976 | } |
3977 | |
3978 | // CHECK-LABEL: @test_vmlsq_laneq_u32_0( |
3979 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> zeroinitializer |
3980 | // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] |
3981 | // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]] |
3982 | // CHECK: ret <4 x i32> [[SUB]] |
3983 | uint32x4_t test_vmlsq_laneq_u32_0(uint32x4_t a, uint32x4_t b, uint32x4_t v) { |
3984 | return vmlsq_laneq_u32(a, b, v, 0); |
3985 | } |
3986 | |
3987 | // CHECK-LABEL: @test_vqdmlsl_laneq_s16_0( |
3988 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer |
3989 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
3990 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
3991 | // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
3992 | // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) |
3993 | // CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) |
3994 | // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] |
3995 | int32x4_t test_vqdmlsl_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) { |
3996 | return vqdmlsl_laneq_s16(a, b, v, 0); |
3997 | } |
3998 | |
3999 | // CHECK-LABEL: @test_vqdmlsl_laneq_s32_0( |
4000 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer |
4001 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
4002 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
4003 | // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
4004 | // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) |
4005 | // CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) |
4006 | // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] |
4007 | int64x2_t test_vqdmlsl_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) { |
4008 | return vqdmlsl_laneq_s32(a, b, v, 0); |
4009 | } |
4010 | |
4011 | // CHECK-LABEL: @test_vqdmlsl_high_laneq_s16_0( |
4012 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
4013 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer |
4014 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
4015 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
4016 | // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
4017 | // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
4018 | // CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) |
4019 | // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] |
4020 | int32x4_t test_vqdmlsl_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) { |
4021 | return vqdmlsl_high_laneq_s16(a, b, v, 0); |
4022 | } |
4023 | |
4024 | // CHECK-LABEL: @test_vqdmlsl_high_laneq_s32_0( |
4025 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> |
4026 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer |
4027 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
4028 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
4029 | // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
4030 | // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
4031 | // CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) |
4032 | // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] |
4033 | int64x2_t test_vqdmlsl_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) { |
4034 | return vqdmlsl_high_laneq_s32(a, b, v, 0); |
4035 | } |
4036 | |
4037 | // CHECK-LABEL: @test_vqdmulh_laneq_s16_0( |
4038 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer |
4039 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
4040 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
4041 | // CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) |
4042 | // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8> |
4043 | // CHECK: ret <4 x i16> [[VQDMULH_V2_I]] |
4044 | int16x4_t test_vqdmulh_laneq_s16_0(int16x4_t a, int16x8_t v) { |
4045 | return vqdmulh_laneq_s16(a, v, 0); |
4046 | } |
4047 | |
4048 | // CHECK-LABEL: @test_vqdmulhq_laneq_s16_0( |
4049 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> zeroinitializer |
4050 | // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> |
4051 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> |
4052 | // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) |
4053 | // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8> |
4054 | // CHECK: ret <8 x i16> [[VQDMULHQ_V2_I]] |
4055 | int16x8_t test_vqdmulhq_laneq_s16_0(int16x8_t a, int16x8_t v) { |
4056 | return vqdmulhq_laneq_s16(a, v, 0); |
4057 | } |
4058 | |
4059 | // CHECK-LABEL: @test_vqdmulh_laneq_s32_0( |
4060 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer |
4061 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
4062 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
4063 | // CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) |
4064 | // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8> |
4065 | // CHECK: ret <2 x i32> [[VQDMULH_V2_I]] |
4066 | int32x2_t test_vqdmulh_laneq_s32_0(int32x2_t a, int32x4_t v) { |
4067 | return vqdmulh_laneq_s32(a, v, 0); |
4068 | } |
4069 | |
4070 | // CHECK-LABEL: @test_vqdmulhq_laneq_s32_0( |
4071 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> zeroinitializer |
4072 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
4073 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> |
4074 | // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) |
4075 | // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8> |
4076 | // CHECK: ret <4 x i32> [[VQDMULHQ_V2_I]] |
4077 | int32x4_t test_vqdmulhq_laneq_s32_0(int32x4_t a, int32x4_t v) { |
4078 | return vqdmulhq_laneq_s32(a, v, 0); |
4079 | } |
4080 | |
4081 | // CHECK-LABEL: @test_vqrdmulh_laneq_s16_0( |
4082 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> zeroinitializer |
4083 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
4084 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
4085 | // CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) |
4086 | // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8> |
4087 | // CHECK: ret <4 x i16> [[VQRDMULH_V2_I]] |
4088 | int16x4_t test_vqrdmulh_laneq_s16_0(int16x4_t a, int16x8_t v) { |
4089 | return vqrdmulh_laneq_s16(a, v, 0); |
4090 | } |
4091 | |
4092 | // CHECK-LABEL: @test_vqrdmulhq_laneq_s16_0( |
4093 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> zeroinitializer |
4094 | // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> |
4095 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> |
4096 | // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) |
4097 | // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8> |
4098 | // CHECK: ret <8 x i16> [[VQRDMULHQ_V2_I]] |
4099 | int16x8_t test_vqrdmulhq_laneq_s16_0(int16x8_t a, int16x8_t v) { |
4100 | return vqrdmulhq_laneq_s16(a, v, 0); |
4101 | } |
4102 | |
4103 | // CHECK-LABEL: @test_vqrdmulh_laneq_s32_0( |
4104 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> zeroinitializer |
4105 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
4106 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
4107 | // CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) |
4108 | // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8> |
4109 | // CHECK: ret <2 x i32> [[VQRDMULH_V2_I]] |
4110 | int32x2_t test_vqrdmulh_laneq_s32_0(int32x2_t a, int32x4_t v) { |
4111 | return vqrdmulh_laneq_s32(a, v, 0); |
4112 | } |
4113 | |
4114 | // CHECK-LABEL: @test_vqrdmulhq_laneq_s32_0( |
4115 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> zeroinitializer |
4116 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
4117 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> |
4118 | // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) |
4119 | // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8> |
4120 | // CHECK: ret <4 x i32> [[VQRDMULHQ_V2_I]] |
4121 | int32x4_t test_vqrdmulhq_laneq_s32_0(int32x4_t a, int32x4_t v) { |
4122 | return vqrdmulhq_laneq_s32(a, v, 0); |
4123 | } |
4124 | |
4125 | // CHECK-LABEL: @test_vmla_lane_u16( |
4126 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
4127 | // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] |
4128 | // CHECK: [[ADD:%.*]] = add <4 x i16> %a, [[MUL]] |
4129 | // CHECK: ret <4 x i16> [[ADD]] |
4130 | uint16x4_t test_vmla_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t v) { |
4131 | return vmla_lane_u16(a, b, v, 3); |
4132 | } |
4133 | |
4134 | // CHECK-LABEL: @test_vmlaq_lane_u16( |
4135 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> |
4136 | // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] |
4137 | // CHECK: [[ADD:%.*]] = add <8 x i16> %a, [[MUL]] |
4138 | // CHECK: ret <8 x i16> [[ADD]] |
4139 | uint16x8_t test_vmlaq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t v) { |
4140 | return vmlaq_lane_u16(a, b, v, 3); |
4141 | } |
4142 | |
4143 | // CHECK-LABEL: @test_vmla_lane_u32( |
4144 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> |
4145 | // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] |
4146 | // CHECK: [[ADD:%.*]] = add <2 x i32> %a, [[MUL]] |
4147 | // CHECK: ret <2 x i32> [[ADD]] |
4148 | uint32x2_t test_vmla_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t v) { |
4149 | return vmla_lane_u32(a, b, v, 1); |
4150 | } |
4151 | |
4152 | // CHECK-LABEL: @test_vmlaq_lane_u32( |
4153 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1> |
4154 | // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] |
4155 | // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[MUL]] |
4156 | // CHECK: ret <4 x i32> [[ADD]] |
4157 | uint32x4_t test_vmlaq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t v) { |
4158 | return vmlaq_lane_u32(a, b, v, 1); |
4159 | } |
4160 | |
4161 | // CHECK-LABEL: @test_vmla_laneq_u16( |
4162 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> |
4163 | // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] |
4164 | // CHECK: [[ADD:%.*]] = add <4 x i16> %a, [[MUL]] |
4165 | // CHECK: ret <4 x i16> [[ADD]] |
4166 | uint16x4_t test_vmla_laneq_u16(uint16x4_t a, uint16x4_t b, uint16x8_t v) { |
4167 | return vmla_laneq_u16(a, b, v, 7); |
4168 | } |
4169 | |
4170 | // CHECK-LABEL: @test_vmlaq_laneq_u16( |
4171 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> |
4172 | // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] |
4173 | // CHECK: [[ADD:%.*]] = add <8 x i16> %a, [[MUL]] |
4174 | // CHECK: ret <8 x i16> [[ADD]] |
4175 | uint16x8_t test_vmlaq_laneq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t v) { |
4176 | return vmlaq_laneq_u16(a, b, v, 7); |
4177 | } |
4178 | |
4179 | // CHECK-LABEL: @test_vmla_laneq_u32( |
4180 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> |
4181 | // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] |
4182 | // CHECK: [[ADD:%.*]] = add <2 x i32> %a, [[MUL]] |
4183 | // CHECK: ret <2 x i32> [[ADD]] |
4184 | uint32x2_t test_vmla_laneq_u32(uint32x2_t a, uint32x2_t b, uint32x4_t v) { |
4185 | return vmla_laneq_u32(a, b, v, 3); |
4186 | } |
4187 | |
4188 | // CHECK-LABEL: @test_vmlaq_laneq_u32( |
4189 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
4190 | // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] |
4191 | // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[MUL]] |
4192 | // CHECK: ret <4 x i32> [[ADD]] |
4193 | uint32x4_t test_vmlaq_laneq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t v) { |
4194 | return vmlaq_laneq_u32(a, b, v, 3); |
4195 | } |
4196 | |
4197 | // CHECK-LABEL: @test_vqdmlal_laneq_s16( |
4198 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> |
4199 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
4200 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
4201 | // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
4202 | // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) |
4203 | // CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) |
4204 | // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] |
4205 | int32x4_t test_vqdmlal_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) { |
4206 | return vqdmlal_laneq_s16(a, b, v, 7); |
4207 | } |
4208 | |
4209 | // CHECK-LABEL: @test_vqdmlal_laneq_s32( |
4210 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> |
4211 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
4212 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
4213 | // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
4214 | // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) |
4215 | // CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) |
4216 | // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] |
4217 | int64x2_t test_vqdmlal_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) { |
4218 | return vqdmlal_laneq_s32(a, b, v, 3); |
4219 | } |
4220 | |
4221 | // CHECK-LABEL: @test_vqdmlal_high_laneq_s16( |
4222 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
4223 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> |
4224 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
4225 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
4226 | // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
4227 | // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
4228 | // CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) |
4229 | // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] |
4230 | int32x4_t test_vqdmlal_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) { |
4231 | return vqdmlal_high_laneq_s16(a, b, v, 7); |
4232 | } |
4233 | |
4234 | // CHECK-LABEL: @test_vqdmlal_high_laneq_s32( |
4235 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> |
4236 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> |
4237 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
4238 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
4239 | // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
4240 | // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
4241 | // CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) |
4242 | // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] |
4243 | int64x2_t test_vqdmlal_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) { |
4244 | return vqdmlal_high_laneq_s32(a, b, v, 3); |
4245 | } |
4246 | |
4247 | // CHECK-LABEL: @test_vmls_lane_u16( |
4248 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
4249 | // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] |
4250 | // CHECK: [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]] |
4251 | // CHECK: ret <4 x i16> [[SUB]] |
4252 | uint16x4_t test_vmls_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t v) { |
4253 | return vmls_lane_u16(a, b, v, 3); |
4254 | } |
4255 | |
4256 | // CHECK-LABEL: @test_vmlsq_lane_u16( |
4257 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %v, <4 x i16> %v, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> |
4258 | // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] |
4259 | // CHECK: [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]] |
4260 | // CHECK: ret <8 x i16> [[SUB]] |
4261 | uint16x8_t test_vmlsq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t v) { |
4262 | return vmlsq_lane_u16(a, b, v, 3); |
4263 | } |
4264 | |
4265 | // CHECK-LABEL: @test_vmls_lane_u32( |
4266 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <2 x i32> <i32 1, i32 1> |
4267 | // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] |
4268 | // CHECK: [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]] |
4269 | // CHECK: ret <2 x i32> [[SUB]] |
4270 | uint32x2_t test_vmls_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t v) { |
4271 | return vmls_lane_u32(a, b, v, 1); |
4272 | } |
4273 | |
4274 | // CHECK-LABEL: @test_vmlsq_lane_u32( |
4275 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %v, <2 x i32> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1> |
4276 | // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] |
4277 | // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]] |
4278 | // CHECK: ret <4 x i32> [[SUB]] |
4279 | uint32x4_t test_vmlsq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t v) { |
4280 | return vmlsq_lane_u32(a, b, v, 1); |
4281 | } |
4282 | |
4283 | // CHECK-LABEL: @test_vmls_laneq_u16( |
4284 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> |
4285 | // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] |
4286 | // CHECK: [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]] |
4287 | // CHECK: ret <4 x i16> [[SUB]] |
4288 | uint16x4_t test_vmls_laneq_u16(uint16x4_t a, uint16x4_t b, uint16x8_t v) { |
4289 | return vmls_laneq_u16(a, b, v, 7); |
4290 | } |
4291 | |
4292 | // CHECK-LABEL: @test_vmlsq_laneq_u16( |
4293 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> |
4294 | // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] |
4295 | // CHECK: [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]] |
4296 | // CHECK: ret <8 x i16> [[SUB]] |
4297 | uint16x8_t test_vmlsq_laneq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t v) { |
4298 | return vmlsq_laneq_u16(a, b, v, 7); |
4299 | } |
4300 | |
4301 | // CHECK-LABEL: @test_vmls_laneq_u32( |
4302 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> |
4303 | // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] |
4304 | // CHECK: [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]] |
4305 | // CHECK: ret <2 x i32> [[SUB]] |
4306 | uint32x2_t test_vmls_laneq_u32(uint32x2_t a, uint32x2_t b, uint32x4_t v) { |
4307 | return vmls_laneq_u32(a, b, v, 3); |
4308 | } |
4309 | |
4310 | // CHECK-LABEL: @test_vmlsq_laneq_u32( |
4311 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
4312 | // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] |
4313 | // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]] |
4314 | // CHECK: ret <4 x i32> [[SUB]] |
4315 | uint32x4_t test_vmlsq_laneq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t v) { |
4316 | return vmlsq_laneq_u32(a, b, v, 3); |
4317 | } |
4318 | |
4319 | // CHECK-LABEL: @test_vqdmlsl_laneq_s16( |
4320 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> |
4321 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
4322 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
4323 | // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
4324 | // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) |
4325 | // CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) |
4326 | // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] |
4327 | int32x4_t test_vqdmlsl_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) { |
4328 | return vqdmlsl_laneq_s16(a, b, v, 7); |
4329 | } |
4330 | |
4331 | // CHECK-LABEL: @test_vqdmlsl_laneq_s32( |
4332 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> |
4333 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
4334 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
4335 | // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
4336 | // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) |
4337 | // CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) |
4338 | // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] |
4339 | int64x2_t test_vqdmlsl_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) { |
4340 | return vqdmlsl_laneq_s32(a, b, v, 3); |
4341 | } |
4342 | |
4343 | // CHECK-LABEL: @test_vqdmlsl_high_laneq_s16( |
4344 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
4345 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> |
4346 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
4347 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> |
4348 | // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
4349 | // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[SHUFFLE]]) |
4350 | // CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) |
4351 | // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] |
4352 | int32x4_t test_vqdmlsl_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) { |
4353 | return vqdmlsl_high_laneq_s16(a, b, v, 7); |
4354 | } |
4355 | |
4356 | // CHECK-LABEL: @test_vqdmlsl_high_laneq_s32( |
4357 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> |
4358 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> |
4359 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> |
4360 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> |
4361 | // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
4362 | // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[SHUFFLE]]) |
4363 | // CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) |
4364 | // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] |
4365 | int64x2_t test_vqdmlsl_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) { |
4366 | return vqdmlsl_high_laneq_s32(a, b, v, 3); |
4367 | } |
4368 | |
4369 | // CHECK-LABEL: @test_vqdmulh_laneq_s16( |
4370 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> |
4371 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
4372 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
4373 | // CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) |
4374 | // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8> |
4375 | // CHECK: ret <4 x i16> [[VQDMULH_V2_I]] |
4376 | int16x4_t test_vqdmulh_laneq_s16(int16x4_t a, int16x8_t v) { |
4377 | return vqdmulh_laneq_s16(a, v, 7); |
4378 | } |
4379 | |
4380 | // CHECK-LABEL: @test_vqdmulhq_laneq_s16( |
4381 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> |
4382 | // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> |
4383 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> |
4384 | // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) |
4385 | // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8> |
4386 | // CHECK: ret <8 x i16> [[VQDMULHQ_V2_I]] |
4387 | int16x8_t test_vqdmulhq_laneq_s16(int16x8_t a, int16x8_t v) { |
4388 | return vqdmulhq_laneq_s16(a, v, 7); |
4389 | } |
4390 | |
4391 | // CHECK-LABEL: @test_vqdmulh_laneq_s32( |
4392 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> |
4393 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
4394 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
4395 | // CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) |
4396 | // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8> |
4397 | // CHECK: ret <2 x i32> [[VQDMULH_V2_I]] |
4398 | int32x2_t test_vqdmulh_laneq_s32(int32x2_t a, int32x4_t v) { |
4399 | return vqdmulh_laneq_s32(a, v, 3); |
4400 | } |
4401 | |
4402 | // CHECK-LABEL: @test_vqdmulhq_laneq_s32( |
4403 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
4404 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
4405 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> |
4406 | // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) |
4407 | // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8> |
4408 | // CHECK: ret <4 x i32> [[VQDMULHQ_V2_I]] |
4409 | int32x4_t test_vqdmulhq_laneq_s32(int32x4_t a, int32x4_t v) { |
4410 | return vqdmulhq_laneq_s32(a, v, 3); |
4411 | } |
4412 | |
4413 | // CHECK-LABEL: @test_vqrdmulh_laneq_s16( |
4414 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <4 x i32> <i32 7, i32 7, i32 7, i32 7> |
4415 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> |
4416 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> |
4417 | // CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) |
4418 | // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8> |
4419 | // CHECK: ret <4 x i16> [[VQRDMULH_V2_I]] |
4420 | int16x4_t test_vqrdmulh_laneq_s16(int16x4_t a, int16x8_t v) { |
4421 | return vqrdmulh_laneq_s16(a, v, 7); |
4422 | } |
4423 | |
4424 | // CHECK-LABEL: @test_vqrdmulhq_laneq_s16( |
4425 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i16> %v, <8 x i16> %v, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> |
4426 | // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> |
4427 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> |
4428 | // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) |
4429 | // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8> |
4430 | // CHECK: ret <8 x i16> [[VQRDMULHQ_V2_I]] |
4431 | int16x8_t test_vqrdmulhq_laneq_s16(int16x8_t a, int16x8_t v) { |
4432 | return vqrdmulhq_laneq_s16(a, v, 7); |
4433 | } |
4434 | |
4435 | // CHECK-LABEL: @test_vqrdmulh_laneq_s32( |
4436 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <2 x i32> <i32 3, i32 3> |
4437 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> |
4438 | // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> |
4439 | // CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) |
4440 | // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8> |
4441 | // CHECK: ret <2 x i32> [[VQRDMULH_V2_I]] |
4442 | int32x2_t test_vqrdmulh_laneq_s32(int32x2_t a, int32x4_t v) { |
4443 | return vqrdmulh_laneq_s32(a, v, 3); |
4444 | } |
4445 | |
4446 | // CHECK-LABEL: @test_vqrdmulhq_laneq_s32( |
4447 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> %v, <4 x i32> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
4448 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> |
4449 | // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> |
4450 | // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) |
4451 | // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8> |
4452 | // CHECK: ret <4 x i32> [[VQRDMULHQ_V2_I]] |
4453 | int32x4_t test_vqrdmulhq_laneq_s32(int32x4_t a, int32x4_t v) { |
4454 | return vqrdmulhq_laneq_s32(a, v, 3); |
4455 | } |
4456 | |