1 | // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-cpu cyclone \ |
2 | // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s |
3 | |
4 | // Test new aarch64 intrinsics and types |
5 | |
6 | #include <arm_neon.h> |
7 | |
8 | |
9 | // CHECK-LABEL: define float @test_vmuls_lane_f32(float %a, <2 x float> %b) #0 { |
10 | // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %b to <8 x i8> |
11 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> |
12 | // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 |
13 | // CHECK: [[MUL:%.*]] = fmul float %a, [[VGET_LANE]] |
14 | // CHECK: ret float [[MUL]] |
15 | float32_t test_vmuls_lane_f32(float32_t a, float32x2_t b) { |
16 | return vmuls_lane_f32(a, b, 1); |
17 | } |
18 | |
19 | // CHECK-LABEL: define double @test_vmuld_lane_f64(double %a, <1 x double> %b) #0 { |
20 | // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %b to <8 x i8> |
21 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> |
22 | // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP1]], i32 0 |
23 | // CHECK: [[MUL:%.*]] = fmul double %a, [[VGET_LANE]] |
24 | // CHECK: ret double [[MUL]] |
25 | float64_t test_vmuld_lane_f64(float64_t a, float64x1_t b) { |
26 | return vmuld_lane_f64(a, b, 0); |
27 | } |
28 | |
29 | // CHECK-LABEL: define float @test_vmuls_laneq_f32(float %a, <4 x float> %b) #1 { |
30 | // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8> |
31 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> |
32 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 |
33 | // CHECK: [[MUL:%.*]] = fmul float %a, [[VGETQ_LANE]] |
34 | // CHECK: ret float [[MUL]] |
35 | float32_t test_vmuls_laneq_f32(float32_t a, float32x4_t b) { |
36 | return vmuls_laneq_f32(a, b, 3); |
37 | } |
38 | |
39 | // CHECK-LABEL: define double @test_vmuld_laneq_f64(double %a, <2 x double> %b) #1 { |
40 | // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %b to <16 x i8> |
41 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> |
42 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 |
43 | // CHECK: [[MUL:%.*]] = fmul double %a, [[VGETQ_LANE]] |
44 | // CHECK: ret double [[MUL]] |
45 | float64_t test_vmuld_laneq_f64(float64_t a, float64x2_t b) { |
46 | return vmuld_laneq_f64(a, b, 1); |
47 | } |
48 | |
49 | // CHECK-LABEL: define <1 x double> @test_vmul_n_f64(<1 x double> %a, double %b) #0 { |
50 | // CHECK: [[TMP2:%.*]] = bitcast <1 x double> %a to double |
51 | // CHECK: [[TMP3:%.*]] = fmul double [[TMP2]], %b |
52 | // CHECK: [[TMP4:%.*]] = bitcast double [[TMP3]] to <1 x double> |
53 | // CHECK: ret <1 x double> [[TMP4]] |
54 | float64x1_t test_vmul_n_f64(float64x1_t a, float64_t b) { |
55 | return vmul_n_f64(a, b); |
56 | } |
57 | |
58 | // CHECK-LABEL: define float @test_vmulxs_lane_f32(float %a, <2 x float> %b) #0 { |
59 | // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %b to <8 x i8> |
60 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> |
61 | // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 |
62 | // CHECK: [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float [[VGET_LANE]]) |
63 | // CHECK: ret float [[VMULXS_F32_I]] |
64 | float32_t test_vmulxs_lane_f32(float32_t a, float32x2_t b) { |
65 | return vmulxs_lane_f32(a, b, 1); |
66 | } |
67 | |
68 | // CHECK-LABEL: define float @test_vmulxs_laneq_f32(float %a, <4 x float> %b) #1 { |
69 | // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8> |
70 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> |
71 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 |
72 | // CHECK: [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float [[VGETQ_LANE]]) |
73 | // CHECK: ret float [[VMULXS_F32_I]] |
74 | float32_t test_vmulxs_laneq_f32(float32_t a, float32x4_t b) { |
75 | return vmulxs_laneq_f32(a, b, 3); |
76 | } |
77 | |
78 | // CHECK-LABEL: define double @test_vmulxd_lane_f64(double %a, <1 x double> %b) #0 { |
79 | // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %b to <8 x i8> |
80 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> |
81 | // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP1]], i32 0 |
82 | // CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double [[VGET_LANE]]) |
83 | // CHECK: ret double [[VMULXD_F64_I]] |
84 | float64_t test_vmulxd_lane_f64(float64_t a, float64x1_t b) { |
85 | return vmulxd_lane_f64(a, b, 0); |
86 | } |
87 | |
88 | // CHECK-LABEL: define double @test_vmulxd_laneq_f64(double %a, <2 x double> %b) #1 { |
89 | // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %b to <16 x i8> |
90 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> |
91 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 |
92 | // CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double [[VGETQ_LANE]]) |
93 | // CHECK: ret double [[VMULXD_F64_I]] |
94 | float64_t test_vmulxd_laneq_f64(float64_t a, float64x2_t b) { |
95 | return vmulxd_laneq_f64(a, b, 1); |
96 | } |
97 | |
98 | // CHECK-LABEL: define <1 x double> @test_vmulx_lane_f64(<1 x double> %a, <1 x double> %b) #0 { |
99 | // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> |
100 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> |
101 | // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP1]], i32 0 |
102 | // CHECK: [[TMP2:%.*]] = bitcast <1 x double> %b to <8 x i8> |
103 | // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double> |
104 | // CHECK: [[VGET_LANE6:%.*]] = extractelement <1 x double> [[TMP3]], i32 0 |
105 | // CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE6]]) |
106 | // CHECK: [[TMP4:%.*]] = bitcast <1 x double> %a to <8 x i8> |
107 | // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> |
108 | // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP5]], double [[VMULXD_F64_I]], i32 0 |
109 | // CHECK: ret <1 x double> [[VSET_LANE]] |
110 | float64x1_t test_vmulx_lane_f64(float64x1_t a, float64x1_t b) { |
111 | return vmulx_lane_f64(a, b, 0); |
112 | } |
113 | |
114 | |
115 | // CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_0(<1 x double> %a, <2 x double> %b) #1 { |
116 | // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> |
117 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> |
118 | // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP1]], i32 0 |
119 | // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %b to <16 x i8> |
120 | // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> |
121 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP3]], i32 0 |
122 | // CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]]) |
123 | // CHECK: [[TMP4:%.*]] = bitcast <1 x double> %a to <8 x i8> |
124 | // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> |
125 | // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP5]], double [[VMULXD_F64_I]], i32 0 |
126 | // CHECK: ret <1 x double> [[VSET_LANE]] |
127 | float64x1_t test_vmulx_laneq_f64_0(float64x1_t a, float64x2_t b) { |
128 | return vmulx_laneq_f64(a, b, 0); |
129 | } |
130 | |
131 | // CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_1(<1 x double> %a, <2 x double> %b) #1 { |
132 | // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> |
133 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> |
134 | // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP1]], i32 0 |
135 | // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %b to <16 x i8> |
136 | // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> |
137 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP3]], i32 1 |
138 | // CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]]) |
139 | // CHECK: [[TMP4:%.*]] = bitcast <1 x double> %a to <8 x i8> |
140 | // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> |
141 | // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP5]], double [[VMULXD_F64_I]], i32 0 |
142 | // CHECK: ret <1 x double> [[VSET_LANE]] |
143 | float64x1_t test_vmulx_laneq_f64_1(float64x1_t a, float64x2_t b) { |
144 | return vmulx_laneq_f64(a, b, 1); |
145 | } |
146 | |
147 | |
148 | // CHECK-LABEL: define float @test_vfmas_lane_f32(float %a, float %b, <2 x float> %c) #0 { |
149 | // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %c to <8 x i8> |
150 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> |
151 | // CHECK: [[EXTRACT:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 |
152 | // CHECK: [[TMP2:%.*]] = call float @llvm.fma.f32(float %b, float [[EXTRACT]], float %a) |
153 | // CHECK: ret float [[TMP2]] |
154 | float32_t test_vfmas_lane_f32(float32_t a, float32_t b, float32x2_t c) { |
155 | return vfmas_lane_f32(a, b, c, 1); |
156 | } |
157 | |
158 | // CHECK-LABEL: define double @test_vfmad_lane_f64(double %a, double %b, <1 x double> %c) #0 { |
159 | // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %c to <8 x i8> |
160 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> |
161 | // CHECK: [[EXTRACT:%.*]] = extractelement <1 x double> [[TMP1]], i32 0 |
162 | // CHECK: [[TMP2:%.*]] = call double @llvm.fma.f64(double %b, double [[EXTRACT]], double %a) |
163 | // CHECK: ret double [[TMP2]] |
164 | float64_t test_vfmad_lane_f64(float64_t a, float64_t b, float64x1_t c) { |
165 | return vfmad_lane_f64(a, b, c, 0); |
166 | } |
167 | |
168 | // CHECK-LABEL: define double @test_vfmad_laneq_f64(double %a, double %b, <2 x double> %c) #1 { |
169 | // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %c to <16 x i8> |
170 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> |
171 | // CHECK: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 |
172 | // CHECK: [[TMP2:%.*]] = call double @llvm.fma.f64(double %b, double [[EXTRACT]], double %a) |
173 | // CHECK: ret double [[TMP2]] |
174 | float64_t test_vfmad_laneq_f64(float64_t a, float64_t b, float64x2_t c) { |
175 | return vfmad_laneq_f64(a, b, c, 1); |
176 | } |
177 | |
178 | // CHECK-LABEL: define float @test_vfmss_lane_f32(float %a, float %b, <2 x float> %c) #0 { |
179 | // CHECK: [[SUB:%.*]] = fsub float -0.000000e+00, %b |
180 | // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %c to <8 x i8> |
181 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> |
182 | // CHECK: [[EXTRACT:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 |
183 | // CHECK: [[TMP2:%.*]] = call float @llvm.fma.f32(float [[SUB]], float [[EXTRACT]], float %a) |
184 | // CHECK: ret float [[TMP2]] |
185 | float32_t test_vfmss_lane_f32(float32_t a, float32_t b, float32x2_t c) { |
186 | return vfmss_lane_f32(a, b, c, 1); |
187 | } |
188 | |
189 | // CHECK-LABEL: define <1 x double> @test_vfma_lane_f64(<1 x double> %a, <1 x double> %b, <1 x double> %v) #0 { |
190 | // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> |
191 | // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> |
192 | // CHECK: [[TMP2:%.*]] = bitcast <1 x double> %v to <8 x i8> |
193 | // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double> |
194 | // CHECK: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <1 x i32> zeroinitializer |
195 | // CHECK: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> |
196 | // CHECK: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> |
197 | // CHECK: [[FMLA2:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]]) |
198 | // CHECK: ret <1 x double> [[FMLA2]] |
199 | float64x1_t test_vfma_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) { |
200 | return vfma_lane_f64(a, b, v, 0); |
201 | } |
202 | |
203 | // CHECK-LABEL: define <1 x double> @test_vfms_lane_f64(<1 x double> %a, <1 x double> %b, <1 x double> %v) #0 { |
204 | // CHECK: [[SUB:%.*]] = fsub <1 x double> <double -0.000000e+00>, %b |
205 | // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> |
206 | // CHECK: [[TMP1:%.*]] = bitcast <1 x double> [[SUB]] to <8 x i8> |
207 | // CHECK: [[TMP2:%.*]] = bitcast <1 x double> %v to <8 x i8> |
208 | // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double> |
209 | // CHECK: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <1 x i32> zeroinitializer |
210 | // CHECK: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> |
211 | // CHECK: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> |
212 | // CHECK: [[FMLA2:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]]) |
213 | // CHECK: ret <1 x double> [[FMLA2]] |
214 | float64x1_t test_vfms_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) { |
215 | return vfms_lane_f64(a, b, v, 0); |
216 | } |
217 | |
218 | // CHECK-LABEL: define <1 x double> @test_vfma_laneq_f64(<1 x double> %a, <1 x double> %b, <2 x double> %v) #1 { |
219 | // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> |
220 | // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> |
221 | // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8> |
222 | // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to double |
223 | // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to double |
224 | // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> |
225 | // CHECK: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP5]], i32 0 |
226 | // CHECK: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP4]], double [[EXTRACT]], double [[TMP3]]) |
227 | // CHECK: [[TMP7:%.*]] = bitcast double [[TMP6]] to <1 x double> |
228 | // CHECK: ret <1 x double> [[TMP7]] |
229 | float64x1_t test_vfma_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) { |
230 | return vfma_laneq_f64(a, b, v, 0); |
231 | } |
232 | |
233 | // CHECK-LABEL: define <1 x double> @test_vfms_laneq_f64(<1 x double> %a, <1 x double> %b, <2 x double> %v) #1 { |
234 | // CHECK: [[SUB:%.*]] = fsub <1 x double> <double -0.000000e+00>, %b |
235 | // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> |
236 | // CHECK: [[TMP1:%.*]] = bitcast <1 x double> [[SUB]] to <8 x i8> |
237 | // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8> |
238 | // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to double |
239 | // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to double |
240 | // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> |
241 | // CHECK: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP5]], i32 0 |
242 | // CHECK: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP4]], double [[EXTRACT]], double [[TMP3]]) |
243 | // CHECK: [[TMP7:%.*]] = bitcast double [[TMP6]] to <1 x double> |
244 | // CHECK: ret <1 x double> [[TMP7]] |
245 | float64x1_t test_vfms_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) { |
246 | return vfms_laneq_f64(a, b, v, 0); |
247 | } |
248 | |
249 | // CHECK-LABEL: define i32 @test_vqdmullh_lane_s16(i16 %a, <4 x i16> %b) #0 { |
250 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
251 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> |
252 | // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 |
253 | // CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 |
254 | // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGET_LANE]], i64 0 |
255 | // CHECK: [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) |
256 | // CHECK: [[TMP4:%.*]] = extractelement <4 x i32> [[VQDMULLH_S16_I]], i64 0 |
257 | // CHECK: ret i32 [[TMP4]] |
258 | int32_t test_vqdmullh_lane_s16(int16_t a, int16x4_t b) { |
259 | return vqdmullh_lane_s16(a, b, 3); |
260 | } |
261 | |
262 | // CHECK-LABEL: define i64 @test_vqdmulls_lane_s32(i32 %a, <2 x i32> %b) #0 { |
263 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
264 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> |
265 | // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 |
266 | // CHECK: [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 [[VGET_LANE]]) |
267 | // CHECK: ret i64 [[VQDMULLS_S32_I]] |
268 | int64_t test_vqdmulls_lane_s32(int32_t a, int32x2_t b) { |
269 | return vqdmulls_lane_s32(a, b, 1); |
270 | } |
271 | |
272 | // CHECK-LABEL: define i32 @test_vqdmullh_laneq_s16(i16 %a, <8 x i16> %b) #1 { |
273 | // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> |
274 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> |
275 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 |
276 | // CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 |
277 | // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGETQ_LANE]], i64 0 |
278 | // CHECK: [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) |
279 | // CHECK: [[TMP4:%.*]] = extractelement <4 x i32> [[VQDMULLH_S16_I]], i64 0 |
280 | // CHECK: ret i32 [[TMP4]] |
281 | int32_t test_vqdmullh_laneq_s16(int16_t a, int16x8_t b) { |
282 | return vqdmullh_laneq_s16(a, b, 7); |
283 | } |
284 | |
285 | // CHECK-LABEL: define i64 @test_vqdmulls_laneq_s32(i32 %a, <4 x i32> %b) #1 { |
286 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> |
287 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> |
288 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 |
289 | // CHECK: [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 [[VGETQ_LANE]]) |
290 | // CHECK: ret i64 [[VQDMULLS_S32_I]] |
291 | int64_t test_vqdmulls_laneq_s32(int32_t a, int32x4_t b) { |
292 | return vqdmulls_laneq_s32(a, b, 3); |
293 | } |
294 | |
295 | // CHECK-LABEL: define i16 @test_vqdmulhh_lane_s16(i16 %a, <4 x i16> %b) #0 { |
296 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
297 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> |
298 | // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 |
299 | // CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 |
300 | // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGET_LANE]], i64 0 |
301 | // CHECK: [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) |
302 | // CHECK: [[TMP4:%.*]] = extractelement <4 x i16> [[VQDMULHH_S16_I]], i64 0 |
303 | // CHECK: ret i16 [[TMP4]] |
304 | int16_t test_vqdmulhh_lane_s16(int16_t a, int16x4_t b) { |
305 | return vqdmulhh_lane_s16(a, b, 3); |
306 | } |
307 | |
308 | // CHECK-LABEL: define i32 @test_vqdmulhs_lane_s32(i32 %a, <2 x i32> %b) #0 { |
309 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
310 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> |
311 | // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 |
312 | // CHECK: [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 [[VGET_LANE]]) |
313 | // CHECK: ret i32 [[VQDMULHS_S32_I]] |
314 | int32_t test_vqdmulhs_lane_s32(int32_t a, int32x2_t b) { |
315 | return vqdmulhs_lane_s32(a, b, 1); |
316 | } |
317 | |
318 | |
319 | // CHECK-LABEL: define i16 @test_vqdmulhh_laneq_s16(i16 %a, <8 x i16> %b) #1 { |
320 | // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> |
321 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> |
322 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 |
323 | // CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 |
324 | // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGETQ_LANE]], i64 0 |
325 | // CHECK: [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) |
326 | // CHECK: [[TMP4:%.*]] = extractelement <4 x i16> [[VQDMULHH_S16_I]], i64 0 |
327 | // CHECK: ret i16 [[TMP4]] |
328 | int16_t test_vqdmulhh_laneq_s16(int16_t a, int16x8_t b) { |
329 | return vqdmulhh_laneq_s16(a, b, 7); |
330 | } |
331 | |
332 | |
333 | // CHECK-LABEL: define i32 @test_vqdmulhs_laneq_s32(i32 %a, <4 x i32> %b) #1 { |
334 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> |
335 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> |
336 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 |
337 | // CHECK: [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 [[VGETQ_LANE]]) |
338 | // CHECK: ret i32 [[VQDMULHS_S32_I]] |
339 | int32_t test_vqdmulhs_laneq_s32(int32_t a, int32x4_t b) { |
340 | return vqdmulhs_laneq_s32(a, b, 3); |
341 | } |
342 | |
343 | // CHECK-LABEL: define i16 @test_vqrdmulhh_lane_s16(i16 %a, <4 x i16> %b) #0 { |
344 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> |
345 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> |
346 | // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 |
347 | // CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 |
348 | // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGET_LANE]], i64 0 |
349 | // CHECK: [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) |
350 | // CHECK: [[TMP4:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0 |
351 | // CHECK: ret i16 [[TMP4]] |
352 | int16_t test_vqrdmulhh_lane_s16(int16_t a, int16x4_t b) { |
353 | return vqrdmulhh_lane_s16(a, b, 3); |
354 | } |
355 | |
356 | // CHECK-LABEL: define i32 @test_vqrdmulhs_lane_s32(i32 %a, <2 x i32> %b) #0 { |
357 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> |
358 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> |
359 | // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 |
360 | // CHECK: [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 [[VGET_LANE]]) |
361 | // CHECK: ret i32 [[VQRDMULHS_S32_I]] |
362 | int32_t test_vqrdmulhs_lane_s32(int32_t a, int32x2_t b) { |
363 | return vqrdmulhs_lane_s32(a, b, 1); |
364 | } |
365 | |
366 | |
367 | // CHECK-LABEL: define i16 @test_vqrdmulhh_laneq_s16(i16 %a, <8 x i16> %b) #1 { |
368 | // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> |
369 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> |
370 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 |
371 | // CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 |
372 | // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGETQ_LANE]], i64 0 |
373 | // CHECK: [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) |
374 | // CHECK: [[TMP4:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0 |
375 | // CHECK: ret i16 [[TMP4]] |
376 | int16_t test_vqrdmulhh_laneq_s16(int16_t a, int16x8_t b) { |
377 | return vqrdmulhh_laneq_s16(a, b, 7); |
378 | } |
379 | |
380 | |
381 | // CHECK-LABEL: define i32 @test_vqrdmulhs_laneq_s32(i32 %a, <4 x i32> %b) #1 { |
382 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> |
383 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> |
384 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 |
385 | // CHECK: [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 [[VGETQ_LANE]]) |
386 | // CHECK: ret i32 [[VQRDMULHS_S32_I]] |
387 | int32_t test_vqrdmulhs_laneq_s32(int32_t a, int32x4_t b) { |
388 | return vqrdmulhs_laneq_s32(a, b, 3); |
389 | } |
390 | |
391 | // CHECK-LABEL: define i32 @test_vqdmlalh_lane_s16(i32 %a, i16 %b, <4 x i16> %c) #0 { |
392 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %c to <8 x i8> |
393 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> |
394 | // CHECK: [[LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 |
395 | // CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 |
396 | // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[LANE]], i64 0 |
397 | // CHECK: [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) |
398 | // CHECK: [[LANE0:%.*]] = extractelement <4 x i32> [[VQDMLXL]], i64 0 |
399 | // CHECK: [[VQDMLXL1:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 [[LANE0]]) |
400 | // CHECK: ret i32 [[VQDMLXL1]] |
401 | int32_t test_vqdmlalh_lane_s16(int32_t a, int16_t b, int16x4_t c) { |
402 | return vqdmlalh_lane_s16(a, b, c, 3); |
403 | } |
404 | |
405 | // CHECK-LABEL: define i64 @test_vqdmlals_lane_s32(i64 %a, i32 %b, <2 x i32> %c) #0 { |
406 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %c to <8 x i8> |
407 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> |
408 | // CHECK: [[LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 |
409 | // CHECK: [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 [[LANE]]) |
410 | // CHECK: [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 [[VQDMLXL]]) |
411 | // CHECK: ret i64 [[VQDMLXL1]] |
412 | int64_t test_vqdmlals_lane_s32(int64_t a, int32_t b, int32x2_t c) { |
413 | return vqdmlals_lane_s32(a, b, c, 1); |
414 | } |
415 | |
416 | // CHECK-LABEL: define i32 @test_vqdmlalh_laneq_s16(i32 %a, i16 %b, <8 x i16> %c) #1 { |
417 | // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %c to <16 x i8> |
418 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> |
419 | // CHECK: [[LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 |
420 | // CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 |
421 | // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[LANE]], i64 0 |
422 | // CHECK: [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) |
423 | // CHECK: [[LANE0:%.*]] = extractelement <4 x i32> [[VQDMLXL]], i64 0 |
424 | // CHECK: [[VQDMLXL1:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 [[LANE0]]) |
425 | // CHECK: ret i32 [[VQDMLXL1]] |
426 | int32_t test_vqdmlalh_laneq_s16(int32_t a, int16_t b, int16x8_t c) { |
427 | return vqdmlalh_laneq_s16(a, b, c, 7); |
428 | } |
429 | |
430 | // CHECK-LABEL: define i64 @test_vqdmlals_laneq_s32(i64 %a, i32 %b, <4 x i32> %c) #1 { |
431 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %c to <16 x i8> |
432 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> |
433 | // CHECK: [[LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 |
434 | // CHECK: [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 [[LANE]]) |
435 | // CHECK: [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 [[VQDMLXL]]) |
436 | // CHECK: ret i64 [[VQDMLXL1]] |
437 | int64_t test_vqdmlals_laneq_s32(int64_t a, int32_t b, int32x4_t c) { |
438 | return vqdmlals_laneq_s32(a, b, c, 3); |
439 | } |
440 | |
441 | // CHECK-LABEL: define i32 @test_vqdmlslh_lane_s16(i32 %a, i16 %b, <4 x i16> %c) #0 { |
442 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %c to <8 x i8> |
443 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> |
444 | // CHECK: [[LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 |
445 | // CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 |
446 | // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[LANE]], i64 0 |
447 | // CHECK: [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) |
448 | // CHECK: [[LANE0:%.*]] = extractelement <4 x i32> [[VQDMLXL]], i64 0 |
449 | // CHECK: [[VQDMLXL1:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 [[LANE0]]) |
450 | // CHECK: ret i32 [[VQDMLXL1]] |
451 | int32_t test_vqdmlslh_lane_s16(int32_t a, int16_t b, int16x4_t c) { |
452 | return vqdmlslh_lane_s16(a, b, c, 3); |
453 | } |
454 | |
455 | // CHECK-LABEL: define i64 @test_vqdmlsls_lane_s32(i64 %a, i32 %b, <2 x i32> %c) #0 { |
456 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %c to <8 x i8> |
457 | // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> |
458 | // CHECK: [[LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 |
459 | // CHECK: [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 [[LANE]]) |
460 | // CHECK: [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 [[VQDMLXL]]) |
461 | // CHECK: ret i64 [[VQDMLXL1]] |
462 | int64_t test_vqdmlsls_lane_s32(int64_t a, int32_t b, int32x2_t c) { |
463 | return vqdmlsls_lane_s32(a, b, c, 1); |
464 | } |
465 | |
466 | // CHECK-LABEL: define i32 @test_vqdmlslh_laneq_s16(i32 %a, i16 %b, <8 x i16> %c) #1 { |
467 | // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %c to <16 x i8> |
468 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> |
469 | // CHECK: [[LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 |
470 | // CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 |
471 | // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[LANE]], i64 0 |
472 | // CHECK: [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) |
473 | // CHECK: [[LANE0:%.*]] = extractelement <4 x i32> [[VQDMLXL]], i64 0 |
474 | // CHECK: [[VQDMLXL1:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 [[LANE0]]) |
475 | // CHECK: ret i32 [[VQDMLXL1]] |
476 | int32_t test_vqdmlslh_laneq_s16(int32_t a, int16_t b, int16x8_t c) { |
477 | return vqdmlslh_laneq_s16(a, b, c, 7); |
478 | } |
479 | |
480 | // CHECK-LABEL: define i64 @test_vqdmlsls_laneq_s32(i64 %a, i32 %b, <4 x i32> %c) #1 { |
481 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %c to <16 x i8> |
482 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> |
483 | // CHECK: [[LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 |
484 | // CHECK: [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 [[LANE]]) |
485 | // CHECK: [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 [[VQDMLXL]]) |
486 | // CHECK: ret i64 [[VQDMLXL1]] |
487 | int64_t test_vqdmlsls_laneq_s32(int64_t a, int32_t b, int32x4_t c) { |
488 | return vqdmlsls_laneq_s32(a, b, c, 3); |
489 | } |
490 | |
491 | // CHECK-LABEL: define <1 x double> @test_vmulx_lane_f64_0() #0 { |
492 | // CHECK: [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double> |
493 | // CHECK: [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double> |
494 | // CHECK: [[TMP2:%.*]] = bitcast <1 x double> [[TMP0]] to <8 x i8> |
495 | // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double> |
496 | // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP3]], i32 0 |
497 | // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP1]] to <8 x i8> |
498 | // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> |
499 | // CHECK: [[VGET_LANE7:%.*]] = extractelement <1 x double> [[TMP5]], i32 0 |
500 | // CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE7]]) |
501 | // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP0]] to <8 x i8> |
502 | // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> |
503 | // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP7]], double [[VMULXD_F64_I]], i32 0 |
504 | // CHECK: ret <1 x double> [[VSET_LANE]] |
505 | float64x1_t test_vmulx_lane_f64_0() { |
506 | float64x1_t arg1; |
507 | float64x1_t arg2; |
508 | float64x1_t result; |
509 | float64_t sarg1, sarg2, sres; |
510 | arg1 = vcreate_f64(UINT64_C(0x3fd6304bc43ab5c2)); |
511 | arg2 = vcreate_f64(UINT64_C(0x3fee211e215aeef3)); |
512 | result = vmulx_lane_f64(arg1, arg2, 0); |
513 | return result; |
514 | } |
515 | |
516 | // CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_2() #1 { |
517 | // CHECK: [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double> |
518 | // CHECK: [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double> |
519 | // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x double> [[TMP0]], <1 x double> [[TMP1]], <2 x i32> <i32 0, i32 1> |
520 | // CHECK: [[TMP2:%.*]] = bitcast <1 x double> [[TMP0]] to <8 x i8> |
521 | // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double> |
522 | // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP3]], i32 0 |
523 | // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[SHUFFLE_I]] to <16 x i8> |
524 | // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> |
525 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP5]], i32 1 |
526 | // CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]]) |
527 | // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP0]] to <8 x i8> |
528 | // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> |
529 | // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP7]], double [[VMULXD_F64_I]], i32 0 |
530 | // CHECK: ret <1 x double> [[VSET_LANE]] |
531 | float64x1_t test_vmulx_laneq_f64_2() { |
532 | float64x1_t arg1; |
533 | float64x1_t arg2; |
534 | float64x2_t arg3; |
535 | float64x1_t result; |
536 | float64_t sarg1, sarg2, sres; |
537 | arg1 = vcreate_f64(UINT64_C(0x3fd6304bc43ab5c2)); |
538 | arg2 = vcreate_f64(UINT64_C(0x3fee211e215aeef3)); |
539 | arg3 = vcombine_f64(arg1, arg2); |
540 | result = vmulx_laneq_f64(arg1, arg3, 1); |
541 | return result; |
542 | } |
543 | |
544 | // CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64" |
545 | // CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128" |
546 | |