1 | // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -S -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s |
2 | |
3 | // Test new aarch64 intrinsics and types |
4 | |
5 | #include <arm_neon.h> |
6 | |
7 | // CHECK-LABEL: define <2 x float> @test_vmla_n_f32(<2 x float> %a, <2 x float> %b, float %c) #0 { |
8 | // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %c, i32 0 |
9 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %c, i32 1 |
10 | // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, [[VECINIT1_I]] |
11 | // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %a, [[MUL_I]] |
12 | // CHECK: ret <2 x float> [[ADD_I]] |
13 | float32x2_t test_vmla_n_f32(float32x2_t a, float32x2_t b, float32_t c) { |
14 | return vmla_n_f32(a, b, c); |
15 | } |
16 | |
17 | // CHECK-LABEL: define <4 x float> @test_vmlaq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #1 { |
18 | // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0 |
19 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1 |
20 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2 |
21 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %c, i32 3 |
22 | // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, [[VECINIT3_I]] |
23 | // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %a, [[MUL_I]] |
24 | // CHECK: ret <4 x float> [[ADD_I]] |
25 | float32x4_t test_vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) { |
26 | return vmlaq_n_f32(a, b, c); |
27 | } |
28 | |
29 | // CHECK-LABEL: define <2 x double> @test_vmlaq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #1 { |
30 | // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0 |
31 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1 |
32 | // CHECK: [[MUL_I:%.*]] = fmul <2 x double> %b, [[VECINIT1_I]] |
33 | // CHECK: [[ADD_I:%.*]] = fadd <2 x double> %a, [[MUL_I]] |
34 | // CHECK: ret <2 x double> [[ADD_I]] |
35 | float64x2_t test_vmlaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { |
36 | return vmlaq_n_f64(a, b, c); |
37 | } |
38 | |
39 | // CHECK-LABEL: define <4 x float> @test_vmlsq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #1 { |
40 | // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0 |
41 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1 |
42 | // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2 |
43 | // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %c, i32 3 |
44 | // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, [[VECINIT3_I]] |
45 | // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %a, [[MUL_I]] |
46 | // CHECK: ret <4 x float> [[SUB_I]] |
47 | float32x4_t test_vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c) { |
48 | return vmlsq_n_f32(a, b, c); |
49 | } |
50 | |
51 | // CHECK-LABEL: define <2 x float> @test_vmls_n_f32(<2 x float> %a, <2 x float> %b, float %c) #0 { |
52 | // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %c, i32 0 |
53 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %c, i32 1 |
54 | // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, [[VECINIT1_I]] |
55 | // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %a, [[MUL_I]] |
56 | // CHECK: ret <2 x float> [[SUB_I]] |
57 | float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) { |
58 | return vmls_n_f32(a, b, c); |
59 | } |
60 | |
61 | // CHECK-LABEL: define <2 x double> @test_vmlsq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #1 { |
62 | // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0 |
63 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1 |
64 | // CHECK: [[MUL_I:%.*]] = fmul <2 x double> %b, [[VECINIT1_I]] |
65 | // CHECK: [[SUB_I:%.*]] = fsub <2 x double> %a, [[MUL_I]] |
66 | // CHECK: ret <2 x double> [[SUB_I]] |
67 | float64x2_t test_vmlsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { |
68 | return vmlsq_n_f64(a, b, c); |
69 | } |
70 | |
71 | // CHECK-LABEL: define <2 x float> @test_vmla_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 { |
72 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> zeroinitializer |
73 | // CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]] |
74 | // CHECK: [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]] |
75 | // CHECK: ret <2 x float> [[ADD]] |
76 | float32x2_t test_vmla_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) { |
77 | return vmla_lane_f32(a, b, v, 0); |
78 | } |
79 | |
80 | // CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #1 { |
81 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> zeroinitializer |
82 | // CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] |
83 | // CHECK: [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]] |
84 | // CHECK: ret <4 x float> [[ADD]] |
85 | float32x4_t test_vmlaq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) { |
86 | return vmlaq_lane_f32(a, b, v, 0); |
87 | } |
88 | |
89 | // CHECK-LABEL: define <2 x float> @test_vmla_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #1 { |
90 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> zeroinitializer |
91 | // CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]] |
92 | // CHECK: [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]] |
93 | // CHECK: ret <2 x float> [[ADD]] |
94 | float32x2_t test_vmla_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) { |
95 | return vmla_laneq_f32(a, b, v, 0); |
96 | } |
97 | |
98 | // CHECK-LABEL: define <4 x float> @test_vmlaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #1 { |
99 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> zeroinitializer |
100 | // CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] |
101 | // CHECK: [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]] |
102 | // CHECK: ret <4 x float> [[ADD]] |
103 | float32x4_t test_vmlaq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) { |
104 | return vmlaq_laneq_f32(a, b, v, 0); |
105 | } |
106 | |
107 | // CHECK-LABEL: define <2 x float> @test_vmls_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 { |
108 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> zeroinitializer |
109 | // CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]] |
110 | // CHECK: [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]] |
111 | // CHECK: ret <2 x float> [[SUB]] |
112 | float32x2_t test_vmls_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) { |
113 | return vmls_lane_f32(a, b, v, 0); |
114 | } |
115 | |
116 | // CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #1 { |
117 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> zeroinitializer |
118 | // CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] |
119 | // CHECK: [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]] |
120 | // CHECK: ret <4 x float> [[SUB]] |
121 | float32x4_t test_vmlsq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) { |
122 | return vmlsq_lane_f32(a, b, v, 0); |
123 | } |
124 | |
125 | // CHECK-LABEL: define <2 x float> @test_vmls_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #1 { |
126 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> zeroinitializer |
127 | // CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]] |
128 | // CHECK: [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]] |
129 | // CHECK: ret <2 x float> [[SUB]] |
130 | float32x2_t test_vmls_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) { |
131 | return vmls_laneq_f32(a, b, v, 0); |
132 | } |
133 | |
134 | // CHECK-LABEL: define <4 x float> @test_vmlsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #1 { |
135 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> zeroinitializer |
136 | // CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] |
137 | // CHECK: [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]] |
138 | // CHECK: ret <4 x float> [[SUB]] |
139 | float32x4_t test_vmlsq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) { |
140 | return vmlsq_laneq_f32(a, b, v, 0); |
141 | } |
142 | |
143 | // CHECK-LABEL: define <2 x float> @test_vmla_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 { |
144 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> <i32 1, i32 1> |
145 | // CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]] |
146 | // CHECK: [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]] |
147 | // CHECK: ret <2 x float> [[ADD]] |
148 | float32x2_t test_vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) { |
149 | return vmla_lane_f32(a, b, v, 1); |
150 | } |
151 | |
152 | // CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #1 { |
153 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1> |
154 | // CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] |
155 | // CHECK: [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]] |
156 | // CHECK: ret <4 x float> [[ADD]] |
157 | float32x4_t test_vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) { |
158 | return vmlaq_lane_f32(a, b, v, 1); |
159 | } |
160 | |
161 | // CHECK-LABEL: define <2 x float> @test_vmla_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #1 { |
162 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> <i32 3, i32 3> |
163 | // CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]] |
164 | // CHECK: [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]] |
165 | // CHECK: ret <2 x float> [[ADD]] |
166 | float32x2_t test_vmla_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) { |
167 | return vmla_laneq_f32(a, b, v, 3); |
168 | } |
169 | |
170 | // CHECK-LABEL: define <4 x float> @test_vmlaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #1 { |
171 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
172 | // CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] |
173 | // CHECK: [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]] |
174 | // CHECK: ret <4 x float> [[ADD]] |
175 | float32x4_t test_vmlaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) { |
176 | return vmlaq_laneq_f32(a, b, v, 3); |
177 | } |
178 | |
179 | // CHECK-LABEL: define <2 x float> @test_vmls_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 { |
180 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <2 x i32> <i32 1, i32 1> |
181 | // CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]] |
182 | // CHECK: [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]] |
183 | // CHECK: ret <2 x float> [[SUB]] |
184 | float32x2_t test_vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) { |
185 | return vmls_lane_f32(a, b, v, 1); |
186 | } |
187 | |
188 | // CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #1 { |
189 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1> |
190 | // CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] |
191 | // CHECK: [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]] |
192 | // CHECK: ret <4 x float> [[SUB]] |
193 | float32x4_t test_vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) { |
194 | return vmlsq_lane_f32(a, b, v, 1); |
195 | } |
196 | // CHECK-LABEL: define <2 x float> @test_vmls_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #1 { |
197 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> <i32 3, i32 3> |
198 | // CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]] |
199 | // CHECK: [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]] |
200 | // CHECK: ret <2 x float> [[SUB]] |
201 | float32x2_t test_vmls_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) { |
202 | return vmls_laneq_f32(a, b, v, 3); |
203 | } |
204 | |
205 | // CHECK-LABEL: define <4 x float> @test_vmlsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #1 { |
206 | // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
207 | // CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] |
208 | // CHECK: [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]] |
209 | // CHECK: ret <4 x float> [[SUB]] |
210 | float32x4_t test_vmlsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) { |
211 | return vmlsq_laneq_f32(a, b, v, 3); |
212 | } |
213 | |
214 | // CHECK-LABEL: define <2 x double> @test_vfmaq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #1 { |
215 | // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0 |
216 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1 |
217 | // CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> [[VECINIT1_I]], <2 x double> %a) |
218 | // CHECK: ret <2 x double> [[TMP6]] |
219 | float64x2_t test_vfmaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { |
220 | return vfmaq_n_f64(a, b, c); |
221 | } |
222 | |
223 | // CHECK-LABEL: define <2 x double> @test_vfmsq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #1 { |
224 | // CHECK: [[SUB_I:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %b |
225 | // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0 |
226 | // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1 |
227 | // CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[SUB_I]], <2 x double> [[VECINIT1_I]], <2 x double> %a) #3 |
228 | // CHECK: ret <2 x double> [[TMP6]] |
229 | float64x2_t test_vfmsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { |
230 | return vfmsq_n_f64(a, b, c); |
231 | } |
232 | |
233 | // CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64" |
234 | // CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128" |
235 | |