1 | // REQUIRES: aarch64-registered-target |
2 | |
3 | // RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \ |
4 | // RUN: -target-feature +v8.1a -S -emit-llvm -o - %s | FileCheck %s |
5 | |
6 | #include <arm_neon.h> |
7 | |
8 | // CHECK-LABEL: test_vqrdmlah_laneq_s16 |
9 | int16x4_t test_vqrdmlah_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) { |
10 | // CHECK: shufflevector <8 x i16> {{%.*}}, <8 x i16> {{%.*}}, <4 x i32> <i32 7, i32 7, i32 7, i32 7> |
11 | // CHECK: call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) |
12 | // CHECK: call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) |
13 | return vqrdmlah_laneq_s16(a, b, v, 7); |
14 | } |
15 | |
16 | // CHECK-LABEL: test_vqrdmlah_laneq_s32 |
17 | int32x2_t test_vqrdmlah_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { |
18 | // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <2 x i32> <i32 3, i32 3> |
19 | // CHECK: call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) |
20 | // CHECK: call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) |
21 | return vqrdmlah_laneq_s32(a, b, v, 3); |
22 | } |
23 | |
24 | // CHECK-LABEL: test_vqrdmlahq_laneq_s16 |
25 | int16x8_t test_vqrdmlahq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) { |
26 | // CHECK: shufflevector <8 x i16> {{%.*}}, <8 x i16> {{%.*}}, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> |
27 | // CHECK: call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) |
28 | // CHECK: call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) |
29 | return vqrdmlahq_laneq_s16(a, b, v, 7); |
30 | } |
31 | |
32 | // CHECK-LABEL: test_vqrdmlahq_laneq_s32 |
33 | int32x4_t test_vqrdmlahq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) { |
34 | // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
35 | // CHECK: call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) |
36 | // CHECK: call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) |
37 | return vqrdmlahq_laneq_s32(a, b, v, 3); |
38 | } |
39 | |
40 | // CHECK-LABEL: test_vqrdmlahh_s16 |
41 | int16_t test_vqrdmlahh_s16(int16_t a, int16_t b, int16_t c) { |
42 | // CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 |
43 | // CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 |
44 | // CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]]) |
45 | // CHECK: extractelement <4 x i16> [[mul]], i64 0 |
46 | // CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 |
47 | // CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 |
48 | // CHECK: [[add:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]]) |
49 | // CHECK: extractelement <4 x i16> [[add]], i64 0 |
50 | return vqrdmlahh_s16(a, b, c); |
51 | } |
52 | |
53 | // CHECK-LABEL: test_vqrdmlahs_s32 |
54 | int32_t test_vqrdmlahs_s32(int32_t a, int32_t b, int32_t c) { |
55 | // CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}}) |
56 | // CHECK: call i32 @llvm.aarch64.neon.sqadd.i32(i32 {{%.*}}, i32 {{%.*}}) |
57 | return vqrdmlahs_s32(a, b, c); |
58 | } |
59 | |
60 | // CHECK-LABEL: test_vqrdmlahh_lane_s16 |
61 | int16_t test_vqrdmlahh_lane_s16(int16_t a, int16_t b, int16x4_t c) { |
62 | // CHECK: extractelement <4 x i16> {{%.*}}, i32 3 |
63 | // CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 |
64 | // CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 |
65 | // CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]]) |
66 | // CHECK: extractelement <4 x i16> [[mul]], i64 0 |
67 | // CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 |
68 | // CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 |
69 | // CHECK: [[add:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]]) |
70 | // CHECK: extractelement <4 x i16> [[add]], i64 0 |
71 | return vqrdmlahh_lane_s16(a, b, c, 3); |
72 | } |
73 | |
74 | // CHECK-LABEL: test_vqrdmlahs_lane_s32 |
75 | int32_t test_vqrdmlahs_lane_s32(int32_t a, int32_t b, int32x2_t c) { |
76 | // CHECK: extractelement <2 x i32> {{%.*}}, i32 1 |
77 | // CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}}) |
78 | // CHECK: call i32 @llvm.aarch64.neon.sqadd.i32(i32 {{%.*}}, i32 {{%.*}}) |
79 | return vqrdmlahs_lane_s32(a, b, c, 1); |
80 | } |
81 | |
82 | // CHECK-LABEL: test_vqrdmlahh_laneq_s16 |
83 | int16_t test_vqrdmlahh_laneq_s16(int16_t a, int16_t b, int16x8_t c) { |
84 | // CHECK: extractelement <8 x i16> {{%.*}}, i32 7 |
85 | // CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 |
86 | // CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 |
87 | // CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]]) |
88 | // CHECK: extractelement <4 x i16> [[mul]], i64 0 |
89 | // CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 |
90 | // CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 |
91 | // CHECK: [[add:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]]) |
92 | // CHECK: extractelement <4 x i16> [[add]], i64 0 |
93 | return vqrdmlahh_laneq_s16(a, b, c, 7); |
94 | } |
95 | |
96 | // CHECK-LABEL: test_vqrdmlahs_laneq_s32 |
97 | int32_t test_vqrdmlahs_laneq_s32(int32_t a, int32_t b, int32x4_t c) { |
98 | // CHECK: extractelement <4 x i32> {{%.*}}, i32 3 |
99 | // CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}}) |
100 | // CHECK: call i32 @llvm.aarch64.neon.sqadd.i32(i32 {{%.*}}, i32 {{%.*}}) |
101 | return vqrdmlahs_laneq_s32(a, b, c, 3); |
102 | } |
103 | |
104 | // CHECK-LABEL: test_vqrdmlsh_laneq_s16 |
105 | int16x4_t test_vqrdmlsh_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) { |
106 | // CHECK: shufflevector <8 x i16> {{%.*}}, <8 x i16> {{%.*}}, <4 x i32> <i32 7, i32 7, i32 7, i32 7> |
107 | // CHECK: call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) |
108 | // CHECK: call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) |
109 | return vqrdmlsh_laneq_s16(a, b, v, 7); |
110 | } |
111 | |
112 | // CHECK-LABEL: test_vqrdmlsh_laneq_s32 |
113 | int32x2_t test_vqrdmlsh_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { |
114 | // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <2 x i32> <i32 3, i32 3> |
115 | // CHECK: call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) |
116 | // CHECK: call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) |
117 | return vqrdmlsh_laneq_s32(a, b, v, 3); |
118 | } |
119 | |
120 | // CHECK-LABEL: test_vqrdmlshq_laneq_s16 |
121 | int16x8_t test_vqrdmlshq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) { |
122 | // CHECK: shufflevector <8 x i16> {{%.*}}, <8 x i16> {{%.*}}, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> |
123 | // CHECK: call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) |
124 | // CHECK: call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) |
125 | return vqrdmlshq_laneq_s16(a, b, v, 7); |
126 | } |
127 | |
128 | // CHECK-LABEL: test_vqrdmlshq_laneq_s32 |
129 | int32x4_t test_vqrdmlshq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) { |
130 | // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> <i32 3, i32 3, i32 3, i32 3> |
131 | // CHECK: call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) |
132 | // CHECK: call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) |
133 | return vqrdmlshq_laneq_s32(a, b, v, 3); |
134 | } |
135 | |
136 | // CHECK-LABEL: test_vqrdmlshh_s16 |
137 | int16_t test_vqrdmlshh_s16(int16_t a, int16_t b, int16_t c) { |
138 | // CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 |
139 | // CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 |
140 | // CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]]) |
141 | // CHECK: extractelement <4 x i16> [[mul]], i64 0 |
142 | // CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 |
143 | // CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 |
144 | // CHECK: [[sub:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]]) |
145 | // CHECK: extractelement <4 x i16> [[sub]], i64 0 |
146 | return vqrdmlshh_s16(a, b, c); |
147 | } |
148 | |
149 | // CHECK-LABEL: test_vqrdmlshs_s32 |
150 | int32_t test_vqrdmlshs_s32(int32_t a, int32_t b, int32_t c) { |
151 | // CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}}) |
152 | // CHECK: call i32 @llvm.aarch64.neon.sqsub.i32(i32 {{%.*}}, i32 {{%.*}}) |
153 | return vqrdmlshs_s32(a, b, c); |
154 | } |
155 | |
156 | // CHECK-LABEL: test_vqrdmlshh_lane_s16 |
157 | int16_t test_vqrdmlshh_lane_s16(int16_t a, int16_t b, int16x4_t c) { |
158 | // CHECK: extractelement <4 x i16> {{%.*}}, i32 3 |
159 | // CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 |
160 | // CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 |
161 | // CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]]) |
162 | // CHECK: extractelement <4 x i16> [[mul]], i64 0 |
163 | // CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 |
164 | // CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 |
165 | // CHECK: [[sub:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]]) |
166 | // CHECK: extractelement <4 x i16> [[sub]], i64 0 |
167 | return vqrdmlshh_lane_s16(a, b, c, 3); |
168 | } |
169 | |
170 | // CHECK-LABEL: test_vqrdmlshs_lane_s32 |
171 | int32_t test_vqrdmlshs_lane_s32(int32_t a, int32_t b, int32x2_t c) { |
172 | // CHECK: extractelement <2 x i32> {{%.*}}, i32 1 |
173 | // CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}}) |
174 | // CHECK: call i32 @llvm.aarch64.neon.sqsub.i32(i32 {{%.*}}, i32 {{%.*}}) |
175 | return vqrdmlshs_lane_s32(a, b, c, 1); |
176 | } |
177 | |
178 | // CHECK-LABEL: test_vqrdmlshh_laneq_s16 |
179 | int16_t test_vqrdmlshh_laneq_s16(int16_t a, int16_t b, int16x8_t c) { |
180 | // CHECK: extractelement <8 x i16> {{%.*}}, i32 7 |
181 | // CHECK: [[insb:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 |
182 | // CHECK: [[insc:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 |
183 | // CHECK: [[mul:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[insb]], <4 x i16> [[insc]]) |
184 | // CHECK: extractelement <4 x i16> [[mul]], i64 0 |
185 | // CHECK: [[insa:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 |
186 | // CHECK: [[insmul:%.*]] = insertelement <4 x i16> undef, i16 {{%.*}}, i64 0 |
187 | // CHECK: [[sub:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[insa]], <4 x i16> [[insmul]]) |
188 | // CHECK: extractelement <4 x i16> [[sub]], i64 0 |
189 | return vqrdmlshh_laneq_s16(a, b, c, 7); |
190 | } |
191 | |
192 | // CHECK-LABEL: test_vqrdmlshs_laneq_s32 |
193 | int32_t test_vqrdmlshs_laneq_s32(int32_t a, int32_t b, int32x4_t c) { |
194 | // CHECK: extractelement <4 x i32> {{%.*}}, i32 3 |
195 | // CHECK: call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{%.*}}, i32 {{%.*}}) |
196 | // CHECK: call i32 @llvm.aarch64.neon.sqsub.i32(i32 {{%.*}}, i32 {{%.*}}) |
197 | return vqrdmlshs_laneq_s32(a, b, c, 3); |
198 | } |
199 | |