aarch64-neon-fp16fml.c source code [clang_source_code/test/CodeGen/aarch64-neon-fp16fml.c]

1	// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +v8.2a -target-feature +neon -target-feature +fp16fml \
2	// RUN: -fallow-half-arguments-and-returns -disable-O0-optnone -emit-llvm -o - %s \| opt -S -instcombine \| FileCheck %s
3
4	// REQUIRES: aarch64-registered-target
5
6	// Test AArch64 Armv8.2-A FP16 Fused Multiply-Add Long intrinsics
7
8	#include <arm_neon.h>
9
10	// Vector form
11
12	float32x2_t test_vfmlal_low_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
13	// CHECK-LABEL: define <2 x float> @test_vfmlal_low_f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
14	// CHECK: [[RESULT:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal.v2f32.v4f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
15	// CHECK: ret <2 x float> [[RESULT]]
16	return vfmlal_low_f16(a, b, c);
17	}
18
19	float32x2_t test_vfmlsl_low_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
20	// CHECK-LABEL: define <2 x float> @test_vfmlsl_low_f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
21	// CHECK: [[RESULT:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl.v2f32.v4f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
22	// CHECK: ret <2 x float> [[RESULT]]
23	return vfmlsl_low_f16(a, b, c);
24	}
25
26	float32x2_t test_vfmlal_high_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
27	// CHECK-LABEL: define <2 x float> @test_vfmlal_high_f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
28	// CHECK: [[RESULT:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal2.v2f32.v4f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
29	// CHECK: ret <2 x float> [[RESULT]]
30	return vfmlal_high_f16(a, b, c);
31	}
32
33	float32x2_t test_vfmlsl_high_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
34	// CHECK-LABEL: define <2 x float> @test_vfmlsl_high_f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
35	// CHECK: [[RESULT:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl2.v2f32.v4f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
36	// CHECK: ret <2 x float> [[RESULT]]
37	return vfmlsl_high_f16(a, b, c);
38	}
39
40	float32x4_t test_vfmlalq_low_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
41	// CHECK-LABEL: define <4 x float> @test_vfmlalq_low_f16(<4 x float> %a, <8 x half> %b, <8 x half> %c)
42	// CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal.v4f32.v8f16(<4 x float> %a, <8 x half> %b, <8 x half> %c)
43	// CHECK: ret <4 x float> [[RESULT]]
44	return vfmlalq_low_f16(a, b, c);
45	}
46
47	float32x4_t test_vfmlslq_low_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
48	// CHECK-LABEL: define <4 x float> @test_vfmlslq_low_f16(<4 x float> %a, <8 x half> %b, <8 x half> %c)
49	// CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl.v4f32.v8f16(<4 x float> %a, <8 x half> %b, <8 x half> %c)
50	// CHECK: ret <4 x float> [[RESULT]]
51	return vfmlslq_low_f16(a, b, c);
52	}
53
54	float32x4_t test_vfmlalq_high_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
55	// CHECK-LABEL: define <4 x float> @test_vfmlalq_high_f16(<4 x float> %a, <8 x half> %b, <8 x half> %c)
56	// CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal2.v4f32.v8f16(<4 x float> %a, <8 x half> %b, <8 x half> %c)
57	// CHECK: ret <4 x float> [[RESULT]]
58	return vfmlalq_high_f16(a, b, c);
59	}
60
61	float32x4_t test_vfmlslq_high_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
62	// CHECK-LABEL: define <4 x float> @test_vfmlslq_high_f16(<4 x float> %a, <8 x half> %b, <8 x half> %c)
63	// CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl2.v4f32.v8f16(<4 x float> %a, <8 x half> %b, <8 x half> %c)
64	// CHECK: ret <4 x float> [[RESULT]]
65	return vfmlslq_high_f16(a, b, c);
66	}
67
68	// Indexed form
69
70	float32x2_t test_vfmlal_lane_low_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
71	// CHECK-LABEL: define <2 x float> @test_vfmlal_lane_low_f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
72	// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x half> %c, <4 x half> undef, <4 x i32> zeroinitializer
73	// CHECK: [[RESULT:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal.v2f32.v4f16(<2 x float> %a, <4 x half> %b, <4 x half> [[SHUFFLE]])
74	// CHECK: ret <2 x float> [[RESULT]]
75	return vfmlal_lane_low_f16(a, b, c, 0);
76	}
77
78	float32x2_t test_vfmlal_lane_high_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
79	// CHECK-LABEL: define <2 x float> @test_vfmlal_lane_high_f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
80	// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x half> %c, <4 x half> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
81	// CHECK: [[RESULT:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal2.v2f32.v4f16(<2 x float> %a, <4 x half> %b, <4 x half> [[SHUFFLE]])
82	// CHECK: ret <2 x float> [[RESULT]]
83	return vfmlal_lane_high_f16(a, b, c, 1);
84	}
85
86	float32x4_t test_vfmlalq_lane_low_f16(float32x4_t a, float16x8_t b, float16x4_t c) {
87	// CHECK-LABEL: define <4 x float> @test_vfmlalq_lane_low_f16(<4 x float> %a, <8 x half> %b, <4 x half> %c)
88	// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x half> %c, <4 x half> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
89	// CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal.v4f32.v8f16(<4 x float> %a, <8 x half> %b, <8 x half> [[SHUFFLE]])
90	// CHECK: ret <4 x float> [[RESULT]]
91	return vfmlalq_lane_low_f16(a, b, c, 2);
92	}
93
94	float32x4_t test_vfmlalq_lane_high_f16(float32x4_t a, float16x8_t b, float16x4_t c) {
95	// CHECK-LABEL: define <4 x float> @test_vfmlalq_lane_high_f16(<4 x float> %a, <8 x half> %b, <4 x half> %c)
96	// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x half> %c, <4 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
97	// CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal2.v4f32.v8f16(<4 x float> %a, <8 x half> %b, <8 x half> [[SHUFFLE]])
98	// CHECK: ret <4 x float> [[RESULT]]
99	return vfmlalq_lane_high_f16(a, b, c, 3);
100	}
101
102	float32x2_t test_vfmlal_laneq_low_f16(float32x2_t a, float16x4_t b, float16x8_t c) {
103	// CHECK-LABEL: define <2 x float> @test_vfmlal_laneq_low_f16(<2 x float> %a, <4 x half> %b, <8 x half> %c)
104	// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x half> %c, <8 x half> undef, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
105	// CHECK: [[RESULT:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal.v2f32.v4f16(<2 x float> %a, <4 x half> %b, <4 x half> [[SHUFFLE]])
106	// CHECK: ret <2 x float> [[RESULT]]
107	return vfmlal_laneq_low_f16(a, b, c, 4);
108	}
109
110	float32x2_t test_vfmlal_laneq_high_f16(float32x2_t a, float16x4_t b, float16x8_t c) {
111	// CHECK-LABEL: define <2 x float> @test_vfmlal_laneq_high_f16(<2 x float> %a, <4 x half> %b, <8 x half> %c)
112	// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x half> %c, <8 x half> undef, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
113	// CHECK: [[RESULT:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal2.v2f32.v4f16(<2 x float> %a, <4 x half> %b, <4 x half> [[SHUFFLE]])
114	// CHECK: ret <2 x float> [[RESULT]]
115	return vfmlal_laneq_high_f16(a, b, c, 5);
116	}
117
118	float32x4_t test_vfmlalq_laneq_low_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
119	// CHECK-LABEL: define <4 x float> @test_vfmlalq_laneq_low_f16(<4 x float> %a, <8 x half> %b, <8 x half> %c)
120	// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x half> %c, <8 x half> undef, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
121	// CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal.v4f32.v8f16(<4 x float> %a, <8 x half> %b, <8 x half> [[SHUFFLE]])
122	// CHECK: ret <4 x float> [[RESULT]]
123	return vfmlalq_laneq_low_f16(a, b, c, 6);
124	}
125
126	float32x4_t test_vfmlalq_laneq_high_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
127	// CHECK-LABEL: define <4 x float> @test_vfmlalq_laneq_high_f16(<4 x float> %a, <8 x half> %b, <8 x half> %c)
128	// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x half> %c, <8 x half> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
129	// CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal2.v4f32.v8f16(<4 x float> %a, <8 x half> %b, <8 x half> [[SHUFFLE]])
130	// CHECK: ret <4 x float> [[RESULT]]
131	return vfmlalq_laneq_high_f16(a, b, c, 7);
132	}
133
134	float32x2_t test_vfmlsl_lane_low_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
135	// CHECK-LABEL: define <2 x float> @test_vfmlsl_lane_low_f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
136	// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x half> %c, <4 x half> undef, <4 x i32> zeroinitializer
137	// CHECK: [[RESULT:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl.v2f32.v4f16(<2 x float> %a, <4 x half> %b, <4 x half> [[SHUFFLE]])
138	// CHECK: ret <2 x float> [[RESULT]]
139	return vfmlsl_lane_low_f16(a, b, c, 0);
140	}
141
142	float32x2_t test_vfmlsl_lane_high_f16(float32x2_t a, float16x4_t b, float16x4_t c) {
143	// CHECK-LABEL: define <2 x float> @test_vfmlsl_lane_high_f16(<2 x float> %a, <4 x half> %b, <4 x half> %c)
144	// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x half> %c, <4 x half> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
145	// CHECK: [[RESULT:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl2.v2f32.v4f16(<2 x float> %a, <4 x half> %b, <4 x half> [[SHUFFLE]])
146	// CHECK: ret <2 x float> [[RESULT]]
147	return vfmlsl_lane_high_f16(a, b, c, 1);
148	}
149
150	float32x4_t test_vfmlslq_lane_low_f16(float32x4_t a, float16x8_t b, float16x4_t c) {
151	// CHECK-LABEL: define <4 x float> @test_vfmlslq_lane_low_f16(<4 x float> %a, <8 x half> %b, <4 x half> %c)
152	// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x half> %c, <4 x half> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
153	// CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl.v4f32.v8f16(<4 x float> %a, <8 x half> %b, <8 x half> [[SHUFFLE]])
154	// CHECK: ret <4 x float> [[RESULT]]
155	return vfmlslq_lane_low_f16(a, b, c, 2);
156	}
157
158	float32x4_t test_vfmlslq_lane_high_f16(float32x4_t a, float16x8_t b, float16x4_t c) {
159	// CHECK-LABEL: define <4 x float> @test_vfmlslq_lane_high_f16(<4 x float> %a, <8 x half> %b, <4 x half> %c)
160	// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x half> %c, <4 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
161	// CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl2.v4f32.v8f16(<4 x float> %a, <8 x half> %b, <8 x half> [[SHUFFLE]])
162	// CHECK: ret <4 x float> [[RESULT]]
163	return vfmlslq_lane_high_f16(a, b, c, 3);
164	}
165
166	float32x2_t test_vfmlsl_laneq_low_f16(float32x2_t a, float16x4_t b, float16x8_t c) {
167	// CHECK-LABEL: define <2 x float> @test_vfmlsl_laneq_low_f16(<2 x float> %a, <4 x half> %b, <8 x half> %c)
168	// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x half> %c, <8 x half> undef, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
169	// CHECK: [[RESULT:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl.v2f32.v4f16(<2 x float> %a, <4 x half> %b, <4 x half> [[SHUFFLE]])
170	// CHECK: ret <2 x float> [[RESULT]]
171	return vfmlsl_laneq_low_f16(a, b, c, 4);
172	}
173
174	float32x2_t test_vfmlsl_laneq_high_f16(float32x2_t a, float16x4_t b, float16x8_t c) {
175	// CHECK-LABEL: define <2 x float> @test_vfmlsl_laneq_high_f16(<2 x float> %a, <4 x half> %b, <8 x half> %c)
176	// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x half> %c, <8 x half> undef, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
177	// CHECK: [[RESULT:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl2.v2f32.v4f16(<2 x float> %a, <4 x half> %b, <4 x half> [[SHUFFLE]])
178	// CHECK: ret <2 x float> [[RESULT]]
179	return vfmlsl_laneq_high_f16(a, b, c, 5);
180	}
181
182	float32x4_t test_vfmlslq_laneq_low_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
183	// CHECK-LABEL: define <4 x float> @test_vfmlslq_laneq_low_f16(<4 x float> %a, <8 x half> %b, <8 x half> %c)
184	// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x half> %c, <8 x half> undef, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
185	// CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl.v4f32.v8f16(<4 x float> %a, <8 x half> %b, <8 x half> [[SHUFFLE]])
186	// CHECK: ret <4 x float> [[RESULT]]
187	return vfmlslq_laneq_low_f16(a, b, c, 6);
188	}
189
190	float32x4_t test_vfmlslq_laneq_high_f16(float32x4_t a, float16x8_t b, float16x8_t c) {
191	// CHECK-LABEL: define <4 x float> @test_vfmlslq_laneq_high_f16(<4 x float> %a, <8 x half> %b, <8 x half> %c)
192	// CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x half> %c, <8 x half> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
193	// CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl2.v4f32.v8f16(<4 x float> %a, <8 x half> %b, <8 x half> [[SHUFFLE]])
194	// CHECK: ret <4 x float> [[RESULT]]
195	return vfmlslq_laneq_high_f16(a, b, c, 7);
196	}
197

Clang Project