1 | // RUN: %clang_cc1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -S -o - -disable-O0-optnone -emit-llvm %s | opt -S -mem2reg | FileCheck %s |
2 | |
3 | // Test ARM64 SIMD copy vector element to vector element: vcopyq_lane* |
4 | |
5 | #include <arm_neon.h> |
6 | |
7 | // CHECK-LABEL: define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %a1, <16 x i8> %a2) #0 { |
8 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a2, i32 13 |
9 | // CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %a1, i8 [[VGETQ_LANE]], i32 3 |
10 | // CHECK: ret <16 x i8> [[VSET_LANE]] |
11 | int8x16_t test_vcopyq_laneq_s8(int8x16_t a1, int8x16_t a2) { |
12 | return vcopyq_laneq_s8(a1, (int64_t) 3, a2, (int64_t) 13); |
13 | } |
14 | |
15 | // CHECK-LABEL: define <16 x i8> @test_vcopyq_laneq_u8(<16 x i8> %a1, <16 x i8> %a2) #0 { |
16 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a2, i32 13 |
17 | // CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %a1, i8 [[VGETQ_LANE]], i32 3 |
18 | // CHECK: ret <16 x i8> [[VSET_LANE]] |
19 | uint8x16_t test_vcopyq_laneq_u8(uint8x16_t a1, uint8x16_t a2) { |
20 | return vcopyq_laneq_u8(a1, (int64_t) 3, a2, (int64_t) 13); |
21 | |
22 | } |
23 | |
24 | // CHECK-LABEL: define <8 x i16> @test_vcopyq_laneq_s16(<8 x i16> %a1, <8 x i16> %a2) #0 { |
25 | // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a2 to <16 x i8> |
26 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> |
27 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 |
28 | // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %a1 to <16 x i8> |
29 | // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> |
30 | // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP3]], i16 [[VGETQ_LANE]], i32 3 |
31 | // CHECK: ret <8 x i16> [[VSET_LANE]] |
32 | int16x8_t test_vcopyq_laneq_s16(int16x8_t a1, int16x8_t a2) { |
33 | return vcopyq_laneq_s16(a1, (int64_t) 3, a2, (int64_t) 7); |
34 | |
35 | } |
36 | |
37 | // CHECK-LABEL: define <8 x i16> @test_vcopyq_laneq_u16(<8 x i16> %a1, <8 x i16> %a2) #0 { |
38 | // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a2 to <16 x i8> |
39 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> |
40 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 |
41 | // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %a1 to <16 x i8> |
42 | // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> |
43 | // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP3]], i16 [[VGETQ_LANE]], i32 3 |
44 | // CHECK: ret <8 x i16> [[VSET_LANE]] |
45 | uint16x8_t test_vcopyq_laneq_u16(uint16x8_t a1, uint16x8_t a2) { |
46 | return vcopyq_laneq_u16(a1, (int64_t) 3, a2, (int64_t) 7); |
47 | |
48 | } |
49 | |
50 | // CHECK-LABEL: define <4 x i32> @test_vcopyq_laneq_s32(<4 x i32> %a1, <4 x i32> %a2) #0 { |
51 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a2 to <16 x i8> |
52 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> |
53 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 |
54 | // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %a1 to <16 x i8> |
55 | // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> |
56 | // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[VGETQ_LANE]], i32 3 |
57 | // CHECK: ret <4 x i32> [[VSET_LANE]] |
58 | int32x4_t test_vcopyq_laneq_s32(int32x4_t a1, int32x4_t a2) { |
59 | return vcopyq_laneq_s32(a1, (int64_t) 3, a2, (int64_t) 3); |
60 | } |
61 | |
62 | // CHECK-LABEL: define <4 x i32> @test_vcopyq_laneq_u32(<4 x i32> %a1, <4 x i32> %a2) #0 { |
63 | // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a2 to <16 x i8> |
64 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> |
65 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 |
66 | // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %a1 to <16 x i8> |
67 | // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> |
68 | // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[VGETQ_LANE]], i32 3 |
69 | // CHECK: ret <4 x i32> [[VSET_LANE]] |
70 | uint32x4_t test_vcopyq_laneq_u32(uint32x4_t a1, uint32x4_t a2) { |
71 | return vcopyq_laneq_u32(a1, (int64_t) 3, a2, (int64_t) 3); |
72 | } |
73 | |
74 | // CHECK-LABEL: define <2 x i64> @test_vcopyq_laneq_s64(<2 x i64> %a1, <2 x i64> %a2) #0 { |
75 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a2 to <16 x i8> |
76 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> |
77 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 |
78 | // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %a1 to <16 x i8> |
79 | // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> |
80 | // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[VGETQ_LANE]], i32 0 |
81 | // CHECK: ret <2 x i64> [[VSET_LANE]] |
82 | int64x2_t test_vcopyq_laneq_s64(int64x2_t a1, int64x2_t a2) { |
83 | return vcopyq_laneq_s64(a1, (int64_t) 0, a2, (int64_t) 1); |
84 | } |
85 | |
86 | // CHECK-LABEL: define <2 x i64> @test_vcopyq_laneq_u64(<2 x i64> %a1, <2 x i64> %a2) #0 { |
87 | // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a2 to <16 x i8> |
88 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> |
89 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 |
90 | // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %a1 to <16 x i8> |
91 | // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> |
92 | // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[VGETQ_LANE]], i32 0 |
93 | // CHECK: ret <2 x i64> [[VSET_LANE]] |
94 | uint64x2_t test_vcopyq_laneq_u64(uint64x2_t a1, uint64x2_t a2) { |
95 | return vcopyq_laneq_u64(a1, (int64_t) 0, a2, (int64_t) 1); |
96 | } |
97 | |
98 | // CHECK-LABEL: define <4 x float> @test_vcopyq_laneq_f32(<4 x float> %a1, <4 x float> %a2) #0 { |
99 | // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a2 to <16 x i8> |
100 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> |
101 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 |
102 | // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %a1 to <16 x i8> |
103 | // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> |
104 | // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x float> [[TMP3]], float [[VGETQ_LANE]], i32 0 |
105 | // CHECK: ret <4 x float> [[VSET_LANE]] |
106 | float32x4_t test_vcopyq_laneq_f32(float32x4_t a1, float32x4_t a2) { |
107 | return vcopyq_laneq_f32(a1, 0, a2, 3); |
108 | } |
109 | |
110 | // CHECK-LABEL: define <2 x double> @test_vcopyq_laneq_f64(<2 x double> %a1, <2 x double> %a2) #0 { |
111 | // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a2 to <16 x i8> |
112 | // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> |
113 | // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 |
114 | // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %a1 to <16 x i8> |
115 | // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> |
116 | // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x double> [[TMP3]], double [[VGETQ_LANE]], i32 0 |
117 | // CHECK: ret <2 x double> [[VSET_LANE]] |
118 | float64x2_t test_vcopyq_laneq_f64(float64x2_t a1, float64x2_t a2) { |
119 | return vcopyq_laneq_f64(a1, 0, a2, 1); |
120 | } |
121 | |
122 | |