1 | // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ |
2 | // RUN: -S -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg | \ |
3 | // RUN: FileCheck -check-prefixes=CHECK,CHECK-A64 %s |
4 | // RUN: %clang_cc1 -triple armv8-none-linux-gnueabi -target-feature +neon \ |
5 | // RUN: -target-feature +fp16 -S -disable-O0-optnone -emit-llvm -o - %s | \ |
6 | // RUN: opt -S -mem2reg | FileCheck -check-prefixes=CHECK,CHECK-A32 %s |
7 | |
8 | #include <arm_neon.h> |
9 | |
10 | // CHECK-LABEL: @test_vst1_f16_x2( |
11 | // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8 |
12 | // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8 |
13 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0 |
14 | // CHECK-A64: store [2 x <4 x half>] [[B]].coerce, [2 x <4 x half>]* [[COERCE_DIVE]], align 8 |
15 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [2 x <4 x half>]* %coerce.dive to [2 x i64]* |
16 | // CHECK-A32: store [2 x i64] %b.coerce, [2 x i64]* [[COERCE_DIVE_TMP]], align 8 |
17 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8* |
18 | // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8* |
19 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 16, i1 false) |
20 | // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* |
21 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 |
22 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
23 | // CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 |
24 | // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8> |
25 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 |
26 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
27 | // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 |
28 | // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> |
29 | // CHECK-DAG: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x [[HALF:(half|i16)]]> |
30 | // CHECK-DAG: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x [[HALF]]> |
31 | // CHECK-DAG: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to [[HALF]]* |
32 | // CHECK-A64: call void @llvm.aarch64.neon.st1x2.v4f16.p0f16(<4 x half> [[TMP7]], <4 x half> [[TMP8]], half* [[TMP9]]) |
33 | // CHECK-A32: call void @llvm.arm.neon.vst1x2.p0i16.v4i16(i16* [[TMP9]], <4 x i16> [[TMP7]], <4 x i16> [[TMP8]]) |
34 | // CHECK: ret void |
35 | void test_vst1_f16_x2(float16_t *a, float16x4x2_t b) { |
36 | vst1_f16_x2(a, b); |
37 | } |
38 | |
39 | // CHECK-LABEL: @test_vst1_f16_x3( |
40 | // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8 |
41 | // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8 |
42 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0 |
43 | // CHECK-A64: store [3 x <4 x half>] [[B]].coerce, [3 x <4 x half>]* [[COERCE_DIVE]], align 8 |
44 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [3 x <4 x half>]* %coerce.dive to [3 x i64]* |
45 | // CHECK-A32: store [3 x i64] %b.coerce, [3 x i64]* [[COERCE_DIVE_TMP]], align 8 |
46 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8* |
47 | // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8* |
48 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 24, i1 false) |
49 | // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* |
50 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 |
51 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
52 | // CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 |
53 | // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8> |
54 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 |
55 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
56 | // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 |
57 | // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> |
58 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 |
59 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
60 | // CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8 |
61 | // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8> |
62 | // CHECK-DAG: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x [[HALF]]> |
63 | // CHECK-DAG: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x [[HALF]]> |
64 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x [[HALF]]> |
65 | // CHECK-DAG: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to [[HALF]]* |
66 | // CHECK-A64: call void @llvm.aarch64.neon.st1x3.v4f16.p0f16(<4 x half> [[TMP9]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], half* [[TMP12]]) |
67 | // CHECK-A32: call void @llvm.arm.neon.vst1x3.p0i16.v4i16(i16* [[TMP12]], <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]]) |
68 | // CHECK: ret void |
69 | void test_vst1_f16_x3(float16_t *a, float16x4x3_t b) { |
70 | vst1_f16_x3(a, b); |
71 | } |
72 | |
73 | // CHECK-LABEL: @test_vst1_f16_x4( |
74 | // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8 |
75 | // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8 |
76 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0 |
77 | // CHECK-A64: store [4 x <4 x half>] [[B]].coerce, [4 x <4 x half>]* [[COERCE_DIVE]], align 8 |
78 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [4 x <4 x half>]* %coerce.dive to [4 x i64]* |
79 | // CHECK-A32: store [4 x i64] %b.coerce, [4 x i64]* [[COERCE_DIVE_TMP]], align 8 |
80 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8* |
81 | // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8* |
82 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 32, i1 false) |
83 | // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* |
84 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 |
85 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
86 | // CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 |
87 | // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8> |
88 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 |
89 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
90 | // CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 |
91 | // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> |
92 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 |
93 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
94 | // CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8 |
95 | // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8> |
96 | // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 |
97 | // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3 |
98 | // CHECK: [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8 |
99 | // CHECK: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8> |
100 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x [[HALF]]> |
101 | // CHECK-DAG: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x [[HALF]]> |
102 | // CHECK-DAG: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x [[HALF]]> |
103 | // CHECK-DAG: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x [[HALF]]> |
104 | // CHECK-DAG: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to [[HALF]]* |
105 | // CHECK-A64: call void @llvm.aarch64.neon.st1x4.v4f16.p0f16(<4 x half> [[TMP11]], <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], half* [[TMP15]]) |
106 | // CHECK-A32: call void @llvm.arm.neon.vst1x4.p0i16.v4i16(i16* [[TMP15]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]]) |
107 | // CHECK: ret void |
108 | void test_vst1_f16_x4(float16_t *a, float16x4x4_t b) { |
109 | vst1_f16_x4(a, b); |
110 | } |
111 | |
112 | // CHECK-LABEL: @test_vst1_f32_x2( |
113 | // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8 |
114 | // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8 |
115 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0 |
116 | // CHECK-A64: store [2 x <2 x float>] [[B]].coerce, [2 x <2 x float>]* [[COERCE_DIVE]], align 8 |
117 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [2 x <2 x float>]* %coerce.dive to [2 x i64]* |
118 | // CHECK-A32: store [2 x i64] %b.coerce, [2 x i64]* [[COERCE_DIVE_TMP]], align 8 |
119 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8* |
120 | // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8* |
121 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 16, i1 false) |
122 | // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* |
123 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 |
124 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
125 | // CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 |
126 | // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8> |
127 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 |
128 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
129 | // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 |
130 | // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> |
131 | // CHECK-DAG: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float> |
132 | // CHECK-DAG: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> |
133 | // CHECK-DAG: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to float* |
134 | // CHECK-A64: call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> [[TMP7]], <2 x float> [[TMP8]], float* [[TMP9]]) |
135 | // CHECK-A32: call void @llvm.arm.neon.vst1x2.p0f32.v2f32(float* [[TMP9]], <2 x float> [[TMP7]], <2 x float> [[TMP8]]) |
136 | // CHECK: ret void |
137 | void test_vst1_f32_x2(float32_t *a, float32x2x2_t b) { |
138 | vst1_f32_x2(a, b); |
139 | } |
140 | |
141 | // CHECK-LABEL: @test_vst1_f32_x3( |
142 | // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8 |
143 | // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8 |
144 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0 |
145 | // CHECK-A64: store [3 x <2 x float>] [[B]].coerce, [3 x <2 x float>]* [[COERCE_DIVE]], align 8 |
146 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [3 x <2 x float>]* %coerce.dive to [3 x i64]* |
147 | // CHECK-A32: store [3 x i64] %b.coerce, [3 x i64]* [[COERCE_DIVE_TMP]], align 8 |
148 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8* |
149 | // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8* |
150 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 24, i1 false) |
151 | // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* |
152 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 |
153 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
154 | // CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 |
155 | // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8> |
156 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 |
157 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
158 | // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 |
159 | // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> |
160 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 |
161 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
162 | // CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8 |
163 | // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8> |
164 | // CHECK-DAG: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float> |
165 | // CHECK-DAG: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> |
166 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float> |
167 | // CHECK-DAG: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to float* |
168 | // CHECK-A64: call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], float* [[TMP12]]) |
169 | // CHECK-A32: call void @llvm.arm.neon.vst1x3.p0f32.v2f32(float* [[TMP12]], <2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]]) |
170 | // CHECK: ret void |
171 | void test_vst1_f32_x3(float32_t *a, float32x2x3_t b) { |
172 | vst1_f32_x3(a, b); |
173 | } |
174 | |
175 | // CHECK-LABEL: @test_vst1_f32_x4( |
176 | // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8 |
177 | // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8 |
178 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0 |
179 | // CHECK-A64: store [4 x <2 x float>] [[B]].coerce, [4 x <2 x float>]* [[COERCE_DIVE]], align 8 |
180 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [4 x <2 x float>]* %coerce.dive to [4 x i64]* |
181 | // CHECK-A32: store [4 x i64] %b.coerce, [4 x i64]* [[COERCE_DIVE_TMP]], align 8 |
182 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8* |
183 | // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8* |
184 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 32, i1 false) |
185 | // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* |
186 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 |
187 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
188 | // CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 |
189 | // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8> |
190 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 |
191 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
192 | // CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 |
193 | // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> |
194 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 |
195 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
196 | // CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8 |
197 | // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8> |
198 | // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 |
199 | // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3 |
200 | // CHECK: [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8 |
201 | // CHECK: [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8> |
202 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float> |
203 | // CHECK-DAG: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> |
204 | // CHECK-DAG: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float> |
205 | // CHECK-DAG: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float> |
206 | // CHECK-DAG: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to float* |
207 | // CHECK-A64: call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], float* [[TMP15]]) |
208 | // CHECK-A32: call void @llvm.arm.neon.vst1x4.p0f32.v2f32(float* [[TMP15]], <2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]]) |
209 | // CHECK: ret void |
210 | void test_vst1_f32_x4(float32_t *a, float32x2x4_t b) { |
211 | vst1_f32_x4(a, b); |
212 | } |
213 | |
214 | // CHECK-LABEL: @test_vst1_p16_x2( |
215 | // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8 |
216 | // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8 |
217 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0 |
218 | // CHECK-A64: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 |
219 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [2 x <4 x i16>]* %coerce.dive to [2 x i64]* |
220 | // CHECK-A32: store [2 x i64] %b.coerce, [2 x i64]* [[COERCE_DIVE_TMP]], align 8 |
221 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8* |
222 | // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8* |
223 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 16, i1 false) |
224 | // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* |
225 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 |
226 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
227 | // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 |
228 | // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> |
229 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 |
230 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
231 | // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 |
232 | // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> |
233 | // CHECK-DAG: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> |
234 | // CHECK-DAG: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> |
235 | // CHECK-DAG: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16* |
236 | // CHECK-A64: call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i16* [[TMP9]]) |
237 | // CHECK-A32: call void @llvm.arm.neon.vst1x2.p0i16.v4i16(i16* [[TMP9]], <4 x i16> [[TMP7]], <4 x i16> [[TMP8]]) |
238 | // CHECK: ret void |
239 | void test_vst1_p16_x2(poly16_t *a, poly16x4x2_t b) { |
240 | vst1_p16_x2(a, b); |
241 | } |
242 | |
243 | // CHECK-LABEL: @test_vst1_p16_x3( |
244 | // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8 |
245 | // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8 |
246 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0 |
247 | // CHECK-A64: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 |
248 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [3 x <4 x i16>]* %coerce.dive to [3 x i64]* |
249 | // CHECK-A32: store [3 x i64] %b.coerce, [3 x i64]* [[COERCE_DIVE_TMP]], align 8 |
250 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8* |
251 | // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8* |
252 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 24, i1 false) |
253 | // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* |
254 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 |
255 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
256 | // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 |
257 | // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> |
258 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 |
259 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
260 | // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 |
261 | // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> |
262 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 |
263 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
264 | // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 |
265 | // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> |
266 | // CHECK-DAG: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> |
267 | // CHECK-DAG: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> |
268 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> |
269 | // CHECK-DAG: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16* |
270 | // CHECK-A64: call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i16* [[TMP12]]) |
271 | // CHECK-A32: call void @llvm.arm.neon.vst1x3.p0i16.v4i16(i16* [[TMP12]], <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]]) |
272 | // CHECK: ret void |
273 | void test_vst1_p16_x3(poly16_t *a, poly16x4x3_t b) { |
274 | vst1_p16_x3(a, b); |
275 | } |
276 | |
277 | // CHECK-LABEL: @test_vst1_p16_x4( |
278 | // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8 |
279 | // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8 |
280 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0 |
281 | // CHECK-A64: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 |
282 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [4 x <4 x i16>]* %coerce.dive to [4 x i64]* |
283 | // CHECK-A32: store [4 x i64] %b.coerce, [4 x i64]* [[COERCE_DIVE_TMP]], align 8 |
284 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8* |
285 | // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8* |
286 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 32, i1 false) |
287 | // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* |
288 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 |
289 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
290 | // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 |
291 | // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> |
292 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 |
293 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
294 | // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 |
295 | // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> |
296 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 |
297 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
298 | // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 |
299 | // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> |
300 | // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 |
301 | // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3 |
302 | // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 |
303 | // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> |
304 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> |
305 | // CHECK-DAG: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> |
306 | // CHECK-DAG: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> |
307 | // CHECK-DAG: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> |
308 | // CHECK-DAG: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16* |
309 | // CHECK-A64: call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i16* [[TMP15]]) |
310 | // CHECK-A32: call void @llvm.arm.neon.vst1x4.p0i16.v4i16(i16* [[TMP15]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]]) |
311 | // CHECK: ret void |
312 | void test_vst1_p16_x4(poly16_t *a, poly16x4x4_t b) { |
313 | vst1_p16_x4(a, b); |
314 | } |
315 | |
316 | // CHECK-LABEL: @test_vst1_p8_x2( |
317 | // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8 |
318 | // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8 |
319 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0 |
320 | // CHECK-A64: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 |
321 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [2 x <8 x i8>]* %coerce.dive to [2 x i64]* |
322 | // CHECK-A32: store [2 x i64] %b.coerce, [2 x i64]* [[COERCE_DIVE_TMP]], align 8 |
323 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8* |
324 | // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8* |
325 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 16, i1 false) |
326 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 |
327 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
328 | // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 |
329 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 |
330 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
331 | // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 |
332 | // CHECK-A64: call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a) |
333 | // CHECK-A32: call void @llvm.arm.neon.vst1x2.p0i8.v8i8(i8* %a, <8 x i8> [[TMP2]], <8 x i8> [[TMP3]]) |
334 | // CHECK: ret void |
335 | void test_vst1_p8_x2(poly8_t *a, poly8x8x2_t b) { |
336 | vst1_p8_x2(a, b); |
337 | } |
338 | |
339 | // CHECK-LABEL: @test_vst1_p8_x3( |
340 | // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8 |
341 | // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8 |
342 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0 |
343 | // CHECK-A64: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 |
344 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [3 x <8 x i8>]* %coerce.dive to [3 x i64]* |
345 | // CHECK-A32: store [3 x i64] %b.coerce, [3 x i64]* [[COERCE_DIVE_TMP]], align 8 |
346 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8* |
347 | // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8* |
348 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 24, i1 false) |
349 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 |
350 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
351 | // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 |
352 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 |
353 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
354 | // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 |
355 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 |
356 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
357 | // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 |
358 | // CHECK-A64: call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a) |
359 | // CHECK-A32: call void @llvm.arm.neon.vst1x3.p0i8.v8i8(i8* %a, <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]]) |
360 | // CHECK: ret void |
361 | void test_vst1_p8_x3(poly8_t *a, poly8x8x3_t b) { |
362 | vst1_p8_x3(a, b); |
363 | } |
364 | |
365 | // CHECK-LABEL: @test_vst1_p8_x4( |
366 | // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8 |
367 | // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8 |
368 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0 |
369 | // CHECK-A64: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 |
370 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [4 x <8 x i8>]* %coerce.dive to [4 x i64]* |
371 | // CHECK-A32: store [4 x i64] %b.coerce, [4 x i64]* [[COERCE_DIVE_TMP]], align 8 |
372 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8* |
373 | // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8* |
374 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 32, i1 false) |
375 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 |
376 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
377 | // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 |
378 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 |
379 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
380 | // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 |
381 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 |
382 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
383 | // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 |
384 | // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 |
385 | // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3 |
386 | // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 |
387 | // CHECK-A64: call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a) |
388 | // CHECK-A32: call void @llvm.arm.neon.vst1x4.p0i8.v8i8(i8* %a, <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]]) |
389 | // CHECK: ret void |
390 | void test_vst1_p8_x4(poly8_t *a, poly8x8x4_t b) { |
391 | vst1_p8_x4(a, b); |
392 | } |
393 | |
394 | // CHECK-LABEL: @test_vst1_s16_x2( |
395 | // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8 |
396 | // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8 |
397 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0 |
398 | // CHECK-A64: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 |
399 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [2 x <4 x i16>]* %coerce.dive to [2 x i64]* |
400 | // CHECK-A32: store [2 x i64] %b.coerce, [2 x i64]* [[COERCE_DIVE_TMP]], align 8 |
401 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8* |
402 | // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8* |
403 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 16, i1 false) |
404 | // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* |
405 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 |
406 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
407 | // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 |
408 | // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> |
409 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 |
410 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
411 | // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 |
412 | // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> |
413 | // CHECK-DAG: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> |
414 | // CHECK-DAG: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> |
415 | // CHECK-DAG: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16* |
416 | // CHECK-A64: call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i16* [[TMP9]]) |
417 | // CHECK-A32: call void @llvm.arm.neon.vst1x2.p0i16.v4i16(i16* [[TMP9]], <4 x i16> [[TMP7]], <4 x i16> [[TMP8]]) |
418 | // CHECK: ret void |
419 | void test_vst1_s16_x2(int16_t *a, int16x4x2_t b) { |
420 | vst1_s16_x2(a, b); |
421 | } |
422 | |
423 | // CHECK-LABEL: @test_vst1_s16_x3( |
424 | // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8 |
425 | // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8 |
426 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0 |
427 | // CHECK-A64: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 |
428 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [3 x <4 x i16>]* %coerce.dive to [3 x i64]* |
429 | // CHECK-A32: store [3 x i64] %b.coerce, [3 x i64]* [[COERCE_DIVE_TMP]], align 8 |
430 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8* |
431 | // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8* |
432 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 24, i1 false) |
433 | // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* |
434 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 |
435 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
436 | // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 |
437 | // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> |
438 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 |
439 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
440 | // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 |
441 | // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> |
442 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 |
443 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
444 | // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 |
445 | // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> |
446 | // CHECK-DAG: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> |
447 | // CHECK-DAG: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> |
448 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> |
449 | // CHECK-DAG: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16* |
450 | // CHECK-A64: call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i16* [[TMP12]]) |
451 | // CHECK-A32: call void @llvm.arm.neon.vst1x3.p0i16.v4i16(i16* [[TMP12]], <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]]) |
452 | // CHECK: ret void |
453 | void test_vst1_s16_x3(int16_t *a, int16x4x3_t b) { |
454 | vst1_s16_x3(a, b); |
455 | } |
456 | |
457 | // CHECK-LABEL: @test_vst1_s16_x4( |
458 | // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8 |
459 | // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8 |
460 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0 |
461 | // CHECK-A64: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 |
462 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [4 x <4 x i16>]* %coerce.dive to [4 x i64]* |
463 | // CHECK-A32: store [4 x i64] %b.coerce, [4 x i64]* [[COERCE_DIVE_TMP]], align 8 |
464 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8* |
465 | // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8* |
466 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 32, i1 false) |
467 | // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* |
468 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 |
469 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
470 | // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 |
471 | // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> |
472 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 |
473 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
474 | // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 |
475 | // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> |
476 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 |
477 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
478 | // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 |
479 | // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> |
480 | // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 |
481 | // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3 |
482 | // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 |
483 | // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> |
484 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> |
485 | // CHECK-DAG: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> |
486 | // CHECK-DAG: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> |
487 | // CHECK-DAG: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> |
488 | // CHECK-DAG: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16* |
489 | // CHECK-A64: call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i16* [[TMP15]]) |
490 | // CHECK-A32: call void @llvm.arm.neon.vst1x4.p0i16.v4i16(i16* [[TMP15]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]]) |
491 | // CHECK: ret void |
492 | void test_vst1_s16_x4(int16_t *a, int16x4x4_t b) { |
493 | vst1_s16_x4(a, b); |
494 | } |
495 | |
496 | // CHECK-LABEL: @test_vst1_s32_x2( |
497 | // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8 |
498 | // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8 |
499 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0 |
500 | // CHECK-A64: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8 |
501 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [2 x <2 x i32>]* %coerce.dive to [2 x i64]* |
502 | // CHECK-A32: store [2 x i64] %b.coerce, [2 x i64]* [[COERCE_DIVE_TMP]], align 8 |
503 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8* |
504 | // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8* |
505 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 16, i1 false) |
506 | // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* |
507 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 |
508 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
509 | // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 |
510 | // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> |
511 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 |
512 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
513 | // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 |
514 | // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> |
515 | // CHECK-DAG: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> |
516 | // CHECK-DAG: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> |
517 | // CHECK-DAG: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i32* |
518 | // CHECK-A64: call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i32* [[TMP9]]) |
519 | // CHECK-A32: call void @llvm.arm.neon.vst1x2.p0i32.v2i32(i32* [[TMP9]], <2 x i32> [[TMP7]], <2 x i32> [[TMP8]]) |
520 | // CHECK: ret void |
521 | void test_vst1_s32_x2(int32_t *a, int32x2x2_t b) { |
522 | vst1_s32_x2(a, b); |
523 | } |
524 | |
525 | // CHECK-LABEL: @test_vst1_s32_x3( |
526 | // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8 |
527 | // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8 |
528 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0 |
529 | // CHECK-A64: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8 |
530 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [3 x <2 x i32>]* %coerce.dive to [3 x i64]* |
531 | // CHECK-A32: store [3 x i64] %b.coerce, [3 x i64]* [[COERCE_DIVE_TMP]], align 8 |
532 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8* |
533 | // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8* |
534 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 24, i1 false) |
535 | // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* |
536 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 |
537 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
538 | // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 |
539 | // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> |
540 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 |
541 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
542 | // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 |
543 | // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> |
544 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 |
545 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
546 | // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 |
547 | // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> |
548 | // CHECK-DAG: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> |
549 | // CHECK-DAG: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> |
550 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> |
551 | // CHECK-DAG: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i32* |
552 | // CHECK-A64: call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i32* [[TMP12]]) |
553 | // CHECK-A32: call void @llvm.arm.neon.vst1x3.p0i32.v2i32(i32* [[TMP12]], <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]]) |
554 | // CHECK: ret void |
555 | void test_vst1_s32_x3(int32_t *a, int32x2x3_t b) { |
556 | vst1_s32_x3(a, b); |
557 | } |
558 | |
559 | // CHECK-LABEL: @test_vst1_s32_x4( |
560 | // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8 |
561 | // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8 |
562 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0 |
563 | // CHECK-A64: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8 |
564 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [4 x <2 x i32>]* %coerce.dive to [4 x i64]* |
565 | // CHECK-A32: store [4 x i64] %b.coerce, [4 x i64]* [[COERCE_DIVE_TMP]], align 8 |
566 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8* |
567 | // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8* |
568 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 32, i1 false) |
569 | // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* |
570 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 |
571 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
572 | // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 |
573 | // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> |
574 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 |
575 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
576 | // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 |
577 | // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> |
578 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 |
579 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
580 | // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 |
581 | // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> |
582 | // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 |
583 | // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3 |
584 | // CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8 |
585 | // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8> |
586 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> |
587 | // CHECK-DAG: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> |
588 | // CHECK-DAG: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> |
589 | // CHECK-DAG: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32> |
590 | // CHECK-DAG: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i32* |
591 | // CHECK-A64: call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i32* [[TMP15]]) |
592 | // CHECK-A32: call void @llvm.arm.neon.vst1x4.p0i32.v2i32(i32* [[TMP15]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]]) |
593 | // CHECK: ret void |
594 | void test_vst1_s32_x4(int32_t *a, int32x2x4_t b) { |
595 | vst1_s32_x4(a, b); |
596 | } |
597 | |
598 | // CHECK-LABEL: @test_vst1_s64_x2( |
599 | // CHECK: [[B:%.*]] = alloca %struct.int64x1x2_t, align 8 |
600 | // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8 |
601 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[B]], i32 0, i32 0 |
602 | // CHECK-A64: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 |
603 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [2 x <1 x i64>]* %coerce.dive to [2 x i64]* |
604 | // CHECK-A32: store [2 x i64] %b.coerce, [2 x i64]* [[COERCE_DIVE_TMP]], align 8 |
605 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8* |
606 | // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8* |
607 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 16, i1 false) |
608 | // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* |
609 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0 |
610 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
611 | // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 |
612 | // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> |
613 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0 |
614 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
615 | // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 |
616 | // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> |
617 | // CHECK-DAG: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> |
618 | // CHECK-DAG: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> |
619 | // CHECK-DAG: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64* |
620 | // CHECK-A64: call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i64* [[TMP9]]) |
621 | // CHECK-A32: call void @llvm.arm.neon.vst1x2.p0i64.v1i64(i64* [[TMP9]], <1 x i64> [[TMP7]], <1 x i64> [[TMP8]]) |
622 | // CHECK: ret void |
623 | void test_vst1_s64_x2(int64_t *a, int64x1x2_t b) { |
624 | vst1_s64_x2(a, b); |
625 | } |
626 | |
627 | // CHECK-LABEL: @test_vst1_s64_x3( |
628 | // CHECK: [[B:%.*]] = alloca %struct.int64x1x3_t, align 8 |
629 | // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8 |
630 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[B]], i32 0, i32 0 |
631 | // CHECK-A64: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 |
632 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [3 x <1 x i64>]* %coerce.dive to [3 x i64]* |
633 | // CHECK-A32: store [3 x i64] %b.coerce, [3 x i64]* [[COERCE_DIVE_TMP]], align 8 |
634 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8* |
635 | // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8* |
636 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 24, i1 false) |
637 | // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* |
638 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 |
639 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
640 | // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 |
641 | // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> |
642 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 |
643 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
644 | // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 |
645 | // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> |
646 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 |
647 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
648 | // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 |
649 | // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> |
650 | // CHECK-DAG: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> |
651 | // CHECK-DAG: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> |
652 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> |
653 | // CHECK-DAG: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64* |
654 | // CHECK-A64: call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64* [[TMP12]]) |
655 | // CHECK-A32: call void @llvm.arm.neon.vst1x3.p0i64.v1i64(i64* [[TMP12]], <1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]]) |
656 | // CHECK: ret void |
657 | void test_vst1_s64_x3(int64_t *a, int64x1x3_t b) { |
658 | vst1_s64_x3(a, b); |
659 | } |
660 | |
661 | // CHECK-LABEL: @test_vst1_s64_x4( |
662 | // CHECK: [[B:%.*]] = alloca %struct.int64x1x4_t, align 8 |
663 | // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8 |
664 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[B]], i32 0, i32 0 |
665 | // CHECK-A64: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 |
666 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [4 x <1 x i64>]* %coerce.dive to [4 x i64]* |
667 | // CHECK-A32: store [4 x i64] %b.coerce, [4 x i64]* [[COERCE_DIVE_TMP]], align 8 |
668 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8* |
669 | // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8* |
670 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 32, i1 false) |
671 | // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* |
672 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 |
673 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
674 | // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 |
675 | // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> |
676 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 |
677 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
678 | // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 |
679 | // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> |
680 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 |
681 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
682 | // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 |
683 | // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> |
684 | // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 |
685 | // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3 |
686 | // CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8 |
687 | // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> |
688 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> |
689 | // CHECK-DAG: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> |
690 | // CHECK-DAG: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> |
691 | // CHECK-DAG: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64> |
692 | // CHECK-DAG: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64* |
693 | // CHECK-A64: call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64* [[TMP15]]) |
694 | // CHECK-A32: call void @llvm.arm.neon.vst1x4.p0i64.v1i64(i64* [[TMP15]], <1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]]) |
695 | // CHECK: ret void |
696 | void test_vst1_s64_x4(int64_t *a, int64x1x4_t b) { |
697 | vst1_s64_x4(a, b); |
698 | } |
699 | |
700 | // CHECK-LABEL: @test_vst1_s8_x2( |
701 | // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8 |
702 | // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8 |
703 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0 |
704 | // CHECK-A64: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 |
705 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [2 x <8 x i8>]* %coerce.dive to [2 x i64]* |
706 | // CHECK-A32: store [2 x i64] %b.coerce, [2 x i64]* [[COERCE_DIVE_TMP]], align 8 |
707 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8* |
708 | // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8* |
709 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 16, i1 false) |
710 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 |
711 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
712 | // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 |
713 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 |
714 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
715 | // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 |
716 | // CHECK-A64: call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a) |
717 | // CHECK-A32: call void @llvm.arm.neon.vst1x2.p0i8.v8i8(i8* %a, <8 x i8> [[TMP2]], <8 x i8> [[TMP3]]) |
718 | // CHECK: ret void |
719 | void test_vst1_s8_x2(int8_t *a, int8x8x2_t b) { |
720 | vst1_s8_x2(a, b); |
721 | } |
722 | |
723 | // CHECK-LABEL: @test_vst1_s8_x3( |
724 | // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8 |
725 | // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8 |
726 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0 |
727 | // CHECK-A64: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 |
728 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [3 x <8 x i8>]* %coerce.dive to [3 x i64]* |
729 | // CHECK-A32: store [3 x i64] %b.coerce, [3 x i64]* [[COERCE_DIVE_TMP]], align 8 |
730 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8* |
731 | // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8* |
732 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 24, i1 false) |
733 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 |
734 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
735 | // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 |
736 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 |
737 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
738 | // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 |
739 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 |
740 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
741 | // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 |
742 | // CHECK-A64: call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a) |
743 | // CHECK-A32: call void @llvm.arm.neon.vst1x3.p0i8.v8i8(i8* %a, <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]]) |
744 | // CHECK: ret void |
745 | void test_vst1_s8_x3(int8_t *a, int8x8x3_t b) { |
746 | vst1_s8_x3(a, b); |
747 | } |
748 | |
749 | // CHECK-LABEL: @test_vst1_s8_x4( |
750 | // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8 |
751 | // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8 |
752 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0 |
753 | // CHECK-A64: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 |
754 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [4 x <8 x i8>]* %coerce.dive to [4 x i64]* |
755 | // CHECK-A32: store [4 x i64] %b.coerce, [4 x i64]* [[COERCE_DIVE_TMP]], align 8 |
756 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8* |
757 | // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8* |
758 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 32, i1 false) |
759 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 |
760 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
761 | // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 |
762 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 |
763 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
764 | // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 |
765 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 |
766 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
767 | // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 |
768 | // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 |
769 | // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3 |
770 | // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 |
771 | // CHECK-A64: call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a) |
772 | // CHECK-A32: call void @llvm.arm.neon.vst1x4.p0i8.v8i8(i8* %a, <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]]) |
773 | // CHECK: ret void |
774 | void test_vst1_s8_x4(int8_t *a, int8x8x4_t b) { |
775 | vst1_s8_x4(a, b); |
776 | } |
777 | |
778 | // CHECK-LABEL: @test_vst1_u16_x2( |
779 | // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8 |
780 | // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8 |
781 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0 |
782 | // CHECK-A64: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8 |
783 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [2 x <4 x i16>]* %coerce.dive to [2 x i64]* |
784 | // CHECK-A32: store [2 x i64] %b.coerce, [2 x i64]* [[COERCE_DIVE_TMP]], align 8 |
785 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8* |
786 | // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8* |
787 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 16, i1 false) |
788 | // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* |
789 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 |
790 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
791 | // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 |
792 | // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> |
793 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 |
794 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
795 | // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 |
796 | // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> |
797 | // CHECK-DAG: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> |
798 | // CHECK-DAG: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> |
799 | // CHECK-DAG: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16* |
800 | // CHECK-A64: call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i16* [[TMP9]]) |
801 | // CHECK-A32: call void @llvm.arm.neon.vst1x2.p0i16.v4i16(i16* [[TMP9]], <4 x i16> [[TMP7]], <4 x i16> [[TMP8]]) |
802 | // CHECK: ret void |
803 | void test_vst1_u16_x2(uint16_t *a, uint16x4x2_t b) { |
804 | vst1_u16_x2(a, b); |
805 | } |
806 | |
807 | // CHECK-LABEL: @test_vst1_u16_x3( |
808 | // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8 |
809 | // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8 |
810 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0 |
811 | // CHECK-A64: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8 |
812 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [3 x <4 x i16>]* %coerce.dive to [3 x i64]* |
813 | // CHECK-A32: store [3 x i64] %b.coerce, [3 x i64]* [[COERCE_DIVE_TMP]], align 8 |
814 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8* |
815 | // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8* |
816 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 24, i1 false) |
817 | // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* |
818 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 |
819 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
820 | // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 |
821 | // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> |
822 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 |
823 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
824 | // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 |
825 | // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> |
826 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 |
827 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
828 | // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 |
829 | // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> |
830 | // CHECK-DAG: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> |
831 | // CHECK-DAG: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> |
832 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> |
833 | // CHECK-DAG: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16* |
834 | // CHECK-A64: call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i16* [[TMP12]]) |
835 | // CHECK-A32: call void @llvm.arm.neon.vst1x3.p0i16.v4i16(i16* [[TMP12]], <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]]) |
836 | // CHECK: ret void |
837 | void test_vst1_u16_x3(uint16_t *a, uint16x4x3_t b) { |
838 | vst1_u16_x3(a, b); |
839 | } |
840 | |
841 | // CHECK-LABEL: @test_vst1_u16_x4( |
842 | // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8 |
843 | // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8 |
844 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0 |
845 | // CHECK-A64: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8 |
846 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [4 x <4 x i16>]* %coerce.dive to [4 x i64]* |
847 | // CHECK-A32: store [4 x i64] %b.coerce, [4 x i64]* [[COERCE_DIVE_TMP]], align 8 |
848 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8* |
849 | // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8* |
850 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 32, i1 false) |
851 | // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* |
852 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 |
853 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
854 | // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 |
855 | // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> |
856 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 |
857 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
858 | // CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 |
859 | // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> |
860 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 |
861 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
862 | // CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 |
863 | // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> |
864 | // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 |
865 | // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3 |
866 | // CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 |
867 | // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> |
868 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> |
869 | // CHECK-DAG: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> |
870 | // CHECK-DAG: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> |
871 | // CHECK-DAG: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> |
872 | // CHECK-DAG: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16* |
873 | // CHECK-A64: call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i16* [[TMP15]]) |
874 | // CHECK-A32: call void @llvm.arm.neon.vst1x4.p0i16.v4i16(i16* [[TMP15]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]]) |
875 | // CHECK: ret void |
876 | void test_vst1_u16_x4(uint16_t *a, uint16x4x4_t b) { |
877 | vst1_u16_x4(a, b); |
878 | } |
879 | |
880 | // CHECK-LABEL: @test_vst1_u32_x2( |
881 | // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8 |
882 | // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8 |
883 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0 |
884 | // CHECK-A64: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8 |
885 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [2 x <2 x i32>]* %coerce.dive to [2 x i64]* |
886 | // CHECK-A32: store [2 x i64] %b.coerce, [2 x i64]* [[COERCE_DIVE_TMP]], align 8 |
887 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8* |
888 | // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8* |
889 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 16, i1 false) |
890 | // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* |
891 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 |
892 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
893 | // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 |
894 | // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> |
895 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 |
896 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
897 | // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 |
898 | // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> |
899 | // CHECK-DAG: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> |
900 | // CHECK-DAG: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> |
901 | // CHECK-DAG: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i32* |
902 | // CHECK-A64: call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i32* [[TMP9]]) |
903 | // CHECK-A32: call void @llvm.arm.neon.vst1x2.p0i32.v2i32(i32* [[TMP9]], <2 x i32> [[TMP7]], <2 x i32> [[TMP8]]) |
904 | // CHECK: ret void |
905 | void test_vst1_u32_x2(uint32_t *a, uint32x2x2_t b) { |
906 | vst1_u32_x2(a, b); |
907 | } |
908 | |
909 | // CHECK-LABEL: @test_vst1_u32_x3( |
910 | // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8 |
911 | // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8 |
912 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0 |
913 | // CHECK-A64: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8 |
914 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [3 x <2 x i32>]* %coerce.dive to [3 x i64]* |
915 | // CHECK-A32: store [3 x i64] %b.coerce, [3 x i64]* [[COERCE_DIVE_TMP]], align 8 |
916 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8* |
917 | // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8* |
918 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 24, i1 false) |
919 | // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* |
920 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 |
921 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
922 | // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 |
923 | // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> |
924 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 |
925 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
926 | // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 |
927 | // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> |
928 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 |
929 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
930 | // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 |
931 | // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> |
932 | // CHECK-DAG: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> |
933 | // CHECK-DAG: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> |
934 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> |
935 | // CHECK-DAG: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i32* |
936 | // CHECK-A64: call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i32* [[TMP12]]) |
937 | // CHECK-A32: call void @llvm.arm.neon.vst1x3.p0i32.v2i32(i32* [[TMP12]], <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]]) |
938 | // CHECK: ret void |
939 | void test_vst1_u32_x3(uint32_t *a, uint32x2x3_t b) { |
940 | vst1_u32_x3(a, b); |
941 | } |
942 | |
943 | // CHECK-LABEL: @test_vst1_u32_x4( |
944 | // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8 |
945 | // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8 |
946 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0 |
947 | // CHECK-A64: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8 |
948 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [4 x <2 x i32>]* %coerce.dive to [4 x i64]* |
949 | // CHECK-A32: store [4 x i64] %b.coerce, [4 x i64]* [[COERCE_DIVE_TMP]], align 8 |
950 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8* |
951 | // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8* |
952 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 32, i1 false) |
953 | // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* |
954 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 |
955 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
956 | // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 |
957 | // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> |
958 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 |
959 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
960 | // CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 |
961 | // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> |
962 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 |
963 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
964 | // CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 |
965 | // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> |
966 | // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 |
967 | // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3 |
968 | // CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8 |
969 | // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8> |
970 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> |
971 | // CHECK-DAG: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> |
972 | // CHECK-DAG: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> |
973 | // CHECK-DAG: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32> |
974 | // CHECK-DAG: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i32* |
975 | // CHECK-A64: call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i32* [[TMP15]]) |
976 | // CHECK-A32: call void @llvm.arm.neon.vst1x4.p0i32.v2i32(i32* [[TMP15]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]]) |
977 | // CHECK: ret void |
978 | void test_vst1_u32_x4(uint32_t *a, uint32x2x4_t b) { |
979 | vst1_u32_x4(a, b); |
980 | } |
981 | |
982 | // CHECK-LABEL: @test_vst1_u64_x2( |
983 | // CHECK: [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8 |
984 | // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8 |
985 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[B]], i32 0, i32 0 |
986 | // CHECK-A64: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8 |
987 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [2 x <1 x i64>]* %coerce.dive to [2 x i64]* |
988 | // CHECK-A32: store [2 x i64] %b.coerce, [2 x i64]* [[COERCE_DIVE_TMP]], align 8 |
989 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8* |
990 | // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8* |
991 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 16, i1 false) |
992 | // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* |
993 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0 |
994 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
995 | // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 |
996 | // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> |
997 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0 |
998 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
999 | // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 |
1000 | // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> |
1001 | // CHECK-DAG: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> |
1002 | // CHECK-DAG: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> |
1003 | // CHECK-DAG: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64* |
1004 | // CHECK-A64: call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i64* [[TMP9]]) |
1005 | // CHECK-A32: call void @llvm.arm.neon.vst1x2.p0i64.v1i64(i64* [[TMP9]], <1 x i64> [[TMP7]], <1 x i64> [[TMP8]]) |
1006 | // CHECK: ret void |
1007 | void test_vst1_u64_x2(uint64_t *a, uint64x1x2_t b) { |
1008 | vst1_u64_x2(a, b); |
1009 | } |
1010 | |
1011 | // CHECK-LABEL: @test_vst1_u64_x3( |
1012 | // CHECK: [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8 |
1013 | // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8 |
1014 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[B]], i32 0, i32 0 |
1015 | // CHECK-A64: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8 |
1016 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [3 x <1 x i64>]* %coerce.dive to [3 x i64]* |
1017 | // CHECK-A32: store [3 x i64] %b.coerce, [3 x i64]* [[COERCE_DIVE_TMP]], align 8 |
1018 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8* |
1019 | // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8* |
1020 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 24, i1 false) |
1021 | // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* |
1022 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 |
1023 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
1024 | // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 |
1025 | // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> |
1026 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 |
1027 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
1028 | // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 |
1029 | // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> |
1030 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 |
1031 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
1032 | // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 |
1033 | // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> |
1034 | // CHECK-DAG: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> |
1035 | // CHECK-DAG: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> |
1036 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> |
1037 | // CHECK-DAG: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64* |
1038 | // CHECK-A64: call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64* [[TMP12]]) |
1039 | // CHECK-A32: call void @llvm.arm.neon.vst1x3.p0i64.v1i64(i64* [[TMP12]], <1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]]) |
1040 | // CHECK: ret void |
1041 | void test_vst1_u64_x3(uint64_t *a, uint64x1x3_t b) { |
1042 | vst1_u64_x3(a, b); |
1043 | } |
1044 | |
1045 | // CHECK-LABEL: @test_vst1_u64_x4( |
1046 | // CHECK: [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8 |
1047 | // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8 |
1048 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[B]], i32 0, i32 0 |
1049 | // CHECK-A64: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8 |
1050 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [4 x <1 x i64>]* %coerce.dive to [4 x i64]* |
1051 | // CHECK-A32: store [4 x i64] %b.coerce, [4 x i64]* [[COERCE_DIVE_TMP]], align 8 |
1052 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8* |
1053 | // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8* |
1054 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 32, i1 false) |
1055 | // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* |
1056 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 |
1057 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
1058 | // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 |
1059 | // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> |
1060 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 |
1061 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
1062 | // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 |
1063 | // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> |
1064 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 |
1065 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
1066 | // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 |
1067 | // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> |
1068 | // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 |
1069 | // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3 |
1070 | // CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8 |
1071 | // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> |
1072 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> |
1073 | // CHECK-DAG: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> |
1074 | // CHECK-DAG: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> |
1075 | // CHECK-DAG: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64> |
1076 | // CHECK-DAG: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64* |
1077 | // CHECK-A64: call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64* [[TMP15]]) |
1078 | // CHECK-A32: call void @llvm.arm.neon.vst1x4.p0i64.v1i64(i64* [[TMP15]], <1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]]) |
1079 | // CHECK: ret void |
1080 | void test_vst1_u64_x4(uint64_t *a, uint64x1x4_t b) { |
1081 | vst1_u64_x4(a, b); |
1082 | } |
1083 | |
1084 | // CHECK-LABEL: @test_vst1_u8_x2( |
1085 | // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8 |
1086 | // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8 |
1087 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0 |
1088 | // CHECK-A64: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 |
1089 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [2 x <8 x i8>]* %coerce.dive to [2 x i64]* |
1090 | // CHECK-A32: store [2 x i64] %b.coerce, [2 x i64]* [[COERCE_DIVE_TMP]], align 8 |
1091 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8* |
1092 | // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8* |
1093 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 16, i1 false) |
1094 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 |
1095 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
1096 | // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 |
1097 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 |
1098 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
1099 | // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 |
1100 | // CHECK-A64: call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a) |
1101 | // CHECK-A32: call void @llvm.arm.neon.vst1x2.p0i8.v8i8(i8* %a, <8 x i8> [[TMP2]], <8 x i8> [[TMP3]]) |
1102 | // CHECK: ret void |
1103 | void test_vst1_u8_x2(uint8_t *a, uint8x8x2_t b) { |
1104 | vst1_u8_x2(a, b); |
1105 | } |
1106 | |
1107 | // CHECK-LABEL: @test_vst1_u8_x3( |
1108 | // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8 |
1109 | // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8 |
1110 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0 |
1111 | // CHECK-A64: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 |
1112 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [3 x <8 x i8>]* %coerce.dive to [3 x i64]* |
1113 | // CHECK-A32: store [3 x i64] %b.coerce, [3 x i64]* [[COERCE_DIVE_TMP]], align 8 |
1114 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8* |
1115 | // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8* |
1116 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 24, i1 false) |
1117 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 |
1118 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
1119 | // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 |
1120 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 |
1121 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
1122 | // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 |
1123 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 |
1124 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
1125 | // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 |
1126 | // CHECK-A64: call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a) |
1127 | // CHECK-A32: call void @llvm.arm.neon.vst1x3.p0i8.v8i8(i8* %a, <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]]) |
1128 | // CHECK: ret void |
1129 | void test_vst1_u8_x3(uint8_t *a, uint8x8x3_t b) { |
1130 | vst1_u8_x3(a, b); |
1131 | } |
1132 | |
1133 | // CHECK-LABEL: @test_vst1_u8_x4( |
1134 | // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8 |
1135 | // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8 |
1136 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0 |
1137 | // CHECK-A64: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 |
1138 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [4 x <8 x i8>]* %coerce.dive to [4 x i64]* |
1139 | // CHECK-A32: store [4 x i64] %b.coerce, [4 x i64]* [[COERCE_DIVE_TMP]], align 8 |
1140 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8* |
1141 | // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8* |
1142 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], {{i64|i32}} 32, i1 false) |
1143 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 |
1144 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
1145 | // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 |
1146 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 |
1147 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
1148 | // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 |
1149 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 |
1150 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
1151 | // CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 |
1152 | // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 |
1153 | // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3 |
1154 | // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 |
1155 | // CHECK-A64: call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a) |
1156 | // CHECK-A32: call void @llvm.arm.neon.vst1x4.p0i8.v8i8(i8* %a, <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]]) |
1157 | // CHECK: ret void |
1158 | void test_vst1_u8_x4(uint8_t *a, uint8x8x4_t b) { |
1159 | vst1_u8_x4(a, b); |
1160 | } |
1161 | |
1162 | // CHECK-LABEL: @test_vst1q_f16_x2( |
1163 | // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align [[QALIGN:(16|8)]] |
1164 | // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align [[QALIGN]] |
1165 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0 |
1166 | // CHECK-A64: store [2 x <8 x half>] [[B]].coerce, [2 x <8 x half>]* [[COERCE_DIVE]], align 16 |
1167 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [2 x <8 x half>]* %coerce.dive to [4 x i64]* |
1168 | // CHECK-A32: store [4 x i64] %b.coerce, [4 x i64]* [[COERCE_DIVE_TMP]], align 8 |
1169 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8* |
1170 | // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8* |
1171 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 32, i1 false) |
1172 | // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* |
1173 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 |
1174 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
1175 | // CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align [[QALIGN]] |
1176 | // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8> |
1177 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 |
1178 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
1179 | // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align [[QALIGN]] |
1180 | // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> |
1181 | // CHECK-DAG: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x [[HALF]]> |
1182 | // CHECK-DAG: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x [[HALF]]> |
1183 | // CHECK-DAG: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to [[HALF]]* |
1184 | // CHECK-A64: call void @llvm.aarch64.neon.st1x2.v8f16.p0f16(<8 x half> [[TMP7]], <8 x half> [[TMP8]], half* [[TMP9]]) |
1185 | // CHECK-A32: call void @llvm.arm.neon.vst1x2.p0i16.v8i16(i16* [[TMP9]], <8 x i16> [[TMP7]], <8 x i16> [[TMP8]]) |
1186 | // CHECK: ret void |
1187 | void test_vst1q_f16_x2(float16_t *a, float16x8x2_t b) { |
1188 | vst1q_f16_x2(a, b); |
1189 | } |
1190 | |
1191 | // CHECK-LABEL: @test_vst1q_f16_x3( |
1192 | // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align [[QALIGN]] |
1193 | // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align [[QALIGN]] |
1194 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0 |
1195 | // CHECK-A64: store [3 x <8 x half>] [[B]].coerce, [3 x <8 x half>]* [[COERCE_DIVE]], align 16 |
1196 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [3 x <8 x half>]* %coerce.dive to [6 x i64]* |
1197 | // CHECK-A32: store [6 x i64] %b.coerce, [6 x i64]* [[COERCE_DIVE_TMP]], align 8 |
1198 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8* |
1199 | // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8* |
1200 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 48, i1 false) |
1201 | // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* |
1202 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 |
1203 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
1204 | // CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align [[QALIGN]] |
1205 | // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8> |
1206 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 |
1207 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
1208 | // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align [[QALIGN]] |
1209 | // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> |
1210 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 |
1211 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
1212 | // CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align [[QALIGN]] |
1213 | // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8> |
1214 | // CHECK-DAG: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x [[HALF]]> |
1215 | // CHECK-DAG: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x [[HALF]]> |
1216 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x [[HALF]]> |
1217 | // CHECK-DAG: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to [[HALF]]* |
1218 | // CHECK-A64: call void @llvm.aarch64.neon.st1x3.v8f16.p0f16(<8 x half> [[TMP9]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], half* [[TMP12]]) |
1219 | // CHECK-A32: call void @llvm.arm.neon.vst1x3.p0i16.v8i16(i16* [[TMP12]], <8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]]) |
1220 | // CHECK: ret void |
1221 | void test_vst1q_f16_x3(float16_t *a, float16x8x3_t b) { |
1222 | vst1q_f16_x3(a, b); |
1223 | } |
1224 | |
1225 | // CHECK-LABEL: @test_vst1q_f16_x4( |
1226 | // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align [[QALIGN]] |
1227 | // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align [[QALIGN]] |
1228 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0 |
1229 | // CHECK-A64: store [4 x <8 x half>] [[B]].coerce, [4 x <8 x half>]* [[COERCE_DIVE]], align 16 |
1230 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [4 x <8 x half>]* %coerce.dive to [8 x i64]* |
1231 | // CHECK-A32: store [8 x i64] %b.coerce, [8 x i64]* [[COERCE_DIVE_TMP]], align 8 |
1232 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8* |
1233 | // CHECK: [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8* |
1234 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 64, i1 false) |
1235 | // CHECK: [[TMP2:%.*]] = bitcast half* %a to i8* |
1236 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 |
1237 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
1238 | // CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align [[QALIGN]] |
1239 | // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8> |
1240 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 |
1241 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
1242 | // CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align [[QALIGN]] |
1243 | // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> |
1244 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 |
1245 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
1246 | // CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align [[QALIGN]] |
1247 | // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8> |
1248 | // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 |
1249 | // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3 |
1250 | // CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align [[QALIGN]] |
1251 | // CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8> |
1252 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x [[HALF]]> |
1253 | // CHECK-DAG: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x [[HALF]]> |
1254 | // CHECK-DAG: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x [[HALF]]> |
1255 | // CHECK-DAG: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x [[HALF]]> |
1256 | // CHECK-DAG: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to [[HALF]]* |
1257 | // CHECK-A64: call void @llvm.aarch64.neon.st1x4.v8f16.p0f16(<8 x half> [[TMP11]], <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], half* [[TMP15]]) |
1258 | // CHECK-A32: call void @llvm.arm.neon.vst1x4.p0i16.v8i16(i16* [[TMP15]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]]) |
1259 | // CHECK: ret void |
1260 | void test_vst1q_f16_x4(float16_t *a, float16x8x4_t b) { |
1261 | vst1q_f16_x4(a, b); |
1262 | } |
1263 | |
1264 | // CHECK-LABEL: @test_vst1q_f32_x2( |
1265 | // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align [[QALIGN]] |
1266 | // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align [[QALIGN]] |
1267 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0 |
1268 | // CHECK-A64: store [2 x <4 x float>] [[B]].coerce, [2 x <4 x float>]* [[COERCE_DIVE]], align 16 |
1269 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [2 x <4 x float>]* %coerce.dive to [4 x i64]* |
1270 | // CHECK-A32: store [4 x i64] %b.coerce, [4 x i64]* [[COERCE_DIVE_TMP]], align 8 |
1271 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8* |
1272 | // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8* |
1273 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 32, i1 false) |
1274 | // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* |
1275 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 |
1276 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
1277 | // CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align [[QALIGN]] |
1278 | // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8> |
1279 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 |
1280 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
1281 | // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align [[QALIGN]] |
1282 | // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> |
1283 | // CHECK-DAG: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float> |
1284 | // CHECK-DAG: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> |
1285 | // CHECK-DAG: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to float* |
1286 | // CHECK-A64: call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> [[TMP7]], <4 x float> [[TMP8]], float* [[TMP9]]) |
1287 | // CHECK-A32: call void @llvm.arm.neon.vst1x2.p0f32.v4f32(float* [[TMP9]], <4 x float> [[TMP7]], <4 x float> [[TMP8]]) |
1288 | // CHECK: ret void |
1289 | void test_vst1q_f32_x2(float32_t *a, float32x4x2_t b) { |
1290 | vst1q_f32_x2(a, b); |
1291 | } |
1292 | |
1293 | // CHECK-LABEL: @test_vst1q_f32_x3( |
1294 | // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align [[QALIGN]] |
1295 | // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align [[QALIGN]] |
1296 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0 |
1297 | // CHECK-A64: store [3 x <4 x float>] [[B]].coerce, [3 x <4 x float>]* [[COERCE_DIVE]], align 16 |
1298 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [3 x <4 x float>]* %coerce.dive to [6 x i64]* |
1299 | // CHECK-A32: store [6 x i64] %b.coerce, [6 x i64]* [[COERCE_DIVE_TMP]], align 8 |
1300 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8* |
1301 | // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8* |
1302 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 48, i1 false) |
1303 | // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* |
1304 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 |
1305 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
1306 | // CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align [[QALIGN]] |
1307 | // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8> |
1308 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 |
1309 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
1310 | // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align [[QALIGN]] |
1311 | // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> |
1312 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 |
1313 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
1314 | // CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align [[QALIGN]] |
1315 | // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8> |
1316 | // CHECK-DAG: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float> |
1317 | // CHECK-DAG: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> |
1318 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float> |
1319 | // CHECK-DAG: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to float* |
1320 | // CHECK-A64: call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], float* [[TMP12]]) |
1321 | // CHECK-A32: call void @llvm.arm.neon.vst1x3.p0f32.v4f32(float* [[TMP12]], <4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]]) |
1322 | // CHECK: ret void |
1323 | void test_vst1q_f32_x3(float32_t *a, float32x4x3_t b) { |
1324 | vst1q_f32_x3(a, b); |
1325 | } |
1326 | |
1327 | // CHECK-LABEL: @test_vst1q_f32_x4( |
1328 | // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align [[QALIGN]] |
1329 | // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align [[QALIGN]] |
1330 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0 |
1331 | // CHECK-A64: store [4 x <4 x float>] [[B]].coerce, [4 x <4 x float>]* [[COERCE_DIVE]], align 16 |
1332 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [4 x <4 x float>]* %coerce.dive to [8 x i64]* |
1333 | // CHECK-A32: store [8 x i64] %b.coerce, [8 x i64]* [[COERCE_DIVE_TMP]], align 8 |
1334 | // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8* |
1335 | // CHECK: [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8* |
1336 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 64, i1 false) |
1337 | // CHECK: [[TMP2:%.*]] = bitcast float* %a to i8* |
1338 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 |
1339 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
1340 | // CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align [[QALIGN]] |
1341 | // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8> |
1342 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 |
1343 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
1344 | // CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align [[QALIGN]] |
1345 | // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> |
1346 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 |
1347 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
1348 | // CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align [[QALIGN]] |
1349 | // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8> |
1350 | // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 |
1351 | // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3 |
1352 | // CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align [[QALIGN]] |
1353 | // CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8> |
1354 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float> |
1355 | // CHECK-DAG: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> |
1356 | // CHECK-DAG: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float> |
1357 | // CHECK-DAG: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float> |
1358 | // CHECK-DAG: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to float* |
1359 | // CHECK-A64: call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], float* [[TMP15]]) |
1360 | // CHECK-A32: call void @llvm.arm.neon.vst1x4.p0f32.v4f32(float* [[TMP15]], <4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]]) |
1361 | // CHECK: ret void |
1362 | void test_vst1q_f32_x4(float32_t *a, float32x4x4_t b) { |
1363 | vst1q_f32_x4(a, b); |
1364 | } |
1365 | |
1366 | // CHECK-LABEL: @test_vst1q_p16_x2( |
1367 | // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align [[QALIGN]] |
1368 | // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align [[QALIGN]] |
1369 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0 |
1370 | // CHECK-A64: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 |
1371 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [2 x <8 x i16>]* %coerce.dive to [4 x i64]* |
1372 | // CHECK-A32: store [4 x i64] %b.coerce, [4 x i64]* [[COERCE_DIVE_TMP]], align 8 |
1373 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8* |
1374 | // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8* |
1375 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 32, i1 false) |
1376 | // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* |
1377 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 |
1378 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
1379 | // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align [[QALIGN]] |
1380 | // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> |
1381 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 |
1382 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
1383 | // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align [[QALIGN]] |
1384 | // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> |
1385 | // CHECK-DAG: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> |
1386 | // CHECK-DAG: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> |
1387 | // CHECK-DAG: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16* |
1388 | // CHECK-A64: call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i16* [[TMP9]]) |
1389 | // CHECK-A32: call void @llvm.arm.neon.vst1x2.p0i16.v8i16(i16* [[TMP9]], <8 x i16> [[TMP7]], <8 x i16> [[TMP8]]) |
1390 | // CHECK: ret void |
1391 | void test_vst1q_p16_x2(poly16_t *a, poly16x8x2_t b) { |
1392 | vst1q_p16_x2(a, b); |
1393 | } |
1394 | |
1395 | // CHECK-LABEL: @test_vst1q_p16_x3( |
1396 | // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align [[QALIGN]] |
1397 | // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align [[QALIGN]] |
1398 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0 |
1399 | // CHECK-A64: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 |
1400 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [3 x <8 x i16>]* %coerce.dive to [6 x i64]* |
1401 | // CHECK-A32: store [6 x i64] %b.coerce, [6 x i64]* [[COERCE_DIVE_TMP]], align 8 |
1402 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8* |
1403 | // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8* |
1404 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 48, i1 false) |
1405 | // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* |
1406 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 |
1407 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
1408 | // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align [[QALIGN]] |
1409 | // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> |
1410 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 |
1411 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
1412 | // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align [[QALIGN]] |
1413 | // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> |
1414 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 |
1415 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
1416 | // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align [[QALIGN]] |
1417 | // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> |
1418 | // CHECK-DAG: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> |
1419 | // CHECK-DAG: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> |
1420 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> |
1421 | // CHECK-DAG: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16* |
1422 | // CHECK-A64: call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i16* [[TMP12]]) |
1423 | // CHECK-A32: call void @llvm.arm.neon.vst1x3.p0i16.v8i16(i16* [[TMP12]], <8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]]) |
1424 | // CHECK: ret void |
1425 | void test_vst1q_p16_x3(poly16_t *a, poly16x8x3_t b) { |
1426 | vst1q_p16_x3(a, b); |
1427 | } |
1428 | |
1429 | // CHECK-LABEL: @test_vst1q_p16_x4( |
1430 | // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align [[QALIGN]] |
1431 | // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align [[QALIGN]] |
1432 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0 |
1433 | // CHECK-A64: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 |
1434 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [4 x <8 x i16>]* %coerce.dive to [8 x i64]* |
1435 | // CHECK-A32: store [8 x i64] %b.coerce, [8 x i64]* [[COERCE_DIVE_TMP]], align 8 |
1436 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8* |
1437 | // CHECK: [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8* |
1438 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 64, i1 false) |
1439 | // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* |
1440 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 |
1441 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
1442 | // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align [[QALIGN]] |
1443 | // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> |
1444 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 |
1445 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
1446 | // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align [[QALIGN]] |
1447 | // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> |
1448 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 |
1449 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
1450 | // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align [[QALIGN]] |
1451 | // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> |
1452 | // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 |
1453 | // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3 |
1454 | // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align [[QALIGN]] |
1455 | // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> |
1456 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> |
1457 | // CHECK-DAG: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> |
1458 | // CHECK-DAG: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> |
1459 | // CHECK-DAG: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> |
1460 | // CHECK-DAG: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16* |
1461 | // CHECK-A64: call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i16* [[TMP15]]) |
1462 | // CHECK-A32: call void @llvm.arm.neon.vst1x4.p0i16.v8i16(i16* [[TMP15]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]]) |
1463 | // CHECK: ret void |
1464 | void test_vst1q_p16_x4(poly16_t *a, poly16x8x4_t b) { |
1465 | vst1q_p16_x4(a, b); |
1466 | } |
1467 | |
1468 | // CHECK-LABEL: @test_vst1q_p8_x2( |
1469 | // CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align [[QALIGN]] |
1470 | // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align [[QALIGN]] |
1471 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0 |
1472 | // CHECK-A64: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 |
1473 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [2 x <16 x i8>]* %coerce.dive to [4 x i64]* |
1474 | // CHECK-A32: store [4 x i64] %b.coerce, [4 x i64]* [[COERCE_DIVE_TMP]], align 8 |
1475 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8* |
1476 | // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8* |
1477 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 32, i1 false) |
1478 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0 |
1479 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
1480 | // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align [[QALIGN]] |
1481 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0 |
1482 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
1483 | // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align [[QALIGN]] |
1484 | // CHECK-A64: call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a) |
1485 | // CHECK-A32: call void @llvm.arm.neon.vst1x2.p0i8.v16i8(i8* %a, <16 x i8> [[TMP2]], <16 x i8> [[TMP3]]) |
1486 | // CHECK: ret void |
1487 | void test_vst1q_p8_x2(poly8_t *a, poly8x16x2_t b) { |
1488 | vst1q_p8_x2(a, b); |
1489 | } |
1490 | |
1491 | // CHECK-LABEL: @test_vst1q_p8_x3( |
1492 | // CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align [[QALIGN]] |
1493 | // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align [[QALIGN]] |
1494 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0 |
1495 | // CHECK-A64: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 |
1496 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [3 x <16 x i8>]* %coerce.dive to [6 x i64]* |
1497 | // CHECK-A32: store [6 x i64] %b.coerce, [6 x i64]* [[COERCE_DIVE_TMP]], align 8 |
1498 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8* |
1499 | // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8* |
1500 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 48, i1 false) |
1501 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 |
1502 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
1503 | // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align [[QALIGN]] |
1504 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 |
1505 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
1506 | // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align [[QALIGN]] |
1507 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 |
1508 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
1509 | // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align [[QALIGN]] |
1510 | // CHECK-A64: call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a) |
1511 | // CHECK-A32: call void @llvm.arm.neon.vst1x3.p0i8.v16i8(i8* %a, <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]]) |
1512 | // CHECK: ret void |
1513 | void test_vst1q_p8_x3(poly8_t *a, poly8x16x3_t b) { |
1514 | vst1q_p8_x3(a, b); |
1515 | } |
1516 | |
1517 | // CHECK-LABEL: @test_vst1q_p8_x4( |
1518 | // CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align [[QALIGN]] |
1519 | // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align [[QALIGN]] |
1520 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0 |
1521 | // CHECK-A64: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 |
1522 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [4 x <16 x i8>]* %coerce.dive to [8 x i64]* |
1523 | // CHECK-A32: store [8 x i64] %b.coerce, [8 x i64]* [[COERCE_DIVE_TMP]], align 8 |
1524 | // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8* |
1525 | // CHECK: [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8* |
1526 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 64, i1 false) |
1527 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 |
1528 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
1529 | // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align [[QALIGN]] |
1530 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 |
1531 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
1532 | // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align [[QALIGN]] |
1533 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 |
1534 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
1535 | // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align [[QALIGN]] |
1536 | // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 |
1537 | // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3 |
1538 | // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align [[QALIGN]] |
1539 | // CHECK-A64: call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a) |
1540 | // CHECK-A32: call void @llvm.arm.neon.vst1x4.p0i8.v16i8(i8* %a, <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]]) |
1541 | // CHECK: ret void |
1542 | void test_vst1q_p8_x4(poly8_t *a, poly8x16x4_t b) { |
1543 | vst1q_p8_x4(a, b); |
1544 | } |
1545 | |
1546 | // CHECK-LABEL: @test_vst1q_s16_x2( |
1547 | // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align [[QALIGN]] |
1548 | // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align [[QALIGN]] |
1549 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0 |
1550 | // CHECK-A64: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 |
1551 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [2 x <8 x i16>]* %coerce.dive to [4 x i64]* |
1552 | // CHECK-A32: store [4 x i64] %b.coerce, [4 x i64]* [[COERCE_DIVE_TMP]], align 8 |
1553 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8* |
1554 | // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8* |
1555 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 32, i1 false) |
1556 | // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* |
1557 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 |
1558 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
1559 | // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align [[QALIGN]] |
1560 | // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> |
1561 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 |
1562 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
1563 | // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align [[QALIGN]] |
1564 | // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> |
1565 | // CHECK-DAG: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> |
1566 | // CHECK-DAG: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> |
1567 | // CHECK-DAG: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16* |
1568 | // CHECK-A64: call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i16* [[TMP9]]) |
1569 | // CHECK-A32: call void @llvm.arm.neon.vst1x2.p0i16.v8i16(i16* [[TMP9]], <8 x i16> [[TMP7]], <8 x i16> [[TMP8]]) |
1570 | // CHECK: ret void |
1571 | void test_vst1q_s16_x2(int16_t *a, int16x8x2_t b) { |
1572 | vst1q_s16_x2(a, b); |
1573 | } |
1574 | |
1575 | // CHECK-LABEL: @test_vst1q_s16_x3( |
1576 | // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align [[QALIGN]] |
1577 | // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align [[QALIGN]] |
1578 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0 |
1579 | // CHECK-A64: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 |
1580 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [3 x <8 x i16>]* %coerce.dive to [6 x i64]* |
1581 | // CHECK-A32: store [6 x i64] %b.coerce, [6 x i64]* [[COERCE_DIVE_TMP]], align 8 |
1582 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8* |
1583 | // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8* |
1584 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 48, i1 false) |
1585 | // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* |
1586 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 |
1587 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
1588 | // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align [[QALIGN]] |
1589 | // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> |
1590 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 |
1591 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
1592 | // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align [[QALIGN]] |
1593 | // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> |
1594 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 |
1595 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
1596 | // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align [[QALIGN]] |
1597 | // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> |
1598 | // CHECK-DAG: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> |
1599 | // CHECK-DAG: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> |
1600 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> |
1601 | // CHECK-DAG: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16* |
1602 | // CHECK-A64: call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i16* [[TMP12]]) |
1603 | // CHECK-A32: call void @llvm.arm.neon.vst1x3.p0i16.v8i16(i16* [[TMP12]], <8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]]) |
1604 | // CHECK: ret void |
1605 | void test_vst1q_s16_x3(int16_t *a, int16x8x3_t b) { |
1606 | vst1q_s16_x3(a, b); |
1607 | } |
1608 | |
1609 | // CHECK-LABEL: @test_vst1q_s16_x4( |
1610 | // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align [[QALIGN]] |
1611 | // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align [[QALIGN]] |
1612 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0 |
1613 | // CHECK-A64: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 |
1614 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [4 x <8 x i16>]* %coerce.dive to [8 x i64]* |
1615 | // CHECK-A32: store [8 x i64] %b.coerce, [8 x i64]* [[COERCE_DIVE_TMP]], align 8 |
1616 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8* |
1617 | // CHECK: [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8* |
1618 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 64, i1 false) |
1619 | // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* |
1620 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 |
1621 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
1622 | // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align [[QALIGN]] |
1623 | // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> |
1624 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 |
1625 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
1626 | // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align [[QALIGN]] |
1627 | // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> |
1628 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 |
1629 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
1630 | // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align [[QALIGN]] |
1631 | // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> |
1632 | // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 |
1633 | // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3 |
1634 | // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align [[QALIGN]] |
1635 | // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> |
1636 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> |
1637 | // CHECK-DAG: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> |
1638 | // CHECK-DAG: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> |
1639 | // CHECK-DAG: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> |
1640 | // CHECK-DAG: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16* |
1641 | // CHECK-A64: call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i16* [[TMP15]]) |
1642 | // CHECK-A32: call void @llvm.arm.neon.vst1x4.p0i16.v8i16(i16* [[TMP15]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]]) |
1643 | // CHECK: ret void |
1644 | void test_vst1q_s16_x4(int16_t *a, int16x8x4_t b) { |
1645 | vst1q_s16_x4(a, b); |
1646 | } |
1647 | |
1648 | // CHECK-LABEL: @test_vst1q_s32_x2( |
1649 | // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align [[QALIGN]] |
1650 | // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align [[QALIGN]] |
1651 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0 |
1652 | // CHECK-A64: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16 |
1653 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [2 x <4 x i32>]* %coerce.dive to [4 x i64]* |
1654 | // CHECK-A32: store [4 x i64] %b.coerce, [4 x i64]* [[COERCE_DIVE_TMP]], align 8 |
1655 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8* |
1656 | // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8* |
1657 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 32, i1 false) |
1658 | // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* |
1659 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 |
1660 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
1661 | // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align [[QALIGN]] |
1662 | // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> |
1663 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 |
1664 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
1665 | // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align [[QALIGN]] |
1666 | // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> |
1667 | // CHECK-DAG: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> |
1668 | // CHECK-DAG: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> |
1669 | // CHECK-DAG: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i32* |
1670 | // CHECK-A64: call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i32* [[TMP9]]) |
1671 | // CHECK-A32: call void @llvm.arm.neon.vst1x2.p0i32.v4i32(i32* [[TMP9]], <4 x i32> [[TMP7]], <4 x i32> [[TMP8]]) |
1672 | // CHECK: ret void |
1673 | void test_vst1q_s32_x2(int32_t *a, int32x4x2_t b) { |
1674 | vst1q_s32_x2(a, b); |
1675 | } |
1676 | |
1677 | // CHECK-LABEL: @test_vst1q_s32_x3( |
1678 | // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align [[QALIGN]] |
1679 | // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align [[QALIGN]] |
1680 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0 |
1681 | // CHECK-A64: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16 |
1682 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [3 x <4 x i32>]* %coerce.dive to [6 x i64]* |
1683 | // CHECK-A32: store [6 x i64] %b.coerce, [6 x i64]* [[COERCE_DIVE_TMP]], align 8 |
1684 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8* |
1685 | // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8* |
1686 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 48, i1 false) |
1687 | // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* |
1688 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 |
1689 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
1690 | // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align [[QALIGN]] |
1691 | // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> |
1692 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 |
1693 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
1694 | // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align [[QALIGN]] |
1695 | // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> |
1696 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 |
1697 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
1698 | // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align [[QALIGN]] |
1699 | // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> |
1700 | // CHECK-DAG: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> |
1701 | // CHECK-DAG: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> |
1702 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> |
1703 | // CHECK-DAG: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i32* |
1704 | // CHECK-A64: call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i32* [[TMP12]]) |
1705 | // CHECK-A32: call void @llvm.arm.neon.vst1x3.p0i32.v4i32(i32* [[TMP12]], <4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]]) |
1706 | // CHECK: ret void |
1707 | void test_vst1q_s32_x3(int32_t *a, int32x4x3_t b) { |
1708 | vst1q_s32_x3(a, b); |
1709 | } |
1710 | |
1711 | // CHECK-LABEL: @test_vst1q_s32_x4( |
1712 | // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align [[QALIGN]] |
1713 | // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align [[QALIGN]] |
1714 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0 |
1715 | // CHECK-A64: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16 |
1716 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [4 x <4 x i32>]* %coerce.dive to [8 x i64]* |
1717 | // CHECK-A32: store [8 x i64] %b.coerce, [8 x i64]* [[COERCE_DIVE_TMP]], align 8 |
1718 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8* |
1719 | // CHECK: [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8* |
1720 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 64, i1 false) |
1721 | // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* |
1722 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 |
1723 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
1724 | // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align [[QALIGN]] |
1725 | // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> |
1726 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 |
1727 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
1728 | // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align [[QALIGN]] |
1729 | // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> |
1730 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 |
1731 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
1732 | // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align [[QALIGN]] |
1733 | // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> |
1734 | // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 |
1735 | // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3 |
1736 | // CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align [[QALIGN]] |
1737 | // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8> |
1738 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> |
1739 | // CHECK-DAG: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> |
1740 | // CHECK-DAG: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> |
1741 | // CHECK-DAG: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32> |
1742 | // CHECK-DAG: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i32* |
1743 | // CHECK-A64: call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i32* [[TMP15]]) |
1744 | // CHECK-A32: call void @llvm.arm.neon.vst1x4.p0i32.v4i32(i32* [[TMP15]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]]) |
1745 | // CHECK: ret void |
1746 | void test_vst1q_s32_x4(int32_t *a, int32x4x4_t b) { |
1747 | vst1q_s32_x4(a, b); |
1748 | } |
1749 | |
1750 | // CHECK-LABEL: @test_vst1q_s64_x2( |
1751 | // CHECK: [[B:%.*]] = alloca %struct.int64x2x2_t, align [[QALIGN]] |
1752 | // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x2_t, align [[QALIGN]] |
1753 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[B]], i32 0, i32 0 |
1754 | // CHECK-A64: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 |
1755 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [2 x <2 x i64>]* %coerce.dive to [4 x i64]* |
1756 | // CHECK-A32: store [4 x i64] %b.coerce, [4 x i64]* [[COERCE_DIVE_TMP]], align 8 |
1757 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__S1]] to i8* |
1758 | // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x2_t* [[B]] to i8* |
1759 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 32, i1 false) |
1760 | // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* |
1761 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0 |
1762 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
1763 | // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align [[QALIGN]] |
1764 | // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> |
1765 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0 |
1766 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
1767 | // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align [[QALIGN]] |
1768 | // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> |
1769 | // CHECK-DAG: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> |
1770 | // CHECK-DAG: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> |
1771 | // CHECK-DAG: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64* |
1772 | // CHECK-A64: call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i64* [[TMP9]]) |
1773 | // CHECK-A32: call void @llvm.arm.neon.vst1x2.p0i64.v2i64(i64* [[TMP9]], <2 x i64> [[TMP7]], <2 x i64> [[TMP8]]) |
1774 | // CHECK: ret void |
1775 | void test_vst1q_s64_x2(int64_t *a, int64x2x2_t b) { |
1776 | vst1q_s64_x2(a, b); |
1777 | } |
1778 | |
1779 | // CHECK-LABEL: @test_vst1q_s64_x3( |
1780 | // CHECK: [[B:%.*]] = alloca %struct.int64x2x3_t, align [[QALIGN]] |
1781 | // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x3_t, align [[QALIGN]] |
1782 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[B]], i32 0, i32 0 |
1783 | // CHECK-A64: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 |
1784 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [3 x <2 x i64>]* %coerce.dive to [6 x i64]* |
1785 | // CHECK-A32: store [6 x i64] %b.coerce, [6 x i64]* [[COERCE_DIVE_TMP]], align 8 |
1786 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__S1]] to i8* |
1787 | // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x3_t* [[B]] to i8* |
1788 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 48, i1 false) |
1789 | // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* |
1790 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 |
1791 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
1792 | // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align [[QALIGN]] |
1793 | // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> |
1794 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 |
1795 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
1796 | // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align [[QALIGN]] |
1797 | // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> |
1798 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 |
1799 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
1800 | // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align [[QALIGN]] |
1801 | // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> |
1802 | // CHECK-DAG: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> |
1803 | // CHECK-DAG: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> |
1804 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> |
1805 | // CHECK-DAG: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64* |
1806 | // CHECK-A64: call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64* [[TMP12]]) |
1807 | // CHECK-A32: call void @llvm.arm.neon.vst1x3.p0i64.v2i64(i64* [[TMP12]], <2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]]) |
1808 | // CHECK: ret void |
1809 | void test_vst1q_s64_x3(int64_t *a, int64x2x3_t b) { |
1810 | vst1q_s64_x3(a, b); |
1811 | } |
1812 | |
1813 | // CHECK-LABEL: @test_vst1q_s64_x4( |
1814 | // CHECK: [[B:%.*]] = alloca %struct.int64x2x4_t, align [[QALIGN]] |
1815 | // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x4_t, align [[QALIGN]] |
1816 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[B]], i32 0, i32 0 |
1817 | // CHECK-A64: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 |
1818 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [4 x <2 x i64>]* %coerce.dive to [8 x i64]* |
1819 | // CHECK-A32: store [8 x i64] %b.coerce, [8 x i64]* [[COERCE_DIVE_TMP]], align 8 |
1820 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__S1]] to i8* |
1821 | // CHECK: [[TMP1:%.*]] = bitcast %struct.int64x2x4_t* [[B]] to i8* |
1822 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 64, i1 false) |
1823 | // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* |
1824 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 |
1825 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
1826 | // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align [[QALIGN]] |
1827 | // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> |
1828 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 |
1829 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
1830 | // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align [[QALIGN]] |
1831 | // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> |
1832 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 |
1833 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
1834 | // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align [[QALIGN]] |
1835 | // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> |
1836 | // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 |
1837 | // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3 |
1838 | // CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align [[QALIGN]] |
1839 | // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8> |
1840 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> |
1841 | // CHECK-DAG: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> |
1842 | // CHECK-DAG: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> |
1843 | // CHECK-DAG: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> |
1844 | // CHECK-DAG: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64* |
1845 | // CHECK-A64: call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64* [[TMP15]]) |
1846 | // CHECK-A32: call void @llvm.arm.neon.vst1x4.p0i64.v2i64(i64* [[TMP15]], <2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]]) |
1847 | // CHECK: ret void |
1848 | void test_vst1q_s64_x4(int64_t *a, int64x2x4_t b) { |
1849 | vst1q_s64_x4(a, b); |
1850 | } |
1851 | |
1852 | // CHECK-LABEL: @test_vst1q_s8_x2( |
1853 | // CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align [[QALIGN]] |
1854 | // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align [[QALIGN]] |
1855 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0 |
1856 | // CHECK-A64: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 |
1857 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [2 x <16 x i8>]* %coerce.dive to [4 x i64]* |
1858 | // CHECK-A32: store [4 x i64] %b.coerce, [4 x i64]* [[COERCE_DIVE_TMP]], align 8 |
1859 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8* |
1860 | // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8* |
1861 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 32, i1 false) |
1862 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0 |
1863 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
1864 | // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align [[QALIGN]] |
1865 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0 |
1866 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
1867 | // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align [[QALIGN]] |
1868 | // CHECK-A64: call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a) |
1869 | // CHECK-A32: call void @llvm.arm.neon.vst1x2.p0i8.v16i8(i8* %a, <16 x i8> [[TMP2]], <16 x i8> [[TMP3]]) |
1870 | // CHECK: ret void |
1871 | void test_vst1q_s8_x2(int8_t *a, int8x16x2_t b) { |
1872 | vst1q_s8_x2(a, b); |
1873 | } |
1874 | |
1875 | // CHECK-LABEL: @test_vst1q_s8_x3( |
1876 | // CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align [[QALIGN]] |
1877 | // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align [[QALIGN]] |
1878 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0 |
1879 | // CHECK-A64: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 |
1880 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [3 x <16 x i8>]* %coerce.dive to [6 x i64]* |
1881 | // CHECK-A32: store [6 x i64] %b.coerce, [6 x i64]* [[COERCE_DIVE_TMP]], align 8 |
1882 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8* |
1883 | // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8* |
1884 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 48, i1 false) |
1885 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 |
1886 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
1887 | // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align [[QALIGN]] |
1888 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 |
1889 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
1890 | // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align [[QALIGN]] |
1891 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 |
1892 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
1893 | // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align [[QALIGN]] |
1894 | // CHECK-A64: call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a) |
1895 | // CHECK-A32: call void @llvm.arm.neon.vst1x3.p0i8.v16i8(i8* %a, <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]]) |
1896 | // CHECK: ret void |
1897 | void test_vst1q_s8_x3(int8_t *a, int8x16x3_t b) { |
1898 | vst1q_s8_x3(a, b); |
1899 | } |
1900 | |
1901 | // CHECK-LABEL: @test_vst1q_s8_x4( |
1902 | // CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align [[QALIGN]] |
1903 | // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align [[QALIGN]] |
1904 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0 |
1905 | // CHECK-A64: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 |
1906 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [4 x <16 x i8>]* %coerce.dive to [8 x i64]* |
1907 | // CHECK-A32: store [8 x i64] %b.coerce, [8 x i64]* [[COERCE_DIVE_TMP]], align 8 |
1908 | // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8* |
1909 | // CHECK: [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8* |
1910 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 64, i1 false) |
1911 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 |
1912 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
1913 | // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align [[QALIGN]] |
1914 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 |
1915 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
1916 | // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align [[QALIGN]] |
1917 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 |
1918 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
1919 | // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align [[QALIGN]] |
1920 | // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 |
1921 | // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3 |
1922 | // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align [[QALIGN]] |
1923 | // CHECK-A64: call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a) |
1924 | // CHECK-A32: call void @llvm.arm.neon.vst1x4.p0i8.v16i8(i8* %a, <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]]) |
1925 | // CHECK: ret void |
1926 | void test_vst1q_s8_x4(int8_t *a, int8x16x4_t b) { |
1927 | vst1q_s8_x4(a, b); |
1928 | } |
1929 | |
1930 | // CHECK-LABEL: @test_vst1q_u16_x2( |
1931 | // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align [[QALIGN]] |
1932 | // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align [[QALIGN]] |
1933 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0 |
1934 | // CHECK-A64: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16 |
1935 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [2 x <8 x i16>]* %coerce.dive to [4 x i64]* |
1936 | // CHECK-A32: store [4 x i64] %b.coerce, [4 x i64]* [[COERCE_DIVE_TMP]], align 8 |
1937 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8* |
1938 | // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8* |
1939 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 32, i1 false) |
1940 | // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* |
1941 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 |
1942 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
1943 | // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align [[QALIGN]] |
1944 | // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> |
1945 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 |
1946 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
1947 | // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align [[QALIGN]] |
1948 | // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> |
1949 | // CHECK-DAG: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> |
1950 | // CHECK-DAG: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> |
1951 | // CHECK-DAG: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i16* |
1952 | // CHECK-A64: call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i16* [[TMP9]]) |
1953 | // CHECK-A32: call void @llvm.arm.neon.vst1x2.p0i16.v8i16(i16* [[TMP9]], <8 x i16> [[TMP7]], <8 x i16> [[TMP8]]) |
1954 | // CHECK: ret void |
1955 | void test_vst1q_u16_x2(uint16_t *a, uint16x8x2_t b) { |
1956 | vst1q_u16_x2(a, b); |
1957 | } |
1958 | |
1959 | // CHECK-LABEL: @test_vst1q_u16_x3( |
1960 | // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align [[QALIGN]] |
1961 | // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align [[QALIGN]] |
1962 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0 |
1963 | // CHECK-A64: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16 |
1964 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [3 x <8 x i16>]* %coerce.dive to [6 x i64]* |
1965 | // CHECK-A32: store [6 x i64] %b.coerce, [6 x i64]* [[COERCE_DIVE_TMP]], align 8 |
1966 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8* |
1967 | // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8* |
1968 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 48, i1 false) |
1969 | // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* |
1970 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 |
1971 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
1972 | // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align [[QALIGN]] |
1973 | // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> |
1974 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 |
1975 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
1976 | // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align [[QALIGN]] |
1977 | // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> |
1978 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 |
1979 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
1980 | // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align [[QALIGN]] |
1981 | // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> |
1982 | // CHECK-DAG: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> |
1983 | // CHECK-DAG: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> |
1984 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> |
1985 | // CHECK-DAG: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16* |
1986 | // CHECK-A64: call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i16* [[TMP12]]) |
1987 | // CHECK-A32: call void @llvm.arm.neon.vst1x3.p0i16.v8i16(i16* [[TMP12]], <8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]]) |
1988 | // CHECK: ret void |
1989 | void test_vst1q_u16_x3(uint16_t *a, uint16x8x3_t b) { |
1990 | vst1q_u16_x3(a, b); |
1991 | } |
1992 | |
1993 | // CHECK-LABEL: @test_vst1q_u16_x4( |
1994 | // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align [[QALIGN]] |
1995 | // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align [[QALIGN]] |
1996 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0 |
1997 | // CHECK-A64: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16 |
1998 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [4 x <8 x i16>]* %coerce.dive to [8 x i64]* |
1999 | // CHECK-A32: store [8 x i64] %b.coerce, [8 x i64]* [[COERCE_DIVE_TMP]], align 8 |
2000 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8* |
2001 | // CHECK: [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8* |
2002 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 64, i1 false) |
2003 | // CHECK: [[TMP2:%.*]] = bitcast i16* %a to i8* |
2004 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 |
2005 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
2006 | // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align [[QALIGN]] |
2007 | // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> |
2008 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 |
2009 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
2010 | // CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align [[QALIGN]] |
2011 | // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> |
2012 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 |
2013 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
2014 | // CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align [[QALIGN]] |
2015 | // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> |
2016 | // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 |
2017 | // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3 |
2018 | // CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align [[QALIGN]] |
2019 | // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> |
2020 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> |
2021 | // CHECK-DAG: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> |
2022 | // CHECK-DAG: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> |
2023 | // CHECK-DAG: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> |
2024 | // CHECK-DAG: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16* |
2025 | // CHECK-A64: call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i16* [[TMP15]]) |
2026 | // CHECK-A32: call void @llvm.arm.neon.vst1x4.p0i16.v8i16(i16* [[TMP15]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]]) |
2027 | // CHECK: ret void |
2028 | void test_vst1q_u16_x4(uint16_t *a, uint16x8x4_t b) { |
2029 | vst1q_u16_x4(a, b); |
2030 | } |
2031 | |
2032 | // CHECK-LABEL: @test_vst1q_u32_x2( |
2033 | // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align [[QALIGN]] |
2034 | // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align [[QALIGN]] |
2035 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0 |
2036 | // CHECK-A64: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16 |
2037 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [2 x <4 x i32>]* %coerce.dive to [4 x i64]* |
2038 | // CHECK-A32: store [4 x i64] %b.coerce, [4 x i64]* [[COERCE_DIVE_TMP]], align 8 |
2039 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8* |
2040 | // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8* |
2041 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 32, i1 false) |
2042 | // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* |
2043 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 |
2044 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
2045 | // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align [[QALIGN]] |
2046 | // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> |
2047 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 |
2048 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
2049 | // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align [[QALIGN]] |
2050 | // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> |
2051 | // CHECK-DAG: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> |
2052 | // CHECK-DAG: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> |
2053 | // CHECK-DAG: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i32* |
2054 | // CHECK-A64: call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i32* [[TMP9]]) |
2055 | // CHECK-A32: call void @llvm.arm.neon.vst1x2.p0i32.v4i32(i32* [[TMP9]], <4 x i32> [[TMP7]], <4 x i32> [[TMP8]]) |
2056 | // CHECK: ret void |
2057 | void test_vst1q_u32_x2(uint32_t *a, uint32x4x2_t b) { |
2058 | vst1q_u32_x2(a, b); |
2059 | } |
2060 | |
2061 | // CHECK-LABEL: @test_vst1q_u32_x3( |
2062 | // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align [[QALIGN]] |
2063 | // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align [[QALIGN]] |
2064 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0 |
2065 | // CHECK-A64: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16 |
2066 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [3 x <4 x i32>]* %coerce.dive to [6 x i64]* |
2067 | // CHECK-A32: store [6 x i64] %b.coerce, [6 x i64]* [[COERCE_DIVE_TMP]], align 8 |
2068 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8* |
2069 | // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8* |
2070 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 48, i1 false) |
2071 | // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* |
2072 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 |
2073 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
2074 | // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align [[QALIGN]] |
2075 | // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> |
2076 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 |
2077 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
2078 | // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align [[QALIGN]] |
2079 | // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> |
2080 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 |
2081 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
2082 | // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align [[QALIGN]] |
2083 | // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> |
2084 | // CHECK-DAG: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> |
2085 | // CHECK-DAG: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> |
2086 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> |
2087 | // CHECK-DAG: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i32* |
2088 | // CHECK-A64: call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i32* [[TMP12]]) |
2089 | // CHECK-A32: call void @llvm.arm.neon.vst1x3.p0i32.v4i32(i32* [[TMP12]], <4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]]) |
2090 | // CHECK: ret void |
2091 | void test_vst1q_u32_x3(uint32_t *a, uint32x4x3_t b) { |
2092 | vst1q_u32_x3(a, b); |
2093 | } |
2094 | |
2095 | // CHECK-LABEL: @test_vst1q_u32_x4( |
2096 | // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align [[QALIGN]] |
2097 | // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align [[QALIGN]] |
2098 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0 |
2099 | // CHECK-A64: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16 |
2100 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [4 x <4 x i32>]* %coerce.dive to [8 x i64]* |
2101 | // CHECK-A32: store [8 x i64] %b.coerce, [8 x i64]* [[COERCE_DIVE_TMP]], align 8 |
2102 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8* |
2103 | // CHECK: [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8* |
2104 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 64, i1 false) |
2105 | // CHECK: [[TMP2:%.*]] = bitcast i32* %a to i8* |
2106 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 |
2107 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
2108 | // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align [[QALIGN]] |
2109 | // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> |
2110 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 |
2111 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
2112 | // CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align [[QALIGN]] |
2113 | // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> |
2114 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 |
2115 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
2116 | // CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align [[QALIGN]] |
2117 | // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> |
2118 | // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 |
2119 | // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3 |
2120 | // CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align [[QALIGN]] |
2121 | // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8> |
2122 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> |
2123 | // CHECK-DAG: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> |
2124 | // CHECK-DAG: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> |
2125 | // CHECK-DAG: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32> |
2126 | // CHECK-DAG: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i32* |
2127 | // CHECK-A64: call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i32* [[TMP15]]) |
2128 | // CHECK-A32: call void @llvm.arm.neon.vst1x4.p0i32.v4i32(i32* [[TMP15]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]]) |
2129 | // CHECK: ret void |
2130 | void test_vst1q_u32_x4(uint32_t *a, uint32x4x4_t b) { |
2131 | vst1q_u32_x4(a, b); |
2132 | } |
2133 | |
2134 | // CHECK-LABEL: @test_vst1q_u64_x2( |
2135 | // CHECK: [[B:%.*]] = alloca %struct.uint64x2x2_t, align [[QALIGN]] |
2136 | // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align [[QALIGN]] |
2137 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[B]], i32 0, i32 0 |
2138 | // CHECK-A64: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16 |
2139 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [2 x <2 x i64>]* %coerce.dive to [4 x i64]* |
2140 | // CHECK-A32: store [4 x i64] %b.coerce, [4 x i64]* [[COERCE_DIVE_TMP]], align 8 |
2141 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__S1]] to i8* |
2142 | // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x2_t* [[B]] to i8* |
2143 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 32, i1 false) |
2144 | // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* |
2145 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0 |
2146 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
2147 | // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align [[QALIGN]] |
2148 | // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> |
2149 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0 |
2150 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
2151 | // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align [[QALIGN]] |
2152 | // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> |
2153 | // CHECK-DAG: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> |
2154 | // CHECK-DAG: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> |
2155 | // CHECK-DAG: [[TMP9:%.*]] = bitcast i8* [[TMP2]] to i64* |
2156 | // CHECK-A64: call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i64* [[TMP9]]) |
2157 | // CHECK-A32: call void @llvm.arm.neon.vst1x2.p0i64.v2i64(i64* [[TMP9]], <2 x i64> [[TMP7]], <2 x i64> [[TMP8]]) |
2158 | // CHECK: ret void |
2159 | void test_vst1q_u64_x2(uint64_t *a, uint64x2x2_t b) { |
2160 | vst1q_u64_x2(a, b); |
2161 | } |
2162 | |
2163 | // CHECK-LABEL: @test_vst1q_u64_x3( |
2164 | // CHECK: [[B:%.*]] = alloca %struct.uint64x2x3_t, align [[QALIGN]] |
2165 | // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align [[QALIGN]] |
2166 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[B]], i32 0, i32 0 |
2167 | // CHECK-A64: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16 |
2168 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [3 x <2 x i64>]* %coerce.dive to [6 x i64]* |
2169 | // CHECK-A32: store [6 x i64] %b.coerce, [6 x i64]* [[COERCE_DIVE_TMP]], align 8 |
2170 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__S1]] to i8* |
2171 | // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x3_t* [[B]] to i8* |
2172 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 48, i1 false) |
2173 | // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* |
2174 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 |
2175 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
2176 | // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align [[QALIGN]] |
2177 | // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> |
2178 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 |
2179 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
2180 | // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align [[QALIGN]] |
2181 | // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> |
2182 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 |
2183 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
2184 | // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align [[QALIGN]] |
2185 | // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> |
2186 | // CHECK-DAG: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> |
2187 | // CHECK-DAG: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> |
2188 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> |
2189 | // CHECK-DAG: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64* |
2190 | // CHECK-A64: call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64* [[TMP12]]) |
2191 | // CHECK-A32: call void @llvm.arm.neon.vst1x3.p0i64.v2i64(i64* [[TMP12]], <2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]]) |
2192 | // CHECK: ret void |
2193 | void test_vst1q_u64_x3(uint64_t *a, uint64x2x3_t b) { |
2194 | vst1q_u64_x3(a, b); |
2195 | } |
2196 | |
2197 | // CHECK-LABEL: @test_vst1q_u64_x4( |
2198 | // CHECK: [[B:%.*]] = alloca %struct.uint64x2x4_t, align [[QALIGN]] |
2199 | // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align [[QALIGN]] |
2200 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[B]], i32 0, i32 0 |
2201 | // CHECK-A64: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16 |
2202 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [4 x <2 x i64>]* %coerce.dive to [8 x i64]* |
2203 | // CHECK-A32: store [8 x i64] %b.coerce, [8 x i64]* [[COERCE_DIVE_TMP]], align 8 |
2204 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__S1]] to i8* |
2205 | // CHECK: [[TMP1:%.*]] = bitcast %struct.uint64x2x4_t* [[B]] to i8* |
2206 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 64, i1 false) |
2207 | // CHECK: [[TMP2:%.*]] = bitcast i64* %a to i8* |
2208 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 |
2209 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
2210 | // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align [[QALIGN]] |
2211 | // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> |
2212 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 |
2213 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
2214 | // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align [[QALIGN]] |
2215 | // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> |
2216 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 |
2217 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
2218 | // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align [[QALIGN]] |
2219 | // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> |
2220 | // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 |
2221 | // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3 |
2222 | // CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align [[QALIGN]] |
2223 | // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8> |
2224 | // CHECK-DAG: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> |
2225 | // CHECK-DAG: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> |
2226 | // CHECK-DAG: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> |
2227 | // CHECK-DAG: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> |
2228 | // CHECK-DAG: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64* |
2229 | // CHECK-A64: call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64* [[TMP15]]) |
2230 | // CHECK-A32: call void @llvm.arm.neon.vst1x4.p0i64.v2i64(i64* [[TMP15]], <2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]]) |
2231 | // CHECK: ret void |
2232 | void test_vst1q_u64_x4(uint64_t *a, uint64x2x4_t b) { |
2233 | vst1q_u64_x4(a, b); |
2234 | } |
2235 | |
2236 | // CHECK-LABEL: @test_vst1q_u8_x2( |
2237 | // CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align [[QALIGN]] |
2238 | // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align [[QALIGN]] |
2239 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0 |
2240 | // CHECK-A64: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 |
2241 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [2 x <16 x i8>]* %coerce.dive to [4 x i64]* |
2242 | // CHECK-A32: store [4 x i64] %b.coerce, [4 x i64]* [[COERCE_DIVE_TMP]], align 8 |
2243 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8* |
2244 | // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8* |
2245 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 32, i1 false) |
2246 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0 |
2247 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
2248 | // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align [[QALIGN]] |
2249 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0 |
2250 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
2251 | // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align [[QALIGN]] |
2252 | // CHECK-A64: call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a) |
2253 | // CHECK-A32: call void @llvm.arm.neon.vst1x2.p0i8.v16i8(i8* %a, <16 x i8> [[TMP2]], <16 x i8> [[TMP3]]) |
2254 | // CHECK: ret void |
2255 | void test_vst1q_u8_x2(uint8_t *a, uint8x16x2_t b) { |
2256 | vst1q_u8_x2(a, b); |
2257 | } |
2258 | |
2259 | // CHECK-LABEL: @test_vst1q_u8_x3( |
2260 | // CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align [[QALIGN]] |
2261 | // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align [[QALIGN]] |
2262 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0 |
2263 | // CHECK-A64: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 |
2264 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [3 x <16 x i8>]* %coerce.dive to [6 x i64]* |
2265 | // CHECK-A32: store [6 x i64] %b.coerce, [6 x i64]* [[COERCE_DIVE_TMP]], align 8 |
2266 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8* |
2267 | // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8* |
2268 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 48, i1 false) |
2269 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 |
2270 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
2271 | // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align [[QALIGN]] |
2272 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 |
2273 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
2274 | // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align [[QALIGN]] |
2275 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 |
2276 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
2277 | // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align [[QALIGN]] |
2278 | // CHECK-A64: call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a) |
2279 | // CHECK-A32: call void @llvm.arm.neon.vst1x3.p0i8.v16i8(i8* %a, <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]]) |
2280 | // CHECK: ret void |
2281 | void test_vst1q_u8_x3(uint8_t *a, uint8x16x3_t b) { |
2282 | vst1q_u8_x3(a, b); |
2283 | } |
2284 | |
2285 | // CHECK-LABEL: @test_vst1q_u8_x4( |
2286 | // CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align [[QALIGN]] |
2287 | // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align [[QALIGN]] |
2288 | // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0 |
2289 | // CHECK-A64: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 |
2290 | // CHECK-A32: [[COERCE_DIVE_TMP:%.*]] = bitcast [4 x <16 x i8>]* %coerce.dive to [8 x i64]* |
2291 | // CHECK-A32: store [8 x i64] %b.coerce, [8 x i64]* [[COERCE_DIVE_TMP]], align 8 |
2292 | // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8* |
2293 | // CHECK: [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8* |
2294 | // CHECK: call void @llvm.memcpy.p0i8.p0i8.{{i64|i32}}(i8* align [[QALIGN]] [[TMP0]], i8* align [[QALIGN]] [[TMP1]], {{i64|i32}} 64, i1 false) |
2295 | // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 |
2296 | // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], {{i64|i32}} 0, {{i64|i32}} 0 |
2297 | // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align [[QALIGN]] |
2298 | // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 |
2299 | // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], {{i64|i32}} 0, {{i64|i32}} 1 |
2300 | // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align [[QALIGN]] |
2301 | // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 |
2302 | // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], {{i64|i32}} 0, {{i64|i32}} 2 |
2303 | // CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align [[QALIGN]] |
2304 | // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 |
2305 | // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], {{i64|i32}} 0, {{i64|i32}} 3 |
2306 | // CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align [[QALIGN]] |
2307 | // CHECK-A64: call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a) |
2308 | // CHECK-A32: call void @llvm.arm.neon.vst1x4.p0i8.v16i8(i8* %a, <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]]) |
2309 | // CHECK: ret void |
2310 | void test_vst1q_u8_x4(uint8_t *a, uint8x16x4_t b) { |
2311 | vst1q_u8_x4(a, b); |
2312 | } |
2313 | |