aarch64-neon-tbl.c source code [clang_source_code/test/CodeGen/aarch64-neon-tbl.c]

1	// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
2	// RUN: -disable-O0-optnone -emit-llvm -o - %s \| opt -S -mem2reg \| FileCheck %s
3
4	// Test new aarch64 intrinsics and types
5
6	#include <arm_neon.h>
7
8	// CHECK-LABEL: define <8 x i8> @test_vtbl1_s8(<8 x i8> %a, <8 x i8> %b) #0 {
9	// CHECK: [[VTBL1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
10	// CHECK: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #3
11	// CHECK: ret <8 x i8> [[VTBL11_I]]
12	int8x8_t test_vtbl1_s8(int8x8_t a, int8x8_t b) {
13	return vtbl1_s8(a, b);
14	}
15
16	// CHECK-LABEL: define <8 x i8> @test_vqtbl1_s8(<16 x i8> %a, <8 x i8> %b) #1 {
17	// CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b) #3
18	// CHECK: ret <8 x i8> [[VTBL1_I]]
19	int8x8_t test_vqtbl1_s8(int8x16_t a, int8x8_t b) {
20	return vqtbl1_s8(a, b);
21	}
22
23	// CHECK-LABEL: define <8 x i8> @test_vtbl2_s8([2 x <8 x i8>] %a.coerce, <8 x i8> %b) #0 {
24	// CHECK: [[__P0_I:%.*]] = alloca %struct.int8x8x2_t, align 8
25	// CHECK: [[A:%.*]] = alloca %struct.int8x8x2_t, align 8
26	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t [[A]], i32 0, i32 0
27	// CHECK: store [2 x <8 x i8>] [[A]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
28	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t [[A]], i32 0, i32 0
29	// CHECK: [[TMP0:%.]] = load [2 x <8 x i8>], [2 x <8 x i8>] [[COERCE_DIVE1]], align 8
30	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t [[__P0_I]], i32 0, i32 0
31	// CHECK: store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
32	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t [[__P0_I]], i32 0, i32 0
33	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>] [[VAL_I]], i64 0, i64 0
34	// CHECK: [[TMP1:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX_I]], align 8
35	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t [[__P0_I]], i32 0, i32 0
36	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>] [[VAL1_I]], i64 0, i64 1
37	// CHECK: [[TMP2:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX2_I]], align 8
38	// CHECK: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
39	// CHECK: [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #3
40	// CHECK: ret <8 x i8> [[VTBL13_I]]
41	int8x8_t test_vtbl2_s8(int8x8x2_t a, int8x8_t b) {
42	return vtbl2_s8(a, b);
43	}
44
45	// CHECK-LABEL: define <8 x i8> @test_vqtbl2_s8([2 x <16 x i8>] %a.coerce, <8 x i8> %b) #0 {
46	// CHECK: [[__P0_I:%.*]] = alloca %struct.int8x16x2_t, align 16
47	// CHECK: [[A:%.*]] = alloca %struct.int8x16x2_t, align 16
48	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t [[A]], i32 0, i32 0
49	// CHECK: store [2 x <16 x i8>] [[A]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
50	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t [[A]], i32 0, i32 0
51	// CHECK: [[TMP0:%.]] = load [2 x <16 x i8>], [2 x <16 x i8>] [[COERCE_DIVE1]], align 16
52	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t [[__P0_I]], i32 0, i32 0
53	// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
54	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t [[__P0_I]], i32 0, i32 0
55	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>] [[VAL_I]], i64 0, i64 0
56	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
57	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t [[__P0_I]], i32 0, i32 0
58	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
59	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
60	// CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %b) #3
61	// CHECK: ret <8 x i8> [[VTBL2_I]]
62	int8x8_t test_vqtbl2_s8(int8x16x2_t a, int8x8_t b) {
63	return vqtbl2_s8(a, b);
64	}
65
66	// CHECK-LABEL: define <8 x i8> @test_vtbl3_s8([3 x <8 x i8>] %a.coerce, <8 x i8> %b) #0 {
67	// CHECK: [[__P0_I:%.*]] = alloca %struct.int8x8x3_t, align 8
68	// CHECK: [[A:%.*]] = alloca %struct.int8x8x3_t, align 8
69	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t [[A]], i32 0, i32 0
70	// CHECK: store [3 x <8 x i8>] [[A]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
71	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t [[A]], i32 0, i32 0
72	// CHECK: [[TMP0:%.]] = load [3 x <8 x i8>], [3 x <8 x i8>] [[COERCE_DIVE1]], align 8
73	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t [[__P0_I]], i32 0, i32 0
74	// CHECK: store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
75	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t [[__P0_I]], i32 0, i32 0
76	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>] [[VAL_I]], i64 0, i64 0
77	// CHECK: [[TMP1:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX_I]], align 8
78	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t [[__P0_I]], i32 0, i32 0
79	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>] [[VAL1_I]], i64 0, i64 1
80	// CHECK: [[TMP2:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX2_I]], align 8
81	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t [[__P0_I]], i32 0, i32 0
82	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>] [[VAL3_I]], i64 0, i64 2
83	// CHECK: [[TMP3:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX4_I]], align 8
84	// CHECK: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
85	// CHECK: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
86	// CHECK: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %b) #3
87	// CHECK: ret <8 x i8> [[VTBL26_I]]
88	int8x8_t test_vtbl3_s8(int8x8x3_t a, int8x8_t b) {
89	return vtbl3_s8(a, b);
90	}
91
92	// CHECK-LABEL: define <8 x i8> @test_vqtbl3_s8([3 x <16 x i8>] %a.coerce, <8 x i8> %b) #0 {
93	// CHECK: [[__P0_I:%.*]] = alloca %struct.int8x16x3_t, align 16
94	// CHECK: [[A:%.*]] = alloca %struct.int8x16x3_t, align 16
95	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t [[A]], i32 0, i32 0
96	// CHECK: store [3 x <16 x i8>] [[A]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
97	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t [[A]], i32 0, i32 0
98	// CHECK: [[TMP0:%.]] = load [3 x <16 x i8>], [3 x <16 x i8>] [[COERCE_DIVE1]], align 16
99	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t [[__P0_I]], i32 0, i32 0
100	// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
101	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t [[__P0_I]], i32 0, i32 0
102	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL_I]], i64 0, i64 0
103	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
104	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t [[__P0_I]], i32 0, i32 0
105	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
106	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
107	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t [[__P0_I]], i32 0, i32 0
108	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL3_I]], i64 0, i64 2
109	// CHECK: [[TMP3:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX4_I]], align 16
110	// CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %b) #3
111	// CHECK: ret <8 x i8> [[VTBL3_I]]
112	int8x8_t test_vqtbl3_s8(int8x16x3_t a, int8x8_t b) {
113	return vqtbl3_s8(a, b);
114	}
115
116	// CHECK-LABEL: define <8 x i8> @test_vtbl4_s8([4 x <8 x i8>] %a.coerce, <8 x i8> %b) #0 {
117	// CHECK: [[__P0_I:%.*]] = alloca %struct.int8x8x4_t, align 8
118	// CHECK: [[A:%.*]] = alloca %struct.int8x8x4_t, align 8
119	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t [[A]], i32 0, i32 0
120	// CHECK: store [4 x <8 x i8>] [[A]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
121	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t [[A]], i32 0, i32 0
122	// CHECK: [[TMP0:%.]] = load [4 x <8 x i8>], [4 x <8 x i8>] [[COERCE_DIVE1]], align 8
123	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t [[__P0_I]], i32 0, i32 0
124	// CHECK: store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
125	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t [[__P0_I]], i32 0, i32 0
126	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>] [[VAL_I]], i64 0, i64 0
127	// CHECK: [[TMP1:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX_I]], align 8
128	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t [[__P0_I]], i32 0, i32 0
129	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>] [[VAL1_I]], i64 0, i64 1
130	// CHECK: [[TMP2:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX2_I]], align 8
131	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t [[__P0_I]], i32 0, i32 0
132	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>] [[VAL3_I]], i64 0, i64 2
133	// CHECK: [[TMP3:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX4_I]], align 8
134	// CHECK: [[VAL5_I:%.]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t [[__P0_I]], i32 0, i32 0
135	// CHECK: [[ARRAYIDX6_I:%.]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>] [[VAL5_I]], i64 0, i64 3
136	// CHECK: [[TMP4:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX6_I]], align 8
137	// CHECK: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
138	// CHECK: [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
139	// CHECK: [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> %b) #3
140	// CHECK: ret <8 x i8> [[VTBL28_I]]
141	int8x8_t test_vtbl4_s8(int8x8x4_t a, int8x8_t b) {
142	return vtbl4_s8(a, b);
143	}
144
145	// CHECK-LABEL: define <8 x i8> @test_vqtbl4_s8([4 x <16 x i8>] %a.coerce, <8 x i8> %b) #0 {
146	// CHECK: [[__P0_I:%.*]] = alloca %struct.int8x16x4_t, align 16
147	// CHECK: [[A:%.*]] = alloca %struct.int8x16x4_t, align 16
148	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t [[A]], i32 0, i32 0
149	// CHECK: store [4 x <16 x i8>] [[A]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
150	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t [[A]], i32 0, i32 0
151	// CHECK: [[TMP0:%.]] = load [4 x <16 x i8>], [4 x <16 x i8>] [[COERCE_DIVE1]], align 16
152	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t [[__P0_I]], i32 0, i32 0
153	// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
154	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t [[__P0_I]], i32 0, i32 0
155	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL_I]], i64 0, i64 0
156	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
157	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t [[__P0_I]], i32 0, i32 0
158	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
159	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
160	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t [[__P0_I]], i32 0, i32 0
161	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL3_I]], i64 0, i64 2
162	// CHECK: [[TMP3:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX4_I]], align 16
163	// CHECK: [[VAL5_I:%.]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t [[__P0_I]], i32 0, i32 0
164	// CHECK: [[ARRAYIDX6_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL5_I]], i64 0, i64 3
165	// CHECK: [[TMP4:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX6_I]], align 16
166	// CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %b) #3
167	// CHECK: ret <8 x i8> [[VTBL4_I]]
168	int8x8_t test_vqtbl4_s8(int8x16x4_t a, int8x8_t b) {
169	return vqtbl4_s8(a, b);
170	}
171
172	// CHECK-LABEL: define <16 x i8> @test_vqtbl1q_s8(<16 x i8> %a, <16 x i8> %b) #1 {
173	// CHECK: [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> %b) #3
174	// CHECK: ret <16 x i8> [[VTBL1_I]]
175	int8x16_t test_vqtbl1q_s8(int8x16_t a, int8x16_t b) {
176	return vqtbl1q_s8(a, b);
177	}
178
179	// CHECK-LABEL: define <16 x i8> @test_vqtbl2q_s8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
180	// CHECK: [[__P0_I:%.*]] = alloca %struct.int8x16x2_t, align 16
181	// CHECK: [[A:%.*]] = alloca %struct.int8x16x2_t, align 16
182	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t [[A]], i32 0, i32 0
183	// CHECK: store [2 x <16 x i8>] [[A]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
184	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t [[A]], i32 0, i32 0
185	// CHECK: [[TMP0:%.]] = load [2 x <16 x i8>], [2 x <16 x i8>] [[COERCE_DIVE1]], align 16
186	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t [[__P0_I]], i32 0, i32 0
187	// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
188	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t [[__P0_I]], i32 0, i32 0
189	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>] [[VAL_I]], i64 0, i64 0
190	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
191	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t [[__P0_I]], i32 0, i32 0
192	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
193	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
194	// CHECK: [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %b) #3
195	// CHECK: ret <16 x i8> [[VTBL2_I]]
196	int8x16_t test_vqtbl2q_s8(int8x16x2_t a, int8x16_t b) {
197	return vqtbl2q_s8(a, b);
198	}
199
200	// CHECK-LABEL: define <16 x i8> @test_vqtbl3q_s8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
201	// CHECK: [[__P0_I:%.*]] = alloca %struct.int8x16x3_t, align 16
202	// CHECK: [[A:%.*]] = alloca %struct.int8x16x3_t, align 16
203	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t [[A]], i32 0, i32 0
204	// CHECK: store [3 x <16 x i8>] [[A]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
205	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t [[A]], i32 0, i32 0
206	// CHECK: [[TMP0:%.]] = load [3 x <16 x i8>], [3 x <16 x i8>] [[COERCE_DIVE1]], align 16
207	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t [[__P0_I]], i32 0, i32 0
208	// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
209	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t [[__P0_I]], i32 0, i32 0
210	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL_I]], i64 0, i64 0
211	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
212	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t [[__P0_I]], i32 0, i32 0
213	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
214	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
215	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t [[__P0_I]], i32 0, i32 0
216	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL3_I]], i64 0, i64 2
217	// CHECK: [[TMP3:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX4_I]], align 16
218	// CHECK: [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %b) #3
219	// CHECK: ret <16 x i8> [[VTBL3_I]]
220	int8x16_t test_vqtbl3q_s8(int8x16x3_t a, int8x16_t b) {
221	return vqtbl3q_s8(a, b);
222	}
223
224	// CHECK-LABEL: define <16 x i8> @test_vqtbl4q_s8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
225	// CHECK: [[__P0_I:%.*]] = alloca %struct.int8x16x4_t, align 16
226	// CHECK: [[A:%.*]] = alloca %struct.int8x16x4_t, align 16
227	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t [[A]], i32 0, i32 0
228	// CHECK: store [4 x <16 x i8>] [[A]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
229	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t [[A]], i32 0, i32 0
230	// CHECK: [[TMP0:%.]] = load [4 x <16 x i8>], [4 x <16 x i8>] [[COERCE_DIVE1]], align 16
231	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t [[__P0_I]], i32 0, i32 0
232	// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
233	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t [[__P0_I]], i32 0, i32 0
234	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL_I]], i64 0, i64 0
235	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
236	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t [[__P0_I]], i32 0, i32 0
237	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
238	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
239	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t [[__P0_I]], i32 0, i32 0
240	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL3_I]], i64 0, i64 2
241	// CHECK: [[TMP3:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX4_I]], align 16
242	// CHECK: [[VAL5_I:%.]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t [[__P0_I]], i32 0, i32 0
243	// CHECK: [[ARRAYIDX6_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL5_I]], i64 0, i64 3
244	// CHECK: [[TMP4:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX6_I]], align 16
245	// CHECK: [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %b) #3
246	// CHECK: ret <16 x i8> [[VTBL4_I]]
247	int8x16_t test_vqtbl4q_s8(int8x16x4_t a, int8x16_t b) {
248	return vqtbl4q_s8(a, b);
249	}
250
251	// CHECK-LABEL: define <8 x i8> @test_vtbx1_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
252	// CHECK: [[VTBL1_I:%.*]] = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
253	// CHECK: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %c) #3
254	// CHECK: [[TMP0:%.*]] = icmp uge <8 x i8> %c, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
255	// CHECK: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
256	// CHECK: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], %a
257	// CHECK: [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
258	// CHECK: [[TMP4:%.*]] = and <8 x i8> [[TMP3]], [[VTBL11_I]]
259	// CHECK: [[VTBX_I:%.*]] = or <8 x i8> [[TMP2]], [[TMP4]]
260	// CHECK: ret <8 x i8> [[VTBX_I]]
261	int8x8_t test_vtbx1_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
262	return vtbx1_s8(a, b, c);
263	}
264
265	// CHECK-LABEL: define <8 x i8> @test_vtbx2_s8(<8 x i8> %a, [2 x <8 x i8>] %b.coerce, <8 x i8> %c) #0 {
266	// CHECK: [[__P1_I:%.*]] = alloca %struct.int8x8x2_t, align 8
267	// CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
268	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t [[B]], i32 0, i32 0
269	// CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
270	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t [[B]], i32 0, i32 0
271	// CHECK: [[TMP0:%.]] = load [2 x <8 x i8>], [2 x <8 x i8>] [[COERCE_DIVE1]], align 8
272	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t [[__P1_I]], i32 0, i32 0
273	// CHECK: store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
274	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t [[__P1_I]], i32 0, i32 0
275	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>] [[VAL_I]], i64 0, i64 0
276	// CHECK: [[TMP1:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX_I]], align 8
277	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t [[__P1_I]], i32 0, i32 0
278	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>] [[VAL1_I]], i64 0, i64 1
279	// CHECK: [[TMP2:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX2_I]], align 8
280	// CHECK: [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
281	// CHECK: [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> [[VTBX1_I]], <8 x i8> %c) #3
282	// CHECK: ret <8 x i8> [[VTBX13_I]]
283	int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) {
284	return vtbx2_s8(a, b, c);
285	}
286
287	// CHECK-LABEL: define <8 x i8> @test_vtbx3_s8(<8 x i8> %a, [3 x <8 x i8>] %b.coerce, <8 x i8> %c) #0 {
288	// CHECK: [[__P1_I:%.*]] = alloca %struct.int8x8x3_t, align 8
289	// CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
290	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t [[B]], i32 0, i32 0
291	// CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
292	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t [[B]], i32 0, i32 0
293	// CHECK: [[TMP0:%.]] = load [3 x <8 x i8>], [3 x <8 x i8>] [[COERCE_DIVE1]], align 8
294	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t [[__P1_I]], i32 0, i32 0
295	// CHECK: store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
296	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t [[__P1_I]], i32 0, i32 0
297	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>] [[VAL_I]], i64 0, i64 0
298	// CHECK: [[TMP1:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX_I]], align 8
299	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t [[__P1_I]], i32 0, i32 0
300	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>] [[VAL1_I]], i64 0, i64 1
301	// CHECK: [[TMP2:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX2_I]], align 8
302	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t [[__P1_I]], i32 0, i32 0
303	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>] [[VAL3_I]], i64 0, i64 2
304	// CHECK: [[TMP3:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX4_I]], align 8
305	// CHECK: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
306	// CHECK: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
307	// CHECK: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %c) #3
308	// CHECK: [[TMP4:%.*]] = icmp uge <8 x i8> %c, <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
309	// CHECK: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8>
310	// CHECK: [[TMP6:%.*]] = and <8 x i8> [[TMP5]], %a
311	// CHECK: [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
312	// CHECK: [[TMP8:%.*]] = and <8 x i8> [[TMP7]], [[VTBL26_I]]
313	// CHECK: [[VTBX_I:%.*]] = or <8 x i8> [[TMP6]], [[TMP8]]
314	// CHECK: ret <8 x i8> [[VTBX_I]]
315	int8x8_t test_vtbx3_s8(int8x8_t a, int8x8x3_t b, int8x8_t c) {
316	return vtbx3_s8(a, b, c);
317	}
318
319	// CHECK-LABEL: define <8 x i8> @test_vtbx4_s8(<8 x i8> %a, [4 x <8 x i8>] %b.coerce, <8 x i8> %c) #0 {
320	// CHECK: [[__P1_I:%.*]] = alloca %struct.int8x8x4_t, align 8
321	// CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
322	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t [[B]], i32 0, i32 0
323	// CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
324	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t [[B]], i32 0, i32 0
325	// CHECK: [[TMP0:%.]] = load [4 x <8 x i8>], [4 x <8 x i8>] [[COERCE_DIVE1]], align 8
326	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t [[__P1_I]], i32 0, i32 0
327	// CHECK: store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
328	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t [[__P1_I]], i32 0, i32 0
329	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>] [[VAL_I]], i64 0, i64 0
330	// CHECK: [[TMP1:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX_I]], align 8
331	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t [[__P1_I]], i32 0, i32 0
332	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>] [[VAL1_I]], i64 0, i64 1
333	// CHECK: [[TMP2:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX2_I]], align 8
334	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t [[__P1_I]], i32 0, i32 0
335	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>] [[VAL3_I]], i64 0, i64 2
336	// CHECK: [[TMP3:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX4_I]], align 8
337	// CHECK: [[VAL5_I:%.]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t [[__P1_I]], i32 0, i32 0
338	// CHECK: [[ARRAYIDX6_I:%.]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>] [[VAL5_I]], i64 0, i64 3
339	// CHECK: [[TMP4:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX6_I]], align 8
340	// CHECK: [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
341	// CHECK: [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
342	// CHECK: [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> %c) #3
343	// CHECK: ret <8 x i8> [[VTBX28_I]]
344	int8x8_t test_vtbx4_s8(int8x8_t a, int8x8x4_t b, int8x8_t c) {
345	return vtbx4_s8(a, b, c);
346	}
347
348	// CHECK-LABEL: define <8 x i8> @test_vqtbx1_s8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #1 {
349	// CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #3
350	// CHECK: ret <8 x i8> [[VTBX1_I]]
351	int8x8_t test_vqtbx1_s8(int8x8_t a, int8x16_t b, int8x8_t c) {
352	return vqtbx1_s8(a, b, c);
353	}
354
355	// CHECK-LABEL: define <8 x i8> @test_vqtbx2_s8(<8 x i8> %a, [2 x <16 x i8>] %b.coerce, <8 x i8> %c) #0 {
356	// CHECK: [[__P1_I:%.*]] = alloca %struct.int8x16x2_t, align 16
357	// CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
358	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t [[B]], i32 0, i32 0
359	// CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
360	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t [[B]], i32 0, i32 0
361	// CHECK: [[TMP0:%.]] = load [2 x <16 x i8>], [2 x <16 x i8>] [[COERCE_DIVE1]], align 16
362	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t [[__P1_I]], i32 0, i32 0
363	// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
364	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t [[__P1_I]], i32 0, i32 0
365	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>] [[VAL_I]], i64 0, i64 0
366	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
367	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t [[__P1_I]], i32 0, i32 0
368	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
369	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
370	// CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %c) #3
371	// CHECK: ret <8 x i8> [[VTBX2_I]]
372	int8x8_t test_vqtbx2_s8(int8x8_t a, int8x16x2_t b, int8x8_t c) {
373	return vqtbx2_s8(a, b, c);
374	}
375
376	// CHECK-LABEL: define <8 x i8> @test_vqtbx3_s8(<8 x i8> %a, [3 x <16 x i8>] %b.coerce, <8 x i8> %c) #0 {
377	// CHECK: [[__P1_I:%.*]] = alloca %struct.int8x16x3_t, align 16
378	// CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
379	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t [[B]], i32 0, i32 0
380	// CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
381	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t [[B]], i32 0, i32 0
382	// CHECK: [[TMP0:%.]] = load [3 x <16 x i8>], [3 x <16 x i8>] [[COERCE_DIVE1]], align 16
383	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t [[__P1_I]], i32 0, i32 0
384	// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
385	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t [[__P1_I]], i32 0, i32 0
386	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL_I]], i64 0, i64 0
387	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
388	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t [[__P1_I]], i32 0, i32 0
389	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
390	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
391	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t [[__P1_I]], i32 0, i32 0
392	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL3_I]], i64 0, i64 2
393	// CHECK: [[TMP3:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX4_I]], align 16
394	// CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %c) #3
395	// CHECK: ret <8 x i8> [[VTBX3_I]]
396	int8x8_t test_vqtbx3_s8(int8x8_t a, int8x16x3_t b, int8x8_t c) {
397	return vqtbx3_s8(a, b, c);
398	}
399
400	// CHECK-LABEL: define <8 x i8> @test_vqtbx4_s8(<8 x i8> %a, [4 x <16 x i8>] %b.coerce, <8 x i8> %c) #0 {
401	// CHECK: [[__P1_I:%.*]] = alloca %struct.int8x16x4_t, align 16
402	// CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
403	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t [[B]], i32 0, i32 0
404	// CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
405	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t [[B]], i32 0, i32 0
406	// CHECK: [[TMP0:%.]] = load [4 x <16 x i8>], [4 x <16 x i8>] [[COERCE_DIVE1]], align 16
407	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t [[__P1_I]], i32 0, i32 0
408	// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
409	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t [[__P1_I]], i32 0, i32 0
410	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL_I]], i64 0, i64 0
411	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
412	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t [[__P1_I]], i32 0, i32 0
413	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
414	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
415	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t [[__P1_I]], i32 0, i32 0
416	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL3_I]], i64 0, i64 2
417	// CHECK: [[TMP3:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX4_I]], align 16
418	// CHECK: [[VAL5_I:%.]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t [[__P1_I]], i32 0, i32 0
419	// CHECK: [[ARRAYIDX6_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL5_I]], i64 0, i64 3
420	// CHECK: [[TMP4:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX6_I]], align 16
421	// CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %c) #3
422	// CHECK: ret <8 x i8> [[VTBX4_I]]
423	int8x8_t test_vqtbx4_s8(int8x8_t a, int8x16x4_t b, int8x8_t c) {
424	return vqtbx4_s8(a, b, c);
425	}
426
427	// CHECK-LABEL: define <16 x i8> @test_vqtbx1q_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #1 {
428	// CHECK: [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #3
429	// CHECK: ret <16 x i8> [[VTBX1_I]]
430	int8x16_t test_vqtbx1q_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
431	return vqtbx1q_s8(a, b, c);
432	}
433
434	// CHECK-LABEL: define <16 x i8> @test_vqtbx2q_s8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
435	// CHECK: [[__P1_I:%.*]] = alloca %struct.int8x16x2_t, align 16
436	// CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
437	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t [[B]], i32 0, i32 0
438	// CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
439	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t [[B]], i32 0, i32 0
440	// CHECK: [[TMP0:%.]] = load [2 x <16 x i8>], [2 x <16 x i8>] [[COERCE_DIVE1]], align 16
441	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t [[__P1_I]], i32 0, i32 0
442	// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
443	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t [[__P1_I]], i32 0, i32 0
444	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>] [[VAL_I]], i64 0, i64 0
445	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
446	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t [[__P1_I]], i32 0, i32 0
447	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
448	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
449	// CHECK: [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %c) #3
450	// CHECK: ret <16 x i8> [[VTBX2_I]]
451	int8x16_t test_vqtbx2q_s8(int8x16_t a, int8x16x2_t b, int8x16_t c) {
452	return vqtbx2q_s8(a, b, c);
453	}
454
455	// CHECK-LABEL: define <16 x i8> @test_vqtbx3q_s8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
456	// CHECK: [[__P1_I:%.*]] = alloca %struct.int8x16x3_t, align 16
457	// CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
458	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t [[B]], i32 0, i32 0
459	// CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
460	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t [[B]], i32 0, i32 0
461	// CHECK: [[TMP0:%.]] = load [3 x <16 x i8>], [3 x <16 x i8>] [[COERCE_DIVE1]], align 16
462	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t [[__P1_I]], i32 0, i32 0
463	// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
464	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t [[__P1_I]], i32 0, i32 0
465	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL_I]], i64 0, i64 0
466	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
467	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t [[__P1_I]], i32 0, i32 0
468	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
469	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
470	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t [[__P1_I]], i32 0, i32 0
471	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL3_I]], i64 0, i64 2
472	// CHECK: [[TMP3:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX4_I]], align 16
473	// CHECK: [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %c) #3
474	// CHECK: ret <16 x i8> [[VTBX3_I]]
475	int8x16_t test_vqtbx3q_s8(int8x16_t a, int8x16x3_t b, int8x16_t c) {
476	return vqtbx3q_s8(a, b, c);
477	}
478
479	// CHECK-LABEL: define <16 x i8> @test_vqtbx4q_s8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
480	// CHECK: [[__P1_I:%.*]] = alloca %struct.int8x16x4_t, align 16
481	// CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
482	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t [[B]], i32 0, i32 0
483	// CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
484	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t [[B]], i32 0, i32 0
485	// CHECK: [[TMP0:%.]] = load [4 x <16 x i8>], [4 x <16 x i8>] [[COERCE_DIVE1]], align 16
486	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t [[__P1_I]], i32 0, i32 0
487	// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
488	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t [[__P1_I]], i32 0, i32 0
489	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL_I]], i64 0, i64 0
490	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
491	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t [[__P1_I]], i32 0, i32 0
492	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
493	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
494	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t [[__P1_I]], i32 0, i32 0
495	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL3_I]], i64 0, i64 2
496	// CHECK: [[TMP3:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX4_I]], align 16
497	// CHECK: [[VAL5_I:%.]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t [[__P1_I]], i32 0, i32 0
498	// CHECK: [[ARRAYIDX6_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL5_I]], i64 0, i64 3
499	// CHECK: [[TMP4:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX6_I]], align 16
500	// CHECK: [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %c) #3
501	// CHECK: ret <16 x i8> [[VTBX4_I]]
502	int8x16_t test_vqtbx4q_s8(int8x16_t a, int8x16x4_t b, int8x16_t c) {
503	return vqtbx4q_s8(a, b, c);
504	}
505
506	// CHECK-LABEL: define <8 x i8> @test_vtbl1_u8(<8 x i8> %a, <8 x i8> %b) #0 {
507	// CHECK: [[VTBL1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
508	// CHECK: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #3
509	// CHECK: ret <8 x i8> [[VTBL11_I]]
510	uint8x8_t test_vtbl1_u8(uint8x8_t a, uint8x8_t b) {
511	return vtbl1_u8(a, b);
512	}
513
514	// CHECK-LABEL: define <8 x i8> @test_vqtbl1_u8(<16 x i8> %a, <8 x i8> %b) #1 {
515	// CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b) #3
516	// CHECK: ret <8 x i8> [[VTBL1_I]]
517	uint8x8_t test_vqtbl1_u8(uint8x16_t a, uint8x8_t b) {
518	return vqtbl1_u8(a, b);
519	}
520
521	// CHECK-LABEL: define <8 x i8> @test_vtbl2_u8([2 x <8 x i8>] %a.coerce, <8 x i8> %b) #0 {
522	// CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
523	// CHECK: [[A:%.*]] = alloca %struct.uint8x8x2_t, align 8
524	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t [[A]], i32 0, i32 0
525	// CHECK: store [2 x <8 x i8>] [[A]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
526	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t [[A]], i32 0, i32 0
527	// CHECK: [[TMP0:%.]] = load [2 x <8 x i8>], [2 x <8 x i8>] [[COERCE_DIVE1]], align 8
528	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t [[__P0_I]], i32 0, i32 0
529	// CHECK: store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
530	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t [[__P0_I]], i32 0, i32 0
531	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>] [[VAL_I]], i64 0, i64 0
532	// CHECK: [[TMP1:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX_I]], align 8
533	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t [[__P0_I]], i32 0, i32 0
534	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>] [[VAL1_I]], i64 0, i64 1
535	// CHECK: [[TMP2:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX2_I]], align 8
536	// CHECK: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
537	// CHECK: [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #3
538	// CHECK: ret <8 x i8> [[VTBL13_I]]
539	uint8x8_t test_vtbl2_u8(uint8x8x2_t a, uint8x8_t b) {
540	return vtbl2_u8(a, b);
541	}
542
543	// CHECK-LABEL: define <8 x i8> @test_vqtbl2_u8([2 x <16 x i8>] %a.coerce, <8 x i8> %b) #0 {
544	// CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
545	// CHECK: [[A:%.*]] = alloca %struct.uint8x16x2_t, align 16
546	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t [[A]], i32 0, i32 0
547	// CHECK: store [2 x <16 x i8>] [[A]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
548	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t [[A]], i32 0, i32 0
549	// CHECK: [[TMP0:%.]] = load [2 x <16 x i8>], [2 x <16 x i8>] [[COERCE_DIVE1]], align 16
550	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t [[__P0_I]], i32 0, i32 0
551	// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
552	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t [[__P0_I]], i32 0, i32 0
553	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>] [[VAL_I]], i64 0, i64 0
554	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
555	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t [[__P0_I]], i32 0, i32 0
556	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
557	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
558	// CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %b) #3
559	// CHECK: ret <8 x i8> [[VTBL2_I]]
560	uint8x8_t test_vqtbl2_u8(uint8x16x2_t a, uint8x8_t b) {
561	return vqtbl2_u8(a, b);
562	}
563
564	// CHECK-LABEL: define <8 x i8> @test_vtbl3_u8([3 x <8 x i8>] %a.coerce, <8 x i8> %b) #0 {
565	// CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x8x3_t, align 8
566	// CHECK: [[A:%.*]] = alloca %struct.uint8x8x3_t, align 8
567	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t [[A]], i32 0, i32 0
568	// CHECK: store [3 x <8 x i8>] [[A]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
569	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t [[A]], i32 0, i32 0
570	// CHECK: [[TMP0:%.]] = load [3 x <8 x i8>], [3 x <8 x i8>] [[COERCE_DIVE1]], align 8
571	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t [[__P0_I]], i32 0, i32 0
572	// CHECK: store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
573	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t [[__P0_I]], i32 0, i32 0
574	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>] [[VAL_I]], i64 0, i64 0
575	// CHECK: [[TMP1:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX_I]], align 8
576	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t [[__P0_I]], i32 0, i32 0
577	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>] [[VAL1_I]], i64 0, i64 1
578	// CHECK: [[TMP2:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX2_I]], align 8
579	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t [[__P0_I]], i32 0, i32 0
580	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>] [[VAL3_I]], i64 0, i64 2
581	// CHECK: [[TMP3:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX4_I]], align 8
582	// CHECK: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
583	// CHECK: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
584	// CHECK: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %b) #3
585	// CHECK: ret <8 x i8> [[VTBL26_I]]
586	uint8x8_t test_vtbl3_u8(uint8x8x3_t a, uint8x8_t b) {
587	return vtbl3_u8(a, b);
588	}
589
590	// CHECK-LABEL: define <8 x i8> @test_vqtbl3_u8([3 x <16 x i8>] %a.coerce, <8 x i8> %b) #0 {
591	// CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x16x3_t, align 16
592	// CHECK: [[A:%.*]] = alloca %struct.uint8x16x3_t, align 16
593	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t [[A]], i32 0, i32 0
594	// CHECK: store [3 x <16 x i8>] [[A]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
595	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t [[A]], i32 0, i32 0
596	// CHECK: [[TMP0:%.]] = load [3 x <16 x i8>], [3 x <16 x i8>] [[COERCE_DIVE1]], align 16
597	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t [[__P0_I]], i32 0, i32 0
598	// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
599	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t [[__P0_I]], i32 0, i32 0
600	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL_I]], i64 0, i64 0
601	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
602	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t [[__P0_I]], i32 0, i32 0
603	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
604	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
605	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t [[__P0_I]], i32 0, i32 0
606	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL3_I]], i64 0, i64 2
607	// CHECK: [[TMP3:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX4_I]], align 16
608	// CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %b) #3
609	// CHECK: ret <8 x i8> [[VTBL3_I]]
610	uint8x8_t test_vqtbl3_u8(uint8x16x3_t a, uint8x8_t b) {
611	return vqtbl3_u8(a, b);
612	}
613
614	// CHECK-LABEL: define <8 x i8> @test_vtbl4_u8([4 x <8 x i8>] %a.coerce, <8 x i8> %b) #0 {
615	// CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x8x4_t, align 8
616	// CHECK: [[A:%.*]] = alloca %struct.uint8x8x4_t, align 8
617	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t [[A]], i32 0, i32 0
618	// CHECK: store [4 x <8 x i8>] [[A]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
619	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t [[A]], i32 0, i32 0
620	// CHECK: [[TMP0:%.]] = load [4 x <8 x i8>], [4 x <8 x i8>] [[COERCE_DIVE1]], align 8
621	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t [[__P0_I]], i32 0, i32 0
622	// CHECK: store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
623	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t [[__P0_I]], i32 0, i32 0
624	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>] [[VAL_I]], i64 0, i64 0
625	// CHECK: [[TMP1:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX_I]], align 8
626	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t [[__P0_I]], i32 0, i32 0
627	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>] [[VAL1_I]], i64 0, i64 1
628	// CHECK: [[TMP2:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX2_I]], align 8
629	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t [[__P0_I]], i32 0, i32 0
630	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>] [[VAL3_I]], i64 0, i64 2
631	// CHECK: [[TMP3:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX4_I]], align 8
632	// CHECK: [[VAL5_I:%.]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t [[__P0_I]], i32 0, i32 0
633	// CHECK: [[ARRAYIDX6_I:%.]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>] [[VAL5_I]], i64 0, i64 3
634	// CHECK: [[TMP4:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX6_I]], align 8
635	// CHECK: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
636	// CHECK: [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
637	// CHECK: [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> %b) #3
638	// CHECK: ret <8 x i8> [[VTBL28_I]]
639	uint8x8_t test_vtbl4_u8(uint8x8x4_t a, uint8x8_t b) {
640	return vtbl4_u8(a, b);
641	}
642
643	// CHECK-LABEL: define <8 x i8> @test_vqtbl4_u8([4 x <16 x i8>] %a.coerce, <8 x i8> %b) #0 {
644	// CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x16x4_t, align 16
645	// CHECK: [[A:%.*]] = alloca %struct.uint8x16x4_t, align 16
646	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t [[A]], i32 0, i32 0
647	// CHECK: store [4 x <16 x i8>] [[A]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
648	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t [[A]], i32 0, i32 0
649	// CHECK: [[TMP0:%.]] = load [4 x <16 x i8>], [4 x <16 x i8>] [[COERCE_DIVE1]], align 16
650	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t [[__P0_I]], i32 0, i32 0
651	// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
652	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t [[__P0_I]], i32 0, i32 0
653	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL_I]], i64 0, i64 0
654	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
655	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t [[__P0_I]], i32 0, i32 0
656	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
657	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
658	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t [[__P0_I]], i32 0, i32 0
659	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL3_I]], i64 0, i64 2
660	// CHECK: [[TMP3:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX4_I]], align 16
661	// CHECK: [[VAL5_I:%.]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t [[__P0_I]], i32 0, i32 0
662	// CHECK: [[ARRAYIDX6_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL5_I]], i64 0, i64 3
663	// CHECK: [[TMP4:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX6_I]], align 16
664	// CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %b) #3
665	// CHECK: ret <8 x i8> [[VTBL4_I]]
666	uint8x8_t test_vqtbl4_u8(uint8x16x4_t a, uint8x8_t b) {
667	return vqtbl4_u8(a, b);
668	}
669
670	// CHECK-LABEL: define <16 x i8> @test_vqtbl1q_u8(<16 x i8> %a, <16 x i8> %b) #1 {
671	// CHECK: [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> %b) #3
672	// CHECK: ret <16 x i8> [[VTBL1_I]]
673	uint8x16_t test_vqtbl1q_u8(uint8x16_t a, uint8x16_t b) {
674	return vqtbl1q_u8(a, b);
675	}
676
677	// CHECK-LABEL: define <16 x i8> @test_vqtbl2q_u8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
678	// CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
679	// CHECK: [[A:%.*]] = alloca %struct.uint8x16x2_t, align 16
680	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t [[A]], i32 0, i32 0
681	// CHECK: store [2 x <16 x i8>] [[A]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
682	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t [[A]], i32 0, i32 0
683	// CHECK: [[TMP0:%.]] = load [2 x <16 x i8>], [2 x <16 x i8>] [[COERCE_DIVE1]], align 16
684	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t [[__P0_I]], i32 0, i32 0
685	// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
686	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t [[__P0_I]], i32 0, i32 0
687	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>] [[VAL_I]], i64 0, i64 0
688	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
689	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t [[__P0_I]], i32 0, i32 0
690	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
691	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
692	// CHECK: [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %b) #3
693	// CHECK: ret <16 x i8> [[VTBL2_I]]
694	uint8x16_t test_vqtbl2q_u8(uint8x16x2_t a, uint8x16_t b) {
695	return vqtbl2q_u8(a, b);
696	}
697
698	// CHECK-LABEL: define <16 x i8> @test_vqtbl3q_u8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
699	// CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x16x3_t, align 16
700	// CHECK: [[A:%.*]] = alloca %struct.uint8x16x3_t, align 16
701	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t [[A]], i32 0, i32 0
702	// CHECK: store [3 x <16 x i8>] [[A]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
703	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t [[A]], i32 0, i32 0
704	// CHECK: [[TMP0:%.]] = load [3 x <16 x i8>], [3 x <16 x i8>] [[COERCE_DIVE1]], align 16
705	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t [[__P0_I]], i32 0, i32 0
706	// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
707	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t [[__P0_I]], i32 0, i32 0
708	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL_I]], i64 0, i64 0
709	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
710	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t [[__P0_I]], i32 0, i32 0
711	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
712	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
713	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t [[__P0_I]], i32 0, i32 0
714	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL3_I]], i64 0, i64 2
715	// CHECK: [[TMP3:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX4_I]], align 16
716	// CHECK: [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %b) #3
717	// CHECK: ret <16 x i8> [[VTBL3_I]]
718	uint8x16_t test_vqtbl3q_u8(uint8x16x3_t a, uint8x16_t b) {
719	return vqtbl3q_u8(a, b);
720	}
721
722	// CHECK-LABEL: define <16 x i8> @test_vqtbl4q_u8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
723	// CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x16x4_t, align 16
724	// CHECK: [[A:%.*]] = alloca %struct.uint8x16x4_t, align 16
725	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t [[A]], i32 0, i32 0
726	// CHECK: store [4 x <16 x i8>] [[A]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
727	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t [[A]], i32 0, i32 0
728	// CHECK: [[TMP0:%.]] = load [4 x <16 x i8>], [4 x <16 x i8>] [[COERCE_DIVE1]], align 16
729	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t [[__P0_I]], i32 0, i32 0
730	// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
731	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t [[__P0_I]], i32 0, i32 0
732	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL_I]], i64 0, i64 0
733	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
734	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t [[__P0_I]], i32 0, i32 0
735	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
736	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
737	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t [[__P0_I]], i32 0, i32 0
738	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL3_I]], i64 0, i64 2
739	// CHECK: [[TMP3:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX4_I]], align 16
740	// CHECK: [[VAL5_I:%.]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t [[__P0_I]], i32 0, i32 0
741	// CHECK: [[ARRAYIDX6_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL5_I]], i64 0, i64 3
742	// CHECK: [[TMP4:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX6_I]], align 16
743	// CHECK: [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %b) #3
744	// CHECK: ret <16 x i8> [[VTBL4_I]]
745	uint8x16_t test_vqtbl4q_u8(uint8x16x4_t a, uint8x16_t b) {
746	return vqtbl4q_u8(a, b);
747	}
748
749	// CHECK-LABEL: define <8 x i8> @test_vtbx1_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
750	// CHECK: [[VTBL1_I:%.*]] = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
751	// CHECK: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %c) #3
752	// CHECK: [[TMP0:%.*]] = icmp uge <8 x i8> %c, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
753	// CHECK: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
754	// CHECK: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], %a
755	// CHECK: [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
756	// CHECK: [[TMP4:%.*]] = and <8 x i8> [[TMP3]], [[VTBL11_I]]
757	// CHECK: [[VTBX_I:%.*]] = or <8 x i8> [[TMP2]], [[TMP4]]
758	// CHECK: ret <8 x i8> [[VTBX_I]]
759	uint8x8_t test_vtbx1_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
760	return vtbx1_u8(a, b, c);
761	}
762
763	// CHECK-LABEL: define <8 x i8> @test_vtbx2_u8(<8 x i8> %a, [2 x <8 x i8>] %b.coerce, <8 x i8> %c) #0 {
764	// CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
765	// CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
766	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t [[B]], i32 0, i32 0
767	// CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
768	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t [[B]], i32 0, i32 0
769	// CHECK: [[TMP0:%.]] = load [2 x <8 x i8>], [2 x <8 x i8>] [[COERCE_DIVE1]], align 8
770	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t [[__P1_I]], i32 0, i32 0
771	// CHECK: store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
772	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t [[__P1_I]], i32 0, i32 0
773	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>] [[VAL_I]], i64 0, i64 0
774	// CHECK: [[TMP1:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX_I]], align 8
775	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t [[__P1_I]], i32 0, i32 0
776	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>] [[VAL1_I]], i64 0, i64 1
777	// CHECK: [[TMP2:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX2_I]], align 8
778	// CHECK: [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
779	// CHECK: [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> [[VTBX1_I]], <8 x i8> %c) #3
780	// CHECK: ret <8 x i8> [[VTBX13_I]]
781	uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) {
782	return vtbx2_u8(a, b, c);
783	}
784
785	// CHECK-LABEL: define <8 x i8> @test_vtbx3_u8(<8 x i8> %a, [3 x <8 x i8>] %b.coerce, <8 x i8> %c) #0 {
786	// CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x8x3_t, align 8
787	// CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
788	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t [[B]], i32 0, i32 0
789	// CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
790	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t [[B]], i32 0, i32 0
791	// CHECK: [[TMP0:%.]] = load [3 x <8 x i8>], [3 x <8 x i8>] [[COERCE_DIVE1]], align 8
792	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t [[__P1_I]], i32 0, i32 0
793	// CHECK: store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
794	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t [[__P1_I]], i32 0, i32 0
795	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>] [[VAL_I]], i64 0, i64 0
796	// CHECK: [[TMP1:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX_I]], align 8
797	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t [[__P1_I]], i32 0, i32 0
798	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>] [[VAL1_I]], i64 0, i64 1
799	// CHECK: [[TMP2:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX2_I]], align 8
800	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t [[__P1_I]], i32 0, i32 0
801	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>] [[VAL3_I]], i64 0, i64 2
802	// CHECK: [[TMP3:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX4_I]], align 8
803	// CHECK: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
804	// CHECK: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
805	// CHECK: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %c) #3
806	// CHECK: [[TMP4:%.*]] = icmp uge <8 x i8> %c, <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
807	// CHECK: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8>
808	// CHECK: [[TMP6:%.*]] = and <8 x i8> [[TMP5]], %a
809	// CHECK: [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
810	// CHECK: [[TMP8:%.*]] = and <8 x i8> [[TMP7]], [[VTBL26_I]]
811	// CHECK: [[VTBX_I:%.*]] = or <8 x i8> [[TMP6]], [[TMP8]]
812	// CHECK: ret <8 x i8> [[VTBX_I]]
813	uint8x8_t test_vtbx3_u8(uint8x8_t a, uint8x8x3_t b, uint8x8_t c) {
814	return vtbx3_u8(a, b, c);
815	}
816
817	// CHECK-LABEL: define <8 x i8> @test_vtbx4_u8(<8 x i8> %a, [4 x <8 x i8>] %b.coerce, <8 x i8> %c) #0 {
818	// CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x8x4_t, align 8
819	// CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
820	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t [[B]], i32 0, i32 0
821	// CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
822	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t [[B]], i32 0, i32 0
823	// CHECK: [[TMP0:%.]] = load [4 x <8 x i8>], [4 x <8 x i8>] [[COERCE_DIVE1]], align 8
824	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t [[__P1_I]], i32 0, i32 0
825	// CHECK: store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
826	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t [[__P1_I]], i32 0, i32 0
827	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>] [[VAL_I]], i64 0, i64 0
828	// CHECK: [[TMP1:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX_I]], align 8
829	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t [[__P1_I]], i32 0, i32 0
830	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>] [[VAL1_I]], i64 0, i64 1
831	// CHECK: [[TMP2:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX2_I]], align 8
832	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t [[__P1_I]], i32 0, i32 0
833	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>] [[VAL3_I]], i64 0, i64 2
834	// CHECK: [[TMP3:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX4_I]], align 8
835	// CHECK: [[VAL5_I:%.]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t [[__P1_I]], i32 0, i32 0
836	// CHECK: [[ARRAYIDX6_I:%.]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>] [[VAL5_I]], i64 0, i64 3
837	// CHECK: [[TMP4:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX6_I]], align 8
838	// CHECK: [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
839	// CHECK: [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
840	// CHECK: [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> %c) #3
841	// CHECK: ret <8 x i8> [[VTBX28_I]]
842	uint8x8_t test_vtbx4_u8(uint8x8_t a, uint8x8x4_t b, uint8x8_t c) {
843	return vtbx4_u8(a, b, c);
844	}
845
846	// CHECK-LABEL: define <8 x i8> @test_vqtbx1_u8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #1 {
847	// CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #3
848	// CHECK: ret <8 x i8> [[VTBX1_I]]
849	uint8x8_t test_vqtbx1_u8(uint8x8_t a, uint8x16_t b, uint8x8_t c) {
850	return vqtbx1_u8(a, b, c);
851	}
852
853	// CHECK-LABEL: define <8 x i8> @test_vqtbx2_u8(<8 x i8> %a, [2 x <16 x i8>] %b.coerce, <8 x i8> %c) #0 {
854	// CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
855	// CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
856	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t [[B]], i32 0, i32 0
857	// CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
858	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t [[B]], i32 0, i32 0
859	// CHECK: [[TMP0:%.]] = load [2 x <16 x i8>], [2 x <16 x i8>] [[COERCE_DIVE1]], align 16
860	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t [[__P1_I]], i32 0, i32 0
861	// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
862	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t [[__P1_I]], i32 0, i32 0
863	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>] [[VAL_I]], i64 0, i64 0
864	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
865	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t [[__P1_I]], i32 0, i32 0
866	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
867	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
868	// CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %c) #3
869	// CHECK: ret <8 x i8> [[VTBX2_I]]
870	uint8x8_t test_vqtbx2_u8(uint8x8_t a, uint8x16x2_t b, uint8x8_t c) {
871	return vqtbx2_u8(a, b, c);
872	}
873
874	// CHECK-LABEL: define <8 x i8> @test_vqtbx3_u8(<8 x i8> %a, [3 x <16 x i8>] %b.coerce, <8 x i8> %c) #0 {
875	// CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x16x3_t, align 16
876	// CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
877	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t [[B]], i32 0, i32 0
878	// CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
879	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t [[B]], i32 0, i32 0
880	// CHECK: [[TMP0:%.]] = load [3 x <16 x i8>], [3 x <16 x i8>] [[COERCE_DIVE1]], align 16
881	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t [[__P1_I]], i32 0, i32 0
882	// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
883	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t [[__P1_I]], i32 0, i32 0
884	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL_I]], i64 0, i64 0
885	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
886	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t [[__P1_I]], i32 0, i32 0
887	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
888	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
889	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t [[__P1_I]], i32 0, i32 0
890	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL3_I]], i64 0, i64 2
891	// CHECK: [[TMP3:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX4_I]], align 16
892	// CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %c) #3
893	// CHECK: ret <8 x i8> [[VTBX3_I]]
894	uint8x8_t test_vqtbx3_u8(uint8x8_t a, uint8x16x3_t b, uint8x8_t c) {
895	return vqtbx3_u8(a, b, c);
896	}
897
898	// CHECK-LABEL: define <8 x i8> @test_vqtbx4_u8(<8 x i8> %a, [4 x <16 x i8>] %b.coerce, <8 x i8> %c) #0 {
899	// CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x16x4_t, align 16
900	// CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
901	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t [[B]], i32 0, i32 0
902	// CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
903	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t [[B]], i32 0, i32 0
904	// CHECK: [[TMP0:%.]] = load [4 x <16 x i8>], [4 x <16 x i8>] [[COERCE_DIVE1]], align 16
905	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t [[__P1_I]], i32 0, i32 0
906	// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
907	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t [[__P1_I]], i32 0, i32 0
908	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL_I]], i64 0, i64 0
909	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
910	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t [[__P1_I]], i32 0, i32 0
911	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
912	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
913	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t [[__P1_I]], i32 0, i32 0
914	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL3_I]], i64 0, i64 2
915	// CHECK: [[TMP3:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX4_I]], align 16
916	// CHECK: [[VAL5_I:%.]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t [[__P1_I]], i32 0, i32 0
917	// CHECK: [[ARRAYIDX6_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL5_I]], i64 0, i64 3
918	// CHECK: [[TMP4:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX6_I]], align 16
919	// CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %c) #3
920	// CHECK: ret <8 x i8> [[VTBX4_I]]
921	uint8x8_t test_vqtbx4_u8(uint8x8_t a, uint8x16x4_t b, uint8x8_t c) {
922	return vqtbx4_u8(a, b, c);
923	}
924
925	// CHECK-LABEL: define <16 x i8> @test_vqtbx1q_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #1 {
926	// CHECK: [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #3
927	// CHECK: ret <16 x i8> [[VTBX1_I]]
928	uint8x16_t test_vqtbx1q_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
929	return vqtbx1q_u8(a, b, c);
930	}
931
932	// CHECK-LABEL: define <16 x i8> @test_vqtbx2q_u8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
933	// CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
934	// CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
935	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t [[B]], i32 0, i32 0
936	// CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
937	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t [[B]], i32 0, i32 0
938	// CHECK: [[TMP0:%.]] = load [2 x <16 x i8>], [2 x <16 x i8>] [[COERCE_DIVE1]], align 16
939	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t [[__P1_I]], i32 0, i32 0
940	// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
941	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t [[__P1_I]], i32 0, i32 0
942	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>] [[VAL_I]], i64 0, i64 0
943	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
944	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t [[__P1_I]], i32 0, i32 0
945	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
946	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
947	// CHECK: [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %c) #3
948	// CHECK: ret <16 x i8> [[VTBX2_I]]
949	uint8x16_t test_vqtbx2q_u8(uint8x16_t a, uint8x16x2_t b, uint8x16_t c) {
950	return vqtbx2q_u8(a, b, c);
951	}
952
953	// CHECK-LABEL: define <16 x i8> @test_vqtbx3q_u8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
954	// CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x16x3_t, align 16
955	// CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
956	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t [[B]], i32 0, i32 0
957	// CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
958	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t [[B]], i32 0, i32 0
959	// CHECK: [[TMP0:%.]] = load [3 x <16 x i8>], [3 x <16 x i8>] [[COERCE_DIVE1]], align 16
960	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t [[__P1_I]], i32 0, i32 0
961	// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
962	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t [[__P1_I]], i32 0, i32 0
963	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL_I]], i64 0, i64 0
964	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
965	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t [[__P1_I]], i32 0, i32 0
966	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
967	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
968	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t [[__P1_I]], i32 0, i32 0
969	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL3_I]], i64 0, i64 2
970	// CHECK: [[TMP3:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX4_I]], align 16
971	// CHECK: [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %c) #3
972	// CHECK: ret <16 x i8> [[VTBX3_I]]
973	uint8x16_t test_vqtbx3q_u8(uint8x16_t a, uint8x16x3_t b, uint8x16_t c) {
974	return vqtbx3q_u8(a, b, c);
975	}
976
977	// CHECK-LABEL: define <16 x i8> @test_vqtbx4q_u8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
978	// CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x16x4_t, align 16
979	// CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
980	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t [[B]], i32 0, i32 0
981	// CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
982	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t [[B]], i32 0, i32 0
983	// CHECK: [[TMP0:%.]] = load [4 x <16 x i8>], [4 x <16 x i8>] [[COERCE_DIVE1]], align 16
984	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t [[__P1_I]], i32 0, i32 0
985	// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
986	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t [[__P1_I]], i32 0, i32 0
987	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL_I]], i64 0, i64 0
988	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
989	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t [[__P1_I]], i32 0, i32 0
990	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
991	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
992	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t [[__P1_I]], i32 0, i32 0
993	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL3_I]], i64 0, i64 2
994	// CHECK: [[TMP3:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX4_I]], align 16
995	// CHECK: [[VAL5_I:%.]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t [[__P1_I]], i32 0, i32 0
996	// CHECK: [[ARRAYIDX6_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL5_I]], i64 0, i64 3
997	// CHECK: [[TMP4:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX6_I]], align 16
998	// CHECK: [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %c) #3
999	// CHECK: ret <16 x i8> [[VTBX4_I]]
1000	uint8x16_t test_vqtbx4q_u8(uint8x16_t a, uint8x16x4_t b, uint8x16_t c) {
1001	return vqtbx4q_u8(a, b, c);
1002	}
1003
1004	// CHECK-LABEL: define <8 x i8> @test_vtbl1_p8(<8 x i8> %a, <8 x i8> %b) #0 {
1005	// CHECK: [[VTBL1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1006	// CHECK: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #3
1007	// CHECK: ret <8 x i8> [[VTBL11_I]]
1008	poly8x8_t test_vtbl1_p8(poly8x8_t a, uint8x8_t b) {
1009	return vtbl1_p8(a, b);
1010	}
1011
1012	// CHECK-LABEL: define <8 x i8> @test_vqtbl1_p8(<16 x i8> %a, <8 x i8> %b) #1 {
1013	// CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b) #3
1014	// CHECK: ret <8 x i8> [[VTBL1_I]]
1015	poly8x8_t test_vqtbl1_p8(poly8x16_t a, uint8x8_t b) {
1016	return vqtbl1_p8(a, b);
1017	}
1018
1019	// CHECK-LABEL: define <8 x i8> @test_vtbl2_p8([2 x <8 x i8>] %a.coerce, <8 x i8> %b) #0 {
1020	// CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
1021	// CHECK: [[A:%.*]] = alloca %struct.poly8x8x2_t, align 8
1022	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t [[A]], i32 0, i32 0
1023	// CHECK: store [2 x <8 x i8>] [[A]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
1024	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t [[A]], i32 0, i32 0
1025	// CHECK: [[TMP0:%.]] = load [2 x <8 x i8>], [2 x <8 x i8>] [[COERCE_DIVE1]], align 8
1026	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t [[__P0_I]], i32 0, i32 0
1027	// CHECK: store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
1028	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t [[__P0_I]], i32 0, i32 0
1029	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>] [[VAL_I]], i64 0, i64 0
1030	// CHECK: [[TMP1:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX_I]], align 8
1031	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t [[__P0_I]], i32 0, i32 0
1032	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>] [[VAL1_I]], i64 0, i64 1
1033	// CHECK: [[TMP2:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX2_I]], align 8
1034	// CHECK: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1035	// CHECK: [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #3
1036	// CHECK: ret <8 x i8> [[VTBL13_I]]
1037	poly8x8_t test_vtbl2_p8(poly8x8x2_t a, uint8x8_t b) {
1038	return vtbl2_p8(a, b);
1039	}
1040
1041	// CHECK-LABEL: define <8 x i8> @test_vqtbl2_p8([2 x <16 x i8>] %a.coerce, <8 x i8> %b) #0 {
1042	// CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
1043	// CHECK: [[A:%.*]] = alloca %struct.poly8x16x2_t, align 16
1044	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t [[A]], i32 0, i32 0
1045	// CHECK: store [2 x <16 x i8>] [[A]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
1046	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t [[A]], i32 0, i32 0
1047	// CHECK: [[TMP0:%.]] = load [2 x <16 x i8>], [2 x <16 x i8>] [[COERCE_DIVE1]], align 16
1048	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t [[__P0_I]], i32 0, i32 0
1049	// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
1050	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t [[__P0_I]], i32 0, i32 0
1051	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>] [[VAL_I]], i64 0, i64 0
1052	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
1053	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t [[__P0_I]], i32 0, i32 0
1054	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
1055	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
1056	// CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %b) #3
1057	// CHECK: ret <8 x i8> [[VTBL2_I]]
1058	poly8x8_t test_vqtbl2_p8(poly8x16x2_t a, uint8x8_t b) {
1059	return vqtbl2_p8(a, b);
1060	}
1061
1062	// CHECK-LABEL: define <8 x i8> @test_vtbl3_p8([3 x <8 x i8>] %a.coerce, <8 x i8> %b) #0 {
1063	// CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x8x3_t, align 8
1064	// CHECK: [[A:%.*]] = alloca %struct.poly8x8x3_t, align 8
1065	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t [[A]], i32 0, i32 0
1066	// CHECK: store [3 x <8 x i8>] [[A]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
1067	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t [[A]], i32 0, i32 0
1068	// CHECK: [[TMP0:%.]] = load [3 x <8 x i8>], [3 x <8 x i8>] [[COERCE_DIVE1]], align 8
1069	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t [[__P0_I]], i32 0, i32 0
1070	// CHECK: store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
1071	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t [[__P0_I]], i32 0, i32 0
1072	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>] [[VAL_I]], i64 0, i64 0
1073	// CHECK: [[TMP1:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX_I]], align 8
1074	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t [[__P0_I]], i32 0, i32 0
1075	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>] [[VAL1_I]], i64 0, i64 1
1076	// CHECK: [[TMP2:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX2_I]], align 8
1077	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t [[__P0_I]], i32 0, i32 0
1078	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>] [[VAL3_I]], i64 0, i64 2
1079	// CHECK: [[TMP3:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX4_I]], align 8
1080	// CHECK: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1081	// CHECK: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1082	// CHECK: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %b) #3
1083	// CHECK: ret <8 x i8> [[VTBL26_I]]
1084	poly8x8_t test_vtbl3_p8(poly8x8x3_t a, uint8x8_t b) {
1085	return vtbl3_p8(a, b);
1086	}
1087
1088	// CHECK-LABEL: define <8 x i8> @test_vqtbl3_p8([3 x <16 x i8>] %a.coerce, <8 x i8> %b) #0 {
1089	// CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x16x3_t, align 16
1090	// CHECK: [[A:%.*]] = alloca %struct.poly8x16x3_t, align 16
1091	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t [[A]], i32 0, i32 0
1092	// CHECK: store [3 x <16 x i8>] [[A]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
1093	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t [[A]], i32 0, i32 0
1094	// CHECK: [[TMP0:%.]] = load [3 x <16 x i8>], [3 x <16 x i8>] [[COERCE_DIVE1]], align 16
1095	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t [[__P0_I]], i32 0, i32 0
1096	// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
1097	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t [[__P0_I]], i32 0, i32 0
1098	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL_I]], i64 0, i64 0
1099	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
1100	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t [[__P0_I]], i32 0, i32 0
1101	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
1102	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
1103	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t [[__P0_I]], i32 0, i32 0
1104	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL3_I]], i64 0, i64 2
1105	// CHECK: [[TMP3:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX4_I]], align 16
1106	// CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %b) #3
1107	// CHECK: ret <8 x i8> [[VTBL3_I]]
1108	poly8x8_t test_vqtbl3_p8(poly8x16x3_t a, uint8x8_t b) {
1109	return vqtbl3_p8(a, b);
1110	}
1111
1112	// CHECK-LABEL: define <8 x i8> @test_vtbl4_p8([4 x <8 x i8>] %a.coerce, <8 x i8> %b) #0 {
1113	// CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x8x4_t, align 8
1114	// CHECK: [[A:%.*]] = alloca %struct.poly8x8x4_t, align 8
1115	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t [[A]], i32 0, i32 0
1116	// CHECK: store [4 x <8 x i8>] [[A]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
1117	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t [[A]], i32 0, i32 0
1118	// CHECK: [[TMP0:%.]] = load [4 x <8 x i8>], [4 x <8 x i8>] [[COERCE_DIVE1]], align 8
1119	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t [[__P0_I]], i32 0, i32 0
1120	// CHECK: store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
1121	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t [[__P0_I]], i32 0, i32 0
1122	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>] [[VAL_I]], i64 0, i64 0
1123	// CHECK: [[TMP1:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX_I]], align 8
1124	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t [[__P0_I]], i32 0, i32 0
1125	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>] [[VAL1_I]], i64 0, i64 1
1126	// CHECK: [[TMP2:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX2_I]], align 8
1127	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t [[__P0_I]], i32 0, i32 0
1128	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>] [[VAL3_I]], i64 0, i64 2
1129	// CHECK: [[TMP3:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX4_I]], align 8
1130	// CHECK: [[VAL5_I:%.]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t [[__P0_I]], i32 0, i32 0
1131	// CHECK: [[ARRAYIDX6_I:%.]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>] [[VAL5_I]], i64 0, i64 3
1132	// CHECK: [[TMP4:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX6_I]], align 8
1133	// CHECK: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1134	// CHECK: [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1135	// CHECK: [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> %b) #3
1136	// CHECK: ret <8 x i8> [[VTBL28_I]]
1137	poly8x8_t test_vtbl4_p8(poly8x8x4_t a, uint8x8_t b) {
1138	return vtbl4_p8(a, b);
1139	}
1140
1141	// CHECK-LABEL: define <8 x i8> @test_vqtbl4_p8([4 x <16 x i8>] %a.coerce, <8 x i8> %b) #0 {
1142	// CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x16x4_t, align 16
1143	// CHECK: [[A:%.*]] = alloca %struct.poly8x16x4_t, align 16
1144	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t [[A]], i32 0, i32 0
1145	// CHECK: store [4 x <16 x i8>] [[A]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
1146	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t [[A]], i32 0, i32 0
1147	// CHECK: [[TMP0:%.]] = load [4 x <16 x i8>], [4 x <16 x i8>] [[COERCE_DIVE1]], align 16
1148	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t [[__P0_I]], i32 0, i32 0
1149	// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
1150	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t [[__P0_I]], i32 0, i32 0
1151	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL_I]], i64 0, i64 0
1152	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
1153	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t [[__P0_I]], i32 0, i32 0
1154	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
1155	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
1156	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t [[__P0_I]], i32 0, i32 0
1157	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL3_I]], i64 0, i64 2
1158	// CHECK: [[TMP3:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX4_I]], align 16
1159	// CHECK: [[VAL5_I:%.]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t [[__P0_I]], i32 0, i32 0
1160	// CHECK: [[ARRAYIDX6_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL5_I]], i64 0, i64 3
1161	// CHECK: [[TMP4:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX6_I]], align 16
1162	// CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %b) #3
1163	// CHECK: ret <8 x i8> [[VTBL4_I]]
1164	poly8x8_t test_vqtbl4_p8(poly8x16x4_t a, uint8x8_t b) {
1165	return vqtbl4_p8(a, b);
1166	}
1167
1168	// CHECK-LABEL: define <16 x i8> @test_vqtbl1q_p8(<16 x i8> %a, <16 x i8> %b) #1 {
1169	// CHECK: [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> %b) #3
1170	// CHECK: ret <16 x i8> [[VTBL1_I]]
1171	poly8x16_t test_vqtbl1q_p8(poly8x16_t a, uint8x16_t b) {
1172	return vqtbl1q_p8(a, b);
1173	}
1174
1175	// CHECK-LABEL: define <16 x i8> @test_vqtbl2q_p8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
1176	// CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
1177	// CHECK: [[A:%.*]] = alloca %struct.poly8x16x2_t, align 16
1178	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t [[A]], i32 0, i32 0
1179	// CHECK: store [2 x <16 x i8>] [[A]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
1180	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t [[A]], i32 0, i32 0
1181	// CHECK: [[TMP0:%.]] = load [2 x <16 x i8>], [2 x <16 x i8>] [[COERCE_DIVE1]], align 16
1182	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t [[__P0_I]], i32 0, i32 0
1183	// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
1184	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t [[__P0_I]], i32 0, i32 0
1185	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>] [[VAL_I]], i64 0, i64 0
1186	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
1187	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t [[__P0_I]], i32 0, i32 0
1188	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
1189	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
1190	// CHECK: [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %b) #3
1191	// CHECK: ret <16 x i8> [[VTBL2_I]]
1192	poly8x16_t test_vqtbl2q_p8(poly8x16x2_t a, uint8x16_t b) {
1193	return vqtbl2q_p8(a, b);
1194	}
1195
1196	// CHECK-LABEL: define <16 x i8> @test_vqtbl3q_p8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
1197	// CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x16x3_t, align 16
1198	// CHECK: [[A:%.*]] = alloca %struct.poly8x16x3_t, align 16
1199	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t [[A]], i32 0, i32 0
1200	// CHECK: store [3 x <16 x i8>] [[A]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
1201	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t [[A]], i32 0, i32 0
1202	// CHECK: [[TMP0:%.]] = load [3 x <16 x i8>], [3 x <16 x i8>] [[COERCE_DIVE1]], align 16
1203	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t [[__P0_I]], i32 0, i32 0
1204	// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
1205	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t [[__P0_I]], i32 0, i32 0
1206	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL_I]], i64 0, i64 0
1207	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
1208	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t [[__P0_I]], i32 0, i32 0
1209	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
1210	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
1211	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t [[__P0_I]], i32 0, i32 0
1212	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL3_I]], i64 0, i64 2
1213	// CHECK: [[TMP3:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX4_I]], align 16
1214	// CHECK: [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %b) #3
1215	// CHECK: ret <16 x i8> [[VTBL3_I]]
1216	poly8x16_t test_vqtbl3q_p8(poly8x16x3_t a, uint8x16_t b) {
1217	return vqtbl3q_p8(a, b);
1218	}
1219
1220	// CHECK-LABEL: define <16 x i8> @test_vqtbl4q_p8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
1221	// CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x16x4_t, align 16
1222	// CHECK: [[A:%.*]] = alloca %struct.poly8x16x4_t, align 16
1223	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t [[A]], i32 0, i32 0
1224	// CHECK: store [4 x <16 x i8>] [[A]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
1225	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t [[A]], i32 0, i32 0
1226	// CHECK: [[TMP0:%.]] = load [4 x <16 x i8>], [4 x <16 x i8>] [[COERCE_DIVE1]], align 16
1227	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t [[__P0_I]], i32 0, i32 0
1228	// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
1229	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t [[__P0_I]], i32 0, i32 0
1230	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL_I]], i64 0, i64 0
1231	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
1232	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t [[__P0_I]], i32 0, i32 0
1233	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
1234	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
1235	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t [[__P0_I]], i32 0, i32 0
1236	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL3_I]], i64 0, i64 2
1237	// CHECK: [[TMP3:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX4_I]], align 16
1238	// CHECK: [[VAL5_I:%.]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t [[__P0_I]], i32 0, i32 0
1239	// CHECK: [[ARRAYIDX6_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL5_I]], i64 0, i64 3
1240	// CHECK: [[TMP4:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX6_I]], align 16
1241	// CHECK: [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %b) #3
1242	// CHECK: ret <16 x i8> [[VTBL4_I]]
1243	poly8x16_t test_vqtbl4q_p8(poly8x16x4_t a, uint8x16_t b) {
1244	return vqtbl4q_p8(a, b);
1245	}
1246
1247	// CHECK-LABEL: define <8 x i8> @test_vtbx1_p8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
1248	// CHECK: [[VTBL1_I:%.*]] = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1249	// CHECK: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %c) #3
1250	// CHECK: [[TMP0:%.*]] = icmp uge <8 x i8> %c, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
1251	// CHECK: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
1252	// CHECK: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], %a
1253	// CHECK: [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1254	// CHECK: [[TMP4:%.*]] = and <8 x i8> [[TMP3]], [[VTBL11_I]]
1255	// CHECK: [[VTBX_I:%.*]] = or <8 x i8> [[TMP2]], [[TMP4]]
1256	// CHECK: ret <8 x i8> [[VTBX_I]]
1257	poly8x8_t test_vtbx1_p8(poly8x8_t a, poly8x8_t b, uint8x8_t c) {
1258	return vtbx1_p8(a, b, c);
1259	}
1260
1261	// CHECK-LABEL: define <8 x i8> @test_vtbx2_p8(<8 x i8> %a, [2 x <8 x i8>] %b.coerce, <8 x i8> %c) #0 {
1262	// CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
1263	// CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
1264	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t [[B]], i32 0, i32 0
1265	// CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
1266	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t [[B]], i32 0, i32 0
1267	// CHECK: [[TMP0:%.]] = load [2 x <8 x i8>], [2 x <8 x i8>] [[COERCE_DIVE1]], align 8
1268	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t [[__P1_I]], i32 0, i32 0
1269	// CHECK: store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
1270	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t [[__P1_I]], i32 0, i32 0
1271	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>] [[VAL_I]], i64 0, i64 0
1272	// CHECK: [[TMP1:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX_I]], align 8
1273	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t [[__P1_I]], i32 0, i32 0
1274	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>] [[VAL1_I]], i64 0, i64 1
1275	// CHECK: [[TMP2:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX2_I]], align 8
1276	// CHECK: [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1277	// CHECK: [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> [[VTBX1_I]], <8 x i8> %c) #3
1278	// CHECK: ret <8 x i8> [[VTBX13_I]]
1279	poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) {
1280	return vtbx2_p8(a, b, c);
1281	}
1282
1283	// CHECK-LABEL: define <8 x i8> @test_vtbx3_p8(<8 x i8> %a, [3 x <8 x i8>] %b.coerce, <8 x i8> %c) #0 {
1284	// CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x8x3_t, align 8
1285	// CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
1286	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t [[B]], i32 0, i32 0
1287	// CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
1288	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t [[B]], i32 0, i32 0
1289	// CHECK: [[TMP0:%.]] = load [3 x <8 x i8>], [3 x <8 x i8>] [[COERCE_DIVE1]], align 8
1290	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t [[__P1_I]], i32 0, i32 0
1291	// CHECK: store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
1292	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t [[__P1_I]], i32 0, i32 0
1293	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>] [[VAL_I]], i64 0, i64 0
1294	// CHECK: [[TMP1:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX_I]], align 8
1295	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t [[__P1_I]], i32 0, i32 0
1296	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>] [[VAL1_I]], i64 0, i64 1
1297	// CHECK: [[TMP2:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX2_I]], align 8
1298	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t [[__P1_I]], i32 0, i32 0
1299	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>] [[VAL3_I]], i64 0, i64 2
1300	// CHECK: [[TMP3:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX4_I]], align 8
1301	// CHECK: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1302	// CHECK: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1303	// CHECK: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %c) #3
1304	// CHECK: [[TMP4:%.*]] = icmp uge <8 x i8> %c, <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
1305	// CHECK: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8>
1306	// CHECK: [[TMP6:%.*]] = and <8 x i8> [[TMP5]], %a
1307	// CHECK: [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1308	// CHECK: [[TMP8:%.*]] = and <8 x i8> [[TMP7]], [[VTBL26_I]]
1309	// CHECK: [[VTBX_I:%.*]] = or <8 x i8> [[TMP6]], [[TMP8]]
1310	// CHECK: ret <8 x i8> [[VTBX_I]]
1311	poly8x8_t test_vtbx3_p8(poly8x8_t a, poly8x8x3_t b, uint8x8_t c) {
1312	return vtbx3_p8(a, b, c);
1313	}
1314
1315	// CHECK-LABEL: define <8 x i8> @test_vtbx4_p8(<8 x i8> %a, [4 x <8 x i8>] %b.coerce, <8 x i8> %c) #0 {
1316	// CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x8x4_t, align 8
1317	// CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
1318	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t [[B]], i32 0, i32 0
1319	// CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
1320	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t [[B]], i32 0, i32 0
1321	// CHECK: [[TMP0:%.]] = load [4 x <8 x i8>], [4 x <8 x i8>] [[COERCE_DIVE1]], align 8
1322	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t [[__P1_I]], i32 0, i32 0
1323	// CHECK: store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8
1324	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t [[__P1_I]], i32 0, i32 0
1325	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>] [[VAL_I]], i64 0, i64 0
1326	// CHECK: [[TMP1:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX_I]], align 8
1327	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t [[__P1_I]], i32 0, i32 0
1328	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>] [[VAL1_I]], i64 0, i64 1
1329	// CHECK: [[TMP2:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX2_I]], align 8
1330	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t [[__P1_I]], i32 0, i32 0
1331	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>] [[VAL3_I]], i64 0, i64 2
1332	// CHECK: [[TMP3:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX4_I]], align 8
1333	// CHECK: [[VAL5_I:%.]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t [[__P1_I]], i32 0, i32 0
1334	// CHECK: [[ARRAYIDX6_I:%.]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>] [[VAL5_I]], i64 0, i64 3
1335	// CHECK: [[TMP4:%.]] = load <8 x i8>, <8 x i8> [[ARRAYIDX6_I]], align 8
1336	// CHECK: [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1337	// CHECK: [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1338	// CHECK: [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> %c) #3
1339	// CHECK: ret <8 x i8> [[VTBX28_I]]
1340	poly8x8_t test_vtbx4_p8(poly8x8_t a, poly8x8x4_t b, uint8x8_t c) {
1341	return vtbx4_p8(a, b, c);
1342	}
1343
1344	// CHECK-LABEL: define <8 x i8> @test_vqtbx1_p8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #1 {
1345	// CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #3
1346	// CHECK: ret <8 x i8> [[VTBX1_I]]
1347	poly8x8_t test_vqtbx1_p8(poly8x8_t a, uint8x16_t b, uint8x8_t c) {
1348	return vqtbx1_p8(a, b, c);
1349	}
1350
1351	// CHECK-LABEL: define <8 x i8> @test_vqtbx2_p8(<8 x i8> %a, [2 x <16 x i8>] %b.coerce, <8 x i8> %c) #0 {
1352	// CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
1353	// CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
1354	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t [[B]], i32 0, i32 0
1355	// CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
1356	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t [[B]], i32 0, i32 0
1357	// CHECK: [[TMP0:%.]] = load [2 x <16 x i8>], [2 x <16 x i8>] [[COERCE_DIVE1]], align 16
1358	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t [[__P1_I]], i32 0, i32 0
1359	// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
1360	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t [[__P1_I]], i32 0, i32 0
1361	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>] [[VAL_I]], i64 0, i64 0
1362	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
1363	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t [[__P1_I]], i32 0, i32 0
1364	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
1365	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
1366	// CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %c) #3
1367	// CHECK: ret <8 x i8> [[VTBX2_I]]
1368	poly8x8_t test_vqtbx2_p8(poly8x8_t a, poly8x16x2_t b, uint8x8_t c) {
1369	return vqtbx2_p8(a, b, c);
1370	}
1371
1372	// CHECK-LABEL: define <8 x i8> @test_vqtbx3_p8(<8 x i8> %a, [3 x <16 x i8>] %b.coerce, <8 x i8> %c) #0 {
1373	// CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x16x3_t, align 16
1374	// CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
1375	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t [[B]], i32 0, i32 0
1376	// CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
1377	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t [[B]], i32 0, i32 0
1378	// CHECK: [[TMP0:%.]] = load [3 x <16 x i8>], [3 x <16 x i8>] [[COERCE_DIVE1]], align 16
1379	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t [[__P1_I]], i32 0, i32 0
1380	// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
1381	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t [[__P1_I]], i32 0, i32 0
1382	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL_I]], i64 0, i64 0
1383	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
1384	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t [[__P1_I]], i32 0, i32 0
1385	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
1386	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
1387	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t [[__P1_I]], i32 0, i32 0
1388	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL3_I]], i64 0, i64 2
1389	// CHECK: [[TMP3:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX4_I]], align 16
1390	// CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %c) #3
1391	// CHECK: ret <8 x i8> [[VTBX3_I]]
1392	poly8x8_t test_vqtbx3_p8(poly8x8_t a, poly8x16x3_t b, uint8x8_t c) {
1393	return vqtbx3_p8(a, b, c);
1394	}
1395
1396	// CHECK-LABEL: define <8 x i8> @test_vqtbx4_p8(<8 x i8> %a, [4 x <16 x i8>] %b.coerce, <8 x i8> %c) #0 {
1397	// CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x16x4_t, align 16
1398	// CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
1399	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t [[B]], i32 0, i32 0
1400	// CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
1401	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t [[B]], i32 0, i32 0
1402	// CHECK: [[TMP0:%.]] = load [4 x <16 x i8>], [4 x <16 x i8>] [[COERCE_DIVE1]], align 16
1403	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t [[__P1_I]], i32 0, i32 0
1404	// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
1405	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t [[__P1_I]], i32 0, i32 0
1406	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL_I]], i64 0, i64 0
1407	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
1408	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t [[__P1_I]], i32 0, i32 0
1409	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
1410	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
1411	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t [[__P1_I]], i32 0, i32 0
1412	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL3_I]], i64 0, i64 2
1413	// CHECK: [[TMP3:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX4_I]], align 16
1414	// CHECK: [[VAL5_I:%.]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t [[__P1_I]], i32 0, i32 0
1415	// CHECK: [[ARRAYIDX6_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL5_I]], i64 0, i64 3
1416	// CHECK: [[TMP4:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX6_I]], align 16
1417	// CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %c) #3
1418	// CHECK: ret <8 x i8> [[VTBX4_I]]
1419	poly8x8_t test_vqtbx4_p8(poly8x8_t a, poly8x16x4_t b, uint8x8_t c) {
1420	return vqtbx4_p8(a, b, c);
1421	}
1422
1423	// CHECK-LABEL: define <16 x i8> @test_vqtbx1q_p8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #1 {
1424	// CHECK: [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #3
1425	// CHECK: ret <16 x i8> [[VTBX1_I]]
1426	poly8x16_t test_vqtbx1q_p8(poly8x16_t a, uint8x16_t b, uint8x16_t c) {
1427	return vqtbx1q_p8(a, b, c);
1428	}
1429
1430	// CHECK-LABEL: define <16 x i8> @test_vqtbx2q_p8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
1431	// CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
1432	// CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
1433	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t [[B]], i32 0, i32 0
1434	// CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
1435	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t [[B]], i32 0, i32 0
1436	// CHECK: [[TMP0:%.]] = load [2 x <16 x i8>], [2 x <16 x i8>] [[COERCE_DIVE1]], align 16
1437	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t [[__P1_I]], i32 0, i32 0
1438	// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
1439	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t [[__P1_I]], i32 0, i32 0
1440	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>] [[VAL_I]], i64 0, i64 0
1441	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
1442	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t [[__P1_I]], i32 0, i32 0
1443	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
1444	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
1445	// CHECK: [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %c) #3
1446	// CHECK: ret <16 x i8> [[VTBX2_I]]
1447	poly8x16_t test_vqtbx2q_p8(poly8x16_t a, poly8x16x2_t b, uint8x16_t c) {
1448	return vqtbx2q_p8(a, b, c);
1449	}
1450
1451	// CHECK-LABEL: define <16 x i8> @test_vqtbx3q_p8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
1452	// CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x16x3_t, align 16
1453	// CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
1454	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t [[B]], i32 0, i32 0
1455	// CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
1456	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t [[B]], i32 0, i32 0
1457	// CHECK: [[TMP0:%.]] = load [3 x <16 x i8>], [3 x <16 x i8>] [[COERCE_DIVE1]], align 16
1458	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t [[__P1_I]], i32 0, i32 0
1459	// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
1460	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t [[__P1_I]], i32 0, i32 0
1461	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL_I]], i64 0, i64 0
1462	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
1463	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t [[__P1_I]], i32 0, i32 0
1464	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
1465	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
1466	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t [[__P1_I]], i32 0, i32 0
1467	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>] [[VAL3_I]], i64 0, i64 2
1468	// CHECK: [[TMP3:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX4_I]], align 16
1469	// CHECK: [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %c) #3
1470	// CHECK: ret <16 x i8> [[VTBX3_I]]
1471	poly8x16_t test_vqtbx3q_p8(poly8x16_t a, poly8x16x3_t b, uint8x16_t c) {
1472	return vqtbx3q_p8(a, b, c);
1473	}
1474
1475	// CHECK-LABEL: define <16 x i8> @test_vqtbx4q_p8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
1476	// CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x16x4_t, align 16
1477	// CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
1478	// CHECK: [[COERCE_DIVE:%.]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t [[B]], i32 0, i32 0
1479	// CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
1480	// CHECK: [[COERCE_DIVE1:%.]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t [[B]], i32 0, i32 0
1481	// CHECK: [[TMP0:%.]] = load [4 x <16 x i8>], [4 x <16 x i8>] [[COERCE_DIVE1]], align 16
1482	// CHECK: [[COERCE_DIVE_I:%.]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t [[__P1_I]], i32 0, i32 0
1483	// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16
1484	// CHECK: [[VAL_I:%.]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t [[__P1_I]], i32 0, i32 0
1485	// CHECK: [[ARRAYIDX_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL_I]], i64 0, i64 0
1486	// CHECK: [[TMP1:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX_I]], align 16
1487	// CHECK: [[VAL1_I:%.]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t [[__P1_I]], i32 0, i32 0
1488	// CHECK: [[ARRAYIDX2_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL1_I]], i64 0, i64 1
1489	// CHECK: [[TMP2:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX2_I]], align 16
1490	// CHECK: [[VAL3_I:%.]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t [[__P1_I]], i32 0, i32 0
1491	// CHECK: [[ARRAYIDX4_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL3_I]], i64 0, i64 2
1492	// CHECK: [[TMP3:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX4_I]], align 16
1493	// CHECK: [[VAL5_I:%.]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t [[__P1_I]], i32 0, i32 0
1494	// CHECK: [[ARRAYIDX6_I:%.]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>] [[VAL5_I]], i64 0, i64 3
1495	// CHECK: [[TMP4:%.]] = load <16 x i8>, <16 x i8> [[ARRAYIDX6_I]], align 16
1496	// CHECK: [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %c) #3
1497	// CHECK: ret <16 x i8> [[VTBX4_I]]
1498	poly8x16_t test_vqtbx4q_p8(poly8x16_t a, poly8x16x4_t b, uint8x16_t c) {
1499	return vqtbx4q_p8(a, b, c);
1500	}
1501
1502	// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
1503	// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"
1504

Clang Project