1 | // RUN: %clang_cc1 -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s |
2 | |
3 | #include <immintrin.h> |
4 | |
5 | // CHECK-LABEL: define i64 @test_mm512_reduce_max_epi64(<8 x i64> %__W) #0 { |
6 | // CHECK-NEXT: entry: |
7 | // CHECK-NEXT: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 |
8 | // CHECK-NEXT: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 |
9 | // CHECK-NEXT: [[__A_ADDR_I5_I:%.*]] = alloca <8 x i64>, align 64 |
10 | // CHECK-NEXT: [[__B_ADDR_I6_I:%.*]] = alloca <8 x i64>, align 64 |
11 | // CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 |
12 | // CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 |
13 | // CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 |
14 | // CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 |
15 | // CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 |
16 | // CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 |
17 | // CHECK-NEXT: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 |
18 | // CHECK-NEXT: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 |
19 | // CHECK-NEXT: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 |
20 | // CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 |
21 | // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 |
22 | // CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 |
23 | // CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64 |
24 | // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
25 | // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
26 | // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> |
27 | // CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 |
28 | // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
29 | // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 |
30 | // CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* [[__A_ADDR_I_I]], align 64 |
31 | // CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[__B_ADDR_I_I]], align 64 |
32 | // CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 |
33 | // CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 |
34 | // CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <8 x i64> [[TMP5]], [[TMP6]] |
35 | // CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[TMP6]] |
36 | // CHECK-NEXT: store <8 x i64> [[TMP8]], <8 x i64>* [[__T2_I]], align 64 |
37 | // CHECK-NEXT: [[TMP9:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 |
38 | // CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 |
39 | // CHECK-NEXT: [[SHUFFLE1_I:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> [[TMP10]], <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5> |
40 | // CHECK-NEXT: store <8 x i64> [[SHUFFLE1_I]], <8 x i64>* [[__T3_I]], align 64 |
41 | // CHECK-NEXT: [[TMP11:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 |
42 | // CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 |
43 | // CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 |
44 | // CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 |
45 | // CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 |
46 | // CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 |
47 | // CHECK-NEXT: [[TMP15:%.*]] = icmp sgt <8 x i64> [[TMP13]], [[TMP14]] |
48 | // CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP15]], <8 x i64> [[TMP13]], <8 x i64> [[TMP14]] |
49 | // CHECK-NEXT: store <8 x i64> [[TMP16]], <8 x i64>* [[__T4_I]], align 64 |
50 | // CHECK-NEXT: [[TMP17:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 |
51 | // CHECK-NEXT: [[TMP18:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 |
52 | // CHECK-NEXT: [[SHUFFLE3_I:%.*]] = shufflevector <8 x i64> [[TMP17]], <8 x i64> [[TMP18]], <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> |
53 | // CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T5_I]], align 64 |
54 | // CHECK-NEXT: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 |
55 | // CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 |
56 | // CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__A_ADDR_I5_I]], align 64 |
57 | // CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__B_ADDR_I6_I]], align 64 |
58 | // CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I5_I]], align 64 |
59 | // CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I6_I]], align 64 |
60 | // CHECK-NEXT: [[TMP23:%.*]] = icmp sgt <8 x i64> [[TMP21]], [[TMP22]] |
61 | // CHECK-NEXT: [[TMP24:%.*]] = select <8 x i1> [[TMP23]], <8 x i64> [[TMP21]], <8 x i64> [[TMP22]] |
62 | // CHECK-NEXT: store <8 x i64> [[TMP24]], <8 x i64>* [[__T6_I]], align 64 |
63 | // CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[__T6_I]], align 64 |
64 | // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP25]], i32 0 |
65 | // CHECK-NEXT: ret i64 [[VECEXT_I]] |
66 | long long test_mm512_reduce_max_epi64(__m512i __W){ |
67 | return _mm512_reduce_max_epi64(__W); |
68 | } |
69 | |
70 | // CHECK-LABEL: define i64 @test_mm512_reduce_max_epu64(<8 x i64> %__W) #0 { |
71 | // CHECK-NEXT: entry: |
72 | // CHECK-NEXT: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 |
73 | // CHECK-NEXT: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 |
74 | // CHECK-NEXT: [[__A_ADDR_I5_I:%.*]] = alloca <8 x i64>, align 64 |
75 | // CHECK-NEXT: [[__B_ADDR_I6_I:%.*]] = alloca <8 x i64>, align 64 |
76 | // CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 |
77 | // CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 |
78 | // CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 |
79 | // CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 |
80 | // CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 |
81 | // CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 |
82 | // CHECK-NEXT: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 |
83 | // CHECK-NEXT: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 |
84 | // CHECK-NEXT: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 |
85 | // CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 |
86 | // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 |
87 | // CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 |
88 | // CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64 |
89 | // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
90 | // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
91 | // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> |
92 | // CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 |
93 | // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
94 | // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 |
95 | // CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* [[__A_ADDR_I_I]], align 64 |
96 | // CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[__B_ADDR_I_I]], align 64 |
97 | // CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 |
98 | // CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 |
99 | // CHECK-NEXT: [[TMP7:%.*]] = icmp ugt <8 x i64> [[TMP5]], [[TMP6]] |
100 | // CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[TMP6]] |
101 | // CHECK-NEXT: store <8 x i64> [[TMP8]], <8 x i64>* [[__T2_I]], align 64 |
102 | // CHECK-NEXT: [[TMP9:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 |
103 | // CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 |
104 | // CHECK-NEXT: [[SHUFFLE1_I:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> [[TMP10]], <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5> |
105 | // CHECK-NEXT: store <8 x i64> [[SHUFFLE1_I]], <8 x i64>* [[__T3_I]], align 64 |
106 | // CHECK-NEXT: [[TMP11:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 |
107 | // CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 |
108 | // CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 |
109 | // CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 |
110 | // CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 |
111 | // CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 |
112 | // CHECK-NEXT: [[TMP15:%.*]] = icmp ugt <8 x i64> [[TMP13]], [[TMP14]] |
113 | // CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP15]], <8 x i64> [[TMP13]], <8 x i64> [[TMP14]] |
114 | // CHECK-NEXT: store <8 x i64> [[TMP16]], <8 x i64>* [[__T4_I]], align 64 |
115 | // CHECK-NEXT: [[TMP17:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 |
116 | // CHECK-NEXT: [[TMP18:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 |
117 | // CHECK-NEXT: [[SHUFFLE3_I:%.*]] = shufflevector <8 x i64> [[TMP17]], <8 x i64> [[TMP18]], <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> |
118 | // CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T5_I]], align 64 |
119 | // CHECK-NEXT: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 |
120 | // CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 |
121 | // CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__A_ADDR_I5_I]], align 64 |
122 | // CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__B_ADDR_I6_I]], align 64 |
123 | // CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I5_I]], align 64 |
124 | // CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I6_I]], align 64 |
125 | // CHECK-NEXT: [[TMP23:%.*]] = icmp ugt <8 x i64> [[TMP21]], [[TMP22]] |
126 | // CHECK-NEXT: [[TMP24:%.*]] = select <8 x i1> [[TMP23]], <8 x i64> [[TMP21]], <8 x i64> [[TMP22]] |
127 | // CHECK-NEXT: store <8 x i64> [[TMP24]], <8 x i64>* [[__T6_I]], align 64 |
128 | // CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[__T6_I]], align 64 |
129 | // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP25]], i32 0 |
130 | // CHECK-NEXT: ret i64 [[VECEXT_I]] |
131 | unsigned long long test_mm512_reduce_max_epu64(__m512i __W){ |
132 | return _mm512_reduce_max_epu64(__W); |
133 | } |
134 | |
135 | // CHECK-LABEL: define double @test_mm512_reduce_max_pd(<8 x double> %__W) #0 { |
136 | // CHECK-NEXT: entry: |
137 | // CHECK-NEXT: [[__A_ADDR_I10_I:%.*]] = alloca <4 x double>, align 32 |
138 | // CHECK-NEXT: [[__B_ADDR_I11_I:%.*]] = alloca <4 x double>, align 32 |
139 | // CHECK-NEXT: [[__A_ADDR_I8_I:%.*]] = alloca <2 x double>, align 16 |
140 | // CHECK-NEXT: [[__B_ADDR_I9_I:%.*]] = alloca <2 x double>, align 16 |
141 | // CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 |
142 | // CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 |
143 | // CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x double>, align 64 |
144 | // CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x double>, align 32 |
145 | // CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x double>, align 32 |
146 | // CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x double>, align 32 |
147 | // CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x double>, align 16 |
148 | // CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x double>, align 16 |
149 | // CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x double>, align 16 |
150 | // CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x double>, align 16 |
151 | // CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x double>, align 16 |
152 | // CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x double>, align 64 |
153 | // CHECK-NEXT: store <8 x double> [[__W:%.*]], <8 x double>* [[__W_ADDR]], align 64 |
154 | // CHECK-NEXT: [[TMP0:%.*]] = load <8 x double>, <8 x double>* [[__W_ADDR]], align 64 |
155 | // CHECK-NEXT: store <8 x double> [[TMP0]], <8 x double>* [[__V_ADDR_I]], align 64 |
156 | // CHECK-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 |
157 | // CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
158 | // CHECK-NEXT: store <4 x double> [[EXTRACT_I]], <4 x double>* [[__T1_I]], align 32 |
159 | // CHECK-NEXT: [[TMP2:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 |
160 | // CHECK-NEXT: [[EXTRACT2_I:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
161 | // CHECK-NEXT: store <4 x double> [[EXTRACT2_I]], <4 x double>* [[__T2_I]], align 32 |
162 | // CHECK-NEXT: [[TMP3:%.*]] = load <4 x double>, <4 x double>* [[__T1_I]], align 32 |
163 | // CHECK-NEXT: [[TMP4:%.*]] = load <4 x double>, <4 x double>* [[__T2_I]], align 32 |
164 | // CHECK-NEXT: store <4 x double> [[TMP3]], <4 x double>* [[__A_ADDR_I10_I]], align 32 |
165 | // CHECK-NEXT: store <4 x double> [[TMP4]], <4 x double>* [[__B_ADDR_I11_I]], align 32 |
166 | // CHECK-NEXT: [[TMP5:%.*]] = load <4 x double>, <4 x double>* [[__A_ADDR_I10_I]], align 32 |
167 | // CHECK-NEXT: [[TMP6:%.*]] = load <4 x double>, <4 x double>* [[__B_ADDR_I11_I]], align 32 |
168 | // CHECK-NEXT: [[TMP7:%.*]] = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> [[TMP5]], <4 x double> [[TMP6]]) #2 |
169 | // CHECK-NEXT: store <4 x double> [[TMP7]], <4 x double>* [[__T3_I]], align 32 |
170 | // CHECK-NEXT: [[TMP8:%.*]] = load <4 x double>, <4 x double>* [[__T3_I]], align 32 |
171 | // CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <4 x double> [[TMP8]], <4 x double> undef, <2 x i32> <i32 0, i32 1> |
172 | // CHECK-NEXT: store <2 x double> [[EXTRACT4_I]], <2 x double>* [[__T4_I]], align 16 |
173 | // CHECK-NEXT: [[TMP9:%.*]] = load <4 x double>, <4 x double>* [[__T3_I]], align 32 |
174 | // CHECK-NEXT: [[EXTRACT5_I:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> undef, <2 x i32> <i32 2, i32 3> |
175 | // CHECK-NEXT: store <2 x double> [[EXTRACT5_I]], <2 x double>* [[__T5_I]], align 16 |
176 | // CHECK-NEXT: [[TMP10:%.*]] = load <2 x double>, <2 x double>* [[__T4_I]], align 16 |
177 | // CHECK-NEXT: [[TMP11:%.*]] = load <2 x double>, <2 x double>* [[__T5_I]], align 16 |
178 | // CHECK-NEXT: store <2 x double> [[TMP10]], <2 x double>* [[__A_ADDR_I8_I]], align 16 |
179 | // CHECK-NEXT: store <2 x double> [[TMP11]], <2 x double>* [[__B_ADDR_I9_I]], align 16 |
180 | // CHECK-NEXT: [[TMP12:%.*]] = load <2 x double>, <2 x double>* [[__A_ADDR_I8_I]], align 16 |
181 | // CHECK-NEXT: [[TMP13:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I9_I]], align 16 |
182 | // CHECK-NEXT: [[TMP14:%.*]] = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> [[TMP12]], <2 x double> [[TMP13]]) #2 |
183 | // CHECK-NEXT: store <2 x double> [[TMP14]], <2 x double>* [[__T6_I]], align 16 |
184 | // CHECK-NEXT: [[TMP15:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 |
185 | // CHECK-NEXT: [[TMP16:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 |
186 | // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> [[TMP15]], <2 x double> [[TMP16]], <2 x i32> <i32 1, i32 0> |
187 | // CHECK-NEXT: store <2 x double> [[SHUFFLE_I]], <2 x double>* [[__T7_I]], align 16 |
188 | // CHECK-NEXT: [[TMP17:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 |
189 | // CHECK-NEXT: [[TMP18:%.*]] = load <2 x double>, <2 x double>* [[__T7_I]], align 16 |
190 | // CHECK-NEXT: store <2 x double> [[TMP17]], <2 x double>* [[__A_ADDR_I_I]], align 16 |
191 | // CHECK-NEXT: store <2 x double> [[TMP18]], <2 x double>* [[__B_ADDR_I_I]], align 16 |
192 | // CHECK-NEXT: [[TMP19:%.*]] = load <2 x double>, <2 x double>* [[__A_ADDR_I_I]], align 16 |
193 | // CHECK-NEXT: [[TMP20:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I_I]], align 16 |
194 | // CHECK-NEXT: [[TMP21:%.*]] = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> [[TMP19]], <2 x double> [[TMP20]]) #2 |
195 | // CHECK-NEXT: store <2 x double> [[TMP21]], <2 x double>* [[__T8_I]], align 16 |
196 | // CHECK-NEXT: [[TMP22:%.*]] = load <2 x double>, <2 x double>* [[__T8_I]], align 16 |
197 | // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x double> [[TMP22]], i32 0 |
198 | // CHECK-NEXT: ret double [[VECEXT_I]] |
199 | double test_mm512_reduce_max_pd(__m512d __W){ |
200 | return _mm512_reduce_max_pd(__W); |
201 | } |
202 | |
203 | // CHECK-LABEL: define i64 @test_mm512_reduce_min_epi64(<8 x i64> %__W) #0 { |
204 | // CHECK-NEXT: entry: |
205 | // CHECK-NEXT: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 |
206 | // CHECK-NEXT: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 |
207 | // CHECK-NEXT: [[__A_ADDR_I5_I:%.*]] = alloca <8 x i64>, align 64 |
208 | // CHECK-NEXT: [[__B_ADDR_I6_I:%.*]] = alloca <8 x i64>, align 64 |
209 | // CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 |
210 | // CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 |
211 | // CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 |
212 | // CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 |
213 | // CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 |
214 | // CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 |
215 | // CHECK-NEXT: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 |
216 | // CHECK-NEXT: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 |
217 | // CHECK-NEXT: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 |
218 | // CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 |
219 | // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 |
220 | // CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 |
221 | // CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64 |
222 | // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
223 | // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
224 | // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> |
225 | // CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 |
226 | // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
227 | // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 |
228 | // CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* [[__A_ADDR_I_I]], align 64 |
229 | // CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[__B_ADDR_I_I]], align 64 |
230 | // CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 |
231 | // CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 |
232 | // CHECK-NEXT: [[TMP7:%.*]] = icmp slt <8 x i64> [[TMP5]], [[TMP6]] |
233 | // CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[TMP6]] |
234 | // CHECK-NEXT: store <8 x i64> [[TMP8]], <8 x i64>* [[__T2_I]], align 64 |
235 | // CHECK-NEXT: [[TMP9:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 |
236 | // CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 |
237 | // CHECK-NEXT: [[SHUFFLE1_I:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> [[TMP10]], <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5> |
238 | // CHECK-NEXT: store <8 x i64> [[SHUFFLE1_I]], <8 x i64>* [[__T3_I]], align 64 |
239 | // CHECK-NEXT: [[TMP11:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 |
240 | // CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 |
241 | // CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 |
242 | // CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 |
243 | // CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 |
244 | // CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 |
245 | // CHECK-NEXT: [[TMP15:%.*]] = icmp slt <8 x i64> [[TMP13]], [[TMP14]] |
246 | // CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP15]], <8 x i64> [[TMP13]], <8 x i64> [[TMP14]] |
247 | // CHECK-NEXT: store <8 x i64> [[TMP16]], <8 x i64>* [[__T4_I]], align 64 |
248 | // CHECK-NEXT: [[TMP17:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 |
249 | // CHECK-NEXT: [[TMP18:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 |
250 | // CHECK-NEXT: [[SHUFFLE3_I:%.*]] = shufflevector <8 x i64> [[TMP17]], <8 x i64> [[TMP18]], <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> |
251 | // CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T5_I]], align 64 |
252 | // CHECK-NEXT: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 |
253 | // CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 |
254 | // CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__A_ADDR_I5_I]], align 64 |
255 | // CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__B_ADDR_I6_I]], align 64 |
256 | // CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I5_I]], align 64 |
257 | // CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I6_I]], align 64 |
258 | // CHECK-NEXT: [[TMP23:%.*]] = icmp slt <8 x i64> [[TMP21]], [[TMP22]] |
259 | // CHECK-NEXT: [[TMP24:%.*]] = select <8 x i1> [[TMP23]], <8 x i64> [[TMP21]], <8 x i64> [[TMP22]] |
260 | // CHECK-NEXT: store <8 x i64> [[TMP24]], <8 x i64>* [[__T6_I]], align 64 |
261 | // CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[__T6_I]], align 64 |
262 | // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP25]], i32 0 |
263 | // CHECK-NEXT: ret i64 [[VECEXT_I]] |
264 | long long test_mm512_reduce_min_epi64(__m512i __W){ |
265 | return _mm512_reduce_min_epi64(__W); |
266 | } |
267 | |
268 | // CHECK-LABEL: define i64 @test_mm512_reduce_min_epu64(<8 x i64> %__W) #0 { |
269 | // CHECK-NEXT: entry: |
270 | // CHECK-NEXT: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 |
271 | // CHECK-NEXT: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 |
272 | // CHECK-NEXT: [[__A_ADDR_I5_I:%.*]] = alloca <8 x i64>, align 64 |
273 | // CHECK-NEXT: [[__B_ADDR_I6_I:%.*]] = alloca <8 x i64>, align 64 |
274 | // CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 |
275 | // CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 |
276 | // CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 |
277 | // CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 |
278 | // CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 |
279 | // CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 |
280 | // CHECK-NEXT: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 |
281 | // CHECK-NEXT: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 |
282 | // CHECK-NEXT: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 |
283 | // CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 |
284 | // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 |
285 | // CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 |
286 | // CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64 |
287 | // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
288 | // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
289 | // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> |
290 | // CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 |
291 | // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
292 | // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 |
293 | // CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* [[__A_ADDR_I_I]], align 64 |
294 | // CHECK-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[__B_ADDR_I_I]], align 64 |
295 | // CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 |
296 | // CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 |
297 | // CHECK-NEXT: [[TMP7:%.*]] = icmp ult <8 x i64> [[TMP5]], [[TMP6]] |
298 | // CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[TMP6]] |
299 | // CHECK-NEXT: store <8 x i64> [[TMP8]], <8 x i64>* [[__T2_I]], align 64 |
300 | // CHECK-NEXT: [[TMP9:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 |
301 | // CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 |
302 | // CHECK-NEXT: [[SHUFFLE1_I:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> [[TMP10]], <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5> |
303 | // CHECK-NEXT: store <8 x i64> [[SHUFFLE1_I]], <8 x i64>* [[__T3_I]], align 64 |
304 | // CHECK-NEXT: [[TMP11:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 |
305 | // CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 |
306 | // CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 |
307 | // CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 |
308 | // CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 |
309 | // CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 |
310 | // CHECK-NEXT: [[TMP15:%.*]] = icmp ult <8 x i64> [[TMP13]], [[TMP14]] |
311 | // CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP15]], <8 x i64> [[TMP13]], <8 x i64> [[TMP14]] |
312 | // CHECK-NEXT: store <8 x i64> [[TMP16]], <8 x i64>* [[__T4_I]], align 64 |
313 | // CHECK-NEXT: [[TMP17:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 |
314 | // CHECK-NEXT: [[TMP18:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 |
315 | // CHECK-NEXT: [[SHUFFLE3_I:%.*]] = shufflevector <8 x i64> [[TMP17]], <8 x i64> [[TMP18]], <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> |
316 | // CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T5_I]], align 64 |
317 | // CHECK-NEXT: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 |
318 | // CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 |
319 | // CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__A_ADDR_I5_I]], align 64 |
320 | // CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__B_ADDR_I6_I]], align 64 |
321 | // CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I5_I]], align 64 |
322 | // CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I6_I]], align 64 |
323 | // CHECK-NEXT: [[TMP23:%.*]] = icmp ult <8 x i64> [[TMP21]], [[TMP22]] |
324 | // CHECK-NEXT: [[TMP24:%.*]] = select <8 x i1> [[TMP23]], <8 x i64> [[TMP21]], <8 x i64> [[TMP22]] |
325 | // CHECK-NEXT: store <8 x i64> [[TMP24]], <8 x i64>* [[__T6_I]], align 64 |
326 | // CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[__T6_I]], align 64 |
327 | // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP25]], i32 0 |
328 | // CHECK-NEXT: ret i64 [[VECEXT_I]] |
329 | unsigned long long test_mm512_reduce_min_epu64(__m512i __W){ |
330 | return _mm512_reduce_min_epu64(__W); |
331 | } |
332 | |
333 | // CHECK-LABEL: define double @test_mm512_reduce_min_pd(<8 x double> %__W) #0 { |
334 | // CHECK-NEXT: entry: |
335 | // CHECK-NEXT: [[__A_ADDR_I10_I:%.*]] = alloca <4 x double>, align 32 |
336 | // CHECK-NEXT: [[__B_ADDR_I11_I:%.*]] = alloca <4 x double>, align 32 |
337 | // CHECK-NEXT: [[__A_ADDR_I8_I:%.*]] = alloca <2 x double>, align 16 |
338 | // CHECK-NEXT: [[__B_ADDR_I9_I:%.*]] = alloca <2 x double>, align 16 |
339 | // CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 |
340 | // CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 |
341 | // CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x double>, align 64 |
342 | // CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x double>, align 32 |
343 | // CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x double>, align 32 |
344 | // CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x double>, align 32 |
345 | // CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x double>, align 16 |
346 | // CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x double>, align 16 |
347 | // CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x double>, align 16 |
348 | // CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x double>, align 16 |
349 | // CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x double>, align 16 |
350 | // CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x double>, align 64 |
351 | // CHECK-NEXT: store <8 x double> [[__W:%.*]], <8 x double>* [[__W_ADDR]], align 64 |
352 | // CHECK-NEXT: [[TMP0:%.*]] = load <8 x double>, <8 x double>* [[__W_ADDR]], align 64 |
353 | // CHECK-NEXT: store <8 x double> [[TMP0]], <8 x double>* [[__V_ADDR_I]], align 64 |
354 | // CHECK-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 |
355 | // CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
356 | // CHECK-NEXT: store <4 x double> [[EXTRACT_I]], <4 x double>* [[__T1_I]], align 32 |
357 | // CHECK-NEXT: [[TMP2:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 |
358 | // CHECK-NEXT: [[EXTRACT2_I:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
359 | // CHECK-NEXT: store <4 x double> [[EXTRACT2_I]], <4 x double>* [[__T2_I]], align 32 |
360 | // CHECK-NEXT: [[TMP3:%.*]] = load <4 x double>, <4 x double>* [[__T1_I]], align 32 |
361 | // CHECK-NEXT: [[TMP4:%.*]] = load <4 x double>, <4 x double>* [[__T2_I]], align 32 |
362 | // CHECK-NEXT: store <4 x double> [[TMP3]], <4 x double>* [[__A_ADDR_I10_I]], align 32 |
363 | // CHECK-NEXT: store <4 x double> [[TMP4]], <4 x double>* [[__B_ADDR_I11_I]], align 32 |
364 | // CHECK-NEXT: [[TMP5:%.*]] = load <4 x double>, <4 x double>* [[__A_ADDR_I10_I]], align 32 |
365 | // CHECK-NEXT: [[TMP6:%.*]] = load <4 x double>, <4 x double>* [[__B_ADDR_I11_I]], align 32 |
366 | // CHECK-NEXT: [[TMP7:%.*]] = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> [[TMP5]], <4 x double> [[TMP6]]) #2 |
367 | // CHECK-NEXT: store <4 x double> [[TMP7]], <4 x double>* [[__T3_I]], align 32 |
368 | // CHECK-NEXT: [[TMP8:%.*]] = load <4 x double>, <4 x double>* [[__T3_I]], align 32 |
369 | // CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <4 x double> [[TMP8]], <4 x double> undef, <2 x i32> <i32 0, i32 1> |
370 | // CHECK-NEXT: store <2 x double> [[EXTRACT4_I]], <2 x double>* [[__T4_I]], align 16 |
371 | // CHECK-NEXT: [[TMP9:%.*]] = load <4 x double>, <4 x double>* [[__T3_I]], align 32 |
372 | // CHECK-NEXT: [[EXTRACT5_I:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> undef, <2 x i32> <i32 2, i32 3> |
373 | // CHECK-NEXT: store <2 x double> [[EXTRACT5_I]], <2 x double>* [[__T5_I]], align 16 |
374 | // CHECK-NEXT: [[TMP10:%.*]] = load <2 x double>, <2 x double>* [[__T4_I]], align 16 |
375 | // CHECK-NEXT: [[TMP11:%.*]] = load <2 x double>, <2 x double>* [[__T5_I]], align 16 |
376 | // CHECK-NEXT: store <2 x double> [[TMP10]], <2 x double>* [[__A_ADDR_I8_I]], align 16 |
377 | // CHECK-NEXT: store <2 x double> [[TMP11]], <2 x double>* [[__B_ADDR_I9_I]], align 16 |
378 | // CHECK-NEXT: [[TMP12:%.*]] = load <2 x double>, <2 x double>* [[__A_ADDR_I8_I]], align 16 |
379 | // CHECK-NEXT: [[TMP13:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I9_I]], align 16 |
380 | // CHECK-NEXT: [[TMP14:%.*]] = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> [[TMP12]], <2 x double> [[TMP13]]) #2 |
381 | // CHECK-NEXT: store <2 x double> [[TMP14]], <2 x double>* [[__T6_I]], align 16 |
382 | // CHECK-NEXT: [[TMP15:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 |
383 | // CHECK-NEXT: [[TMP16:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 |
384 | // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> [[TMP15]], <2 x double> [[TMP16]], <2 x i32> <i32 1, i32 0> |
385 | // CHECK-NEXT: store <2 x double> [[SHUFFLE_I]], <2 x double>* [[__T7_I]], align 16 |
386 | // CHECK-NEXT: [[TMP17:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 |
387 | // CHECK-NEXT: [[TMP18:%.*]] = load <2 x double>, <2 x double>* [[__T7_I]], align 16 |
388 | // CHECK-NEXT: store <2 x double> [[TMP17]], <2 x double>* [[__A_ADDR_I_I]], align 16 |
389 | // CHECK-NEXT: store <2 x double> [[TMP18]], <2 x double>* [[__B_ADDR_I_I]], align 16 |
390 | // CHECK-NEXT: [[TMP19:%.*]] = load <2 x double>, <2 x double>* [[__A_ADDR_I_I]], align 16 |
391 | // CHECK-NEXT: [[TMP20:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I_I]], align 16 |
392 | // CHECK-NEXT: [[TMP21:%.*]] = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> [[TMP19]], <2 x double> [[TMP20]]) #2 |
393 | // CHECK-NEXT: store <2 x double> [[TMP21]], <2 x double>* [[__T8_I]], align 16 |
394 | // CHECK-NEXT: [[TMP22:%.*]] = load <2 x double>, <2 x double>* [[__T8_I]], align 16 |
395 | // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x double> [[TMP22]], i32 0 |
396 | // CHECK-NEXT: ret double [[VECEXT_I]] |
397 | double test_mm512_reduce_min_pd(__m512d __W){ |
398 | return _mm512_reduce_min_pd(__W); |
399 | } |
400 | |
401 | // CHECK-LABEL: define i64 @test_mm512_mask_reduce_max_epi64(i8 zeroext %__M, <8 x i64> %__W) #0 { |
402 | // CHECK-NEXT: entry: |
403 | // CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 |
404 | // CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 |
405 | // CHECK-NEXT: [[__A_ADDR_I11_I:%.*]] = alloca <8 x i64>, align 64 |
406 | // CHECK-NEXT: [[__A_ADDR_I9_I:%.*]] = alloca <8 x i64>, align 64 |
407 | // CHECK-NEXT: [[__B_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64 |
408 | // CHECK-NEXT: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 |
409 | // CHECK-NEXT: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 |
410 | // CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 |
411 | // CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 |
412 | // CHECK-NEXT: [[__D_ADDR_I_I:%.*]] = alloca i64, align 8 |
413 | // CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x i64>, align 64 |
414 | // CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i8, align 1 |
415 | // CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 |
416 | // CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 |
417 | // CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 |
418 | // CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 |
419 | // CHECK-NEXT: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 |
420 | // CHECK-NEXT: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 |
421 | // CHECK-NEXT: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 |
422 | // CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i8, align 1 |
423 | // CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 |
424 | // CHECK-NEXT: store i8 [[__M:%.*]], i8* [[__M_ADDR]], align 1 |
425 | // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 |
426 | // CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1 |
427 | // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 |
428 | // CHECK-NEXT: store i8 [[TMP0]], i8* [[__M_ADDR_I]], align 1 |
429 | // CHECK-NEXT: store <8 x i64> [[TMP1]], <8 x i64>* [[__V_ADDR_I]], align 64 |
430 | // CHECK-NEXT: store i64 -9223372036854775808, i64* [[__D_ADDR_I_I]], align 8 |
431 | // CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 |
432 | // CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <8 x i64> undef, i64 [[TMP2]], i32 0 |
433 | // CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 |
434 | // CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <8 x i64> [[VECINIT_I_I]], i64 [[TMP3]], i32 1 |
435 | // CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 |
436 | // CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <8 x i64> [[VECINIT1_I_I]], i64 [[TMP4]], i32 2 |
437 | // CHECK-NEXT: [[TMP5:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 |
438 | // CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <8 x i64> [[VECINIT2_I_I]], i64 [[TMP5]], i32 3 |
439 | // CHECK-NEXT: [[TMP6:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 |
440 | // CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <8 x i64> [[VECINIT3_I_I]], i64 [[TMP6]], i32 4 |
441 | // CHECK-NEXT: [[TMP7:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 |
442 | // CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <8 x i64> [[VECINIT4_I_I]], i64 [[TMP7]], i32 5 |
443 | // CHECK-NEXT: [[TMP8:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 |
444 | // CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <8 x i64> [[VECINIT5_I_I]], i64 [[TMP8]], i32 6 |
445 | // CHECK-NEXT: [[TMP9:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 |
446 | // CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <8 x i64> [[VECINIT6_I_I]], i64 [[TMP9]], i32 7 |
447 | // CHECK-NEXT: store <8 x i64> [[VECINIT7_I_I]], <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 |
448 | // CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 |
449 | // CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1 |
450 | // CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
451 | // CHECK-NEXT: store <8 x i64> [[TMP10]], <8 x i64>* [[__W_ADDR_I_I]], align 64 |
452 | // CHECK-NEXT: store i8 [[TMP11]], i8* [[__U_ADDR_I_I]], align 1 |
453 | // CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__A_ADDR_I11_I]], align 64 |
454 | // CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[__U_ADDR_I_I]], align 1 |
455 | // CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I11_I]], align 64 |
456 | // CHECK-NEXT: [[TMP15:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR_I_I]], align 64 |
457 | // CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[TMP13]] to <8 x i1> |
458 | // CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP16]], <8 x i64> [[TMP14]], <8 x i64> [[TMP15]] |
459 | // CHECK-NEXT: store <8 x i64> [[TMP17]], <8 x i64>* [[__V_ADDR_I]], align 64 |
460 | // CHECK-NEXT: [[TMP18:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
461 | // CHECK-NEXT: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
462 | // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i64> [[TMP18]], <8 x i64> [[TMP19]], <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> |
463 | // CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 |
464 | // CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
465 | // CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 |
466 | // CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__A_ADDR_I9_I]], align 64 |
467 | // CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* [[__B_ADDR_I10_I]], align 64 |
468 | // CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I9_I]], align 64 |
469 | // CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I10_I]], align 64 |
470 | // CHECK-NEXT: [[TMP24:%.*]] = icmp sgt <8 x i64> [[TMP22]], [[TMP23]] |
471 | // CHECK-NEXT: [[TMP25:%.*]] = select <8 x i1> [[TMP24]], <8 x i64> [[TMP22]], <8 x i64> [[TMP23]] |
472 | // CHECK-NEXT: store <8 x i64> [[TMP25]], <8 x i64>* [[__T2_I]], align 64 |
473 | // CHECK-NEXT: [[TMP26:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 |
474 | // CHECK-NEXT: [[TMP27:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 |
475 | // CHECK-NEXT: [[SHUFFLE3_I:%.*]] = shufflevector <8 x i64> [[TMP26]], <8 x i64> [[TMP27]], <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5> |
476 | // CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T3_I]], align 64 |
477 | // CHECK-NEXT: [[TMP28:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 |
478 | // CHECK-NEXT: [[TMP29:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 |
479 | // CHECK-NEXT: store <8 x i64> [[TMP28]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 |
480 | // CHECK-NEXT: store <8 x i64> [[TMP29]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 |
481 | // CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 |
482 | // CHECK-NEXT: [[TMP31:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 |
483 | // CHECK-NEXT: [[TMP32:%.*]] = icmp sgt <8 x i64> [[TMP30]], [[TMP31]] |
484 | // CHECK-NEXT: [[TMP33:%.*]] = select <8 x i1> [[TMP32]], <8 x i64> [[TMP30]], <8 x i64> [[TMP31]] |
485 | // CHECK-NEXT: store <8 x i64> [[TMP33]], <8 x i64>* [[__T4_I]], align 64 |
486 | // CHECK-NEXT: [[TMP34:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 |
487 | // CHECK-NEXT: [[TMP35:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 |
488 | // CHECK-NEXT: [[SHUFFLE5_I:%.*]] = shufflevector <8 x i64> [[TMP34]], <8 x i64> [[TMP35]], <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> |
489 | // CHECK-NEXT: store <8 x i64> [[SHUFFLE5_I]], <8 x i64>* [[__T5_I]], align 64 |
490 | // CHECK-NEXT: [[TMP36:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 |
491 | // CHECK-NEXT: [[TMP37:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 |
492 | // CHECK-NEXT: store <8 x i64> [[TMP36]], <8 x i64>* [[__A_ADDR_I_I]], align 64 |
493 | // CHECK-NEXT: store <8 x i64> [[TMP37]], <8 x i64>* [[__B_ADDR_I_I]], align 64 |
494 | // CHECK-NEXT: [[TMP38:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 |
495 | // CHECK-NEXT: [[TMP39:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 |
496 | // CHECK-NEXT: [[TMP40:%.*]] = icmp sgt <8 x i64> [[TMP38]], [[TMP39]] |
497 | // CHECK-NEXT: [[TMP41:%.*]] = select <8 x i1> [[TMP40]], <8 x i64> [[TMP38]], <8 x i64> [[TMP39]] |
498 | // CHECK-NEXT: store <8 x i64> [[TMP41]], <8 x i64>* [[__T6_I]], align 64 |
499 | // CHECK-NEXT: [[TMP42:%.*]] = load <8 x i64>, <8 x i64>* [[__T6_I]], align 64 |
500 | // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP42]], i32 0 |
501 | // CHECK-NEXT: ret i64 [[VECEXT_I]] |
502 | long long test_mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __W){ |
503 | return _mm512_mask_reduce_max_epi64(__M, __W); |
504 | } |
505 | |
506 | // CHECK-LABEL: define i64 @test_mm512_mask_reduce_max_epu64(i8 zeroext %__M, <8 x i64> %__W) #0 { |
507 | // CHECK-NEXT: entry: |
508 | // CHECK-NEXT: [[__A_ADDR_I9_I:%.*]] = alloca <8 x i64>, align 64 |
509 | // CHECK-NEXT: [[__B_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64 |
510 | // CHECK-NEXT: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 |
511 | // CHECK-NEXT: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 |
512 | // CHECK-NEXT: [[__A_ADDR_I6_I:%.*]] = alloca <8 x i64>, align 64 |
513 | // CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 |
514 | // CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I_I:%.*]] = alloca <8 x i64>, align 64 |
515 | // CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 |
516 | // CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 |
517 | // CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i8, align 1 |
518 | // CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 |
519 | // CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 |
520 | // CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 |
521 | // CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 |
522 | // CHECK-NEXT: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 |
523 | // CHECK-NEXT: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 |
524 | // CHECK-NEXT: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 |
525 | // CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i8, align 1 |
526 | // CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 |
527 | // CHECK-NEXT: store i8 [[__M:%.*]], i8* [[__M_ADDR]], align 1 |
528 | // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 |
529 | // CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1 |
530 | // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 |
531 | // CHECK-NEXT: store i8 [[TMP0]], i8* [[__M_ADDR_I]], align 1 |
532 | // CHECK-NEXT: store <8 x i64> [[TMP1]], <8 x i64>* [[__V_ADDR_I]], align 64 |
533 | // CHECK-NEXT: [[TMP2:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1 |
534 | // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
535 | // CHECK-NEXT: store i8 [[TMP2]], i8* [[__U_ADDR_I_I]], align 1 |
536 | // CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* [[__A_ADDR_I_I]], align 64 |
537 | // CHECK-NEXT: [[TMP4:%.*]] = load i8, i8* [[__U_ADDR_I_I]], align 1 |
538 | // CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 |
539 | // CHECK-NEXT: store <8 x i64> zeroinitializer, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I_I]], align 64 |
540 | // CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I_I]], align 64 |
541 | // CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> |
542 | // CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[TMP6]] |
543 | // CHECK-NEXT: store <8 x i64> [[TMP8]], <8 x i64>* [[__V_ADDR_I]], align 64 |
544 | // CHECK-NEXT: [[TMP9:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
545 | // CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
546 | // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> [[TMP10]], <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> |
547 | // CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 |
548 | // CHECK-NEXT: [[TMP11:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
549 | // CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 |
550 | // CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* [[__A_ADDR_I9_I]], align 64 |
551 | // CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__B_ADDR_I10_I]], align 64 |
552 | // CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I9_I]], align 64 |
553 | // CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I10_I]], align 64 |
554 | // CHECK-NEXT: [[TMP15:%.*]] = icmp ugt <8 x i64> [[TMP13]], [[TMP14]] |
555 | // CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP15]], <8 x i64> [[TMP13]], <8 x i64> [[TMP14]] |
556 | // CHECK-NEXT: store <8 x i64> [[TMP16]], <8 x i64>* [[__T2_I]], align 64 |
557 | // CHECK-NEXT: [[TMP17:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 |
558 | // CHECK-NEXT: [[TMP18:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 |
559 | // CHECK-NEXT: [[SHUFFLE2_I:%.*]] = shufflevector <8 x i64> [[TMP17]], <8 x i64> [[TMP18]], <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5> |
560 | // CHECK-NEXT: store <8 x i64> [[SHUFFLE2_I]], <8 x i64>* [[__T3_I]], align 64 |
561 | // CHECK-NEXT: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 |
562 | // CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 |
563 | // CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 |
564 | // CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 |
565 | // CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 |
566 | // CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 |
567 | // CHECK-NEXT: [[TMP23:%.*]] = icmp ugt <8 x i64> [[TMP21]], [[TMP22]] |
568 | // CHECK-NEXT: [[TMP24:%.*]] = select <8 x i1> [[TMP23]], <8 x i64> [[TMP21]], <8 x i64> [[TMP22]] |
569 | // CHECK-NEXT: store <8 x i64> [[TMP24]], <8 x i64>* [[__T4_I]], align 64 |
570 | // CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 |
571 | // CHECK-NEXT: [[TMP26:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 |
572 | // CHECK-NEXT: [[SHUFFLE4_I:%.*]] = shufflevector <8 x i64> [[TMP25]], <8 x i64> [[TMP26]], <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> |
573 | // CHECK-NEXT: store <8 x i64> [[SHUFFLE4_I]], <8 x i64>* [[__T5_I]], align 64 |
574 | // CHECK-NEXT: [[TMP27:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 |
575 | // CHECK-NEXT: [[TMP28:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 |
576 | // CHECK-NEXT: store <8 x i64> [[TMP27]], <8 x i64>* [[__A_ADDR_I6_I]], align 64 |
577 | // CHECK-NEXT: store <8 x i64> [[TMP28]], <8 x i64>* [[__B_ADDR_I_I]], align 64 |
578 | // CHECK-NEXT: [[TMP29:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I6_I]], align 64 |
579 | // CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 |
580 | // CHECK-NEXT: [[TMP31:%.*]] = icmp ugt <8 x i64> [[TMP29]], [[TMP30]] |
581 | // CHECK-NEXT: [[TMP32:%.*]] = select <8 x i1> [[TMP31]], <8 x i64> [[TMP29]], <8 x i64> [[TMP30]] |
582 | // CHECK-NEXT: store <8 x i64> [[TMP32]], <8 x i64>* [[__T6_I]], align 64 |
583 | // CHECK-NEXT: [[TMP33:%.*]] = load <8 x i64>, <8 x i64>* [[__T6_I]], align 64 |
584 | // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP33]], i32 0 |
585 | // CHECK-NEXT: ret i64 [[VECEXT_I]] |
586 | unsigned long test_mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __W){ |
587 | return _mm512_mask_reduce_max_epu64(__M, __W); |
588 | } |
589 | |
590 | // CHECK-LABEL: define double @test_mm512_mask_reduce_max_pd(i8 zeroext %__M, <8 x double> %__W) #0 { |
591 | // CHECK-NEXT: entry: |
592 | // CHECK-NEXT: [[__W2_ADDR_I_I:%.*]] = alloca <8 x double>, align 64 |
593 | // CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 |
594 | // CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x double>, align 64 |
595 | // CHECK-NEXT: [[__A_ADDR_I12_I:%.*]] = alloca <4 x double>, align 32 |
596 | // CHECK-NEXT: [[__B_ADDR_I13_I:%.*]] = alloca <4 x double>, align 32 |
597 | // CHECK-NEXT: [[__A_ADDR_I10_I:%.*]] = alloca <2 x double>, align 16 |
598 | // CHECK-NEXT: [[__B_ADDR_I11_I:%.*]] = alloca <2 x double>, align 16 |
599 | // CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 |
600 | // CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 |
601 | // CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca double, align 8 |
602 | // CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x double>, align 64 |
603 | // CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i8, align 1 |
604 | // CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x double>, align 64 |
605 | // CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x double>, align 32 |
606 | // CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x double>, align 32 |
607 | // CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x double>, align 32 |
608 | // CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x double>, align 16 |
609 | // CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x double>, align 16 |
610 | // CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x double>, align 16 |
611 | // CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x double>, align 16 |
612 | // CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x double>, align 16 |
613 | // CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i8, align 1 |
614 | // CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x double>, align 64 |
615 | // CHECK-NEXT: store i8 [[__M:%.*]], i8* [[__M_ADDR]], align 1 |
616 | // CHECK-NEXT: store <8 x double> [[__W:%.*]], <8 x double>* [[__W_ADDR]], align 64 |
617 | // CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1 |
618 | // CHECK-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* [[__W_ADDR]], align 64 |
619 | // CHECK-NEXT: store i8 [[TMP0]], i8* [[__M_ADDR_I]], align 1 |
620 | // CHECK-NEXT: store <8 x double> [[TMP1]], <8 x double>* [[__V_ADDR_I]], align 64 |
621 | // CHECK-NEXT: store double 0xFFF0000000000000, double* [[__W_ADDR_I_I]], align 8 |
622 | // CHECK-NEXT: [[TMP2:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 |
623 | // CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <8 x double> undef, double [[TMP2]], i32 0 |
624 | // CHECK-NEXT: [[TMP3:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 |
625 | // CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <8 x double> [[VECINIT_I_I]], double [[TMP3]], i32 1 |
626 | // CHECK-NEXT: [[TMP4:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 |
627 | // CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <8 x double> [[VECINIT1_I_I]], double [[TMP4]], i32 2 |
628 | // CHECK-NEXT: [[TMP5:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 |
629 | // CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <8 x double> [[VECINIT2_I_I]], double [[TMP5]], i32 3 |
630 | // CHECK-NEXT: [[TMP6:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 |
631 | // CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <8 x double> [[VECINIT3_I_I]], double [[TMP6]], i32 4 |
632 | // CHECK-NEXT: [[TMP7:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 |
633 | // CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <8 x double> [[VECINIT4_I_I]], double [[TMP7]], i32 5 |
634 | // CHECK-NEXT: [[TMP8:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 |
635 | // CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <8 x double> [[VECINIT5_I_I]], double [[TMP8]], i32 6 |
636 | // CHECK-NEXT: [[TMP9:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 |
637 | // CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <8 x double> [[VECINIT6_I_I]], double [[TMP9]], i32 7 |
638 | // CHECK-NEXT: store <8 x double> [[VECINIT7_I_I]], <8 x double>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 |
639 | // CHECK-NEXT: [[TMP10:%.*]] = load <8 x double>, <8 x double>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 |
640 | // CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1 |
641 | // CHECK-NEXT: [[TMP12:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 |
642 | // CHECK-NEXT: store <8 x double> [[TMP10]], <8 x double>* [[__W2_ADDR_I_I]], align 64 |
643 | // CHECK-NEXT: store i8 [[TMP11]], i8* [[__U_ADDR_I_I]], align 1 |
644 | // CHECK-NEXT: store <8 x double> [[TMP12]], <8 x double>* [[__A_ADDR_I_I]], align 64 |
645 | // CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[__U_ADDR_I_I]], align 1 |
646 | // CHECK-NEXT: [[TMP14:%.*]] = load <8 x double>, <8 x double>* [[__A_ADDR_I_I]], align 64 |
647 | // CHECK-NEXT: [[TMP15:%.*]] = load <8 x double>, <8 x double>* [[__W2_ADDR_I_I]], align 64 |
648 | // CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[TMP13]] to <8 x i1> |
649 | // CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP16]], <8 x double> [[TMP14]], <8 x double> [[TMP15]] |
650 | // CHECK-NEXT: store <8 x double> [[TMP17]], <8 x double>* [[__V_ADDR_I]], align 64 |
651 | // CHECK-NEXT: [[TMP18:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 |
652 | // CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x double> [[TMP18]], <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
653 | // CHECK-NEXT: store <4 x double> [[EXTRACT_I]], <4 x double>* [[__T1_I]], align 32 |
654 | // CHECK-NEXT: [[TMP19:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 |
655 | // CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <8 x double> [[TMP19]], <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
656 | // CHECK-NEXT: store <4 x double> [[EXTRACT4_I]], <4 x double>* [[__T2_I]], align 32 |
657 | // CHECK-NEXT: [[TMP20:%.*]] = load <4 x double>, <4 x double>* [[__T1_I]], align 32 |
658 | // CHECK-NEXT: [[TMP21:%.*]] = load <4 x double>, <4 x double>* [[__T2_I]], align 32 |
659 | // CHECK-NEXT: store <4 x double> [[TMP20]], <4 x double>* [[__A_ADDR_I12_I]], align 32 |
660 | // CHECK-NEXT: store <4 x double> [[TMP21]], <4 x double>* [[__B_ADDR_I13_I]], align 32 |
661 | // CHECK-NEXT: [[TMP22:%.*]] = load <4 x double>, <4 x double>* [[__A_ADDR_I12_I]], align 32 |
662 | // CHECK-NEXT: [[TMP23:%.*]] = load <4 x double>, <4 x double>* [[__B_ADDR_I13_I]], align 32 |
663 | // CHECK-NEXT: [[TMP24:%.*]] = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> [[TMP22]], <4 x double> [[TMP23]]) #2 |
664 | // CHECK-NEXT: store <4 x double> [[TMP24]], <4 x double>* [[__T3_I]], align 32 |
665 | // CHECK-NEXT: [[TMP25:%.*]] = load <4 x double>, <4 x double>* [[__T3_I]], align 32 |
666 | // CHECK-NEXT: [[EXTRACT6_I:%.*]] = shufflevector <4 x double> [[TMP25]], <4 x double> undef, <2 x i32> <i32 0, i32 1> |
667 | // CHECK-NEXT: store <2 x double> [[EXTRACT6_I]], <2 x double>* [[__T4_I]], align 16 |
668 | // CHECK-NEXT: [[TMP26:%.*]] = load <4 x double>, <4 x double>* [[__T3_I]], align 32 |
669 | // CHECK-NEXT: [[EXTRACT7_I:%.*]] = shufflevector <4 x double> [[TMP26]], <4 x double> undef, <2 x i32> <i32 2, i32 3> |
670 | // CHECK-NEXT: store <2 x double> [[EXTRACT7_I]], <2 x double>* [[__T5_I]], align 16 |
671 | // CHECK-NEXT: [[TMP27:%.*]] = load <2 x double>, <2 x double>* [[__T4_I]], align 16 |
672 | // CHECK-NEXT: [[TMP28:%.*]] = load <2 x double>, <2 x double>* [[__T5_I]], align 16 |
673 | // CHECK-NEXT: store <2 x double> [[TMP27]], <2 x double>* [[__A_ADDR_I10_I]], align 16 |
674 | // CHECK-NEXT: store <2 x double> [[TMP28]], <2 x double>* [[__B_ADDR_I11_I]], align 16 |
675 | // CHECK-NEXT: [[TMP29:%.*]] = load <2 x double>, <2 x double>* [[__A_ADDR_I10_I]], align 16 |
676 | // CHECK-NEXT: [[TMP30:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I11_I]], align 16 |
677 | // CHECK-NEXT: [[TMP31:%.*]] = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> [[TMP29]], <2 x double> [[TMP30]]) #2 |
678 | // CHECK-NEXT: store <2 x double> [[TMP31]], <2 x double>* [[__T6_I]], align 16 |
679 | // CHECK-NEXT: [[TMP32:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 |
680 | // CHECK-NEXT: [[TMP33:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 |
681 | // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> [[TMP32]], <2 x double> [[TMP33]], <2 x i32> <i32 1, i32 0> |
682 | // CHECK-NEXT: store <2 x double> [[SHUFFLE_I]], <2 x double>* [[__T7_I]], align 16 |
683 | // CHECK-NEXT: [[TMP34:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 |
684 | // CHECK-NEXT: [[TMP35:%.*]] = load <2 x double>, <2 x double>* [[__T7_I]], align 16 |
685 | // CHECK-NEXT: store <2 x double> [[TMP34]], <2 x double>* [[__A2_ADDR_I_I]], align 16 |
686 | // CHECK-NEXT: store <2 x double> [[TMP35]], <2 x double>* [[__B_ADDR_I_I]], align 16 |
687 | // CHECK-NEXT: [[TMP36:%.*]] = load <2 x double>, <2 x double>* [[__A2_ADDR_I_I]], align 16 |
688 | // CHECK-NEXT: [[TMP37:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I_I]], align 16 |
689 | // CHECK-NEXT: [[TMP38:%.*]] = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> [[TMP36]], <2 x double> [[TMP37]]) #2 |
690 | // CHECK-NEXT: store <2 x double> [[TMP38]], <2 x double>* [[__T8_I]], align 16 |
691 | // CHECK-NEXT: [[TMP39:%.*]] = load <2 x double>, <2 x double>* [[__T8_I]], align 16 |
692 | // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x double> [[TMP39]], i32 0 |
693 | // CHECK-NEXT: ret double [[VECEXT_I]] |
694 | double test_mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __W){ |
695 | return _mm512_mask_reduce_max_pd(__M, __W); |
696 | } |
697 | |
698 | // CHECK-LABEL: define i64 @test_mm512_mask_reduce_min_epi64(i8 zeroext %__M, <8 x i64> %__W) #0 { |
699 | // CHECK-NEXT: entry: |
700 | // CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 |
701 | // CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 |
702 | // CHECK-NEXT: [[__A_ADDR_I11_I:%.*]] = alloca <8 x i64>, align 64 |
703 | // CHECK-NEXT: [[__A_ADDR_I9_I:%.*]] = alloca <8 x i64>, align 64 |
704 | // CHECK-NEXT: [[__B_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64 |
705 | // CHECK-NEXT: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 |
706 | // CHECK-NEXT: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 |
707 | // CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 |
708 | // CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 |
709 | // CHECK-NEXT: [[__D_ADDR_I_I:%.*]] = alloca i64, align 8 |
710 | // CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x i64>, align 64 |
711 | // CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i8, align 1 |
712 | // CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 |
713 | // CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 |
714 | // CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 |
715 | // CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 |
716 | // CHECK-NEXT: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 |
717 | // CHECK-NEXT: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 |
718 | // CHECK-NEXT: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 |
719 | // CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i8, align 1 |
720 | // CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 |
721 | // CHECK-NEXT: store i8 [[__M:%.*]], i8* [[__M_ADDR]], align 1 |
722 | // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 |
723 | // CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1 |
724 | // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 |
725 | // CHECK-NEXT: store i8 [[TMP0]], i8* [[__M_ADDR_I]], align 1 |
726 | // CHECK-NEXT: store <8 x i64> [[TMP1]], <8 x i64>* [[__V_ADDR_I]], align 64 |
727 | // CHECK-NEXT: store i64 9223372036854775807, i64* [[__D_ADDR_I_I]], align 8 |
728 | // CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 |
729 | // CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <8 x i64> undef, i64 [[TMP2]], i32 0 |
730 | // CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 |
731 | // CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <8 x i64> [[VECINIT_I_I]], i64 [[TMP3]], i32 1 |
732 | // CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 |
733 | // CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <8 x i64> [[VECINIT1_I_I]], i64 [[TMP4]], i32 2 |
734 | // CHECK-NEXT: [[TMP5:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 |
735 | // CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <8 x i64> [[VECINIT2_I_I]], i64 [[TMP5]], i32 3 |
736 | // CHECK-NEXT: [[TMP6:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 |
737 | // CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <8 x i64> [[VECINIT3_I_I]], i64 [[TMP6]], i32 4 |
738 | // CHECK-NEXT: [[TMP7:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 |
739 | // CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <8 x i64> [[VECINIT4_I_I]], i64 [[TMP7]], i32 5 |
740 | // CHECK-NEXT: [[TMP8:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 |
741 | // CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <8 x i64> [[VECINIT5_I_I]], i64 [[TMP8]], i32 6 |
742 | // CHECK-NEXT: [[TMP9:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 |
743 | // CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <8 x i64> [[VECINIT6_I_I]], i64 [[TMP9]], i32 7 |
744 | // CHECK-NEXT: store <8 x i64> [[VECINIT7_I_I]], <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 |
745 | // CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 |
746 | // CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1 |
747 | // CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
748 | // CHECK-NEXT: store <8 x i64> [[TMP10]], <8 x i64>* [[__W_ADDR_I_I]], align 64 |
749 | // CHECK-NEXT: store i8 [[TMP11]], i8* [[__U_ADDR_I_I]], align 1 |
750 | // CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__A_ADDR_I11_I]], align 64 |
751 | // CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[__U_ADDR_I_I]], align 1 |
752 | // CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I11_I]], align 64 |
753 | // CHECK-NEXT: [[TMP15:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR_I_I]], align 64 |
754 | // CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[TMP13]] to <8 x i1> |
755 | // CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP16]], <8 x i64> [[TMP14]], <8 x i64> [[TMP15]] |
756 | // CHECK-NEXT: store <8 x i64> [[TMP17]], <8 x i64>* [[__V_ADDR_I]], align 64 |
757 | // CHECK-NEXT: [[TMP18:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
758 | // CHECK-NEXT: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
759 | // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i64> [[TMP18]], <8 x i64> [[TMP19]], <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> |
760 | // CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 |
761 | // CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
762 | // CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 |
763 | // CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__A_ADDR_I9_I]], align 64 |
764 | // CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* [[__B_ADDR_I10_I]], align 64 |
765 | // CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I9_I]], align 64 |
766 | // CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I10_I]], align 64 |
767 | // CHECK-NEXT: [[TMP24:%.*]] = icmp slt <8 x i64> [[TMP22]], [[TMP23]] |
768 | // CHECK-NEXT: [[TMP25:%.*]] = select <8 x i1> [[TMP24]], <8 x i64> [[TMP22]], <8 x i64> [[TMP23]] |
769 | // CHECK-NEXT: store <8 x i64> [[TMP25]], <8 x i64>* [[__T2_I]], align 64 |
770 | // CHECK-NEXT: [[TMP26:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 |
771 | // CHECK-NEXT: [[TMP27:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 |
772 | // CHECK-NEXT: [[SHUFFLE3_I:%.*]] = shufflevector <8 x i64> [[TMP26]], <8 x i64> [[TMP27]], <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5> |
773 | // CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T3_I]], align 64 |
774 | // CHECK-NEXT: [[TMP28:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 |
775 | // CHECK-NEXT: [[TMP29:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 |
776 | // CHECK-NEXT: store <8 x i64> [[TMP28]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 |
777 | // CHECK-NEXT: store <8 x i64> [[TMP29]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 |
778 | // CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 |
779 | // CHECK-NEXT: [[TMP31:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 |
780 | // CHECK-NEXT: [[TMP32:%.*]] = icmp slt <8 x i64> [[TMP30]], [[TMP31]] |
781 | // CHECK-NEXT: [[TMP33:%.*]] = select <8 x i1> [[TMP32]], <8 x i64> [[TMP30]], <8 x i64> [[TMP31]] |
782 | // CHECK-NEXT: store <8 x i64> [[TMP33]], <8 x i64>* [[__T4_I]], align 64 |
783 | // CHECK-NEXT: [[TMP34:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 |
784 | // CHECK-NEXT: [[TMP35:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 |
785 | // CHECK-NEXT: [[SHUFFLE5_I:%.*]] = shufflevector <8 x i64> [[TMP34]], <8 x i64> [[TMP35]], <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> |
786 | // CHECK-NEXT: store <8 x i64> [[SHUFFLE5_I]], <8 x i64>* [[__T5_I]], align 64 |
787 | // CHECK-NEXT: [[TMP36:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 |
788 | // CHECK-NEXT: [[TMP37:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 |
789 | // CHECK-NEXT: store <8 x i64> [[TMP36]], <8 x i64>* [[__A_ADDR_I_I]], align 64 |
790 | // CHECK-NEXT: store <8 x i64> [[TMP37]], <8 x i64>* [[__B_ADDR_I_I]], align 64 |
791 | // CHECK-NEXT: [[TMP38:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 |
792 | // CHECK-NEXT: [[TMP39:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 |
793 | // CHECK-NEXT: [[TMP40:%.*]] = icmp slt <8 x i64> [[TMP38]], [[TMP39]] |
794 | // CHECK-NEXT: [[TMP41:%.*]] = select <8 x i1> [[TMP40]], <8 x i64> [[TMP38]], <8 x i64> [[TMP39]] |
795 | // CHECK-NEXT: store <8 x i64> [[TMP41]], <8 x i64>* [[__T6_I]], align 64 |
796 | // CHECK-NEXT: [[TMP42:%.*]] = load <8 x i64>, <8 x i64>* [[__T6_I]], align 64 |
797 | // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP42]], i32 0 |
798 | // CHECK-NEXT: ret i64 [[VECEXT_I]] |
799 | long long test_mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __W){ |
800 | return _mm512_mask_reduce_min_epi64(__M, __W); |
801 | } |
802 | |
803 | // CHECK-LABEL: define i64 @test_mm512_mask_reduce_min_epu64(i8 zeroext %__M, <8 x i64> %__W) #0 { |
804 | // CHECK-NEXT: entry: |
805 | // CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 |
806 | // CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 |
807 | // CHECK-NEXT: [[__A_ADDR_I11_I:%.*]] = alloca <8 x i64>, align 64 |
808 | // CHECK-NEXT: [[__A_ADDR_I9_I:%.*]] = alloca <8 x i64>, align 64 |
809 | // CHECK-NEXT: [[__B_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64 |
810 | // CHECK-NEXT: [[__A_ADDR_I7_I:%.*]] = alloca <8 x i64>, align 64 |
811 | // CHECK-NEXT: [[__B_ADDR_I8_I:%.*]] = alloca <8 x i64>, align 64 |
812 | // CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 |
813 | // CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 |
814 | // CHECK-NEXT: [[__D_ADDR_I_I:%.*]] = alloca i64, align 8 |
815 | // CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x i64>, align 64 |
816 | // CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i8, align 1 |
817 | // CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 |
818 | // CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x i64>, align 64 |
819 | // CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x i64>, align 64 |
820 | // CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x i64>, align 64 |
821 | // CHECK-NEXT: [[__T4_I:%.*]] = alloca <8 x i64>, align 64 |
822 | // CHECK-NEXT: [[__T5_I:%.*]] = alloca <8 x i64>, align 64 |
823 | // CHECK-NEXT: [[__T6_I:%.*]] = alloca <8 x i64>, align 64 |
824 | // CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i8, align 1 |
825 | // CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 |
826 | // CHECK-NEXT: store i8 [[__M:%.*]], i8* [[__M_ADDR]], align 1 |
827 | // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 |
828 | // CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1 |
829 | // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 |
830 | // CHECK-NEXT: store i8 [[TMP0]], i8* [[__M_ADDR_I]], align 1 |
831 | // CHECK-NEXT: store <8 x i64> [[TMP1]], <8 x i64>* [[__V_ADDR_I]], align 64 |
832 | // CHECK-NEXT: store i64 -1, i64* [[__D_ADDR_I_I]], align 8 |
833 | // CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 |
834 | // CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <8 x i64> undef, i64 [[TMP2]], i32 0 |
835 | // CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 |
836 | // CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <8 x i64> [[VECINIT_I_I]], i64 [[TMP3]], i32 1 |
837 | // CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 |
838 | // CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <8 x i64> [[VECINIT1_I_I]], i64 [[TMP4]], i32 2 |
839 | // CHECK-NEXT: [[TMP5:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 |
840 | // CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <8 x i64> [[VECINIT2_I_I]], i64 [[TMP5]], i32 3 |
841 | // CHECK-NEXT: [[TMP6:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 |
842 | // CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <8 x i64> [[VECINIT3_I_I]], i64 [[TMP6]], i32 4 |
843 | // CHECK-NEXT: [[TMP7:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 |
844 | // CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <8 x i64> [[VECINIT4_I_I]], i64 [[TMP7]], i32 5 |
845 | // CHECK-NEXT: [[TMP8:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 |
846 | // CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <8 x i64> [[VECINIT5_I_I]], i64 [[TMP8]], i32 6 |
847 | // CHECK-NEXT: [[TMP9:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8 |
848 | // CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <8 x i64> [[VECINIT6_I_I]], i64 [[TMP9]], i32 7 |
849 | // CHECK-NEXT: store <8 x i64> [[VECINIT7_I_I]], <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 |
850 | // CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 |
851 | // CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1 |
852 | // CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
853 | // CHECK-NEXT: store <8 x i64> [[TMP10]], <8 x i64>* [[__W_ADDR_I_I]], align 64 |
854 | // CHECK-NEXT: store i8 [[TMP11]], i8* [[__U_ADDR_I_I]], align 1 |
855 | // CHECK-NEXT: store <8 x i64> [[TMP12]], <8 x i64>* [[__A_ADDR_I11_I]], align 64 |
856 | // CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[__U_ADDR_I_I]], align 1 |
857 | // CHECK-NEXT: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I11_I]], align 64 |
858 | // CHECK-NEXT: [[TMP15:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR_I_I]], align 64 |
859 | // CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[TMP13]] to <8 x i1> |
860 | // CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP16]], <8 x i64> [[TMP14]], <8 x i64> [[TMP15]] |
861 | // CHECK-NEXT: store <8 x i64> [[TMP17]], <8 x i64>* [[__V_ADDR_I]], align 64 |
862 | // CHECK-NEXT: [[TMP18:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
863 | // CHECK-NEXT: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
864 | // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i64> [[TMP18]], <8 x i64> [[TMP19]], <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> |
865 | // CHECK-NEXT: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__T1_I]], align 64 |
866 | // CHECK-NEXT: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
867 | // CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__T1_I]], align 64 |
868 | // CHECK-NEXT: store <8 x i64> [[TMP20]], <8 x i64>* [[__A_ADDR_I9_I]], align 64 |
869 | // CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* [[__B_ADDR_I10_I]], align 64 |
870 | // CHECK-NEXT: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I9_I]], align 64 |
871 | // CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I10_I]], align 64 |
872 | // CHECK-NEXT: [[TMP24:%.*]] = icmp ult <8 x i64> [[TMP22]], [[TMP23]] |
873 | // CHECK-NEXT: [[TMP25:%.*]] = select <8 x i1> [[TMP24]], <8 x i64> [[TMP22]], <8 x i64> [[TMP23]] |
874 | // CHECK-NEXT: store <8 x i64> [[TMP25]], <8 x i64>* [[__T2_I]], align 64 |
875 | // CHECK-NEXT: [[TMP26:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 |
876 | // CHECK-NEXT: [[TMP27:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 |
877 | // CHECK-NEXT: [[SHUFFLE3_I:%.*]] = shufflevector <8 x i64> [[TMP26]], <8 x i64> [[TMP27]], <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5> |
878 | // CHECK-NEXT: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__T3_I]], align 64 |
879 | // CHECK-NEXT: [[TMP28:%.*]] = load <8 x i64>, <8 x i64>* [[__T2_I]], align 64 |
880 | // CHECK-NEXT: [[TMP29:%.*]] = load <8 x i64>, <8 x i64>* [[__T3_I]], align 64 |
881 | // CHECK-NEXT: store <8 x i64> [[TMP28]], <8 x i64>* [[__A_ADDR_I7_I]], align 64 |
882 | // CHECK-NEXT: store <8 x i64> [[TMP29]], <8 x i64>* [[__B_ADDR_I8_I]], align 64 |
883 | // CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I7_I]], align 64 |
884 | // CHECK-NEXT: [[TMP31:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I8_I]], align 64 |
885 | // CHECK-NEXT: [[TMP32:%.*]] = icmp ult <8 x i64> [[TMP30]], [[TMP31]] |
886 | // CHECK-NEXT: [[TMP33:%.*]] = select <8 x i1> [[TMP32]], <8 x i64> [[TMP30]], <8 x i64> [[TMP31]] |
887 | // CHECK-NEXT: store <8 x i64> [[TMP33]], <8 x i64>* [[__T4_I]], align 64 |
888 | // CHECK-NEXT: [[TMP34:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 |
889 | // CHECK-NEXT: [[TMP35:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 |
890 | // CHECK-NEXT: [[SHUFFLE5_I:%.*]] = shufflevector <8 x i64> [[TMP34]], <8 x i64> [[TMP35]], <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> |
891 | // CHECK-NEXT: store <8 x i64> [[SHUFFLE5_I]], <8 x i64>* [[__T5_I]], align 64 |
892 | // CHECK-NEXT: [[TMP36:%.*]] = load <8 x i64>, <8 x i64>* [[__T4_I]], align 64 |
893 | // CHECK-NEXT: [[TMP37:%.*]] = load <8 x i64>, <8 x i64>* [[__T5_I]], align 64 |
894 | // CHECK-NEXT: store <8 x i64> [[TMP36]], <8 x i64>* [[__A_ADDR_I_I]], align 64 |
895 | // CHECK-NEXT: store <8 x i64> [[TMP37]], <8 x i64>* [[__B_ADDR_I_I]], align 64 |
896 | // CHECK-NEXT: [[TMP38:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 |
897 | // CHECK-NEXT: [[TMP39:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64 |
898 | // CHECK-NEXT: [[TMP40:%.*]] = icmp ult <8 x i64> [[TMP38]], [[TMP39]] |
899 | // CHECK-NEXT: [[TMP41:%.*]] = select <8 x i1> [[TMP40]], <8 x i64> [[TMP38]], <8 x i64> [[TMP39]] |
900 | // CHECK-NEXT: store <8 x i64> [[TMP41]], <8 x i64>* [[__T6_I]], align 64 |
901 | // CHECK-NEXT: [[TMP42:%.*]] = load <8 x i64>, <8 x i64>* [[__T6_I]], align 64 |
902 | // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP42]], i32 0 |
903 | // CHECK-NEXT: ret i64 [[VECEXT_I]] |
904 | long long test_mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __W){ |
905 | return _mm512_mask_reduce_min_epu64(__M, __W); |
906 | } |
907 | |
908 | // CHECK-LABEL: define double @test_mm512_mask_reduce_min_pd(i8 zeroext %__M, <8 x double> %__W) #0 { |
909 | // CHECK-NEXT: entry: |
910 | // CHECK-NEXT: [[__W2_ADDR_I_I:%.*]] = alloca <8 x double>, align 64 |
911 | // CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i8, align 1 |
912 | // CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x double>, align 64 |
913 | // CHECK-NEXT: [[__A_ADDR_I12_I:%.*]] = alloca <4 x double>, align 32 |
914 | // CHECK-NEXT: [[__B_ADDR_I13_I:%.*]] = alloca <4 x double>, align 32 |
915 | // CHECK-NEXT: [[__A_ADDR_I10_I:%.*]] = alloca <2 x double>, align 16 |
916 | // CHECK-NEXT: [[__B_ADDR_I11_I:%.*]] = alloca <2 x double>, align 16 |
917 | // CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 |
918 | // CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <2 x double>, align 16 |
919 | // CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca double, align 8 |
920 | // CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <8 x double>, align 64 |
921 | // CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i8, align 1 |
922 | // CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x double>, align 64 |
923 | // CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x double>, align 32 |
924 | // CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x double>, align 32 |
925 | // CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x double>, align 32 |
926 | // CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x double>, align 16 |
927 | // CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x double>, align 16 |
928 | // CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x double>, align 16 |
929 | // CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x double>, align 16 |
930 | // CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x double>, align 16 |
931 | // CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i8, align 1 |
932 | // CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x double>, align 64 |
933 | // CHECK-NEXT: store i8 [[__M:%.*]], i8* [[__M_ADDR]], align 1 |
934 | // CHECK-NEXT: store <8 x double> [[__W:%.*]], <8 x double>* [[__W_ADDR]], align 64 |
935 | // CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1 |
936 | // CHECK-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* [[__W_ADDR]], align 64 |
937 | // CHECK-NEXT: store i8 [[TMP0]], i8* [[__M_ADDR_I]], align 1 |
938 | // CHECK-NEXT: store <8 x double> [[TMP1]], <8 x double>* [[__V_ADDR_I]], align 64 |
939 | // CHECK-NEXT: store double 0x7FF0000000000000, double* [[__W_ADDR_I_I]], align 8 |
940 | // CHECK-NEXT: [[TMP2:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 |
941 | // CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <8 x double> undef, double [[TMP2]], i32 0 |
942 | // CHECK-NEXT: [[TMP3:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 |
943 | // CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <8 x double> [[VECINIT_I_I]], double [[TMP3]], i32 1 |
944 | // CHECK-NEXT: [[TMP4:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 |
945 | // CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <8 x double> [[VECINIT1_I_I]], double [[TMP4]], i32 2 |
946 | // CHECK-NEXT: [[TMP5:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 |
947 | // CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <8 x double> [[VECINIT2_I_I]], double [[TMP5]], i32 3 |
948 | // CHECK-NEXT: [[TMP6:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 |
949 | // CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <8 x double> [[VECINIT3_I_I]], double [[TMP6]], i32 4 |
950 | // CHECK-NEXT: [[TMP7:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 |
951 | // CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <8 x double> [[VECINIT4_I_I]], double [[TMP7]], i32 5 |
952 | // CHECK-NEXT: [[TMP8:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 |
953 | // CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <8 x double> [[VECINIT5_I_I]], double [[TMP8]], i32 6 |
954 | // CHECK-NEXT: [[TMP9:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8 |
955 | // CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <8 x double> [[VECINIT6_I_I]], double [[TMP9]], i32 7 |
956 | // CHECK-NEXT: store <8 x double> [[VECINIT7_I_I]], <8 x double>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 |
957 | // CHECK-NEXT: [[TMP10:%.*]] = load <8 x double>, <8 x double>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 |
958 | // CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1 |
959 | // CHECK-NEXT: [[TMP12:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 |
960 | // CHECK-NEXT: store <8 x double> [[TMP10]], <8 x double>* [[__W2_ADDR_I_I]], align 64 |
961 | // CHECK-NEXT: store i8 [[TMP11]], i8* [[__U_ADDR_I_I]], align 1 |
962 | // CHECK-NEXT: store <8 x double> [[TMP12]], <8 x double>* [[__A_ADDR_I_I]], align 64 |
963 | // CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[__U_ADDR_I_I]], align 1 |
964 | // CHECK-NEXT: [[TMP14:%.*]] = load <8 x double>, <8 x double>* [[__A_ADDR_I_I]], align 64 |
965 | // CHECK-NEXT: [[TMP15:%.*]] = load <8 x double>, <8 x double>* [[__W2_ADDR_I_I]], align 64 |
966 | // CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[TMP13]] to <8 x i1> |
967 | // CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP16]], <8 x double> [[TMP14]], <8 x double> [[TMP15]] |
968 | // CHECK-NEXT: store <8 x double> [[TMP17]], <8 x double>* [[__V_ADDR_I]], align 64 |
969 | // CHECK-NEXT: [[TMP18:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 |
970 | // CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x double> [[TMP18]], <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
971 | // CHECK-NEXT: store <4 x double> [[EXTRACT_I]], <4 x double>* [[__T1_I]], align 32 |
972 | // CHECK-NEXT: [[TMP19:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64 |
973 | // CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <8 x double> [[TMP19]], <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
974 | // CHECK-NEXT: store <4 x double> [[EXTRACT4_I]], <4 x double>* [[__T2_I]], align 32 |
975 | // CHECK-NEXT: [[TMP20:%.*]] = load <4 x double>, <4 x double>* [[__T1_I]], align 32 |
976 | // CHECK-NEXT: [[TMP21:%.*]] = load <4 x double>, <4 x double>* [[__T2_I]], align 32 |
977 | // CHECK-NEXT: store <4 x double> [[TMP20]], <4 x double>* [[__A_ADDR_I12_I]], align 32 |
978 | // CHECK-NEXT: store <4 x double> [[TMP21]], <4 x double>* [[__B_ADDR_I13_I]], align 32 |
979 | // CHECK-NEXT: [[TMP22:%.*]] = load <4 x double>, <4 x double>* [[__A_ADDR_I12_I]], align 32 |
980 | // CHECK-NEXT: [[TMP23:%.*]] = load <4 x double>, <4 x double>* [[__B_ADDR_I13_I]], align 32 |
981 | // CHECK-NEXT: [[TMP24:%.*]] = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> [[TMP22]], <4 x double> [[TMP23]]) #2 |
982 | // CHECK-NEXT: store <4 x double> [[TMP24]], <4 x double>* [[__T3_I]], align 32 |
983 | // CHECK-NEXT: [[TMP25:%.*]] = load <4 x double>, <4 x double>* [[__T3_I]], align 32 |
984 | // CHECK-NEXT: [[EXTRACT6_I:%.*]] = shufflevector <4 x double> [[TMP25]], <4 x double> undef, <2 x i32> <i32 0, i32 1> |
985 | // CHECK-NEXT: store <2 x double> [[EXTRACT6_I]], <2 x double>* [[__T4_I]], align 16 |
986 | // CHECK-NEXT: [[TMP26:%.*]] = load <4 x double>, <4 x double>* [[__T3_I]], align 32 |
987 | // CHECK-NEXT: [[EXTRACT7_I:%.*]] = shufflevector <4 x double> [[TMP26]], <4 x double> undef, <2 x i32> <i32 2, i32 3> |
988 | // CHECK-NEXT: store <2 x double> [[EXTRACT7_I]], <2 x double>* [[__T5_I]], align 16 |
989 | // CHECK-NEXT: [[TMP27:%.*]] = load <2 x double>, <2 x double>* [[__T4_I]], align 16 |
990 | // CHECK-NEXT: [[TMP28:%.*]] = load <2 x double>, <2 x double>* [[__T5_I]], align 16 |
991 | // CHECK-NEXT: store <2 x double> [[TMP27]], <2 x double>* [[__A_ADDR_I10_I]], align 16 |
992 | // CHECK-NEXT: store <2 x double> [[TMP28]], <2 x double>* [[__B_ADDR_I11_I]], align 16 |
993 | // CHECK-NEXT: [[TMP29:%.*]] = load <2 x double>, <2 x double>* [[__A_ADDR_I10_I]], align 16 |
994 | // CHECK-NEXT: [[TMP30:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I11_I]], align 16 |
995 | // CHECK-NEXT: [[TMP31:%.*]] = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> [[TMP29]], <2 x double> [[TMP30]]) #2 |
996 | // CHECK-NEXT: store <2 x double> [[TMP31]], <2 x double>* [[__T6_I]], align 16 |
997 | // CHECK-NEXT: [[TMP32:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 |
998 | // CHECK-NEXT: [[TMP33:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 |
999 | // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> [[TMP32]], <2 x double> [[TMP33]], <2 x i32> <i32 1, i32 0> |
1000 | // CHECK-NEXT: store <2 x double> [[SHUFFLE_I]], <2 x double>* [[__T7_I]], align 16 |
1001 | // CHECK-NEXT: [[TMP34:%.*]] = load <2 x double>, <2 x double>* [[__T6_I]], align 16 |
1002 | // CHECK-NEXT: [[TMP35:%.*]] = load <2 x double>, <2 x double>* [[__T7_I]], align 16 |
1003 | // CHECK-NEXT: store <2 x double> [[TMP34]], <2 x double>* [[__A2_ADDR_I_I]], align 16 |
1004 | // CHECK-NEXT: store <2 x double> [[TMP35]], <2 x double>* [[__B_ADDR_I_I]], align 16 |
1005 | // CHECK-NEXT: [[TMP36:%.*]] = load <2 x double>, <2 x double>* [[__A2_ADDR_I_I]], align 16 |
1006 | // CHECK-NEXT: [[TMP37:%.*]] = load <2 x double>, <2 x double>* [[__B_ADDR_I_I]], align 16 |
1007 | // CHECK-NEXT: [[TMP38:%.*]] = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> [[TMP36]], <2 x double> [[TMP37]]) #2 |
1008 | // CHECK-NEXT: store <2 x double> [[TMP38]], <2 x double>* [[__T8_I]], align 16 |
1009 | // CHECK-NEXT: [[TMP39:%.*]] = load <2 x double>, <2 x double>* [[__T8_I]], align 16 |
1010 | // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x double> [[TMP39]], i32 0 |
1011 | // CHECK-NEXT: ret double [[VECEXT_I]] |
1012 | double test_mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __W){ |
1013 | return _mm512_mask_reduce_min_pd(__M, __W); |
1014 | } |
1015 | |
1016 | // CHECK-LABEL: define i32 @test_mm512_reduce_max_epi32(<8 x i64> %__W) #0 { |
1017 | // CHECK-NEXT: entry: |
1018 | // CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 |
1019 | // CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 |
1020 | // CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 |
1021 | // CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 |
1022 | // CHECK-NEXT: [[__V1_ADDR_I10_I:%.*]] = alloca <2 x i64>, align 16 |
1023 | // CHECK-NEXT: [[__V2_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 |
1024 | // CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 |
1025 | // CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 |
1026 | // CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 |
1027 | // CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 |
1028 | // CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 |
1029 | // CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 |
1030 | // CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 |
1031 | // CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 |
1032 | // CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 |
1033 | // CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 |
1034 | // CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 |
1035 | // CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 |
1036 | // CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 |
1037 | // CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 |
1038 | // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 |
1039 | // CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 |
1040 | // CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64 |
1041 | // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
1042 | // CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
1043 | // CHECK-NEXT: store <4 x i64> [[EXTRACT_I]], <4 x i64>* [[__T1_I]], align 32 |
1044 | // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
1045 | // CHECK-NEXT: [[EXTRACT2_I:%.*]] = shufflevector <8 x i64> [[TMP2]], <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
1046 | // CHECK-NEXT: store <4 x i64> [[EXTRACT2_I]], <4 x i64>* [[__T2_I]], align 32 |
1047 | // CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 |
1048 | // CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 |
1049 | // CHECK-NEXT: store <4 x i64> [[TMP3]], <4 x i64>* [[__A_ADDR_I_I]], align 32 |
1050 | // CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[__B_ADDR_I_I]], align 32 |
1051 | // CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* [[__A_ADDR_I_I]], align 32 |
1052 | // CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP5]] to <8 x i32> |
1053 | // CHECK-NEXT: [[TMP7:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 |
1054 | // CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i64> [[TMP7]] to <8 x i32> |
1055 | // CHECK-NEXT: [[TMP9:%.*]] = icmp sgt <8 x i32> [[TMP6]], [[TMP8]] |
1056 | // CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP9]], <8 x i32> [[TMP6]], <8 x i32> [[TMP8]] |
1057 | // CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i32> [[TMP10]] to <4 x i64> |
1058 | // CHECK-NEXT: store <4 x i64> [[TMP11]], <4 x i64>* [[__T3_I]], align 32 |
1059 | // CHECK-NEXT: [[TMP12:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 |
1060 | // CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <4 x i64> [[TMP12]], <4 x i64> undef, <2 x i32> <i32 0, i32 1> |
1061 | // CHECK-NEXT: store <2 x i64> [[EXTRACT4_I]], <2 x i64>* [[__T4_I]], align 16 |
1062 | // CHECK-NEXT: [[TMP13:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 |
1063 | // CHECK-NEXT: [[EXTRACT5_I:%.*]] = shufflevector <4 x i64> [[TMP13]], <4 x i64> undef, <2 x i32> <i32 2, i32 3> |
1064 | // CHECK-NEXT: store <2 x i64> [[EXTRACT5_I]], <2 x i64>* [[__T5_I]], align 16 |
1065 | // CHECK-NEXT: [[TMP14:%.*]] = load <2 x i64>, <2 x i64>* [[__T4_I]], align 16 |
1066 | // CHECK-NEXT: [[TMP15:%.*]] = load <2 x i64>, <2 x i64>* [[__T5_I]], align 16 |
1067 | // CHECK-NEXT: store <2 x i64> [[TMP14]], <2 x i64>* [[__V1_ADDR_I12_I]], align 16 |
1068 | // CHECK-NEXT: store <2 x i64> [[TMP15]], <2 x i64>* [[__V2_ADDR_I13_I]], align 16 |
1069 | // CHECK-NEXT: [[TMP16:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I12_I]], align 16 |
1070 | // CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i64> [[TMP16]] to <4 x i32> |
1071 | // CHECK-NEXT: [[TMP18:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I13_I]], align 16 |
1072 | // CHECK-NEXT: [[TMP19:%.*]] = bitcast <2 x i64> [[TMP18]] to <4 x i32> |
1073 | // CHECK-NEXT: [[TMP20:%.*]] = icmp sgt <4 x i32> [[TMP17]], [[TMP19]] |
1074 | // CHECK-NEXT: [[TMP21:%.*]] = select <4 x i1> [[TMP20]], <4 x i32> [[TMP17]], <4 x i32> [[TMP19]] |
1075 | // CHECK-NEXT: [[TMP22:%.*]] = bitcast <4 x i32> [[TMP21]] to <2 x i64> |
1076 | // CHECK-NEXT: store <2 x i64> [[TMP22]], <2 x i64>* [[__T6_I]], align 16 |
1077 | // CHECK-NEXT: [[TMP23:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 |
1078 | // CHECK-NEXT: [[TMP24:%.*]] = bitcast <2 x i64> [[TMP23]] to <4 x i32> |
1079 | // CHECK-NEXT: [[TMP25:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 |
1080 | // CHECK-NEXT: [[TMP26:%.*]] = bitcast <2 x i64> [[TMP25]] to <4 x i32> |
1081 | // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[TMP24]], <4 x i32> [[TMP26]], <4 x i32> <i32 2, i32 3, i32 0, i32 1> |
1082 | // CHECK-NEXT: [[TMP27:%.*]] = bitcast <4 x i32> [[SHUFFLE_I]] to <2 x i64> |
1083 | // CHECK-NEXT: store <2 x i64> [[TMP27]], <2 x i64>* [[__T7_I]], align 16 |
1084 | // CHECK-NEXT: [[TMP28:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 |
1085 | // CHECK-NEXT: [[TMP29:%.*]] = load <2 x i64>, <2 x i64>* [[__T7_I]], align 16 |
1086 | // CHECK-NEXT: store <2 x i64> [[TMP28]], <2 x i64>* [[__V1_ADDR_I10_I]], align 16 |
1087 | // CHECK-NEXT: store <2 x i64> [[TMP29]], <2 x i64>* [[__V2_ADDR_I11_I]], align 16 |
1088 | // CHECK-NEXT: [[TMP30:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I10_I]], align 16 |
1089 | // CHECK-NEXT: [[TMP31:%.*]] = bitcast <2 x i64> [[TMP30]] to <4 x i32> |
1090 | // CHECK-NEXT: [[TMP32:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I11_I]], align 16 |
1091 | // CHECK-NEXT: [[TMP33:%.*]] = bitcast <2 x i64> [[TMP32]] to <4 x i32> |
1092 | // CHECK-NEXT: [[TMP34:%.*]] = icmp sgt <4 x i32> [[TMP31]], [[TMP33]] |
1093 | // CHECK-NEXT: [[TMP35:%.*]] = select <4 x i1> [[TMP34]], <4 x i32> [[TMP31]], <4 x i32> [[TMP33]] |
1094 | // CHECK-NEXT: [[TMP36:%.*]] = bitcast <4 x i32> [[TMP35]] to <2 x i64> |
1095 | // CHECK-NEXT: store <2 x i64> [[TMP36]], <2 x i64>* [[__T8_I]], align 16 |
1096 | // CHECK-NEXT: [[TMP37:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 |
1097 | // CHECK-NEXT: [[TMP38:%.*]] = bitcast <2 x i64> [[TMP37]] to <4 x i32> |
1098 | // CHECK-NEXT: [[TMP39:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 |
1099 | // CHECK-NEXT: [[TMP40:%.*]] = bitcast <2 x i64> [[TMP39]] to <4 x i32> |
1100 | // CHECK-NEXT: [[SHUFFLE8_I:%.*]] = shufflevector <4 x i32> [[TMP38]], <4 x i32> [[TMP40]], <4 x i32> <i32 1, i32 0, i32 3, i32 2> |
1101 | // CHECK-NEXT: [[TMP41:%.*]] = bitcast <4 x i32> [[SHUFFLE8_I]] to <2 x i64> |
1102 | // CHECK-NEXT: store <2 x i64> [[TMP41]], <2 x i64>* [[__T9_I]], align 16 |
1103 | // CHECK-NEXT: [[TMP42:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 |
1104 | // CHECK-NEXT: [[TMP43:%.*]] = load <2 x i64>, <2 x i64>* [[__T9_I]], align 16 |
1105 | // CHECK-NEXT: store <2 x i64> [[TMP42]], <2 x i64>* [[__V1_ADDR_I_I]], align 16 |
1106 | // CHECK-NEXT: store <2 x i64> [[TMP43]], <2 x i64>* [[__V2_ADDR_I_I]], align 16 |
1107 | // CHECK-NEXT: [[TMP44:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I_I]], align 16 |
1108 | // CHECK-NEXT: [[TMP45:%.*]] = bitcast <2 x i64> [[TMP44]] to <4 x i32> |
1109 | // CHECK-NEXT: [[TMP46:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I_I]], align 16 |
1110 | // CHECK-NEXT: [[TMP47:%.*]] = bitcast <2 x i64> [[TMP46]] to <4 x i32> |
1111 | // CHECK-NEXT: [[TMP48:%.*]] = icmp sgt <4 x i32> [[TMP45]], [[TMP47]] |
1112 | // CHECK-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP48]], <4 x i32> [[TMP45]], <4 x i32> [[TMP47]] |
1113 | // CHECK-NEXT: [[TMP50:%.*]] = bitcast <4 x i32> [[TMP49]] to <2 x i64> |
1114 | // CHECK-NEXT: store <4 x i32> [[TMP49]], <4 x i32>* [[__T10_I]], align 16 |
1115 | // CHECK-NEXT: [[TMP51:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 |
1116 | // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP51]], i32 0 |
1117 | // CHECK-NEXT: ret i32 [[VECEXT_I]] |
1118 | int test_mm512_reduce_max_epi32(__m512i __W){ |
1119 | return _mm512_reduce_max_epi32(__W); |
1120 | } |
1121 | |
1122 | // CHECK-LABEL: define i32 @test_mm512_reduce_max_epu32(<8 x i64> %__W) #0 { |
1123 | // CHECK-NEXT: entry: |
1124 | // CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 |
1125 | // CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 |
1126 | // CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 |
1127 | // CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 |
1128 | // CHECK-NEXT: [[__V1_ADDR_I10_I:%.*]] = alloca <2 x i64>, align 16 |
1129 | // CHECK-NEXT: [[__V2_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 |
1130 | // CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 |
1131 | // CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 |
1132 | // CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 |
1133 | // CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 |
1134 | // CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 |
1135 | // CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 |
1136 | // CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 |
1137 | // CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 |
1138 | // CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 |
1139 | // CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 |
1140 | // CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 |
1141 | // CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 |
1142 | // CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 |
1143 | // CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 |
1144 | // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 |
1145 | // CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 |
1146 | // CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64 |
1147 | // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
1148 | // CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
1149 | // CHECK-NEXT: store <4 x i64> [[EXTRACT_I]], <4 x i64>* [[__T1_I]], align 32 |
1150 | // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
1151 | // CHECK-NEXT: [[EXTRACT2_I:%.*]] = shufflevector <8 x i64> [[TMP2]], <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
1152 | // CHECK-NEXT: store <4 x i64> [[EXTRACT2_I]], <4 x i64>* [[__T2_I]], align 32 |
1153 | // CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 |
1154 | // CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 |
1155 | // CHECK-NEXT: store <4 x i64> [[TMP3]], <4 x i64>* [[__A_ADDR_I_I]], align 32 |
1156 | // CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[__B_ADDR_I_I]], align 32 |
1157 | // CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* [[__A_ADDR_I_I]], align 32 |
1158 | // CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP5]] to <8 x i32> |
1159 | // CHECK-NEXT: [[TMP7:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 |
1160 | // CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i64> [[TMP7]] to <8 x i32> |
1161 | // CHECK-NEXT: [[TMP9:%.*]] = icmp ugt <8 x i32> [[TMP6]], [[TMP8]] |
1162 | // CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP9]], <8 x i32> [[TMP6]], <8 x i32> [[TMP8]] |
1163 | // CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i32> [[TMP10]] to <4 x i64> |
1164 | // CHECK-NEXT: store <4 x i64> [[TMP11]], <4 x i64>* [[__T3_I]], align 32 |
1165 | // CHECK-NEXT: [[TMP12:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 |
1166 | // CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <4 x i64> [[TMP12]], <4 x i64> undef, <2 x i32> <i32 0, i32 1> |
1167 | // CHECK-NEXT: store <2 x i64> [[EXTRACT4_I]], <2 x i64>* [[__T4_I]], align 16 |
1168 | // CHECK-NEXT: [[TMP13:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 |
1169 | // CHECK-NEXT: [[EXTRACT5_I:%.*]] = shufflevector <4 x i64> [[TMP13]], <4 x i64> undef, <2 x i32> <i32 2, i32 3> |
1170 | // CHECK-NEXT: store <2 x i64> [[EXTRACT5_I]], <2 x i64>* [[__T5_I]], align 16 |
1171 | // CHECK-NEXT: [[TMP14:%.*]] = load <2 x i64>, <2 x i64>* [[__T4_I]], align 16 |
1172 | // CHECK-NEXT: [[TMP15:%.*]] = load <2 x i64>, <2 x i64>* [[__T5_I]], align 16 |
1173 | // CHECK-NEXT: store <2 x i64> [[TMP14]], <2 x i64>* [[__V1_ADDR_I12_I]], align 16 |
1174 | // CHECK-NEXT: store <2 x i64> [[TMP15]], <2 x i64>* [[__V2_ADDR_I13_I]], align 16 |
1175 | // CHECK-NEXT: [[TMP16:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I12_I]], align 16 |
1176 | // CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i64> [[TMP16]] to <4 x i32> |
1177 | // CHECK-NEXT: [[TMP18:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I13_I]], align 16 |
1178 | // CHECK-NEXT: [[TMP19:%.*]] = bitcast <2 x i64> [[TMP18]] to <4 x i32> |
1179 | // CHECK-NEXT: [[TMP20:%.*]] = icmp ugt <4 x i32> [[TMP17]], [[TMP19]] |
1180 | // CHECK-NEXT: [[TMP21:%.*]] = select <4 x i1> [[TMP20]], <4 x i32> [[TMP17]], <4 x i32> [[TMP19]] |
1181 | // CHECK-NEXT: [[TMP22:%.*]] = bitcast <4 x i32> [[TMP21]] to <2 x i64> |
1182 | // CHECK-NEXT: store <2 x i64> [[TMP22]], <2 x i64>* [[__T6_I]], align 16 |
1183 | // CHECK-NEXT: [[TMP23:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 |
1184 | // CHECK-NEXT: [[TMP24:%.*]] = bitcast <2 x i64> [[TMP23]] to <4 x i32> |
1185 | // CHECK-NEXT: [[TMP25:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 |
1186 | // CHECK-NEXT: [[TMP26:%.*]] = bitcast <2 x i64> [[TMP25]] to <4 x i32> |
1187 | // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[TMP24]], <4 x i32> [[TMP26]], <4 x i32> <i32 2, i32 3, i32 0, i32 1> |
1188 | // CHECK-NEXT: [[TMP27:%.*]] = bitcast <4 x i32> [[SHUFFLE_I]] to <2 x i64> |
1189 | // CHECK-NEXT: store <2 x i64> [[TMP27]], <2 x i64>* [[__T7_I]], align 16 |
1190 | // CHECK-NEXT: [[TMP28:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 |
1191 | // CHECK-NEXT: [[TMP29:%.*]] = load <2 x i64>, <2 x i64>* [[__T7_I]], align 16 |
1192 | // CHECK-NEXT: store <2 x i64> [[TMP28]], <2 x i64>* [[__V1_ADDR_I10_I]], align 16 |
1193 | // CHECK-NEXT: store <2 x i64> [[TMP29]], <2 x i64>* [[__V2_ADDR_I11_I]], align 16 |
1194 | // CHECK-NEXT: [[TMP30:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I10_I]], align 16 |
1195 | // CHECK-NEXT: [[TMP31:%.*]] = bitcast <2 x i64> [[TMP30]] to <4 x i32> |
1196 | // CHECK-NEXT: [[TMP32:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I11_I]], align 16 |
1197 | // CHECK-NEXT: [[TMP33:%.*]] = bitcast <2 x i64> [[TMP32]] to <4 x i32> |
1198 | // CHECK-NEXT: [[TMP34:%.*]] = icmp ugt <4 x i32> [[TMP31]], [[TMP33]] |
1199 | // CHECK-NEXT: [[TMP35:%.*]] = select <4 x i1> [[TMP34]], <4 x i32> [[TMP31]], <4 x i32> [[TMP33]] |
1200 | // CHECK-NEXT: [[TMP36:%.*]] = bitcast <4 x i32> [[TMP35]] to <2 x i64> |
1201 | // CHECK-NEXT: store <2 x i64> [[TMP36]], <2 x i64>* [[__T8_I]], align 16 |
1202 | // CHECK-NEXT: [[TMP37:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 |
1203 | // CHECK-NEXT: [[TMP38:%.*]] = bitcast <2 x i64> [[TMP37]] to <4 x i32> |
1204 | // CHECK-NEXT: [[TMP39:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 |
1205 | // CHECK-NEXT: [[TMP40:%.*]] = bitcast <2 x i64> [[TMP39]] to <4 x i32> |
1206 | // CHECK-NEXT: [[SHUFFLE8_I:%.*]] = shufflevector <4 x i32> [[TMP38]], <4 x i32> [[TMP40]], <4 x i32> <i32 1, i32 0, i32 3, i32 2> |
1207 | // CHECK-NEXT: [[TMP41:%.*]] = bitcast <4 x i32> [[SHUFFLE8_I]] to <2 x i64> |
1208 | // CHECK-NEXT: store <2 x i64> [[TMP41]], <2 x i64>* [[__T9_I]], align 16 |
1209 | // CHECK-NEXT: [[TMP42:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 |
1210 | // CHECK-NEXT: [[TMP43:%.*]] = load <2 x i64>, <2 x i64>* [[__T9_I]], align 16 |
1211 | // CHECK-NEXT: store <2 x i64> [[TMP42]], <2 x i64>* [[__V1_ADDR_I_I]], align 16 |
1212 | // CHECK-NEXT: store <2 x i64> [[TMP43]], <2 x i64>* [[__V2_ADDR_I_I]], align 16 |
1213 | // CHECK-NEXT: [[TMP44:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I_I]], align 16 |
1214 | // CHECK-NEXT: [[TMP45:%.*]] = bitcast <2 x i64> [[TMP44]] to <4 x i32> |
1215 | // CHECK-NEXT: [[TMP46:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I_I]], align 16 |
1216 | // CHECK-NEXT: [[TMP47:%.*]] = bitcast <2 x i64> [[TMP46]] to <4 x i32> |
1217 | // CHECK-NEXT: [[TMP48:%.*]] = icmp ugt <4 x i32> [[TMP45]], [[TMP47]] |
1218 | // CHECK-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP48]], <4 x i32> [[TMP45]], <4 x i32> [[TMP47]] |
1219 | // CHECK-NEXT: [[TMP50:%.*]] = bitcast <4 x i32> [[TMP49]] to <2 x i64> |
1220 | // CHECK-NEXT: store <4 x i32> [[TMP49]], <4 x i32>* [[__T10_I]], align 16 |
1221 | // CHECK-NEXT: [[TMP51:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 |
1222 | // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP51]], i32 0 |
1223 | // CHECK-NEXT: ret i32 [[VECEXT_I]] |
1224 | unsigned int test_mm512_reduce_max_epu32(__m512i __W){ |
1225 | return _mm512_reduce_max_epu32(__W); |
1226 | } |
1227 | |
1228 | // CHECK-LABEL: define float @test_mm512_reduce_max_ps(<16 x float> %__W) #0 { |
1229 | // CHECK-NEXT: entry: |
1230 | // CHECK-NEXT: [[__A_ADDR_I14_I:%.*]] = alloca <8 x float>, align 32 |
1231 | // CHECK-NEXT: [[__B_ADDR_I15_I:%.*]] = alloca <8 x float>, align 32 |
1232 | // CHECK-NEXT: [[__A_ADDR_I12_I:%.*]] = alloca <4 x float>, align 16 |
1233 | // CHECK-NEXT: [[__B_ADDR_I13_I:%.*]] = alloca <4 x float>, align 16 |
1234 | // CHECK-NEXT: [[__A_ADDR_I10_I:%.*]] = alloca <4 x float>, align 16 |
1235 | // CHECK-NEXT: [[__B_ADDR_I11_I:%.*]] = alloca <4 x float>, align 16 |
1236 | // CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 |
1237 | // CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 |
1238 | // CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <16 x float>, align 64 |
1239 | // CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x float>, align 32 |
1240 | // CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x float>, align 32 |
1241 | // CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x float>, align 32 |
1242 | // CHECK-NEXT: [[__T4_I:%.*]] = alloca <4 x float>, align 16 |
1243 | // CHECK-NEXT: [[__T5_I:%.*]] = alloca <4 x float>, align 16 |
1244 | // CHECK-NEXT: [[__T6_I:%.*]] = alloca <4 x float>, align 16 |
1245 | // CHECK-NEXT: [[__T7_I:%.*]] = alloca <4 x float>, align 16 |
1246 | // CHECK-NEXT: [[__T8_I:%.*]] = alloca <4 x float>, align 16 |
1247 | // CHECK-NEXT: [[__T9_I:%.*]] = alloca <4 x float>, align 16 |
1248 | // CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x float>, align 16 |
1249 | // CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <16 x float>, align 64 |
1250 | // CHECK-NEXT: store <16 x float> [[__W:%.*]], <16 x float>* [[__W_ADDR]], align 64 |
1251 | // CHECK-NEXT: [[TMP0:%.*]] = load <16 x float>, <16 x float>* [[__W_ADDR]], align 64 |
1252 | // CHECK-NEXT: store <16 x float> [[TMP0]], <16 x float>* [[__V_ADDR_I]], align 64 |
1253 | // CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 |
1254 | // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x float> [[TMP1]] to <8 x double> |
1255 | // CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
1256 | // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x double> [[EXTRACT_I]] to <8 x float> |
1257 | // CHECK-NEXT: store <8 x float> [[TMP3]], <8 x float>* [[__T1_I]], align 32 |
1258 | // CHECK-NEXT: [[TMP4:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 |
1259 | // CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x float> [[TMP4]] to <8 x double> |
1260 | // CHECK-NEXT: [[EXTRACT2_I:%.*]] = shufflevector <8 x double> [[TMP5]], <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
1261 | // CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x double> [[EXTRACT2_I]] to <8 x float> |
1262 | // CHECK-NEXT: store <8 x float> [[TMP6]], <8 x float>* [[__T2_I]], align 32 |
1263 | // CHECK-NEXT: [[TMP7:%.*]] = load <8 x float>, <8 x float>* [[__T1_I]], align 32 |
1264 | // CHECK-NEXT: [[TMP8:%.*]] = load <8 x float>, <8 x float>* [[__T2_I]], align 32 |
1265 | // CHECK-NEXT: store <8 x float> [[TMP7]], <8 x float>* [[__A_ADDR_I14_I]], align 32 |
1266 | // CHECK-NEXT: store <8 x float> [[TMP8]], <8 x float>* [[__B_ADDR_I15_I]], align 32 |
1267 | // CHECK-NEXT: [[TMP9:%.*]] = load <8 x float>, <8 x float>* [[__A_ADDR_I14_I]], align 32 |
1268 | // CHECK-NEXT: [[TMP10:%.*]] = load <8 x float>, <8 x float>* [[__B_ADDR_I15_I]], align 32 |
1269 | // CHECK-NEXT: [[TMP11:%.*]] = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> [[TMP9]], <8 x float> [[TMP10]]) #2 |
1270 | // CHECK-NEXT: store <8 x float> [[TMP11]], <8 x float>* [[__T3_I]], align 32 |
1271 | // CHECK-NEXT: [[TMP12:%.*]] = load <8 x float>, <8 x float>* [[__T3_I]], align 32 |
1272 | // CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <8 x float> [[TMP12]], <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
1273 | // CHECK-NEXT: store <4 x float> [[EXTRACT4_I]], <4 x float>* [[__T4_I]], align 16 |
1274 | // CHECK-NEXT: [[TMP13:%.*]] = load <8 x float>, <8 x float>* [[__T3_I]], align 32 |
1275 | // CHECK-NEXT: [[EXTRACT5_I:%.*]] = shufflevector <8 x float> [[TMP13]], <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
1276 | // CHECK-NEXT: store <4 x float> [[EXTRACT5_I]], <4 x float>* [[__T5_I]], align 16 |
1277 | // CHECK-NEXT: [[TMP14:%.*]] = load <4 x float>, <4 x float>* [[__T4_I]], align 16 |
1278 | // CHECK-NEXT: [[TMP15:%.*]] = load <4 x float>, <4 x float>* [[__T5_I]], align 16 |
1279 | // CHECK-NEXT: store <4 x float> [[TMP14]], <4 x float>* [[__A_ADDR_I12_I]], align 16 |
1280 | // CHECK-NEXT: store <4 x float> [[TMP15]], <4 x float>* [[__B_ADDR_I13_I]], align 16 |
1281 | // CHECK-NEXT: [[TMP16:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I12_I]], align 16 |
1282 | // CHECK-NEXT: [[TMP17:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I13_I]], align 16 |
1283 | // CHECK-NEXT: [[TMP18:%.*]] = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[TMP16]], <4 x float> [[TMP17]]) #2 |
1284 | // CHECK-NEXT: store <4 x float> [[TMP18]], <4 x float>* [[__T6_I]], align 16 |
1285 | // CHECK-NEXT: [[TMP19:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 |
1286 | // CHECK-NEXT: [[TMP20:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 |
1287 | // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[TMP19]], <4 x float> [[TMP20]], <4 x i32> <i32 2, i32 3, i32 0, i32 1> |
1288 | // CHECK-NEXT: store <4 x float> [[SHUFFLE_I]], <4 x float>* [[__T7_I]], align 16 |
1289 | // CHECK-NEXT: [[TMP21:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 |
1290 | // CHECK-NEXT: [[TMP22:%.*]] = load <4 x float>, <4 x float>* [[__T7_I]], align 16 |
1291 | // CHECK-NEXT: store <4 x float> [[TMP21]], <4 x float>* [[__A_ADDR_I10_I]], align 16 |
1292 | // CHECK-NEXT: store <4 x float> [[TMP22]], <4 x float>* [[__B_ADDR_I11_I]], align 16 |
1293 | // CHECK-NEXT: [[TMP23:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I10_I]], align 16 |
1294 | // CHECK-NEXT: [[TMP24:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I11_I]], align 16 |
1295 | // CHECK-NEXT: [[TMP25:%.*]] = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[TMP23]], <4 x float> [[TMP24]]) #2 |
1296 | // CHECK-NEXT: store <4 x float> [[TMP25]], <4 x float>* [[__T8_I]], align 16 |
1297 | // CHECK-NEXT: [[TMP26:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 |
1298 | // CHECK-NEXT: [[TMP27:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 |
1299 | // CHECK-NEXT: [[SHUFFLE8_I:%.*]] = shufflevector <4 x float> [[TMP26]], <4 x float> [[TMP27]], <4 x i32> <i32 1, i32 0, i32 3, i32 2> |
1300 | // CHECK-NEXT: store <4 x float> [[SHUFFLE8_I]], <4 x float>* [[__T9_I]], align 16 |
1301 | // CHECK-NEXT: [[TMP28:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 |
1302 | // CHECK-NEXT: [[TMP29:%.*]] = load <4 x float>, <4 x float>* [[__T9_I]], align 16 |
1303 | // CHECK-NEXT: store <4 x float> [[TMP28]], <4 x float>* [[__A_ADDR_I_I]], align 16 |
1304 | // CHECK-NEXT: store <4 x float> [[TMP29]], <4 x float>* [[__B_ADDR_I_I]], align 16 |
1305 | // CHECK-NEXT: [[TMP30:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I_I]], align 16 |
1306 | // CHECK-NEXT: [[TMP31:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I_I]], align 16 |
1307 | // CHECK-NEXT: [[TMP32:%.*]] = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[TMP30]], <4 x float> [[TMP31]]) #2 |
1308 | // CHECK-NEXT: store <4 x float> [[TMP32]], <4 x float>* [[__T10_I]], align 16 |
1309 | // CHECK-NEXT: [[TMP33:%.*]] = load <4 x float>, <4 x float>* [[__T10_I]], align 16 |
1310 | // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x float> [[TMP33]], i32 0 |
1311 | // CHECK-NEXT: ret float [[VECEXT_I]] |
1312 | float test_mm512_reduce_max_ps(__m512 __W){ |
1313 | return _mm512_reduce_max_ps(__W); |
1314 | } |
1315 | |
1316 | // CHECK-LABEL: define i32 @test_mm512_reduce_min_epi32(<8 x i64> %__W) #0 { |
1317 | // CHECK-NEXT: entry: |
1318 | // CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 |
1319 | // CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 |
1320 | // CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 |
1321 | // CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 |
1322 | // CHECK-NEXT: [[__V1_ADDR_I10_I:%.*]] = alloca <2 x i64>, align 16 |
1323 | // CHECK-NEXT: [[__V2_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 |
1324 | // CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 |
1325 | // CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 |
1326 | // CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 |
1327 | // CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 |
1328 | // CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 |
1329 | // CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 |
1330 | // CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 |
1331 | // CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 |
1332 | // CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 |
1333 | // CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 |
1334 | // CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 |
1335 | // CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 |
1336 | // CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 |
1337 | // CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 |
1338 | // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 |
1339 | // CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 |
1340 | // CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64 |
1341 | // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
1342 | // CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
1343 | // CHECK-NEXT: store <4 x i64> [[EXTRACT_I]], <4 x i64>* [[__T1_I]], align 32 |
1344 | // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
1345 | // CHECK-NEXT: [[EXTRACT2_I:%.*]] = shufflevector <8 x i64> [[TMP2]], <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
1346 | // CHECK-NEXT: store <4 x i64> [[EXTRACT2_I]], <4 x i64>* [[__T2_I]], align 32 |
1347 | // CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 |
1348 | // CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 |
1349 | // CHECK-NEXT: store <4 x i64> [[TMP3]], <4 x i64>* [[__A_ADDR_I_I]], align 32 |
1350 | // CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[__B_ADDR_I_I]], align 32 |
1351 | // CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* [[__A_ADDR_I_I]], align 32 |
1352 | // CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP5]] to <8 x i32> |
1353 | // CHECK-NEXT: [[TMP7:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 |
1354 | // CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i64> [[TMP7]] to <8 x i32> |
1355 | // CHECK-NEXT: [[TMP9:%.*]] = icmp slt <8 x i32> [[TMP6]], [[TMP8]] |
1356 | // CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP9]], <8 x i32> [[TMP6]], <8 x i32> [[TMP8]] |
1357 | // CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i32> [[TMP10]] to <4 x i64> |
1358 | // CHECK-NEXT: store <4 x i64> [[TMP11]], <4 x i64>* [[__T3_I]], align 32 |
1359 | // CHECK-NEXT: [[TMP12:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 |
1360 | // CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <4 x i64> [[TMP12]], <4 x i64> undef, <2 x i32> <i32 0, i32 1> |
1361 | // CHECK-NEXT: store <2 x i64> [[EXTRACT4_I]], <2 x i64>* [[__T4_I]], align 16 |
1362 | // CHECK-NEXT: [[TMP13:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 |
1363 | // CHECK-NEXT: [[EXTRACT5_I:%.*]] = shufflevector <4 x i64> [[TMP13]], <4 x i64> undef, <2 x i32> <i32 2, i32 3> |
1364 | // CHECK-NEXT: store <2 x i64> [[EXTRACT5_I]], <2 x i64>* [[__T5_I]], align 16 |
1365 | // CHECK-NEXT: [[TMP14:%.*]] = load <2 x i64>, <2 x i64>* [[__T4_I]], align 16 |
1366 | // CHECK-NEXT: [[TMP15:%.*]] = load <2 x i64>, <2 x i64>* [[__T5_I]], align 16 |
1367 | // CHECK-NEXT: store <2 x i64> [[TMP14]], <2 x i64>* [[__V1_ADDR_I12_I]], align 16 |
1368 | // CHECK-NEXT: store <2 x i64> [[TMP15]], <2 x i64>* [[__V2_ADDR_I13_I]], align 16 |
1369 | // CHECK-NEXT: [[TMP16:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I12_I]], align 16 |
1370 | // CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i64> [[TMP16]] to <4 x i32> |
1371 | // CHECK-NEXT: [[TMP18:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I13_I]], align 16 |
1372 | // CHECK-NEXT: [[TMP19:%.*]] = bitcast <2 x i64> [[TMP18]] to <4 x i32> |
1373 | // CHECK-NEXT: [[TMP20:%.*]] = icmp slt <4 x i32> [[TMP17]], [[TMP19]] |
1374 | // CHECK-NEXT: [[TMP21:%.*]] = select <4 x i1> [[TMP20]], <4 x i32> [[TMP17]], <4 x i32> [[TMP19]] |
1375 | // CHECK-NEXT: [[TMP22:%.*]] = bitcast <4 x i32> [[TMP21]] to <2 x i64> |
1376 | // CHECK-NEXT: store <2 x i64> [[TMP22]], <2 x i64>* [[__T6_I]], align 16 |
1377 | // CHECK-NEXT: [[TMP23:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 |
1378 | // CHECK-NEXT: [[TMP24:%.*]] = bitcast <2 x i64> [[TMP23]] to <4 x i32> |
1379 | // CHECK-NEXT: [[TMP25:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 |
1380 | // CHECK-NEXT: [[TMP26:%.*]] = bitcast <2 x i64> [[TMP25]] to <4 x i32> |
1381 | // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[TMP24]], <4 x i32> [[TMP26]], <4 x i32> <i32 2, i32 3, i32 0, i32 1> |
1382 | // CHECK-NEXT: [[TMP27:%.*]] = bitcast <4 x i32> [[SHUFFLE_I]] to <2 x i64> |
1383 | // CHECK-NEXT: store <2 x i64> [[TMP27]], <2 x i64>* [[__T7_I]], align 16 |
1384 | // CHECK-NEXT: [[TMP28:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 |
1385 | // CHECK-NEXT: [[TMP29:%.*]] = load <2 x i64>, <2 x i64>* [[__T7_I]], align 16 |
1386 | // CHECK-NEXT: store <2 x i64> [[TMP28]], <2 x i64>* [[__V1_ADDR_I10_I]], align 16 |
1387 | // CHECK-NEXT: store <2 x i64> [[TMP29]], <2 x i64>* [[__V2_ADDR_I11_I]], align 16 |
1388 | // CHECK-NEXT: [[TMP30:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I10_I]], align 16 |
1389 | // CHECK-NEXT: [[TMP31:%.*]] = bitcast <2 x i64> [[TMP30]] to <4 x i32> |
1390 | // CHECK-NEXT: [[TMP32:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I11_I]], align 16 |
1391 | // CHECK-NEXT: [[TMP33:%.*]] = bitcast <2 x i64> [[TMP32]] to <4 x i32> |
1392 | // CHECK-NEXT: [[TMP34:%.*]] = icmp slt <4 x i32> [[TMP31]], [[TMP33]] |
1393 | // CHECK-NEXT: [[TMP35:%.*]] = select <4 x i1> [[TMP34]], <4 x i32> [[TMP31]], <4 x i32> [[TMP33]] |
1394 | // CHECK-NEXT: [[TMP36:%.*]] = bitcast <4 x i32> [[TMP35]] to <2 x i64> |
1395 | // CHECK-NEXT: store <2 x i64> [[TMP36]], <2 x i64>* [[__T8_I]], align 16 |
1396 | // CHECK-NEXT: [[TMP37:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 |
1397 | // CHECK-NEXT: [[TMP38:%.*]] = bitcast <2 x i64> [[TMP37]] to <4 x i32> |
1398 | // CHECK-NEXT: [[TMP39:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 |
1399 | // CHECK-NEXT: [[TMP40:%.*]] = bitcast <2 x i64> [[TMP39]] to <4 x i32> |
1400 | // CHECK-NEXT: [[SHUFFLE8_I:%.*]] = shufflevector <4 x i32> [[TMP38]], <4 x i32> [[TMP40]], <4 x i32> <i32 1, i32 0, i32 3, i32 2> |
1401 | // CHECK-NEXT: [[TMP41:%.*]] = bitcast <4 x i32> [[SHUFFLE8_I]] to <2 x i64> |
1402 | // CHECK-NEXT: store <2 x i64> [[TMP41]], <2 x i64>* [[__T9_I]], align 16 |
1403 | // CHECK-NEXT: [[TMP42:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 |
1404 | // CHECK-NEXT: [[TMP43:%.*]] = load <2 x i64>, <2 x i64>* [[__T9_I]], align 16 |
1405 | // CHECK-NEXT: store <2 x i64> [[TMP42]], <2 x i64>* [[__V1_ADDR_I_I]], align 16 |
1406 | // CHECK-NEXT: store <2 x i64> [[TMP43]], <2 x i64>* [[__V2_ADDR_I_I]], align 16 |
1407 | // CHECK-NEXT: [[TMP44:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I_I]], align 16 |
1408 | // CHECK-NEXT: [[TMP45:%.*]] = bitcast <2 x i64> [[TMP44]] to <4 x i32> |
1409 | // CHECK-NEXT: [[TMP46:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I_I]], align 16 |
1410 | // CHECK-NEXT: [[TMP47:%.*]] = bitcast <2 x i64> [[TMP46]] to <4 x i32> |
1411 | // CHECK-NEXT: [[TMP48:%.*]] = icmp slt <4 x i32> [[TMP45]], [[TMP47]] |
1412 | // CHECK-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP48]], <4 x i32> [[TMP45]], <4 x i32> [[TMP47]] |
1413 | // CHECK-NEXT: [[TMP50:%.*]] = bitcast <4 x i32> [[TMP49]] to <2 x i64> |
1414 | // CHECK-NEXT: store <4 x i32> [[TMP49]], <4 x i32>* [[__T10_I]], align 16 |
1415 | // CHECK-NEXT: [[TMP51:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 |
1416 | // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP51]], i32 0 |
1417 | // CHECK-NEXT: ret i32 [[VECEXT_I]] |
1418 | int test_mm512_reduce_min_epi32(__m512i __W){ |
1419 | return _mm512_reduce_min_epi32(__W); |
1420 | } |
1421 | |
1422 | // CHECK-LABEL: define i32 @test_mm512_reduce_min_epu32(<8 x i64> %__W) #0 { |
1423 | // CHECK-NEXT: entry: |
1424 | // CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 |
1425 | // CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 |
1426 | // CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 |
1427 | // CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 |
1428 | // CHECK-NEXT: [[__V1_ADDR_I10_I:%.*]] = alloca <2 x i64>, align 16 |
1429 | // CHECK-NEXT: [[__V2_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 |
1430 | // CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 |
1431 | // CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 |
1432 | // CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 |
1433 | // CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 |
1434 | // CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 |
1435 | // CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 |
1436 | // CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 |
1437 | // CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 |
1438 | // CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 |
1439 | // CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 |
1440 | // CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 |
1441 | // CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 |
1442 | // CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 |
1443 | // CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 |
1444 | // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 |
1445 | // CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 |
1446 | // CHECK-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64 |
1447 | // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
1448 | // CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
1449 | // CHECK-NEXT: store <4 x i64> [[EXTRACT_I]], <4 x i64>* [[__T1_I]], align 32 |
1450 | // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
1451 | // CHECK-NEXT: [[EXTRACT2_I:%.*]] = shufflevector <8 x i64> [[TMP2]], <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
1452 | // CHECK-NEXT: store <4 x i64> [[EXTRACT2_I]], <4 x i64>* [[__T2_I]], align 32 |
1453 | // CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 |
1454 | // CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 |
1455 | // CHECK-NEXT: store <4 x i64> [[TMP3]], <4 x i64>* [[__A_ADDR_I_I]], align 32 |
1456 | // CHECK-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[__B_ADDR_I_I]], align 32 |
1457 | // CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* [[__A_ADDR_I_I]], align 32 |
1458 | // CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP5]] to <8 x i32> |
1459 | // CHECK-NEXT: [[TMP7:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 |
1460 | // CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i64> [[TMP7]] to <8 x i32> |
1461 | // CHECK-NEXT: [[TMP9:%.*]] = icmp ult <8 x i32> [[TMP6]], [[TMP8]] |
1462 | // CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP9]], <8 x i32> [[TMP6]], <8 x i32> [[TMP8]] |
1463 | // CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i32> [[TMP10]] to <4 x i64> |
1464 | // CHECK-NEXT: store <4 x i64> [[TMP11]], <4 x i64>* [[__T3_I]], align 32 |
1465 | // CHECK-NEXT: [[TMP12:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 |
1466 | // CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <4 x i64> [[TMP12]], <4 x i64> undef, <2 x i32> <i32 0, i32 1> |
1467 | // CHECK-NEXT: store <2 x i64> [[EXTRACT4_I]], <2 x i64>* [[__T4_I]], align 16 |
1468 | // CHECK-NEXT: [[TMP13:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 |
1469 | // CHECK-NEXT: [[EXTRACT5_I:%.*]] = shufflevector <4 x i64> [[TMP13]], <4 x i64> undef, <2 x i32> <i32 2, i32 3> |
1470 | // CHECK-NEXT: store <2 x i64> [[EXTRACT5_I]], <2 x i64>* [[__T5_I]], align 16 |
1471 | // CHECK-NEXT: [[TMP14:%.*]] = load <2 x i64>, <2 x i64>* [[__T4_I]], align 16 |
1472 | // CHECK-NEXT: [[TMP15:%.*]] = load <2 x i64>, <2 x i64>* [[__T5_I]], align 16 |
1473 | // CHECK-NEXT: store <2 x i64> [[TMP14]], <2 x i64>* [[__V1_ADDR_I12_I]], align 16 |
1474 | // CHECK-NEXT: store <2 x i64> [[TMP15]], <2 x i64>* [[__V2_ADDR_I13_I]], align 16 |
1475 | // CHECK-NEXT: [[TMP16:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I12_I]], align 16 |
1476 | // CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i64> [[TMP16]] to <4 x i32> |
1477 | // CHECK-NEXT: [[TMP18:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I13_I]], align 16 |
1478 | // CHECK-NEXT: [[TMP19:%.*]] = bitcast <2 x i64> [[TMP18]] to <4 x i32> |
1479 | // CHECK-NEXT: [[TMP20:%.*]] = icmp ult <4 x i32> [[TMP17]], [[TMP19]] |
1480 | // CHECK-NEXT: [[TMP21:%.*]] = select <4 x i1> [[TMP20]], <4 x i32> [[TMP17]], <4 x i32> [[TMP19]] |
1481 | // CHECK-NEXT: [[TMP22:%.*]] = bitcast <4 x i32> [[TMP21]] to <2 x i64> |
1482 | // CHECK-NEXT: store <2 x i64> [[TMP22]], <2 x i64>* [[__T6_I]], align 16 |
1483 | // CHECK-NEXT: [[TMP23:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 |
1484 | // CHECK-NEXT: [[TMP24:%.*]] = bitcast <2 x i64> [[TMP23]] to <4 x i32> |
1485 | // CHECK-NEXT: [[TMP25:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 |
1486 | // CHECK-NEXT: [[TMP26:%.*]] = bitcast <2 x i64> [[TMP25]] to <4 x i32> |
1487 | // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[TMP24]], <4 x i32> [[TMP26]], <4 x i32> <i32 2, i32 3, i32 0, i32 1> |
1488 | // CHECK-NEXT: [[TMP27:%.*]] = bitcast <4 x i32> [[SHUFFLE_I]] to <2 x i64> |
1489 | // CHECK-NEXT: store <2 x i64> [[TMP27]], <2 x i64>* [[__T7_I]], align 16 |
1490 | // CHECK-NEXT: [[TMP28:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 |
1491 | // CHECK-NEXT: [[TMP29:%.*]] = load <2 x i64>, <2 x i64>* [[__T7_I]], align 16 |
1492 | // CHECK-NEXT: store <2 x i64> [[TMP28]], <2 x i64>* [[__V1_ADDR_I10_I]], align 16 |
1493 | // CHECK-NEXT: store <2 x i64> [[TMP29]], <2 x i64>* [[__V2_ADDR_I11_I]], align 16 |
1494 | // CHECK-NEXT: [[TMP30:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I10_I]], align 16 |
1495 | // CHECK-NEXT: [[TMP31:%.*]] = bitcast <2 x i64> [[TMP30]] to <4 x i32> |
1496 | // CHECK-NEXT: [[TMP32:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I11_I]], align 16 |
1497 | // CHECK-NEXT: [[TMP33:%.*]] = bitcast <2 x i64> [[TMP32]] to <4 x i32> |
1498 | // CHECK-NEXT: [[TMP34:%.*]] = icmp ult <4 x i32> [[TMP31]], [[TMP33]] |
1499 | // CHECK-NEXT: [[TMP35:%.*]] = select <4 x i1> [[TMP34]], <4 x i32> [[TMP31]], <4 x i32> [[TMP33]] |
1500 | // CHECK-NEXT: [[TMP36:%.*]] = bitcast <4 x i32> [[TMP35]] to <2 x i64> |
1501 | // CHECK-NEXT: store <2 x i64> [[TMP36]], <2 x i64>* [[__T8_I]], align 16 |
1502 | // CHECK-NEXT: [[TMP37:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 |
1503 | // CHECK-NEXT: [[TMP38:%.*]] = bitcast <2 x i64> [[TMP37]] to <4 x i32> |
1504 | // CHECK-NEXT: [[TMP39:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 |
1505 | // CHECK-NEXT: [[TMP40:%.*]] = bitcast <2 x i64> [[TMP39]] to <4 x i32> |
1506 | // CHECK-NEXT: [[SHUFFLE8_I:%.*]] = shufflevector <4 x i32> [[TMP38]], <4 x i32> [[TMP40]], <4 x i32> <i32 1, i32 0, i32 3, i32 2> |
1507 | // CHECK-NEXT: [[TMP41:%.*]] = bitcast <4 x i32> [[SHUFFLE8_I]] to <2 x i64> |
1508 | // CHECK-NEXT: store <2 x i64> [[TMP41]], <2 x i64>* [[__T9_I]], align 16 |
1509 | // CHECK-NEXT: [[TMP42:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 |
1510 | // CHECK-NEXT: [[TMP43:%.*]] = load <2 x i64>, <2 x i64>* [[__T9_I]], align 16 |
1511 | // CHECK-NEXT: store <2 x i64> [[TMP42]], <2 x i64>* [[__V1_ADDR_I_I]], align 16 |
1512 | // CHECK-NEXT: store <2 x i64> [[TMP43]], <2 x i64>* [[__V2_ADDR_I_I]], align 16 |
1513 | // CHECK-NEXT: [[TMP44:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I_I]], align 16 |
1514 | // CHECK-NEXT: [[TMP45:%.*]] = bitcast <2 x i64> [[TMP44]] to <4 x i32> |
1515 | // CHECK-NEXT: [[TMP46:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I_I]], align 16 |
1516 | // CHECK-NEXT: [[TMP47:%.*]] = bitcast <2 x i64> [[TMP46]] to <4 x i32> |
1517 | // CHECK-NEXT: [[TMP48:%.*]] = icmp ult <4 x i32> [[TMP45]], [[TMP47]] |
1518 | // CHECK-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP48]], <4 x i32> [[TMP45]], <4 x i32> [[TMP47]] |
1519 | // CHECK-NEXT: [[TMP50:%.*]] = bitcast <4 x i32> [[TMP49]] to <2 x i64> |
1520 | // CHECK-NEXT: store <4 x i32> [[TMP49]], <4 x i32>* [[__T10_I]], align 16 |
1521 | // CHECK-NEXT: [[TMP51:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 |
1522 | // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP51]], i32 0 |
1523 | // CHECK-NEXT: ret i32 [[VECEXT_I]] |
1524 | unsigned int test_mm512_reduce_min_epu32(__m512i __W){ |
1525 | return _mm512_reduce_min_epu32(__W); |
1526 | } |
1527 | |
1528 | // CHECK-LABEL: define float @test_mm512_reduce_min_ps(<16 x float> %__W) #0 { |
1529 | // CHECK-NEXT: entry: |
1530 | // CHECK-NEXT: [[__A_ADDR_I14_I:%.*]] = alloca <8 x float>, align 32 |
1531 | // CHECK-NEXT: [[__B_ADDR_I15_I:%.*]] = alloca <8 x float>, align 32 |
1532 | // CHECK-NEXT: [[__A_ADDR_I12_I:%.*]] = alloca <4 x float>, align 16 |
1533 | // CHECK-NEXT: [[__B_ADDR_I13_I:%.*]] = alloca <4 x float>, align 16 |
1534 | // CHECK-NEXT: [[__A_ADDR_I10_I:%.*]] = alloca <4 x float>, align 16 |
1535 | // CHECK-NEXT: [[__B_ADDR_I11_I:%.*]] = alloca <4 x float>, align 16 |
1536 | // CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 |
1537 | // CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 |
1538 | // CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <16 x float>, align 64 |
1539 | // CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x float>, align 32 |
1540 | // CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x float>, align 32 |
1541 | // CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x float>, align 32 |
1542 | // CHECK-NEXT: [[__T4_I:%.*]] = alloca <4 x float>, align 16 |
1543 | // CHECK-NEXT: [[__T5_I:%.*]] = alloca <4 x float>, align 16 |
1544 | // CHECK-NEXT: [[__T6_I:%.*]] = alloca <4 x float>, align 16 |
1545 | // CHECK-NEXT: [[__T7_I:%.*]] = alloca <4 x float>, align 16 |
1546 | // CHECK-NEXT: [[__T8_I:%.*]] = alloca <4 x float>, align 16 |
1547 | // CHECK-NEXT: [[__T9_I:%.*]] = alloca <4 x float>, align 16 |
1548 | // CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x float>, align 16 |
1549 | // CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <16 x float>, align 64 |
1550 | // CHECK-NEXT: store <16 x float> [[__W:%.*]], <16 x float>* [[__W_ADDR]], align 64 |
1551 | // CHECK-NEXT: [[TMP0:%.*]] = load <16 x float>, <16 x float>* [[__W_ADDR]], align 64 |
1552 | // CHECK-NEXT: store <16 x float> [[TMP0]], <16 x float>* [[__V_ADDR_I]], align 64 |
1553 | // CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 |
1554 | // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x float> [[TMP1]] to <8 x double> |
1555 | // CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
1556 | // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x double> [[EXTRACT_I]] to <8 x float> |
1557 | // CHECK-NEXT: store <8 x float> [[TMP3]], <8 x float>* [[__T1_I]], align 32 |
1558 | // CHECK-NEXT: [[TMP4:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 |
1559 | // CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x float> [[TMP4]] to <8 x double> |
1560 | // CHECK-NEXT: [[EXTRACT2_I:%.*]] = shufflevector <8 x double> [[TMP5]], <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
1561 | // CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x double> [[EXTRACT2_I]] to <8 x float> |
1562 | // CHECK-NEXT: store <8 x float> [[TMP6]], <8 x float>* [[__T2_I]], align 32 |
1563 | // CHECK-NEXT: [[TMP7:%.*]] = load <8 x float>, <8 x float>* [[__T1_I]], align 32 |
1564 | // CHECK-NEXT: [[TMP8:%.*]] = load <8 x float>, <8 x float>* [[__T2_I]], align 32 |
1565 | // CHECK-NEXT: store <8 x float> [[TMP7]], <8 x float>* [[__A_ADDR_I14_I]], align 32 |
1566 | // CHECK-NEXT: store <8 x float> [[TMP8]], <8 x float>* [[__B_ADDR_I15_I]], align 32 |
1567 | // CHECK-NEXT: [[TMP9:%.*]] = load <8 x float>, <8 x float>* [[__A_ADDR_I14_I]], align 32 |
1568 | // CHECK-NEXT: [[TMP10:%.*]] = load <8 x float>, <8 x float>* [[__B_ADDR_I15_I]], align 32 |
1569 | // CHECK-NEXT: [[TMP11:%.*]] = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> [[TMP9]], <8 x float> [[TMP10]]) #2 |
1570 | // CHECK-NEXT: store <8 x float> [[TMP11]], <8 x float>* [[__T3_I]], align 32 |
1571 | // CHECK-NEXT: [[TMP12:%.*]] = load <8 x float>, <8 x float>* [[__T3_I]], align 32 |
1572 | // CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <8 x float> [[TMP12]], <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
1573 | // CHECK-NEXT: store <4 x float> [[EXTRACT4_I]], <4 x float>* [[__T4_I]], align 16 |
1574 | // CHECK-NEXT: [[TMP13:%.*]] = load <8 x float>, <8 x float>* [[__T3_I]], align 32 |
1575 | // CHECK-NEXT: [[EXTRACT5_I:%.*]] = shufflevector <8 x float> [[TMP13]], <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
1576 | // CHECK-NEXT: store <4 x float> [[EXTRACT5_I]], <4 x float>* [[__T5_I]], align 16 |
1577 | // CHECK-NEXT: [[TMP14:%.*]] = load <4 x float>, <4 x float>* [[__T4_I]], align 16 |
1578 | // CHECK-NEXT: [[TMP15:%.*]] = load <4 x float>, <4 x float>* [[__T5_I]], align 16 |
1579 | // CHECK-NEXT: store <4 x float> [[TMP14]], <4 x float>* [[__A_ADDR_I12_I]], align 16 |
1580 | // CHECK-NEXT: store <4 x float> [[TMP15]], <4 x float>* [[__B_ADDR_I13_I]], align 16 |
1581 | // CHECK-NEXT: [[TMP16:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I12_I]], align 16 |
1582 | // CHECK-NEXT: [[TMP17:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I13_I]], align 16 |
1583 | // CHECK-NEXT: [[TMP18:%.*]] = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> [[TMP16]], <4 x float> [[TMP17]]) #2 |
1584 | // CHECK-NEXT: store <4 x float> [[TMP18]], <4 x float>* [[__T6_I]], align 16 |
1585 | // CHECK-NEXT: [[TMP19:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 |
1586 | // CHECK-NEXT: [[TMP20:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 |
1587 | // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[TMP19]], <4 x float> [[TMP20]], <4 x i32> <i32 2, i32 3, i32 0, i32 1> |
1588 | // CHECK-NEXT: store <4 x float> [[SHUFFLE_I]], <4 x float>* [[__T7_I]], align 16 |
1589 | // CHECK-NEXT: [[TMP21:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 |
1590 | // CHECK-NEXT: [[TMP22:%.*]] = load <4 x float>, <4 x float>* [[__T7_I]], align 16 |
1591 | // CHECK-NEXT: store <4 x float> [[TMP21]], <4 x float>* [[__A_ADDR_I10_I]], align 16 |
1592 | // CHECK-NEXT: store <4 x float> [[TMP22]], <4 x float>* [[__B_ADDR_I11_I]], align 16 |
1593 | // CHECK-NEXT: [[TMP23:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I10_I]], align 16 |
1594 | // CHECK-NEXT: [[TMP24:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I11_I]], align 16 |
1595 | // CHECK-NEXT: [[TMP25:%.*]] = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> [[TMP23]], <4 x float> [[TMP24]]) #2 |
1596 | // CHECK-NEXT: store <4 x float> [[TMP25]], <4 x float>* [[__T8_I]], align 16 |
1597 | // CHECK-NEXT: [[TMP26:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 |
1598 | // CHECK-NEXT: [[TMP27:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 |
1599 | // CHECK-NEXT: [[SHUFFLE8_I:%.*]] = shufflevector <4 x float> [[TMP26]], <4 x float> [[TMP27]], <4 x i32> <i32 1, i32 0, i32 3, i32 2> |
1600 | // CHECK-NEXT: store <4 x float> [[SHUFFLE8_I]], <4 x float>* [[__T9_I]], align 16 |
1601 | // CHECK-NEXT: [[TMP28:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 |
1602 | // CHECK-NEXT: [[TMP29:%.*]] = load <4 x float>, <4 x float>* [[__T9_I]], align 16 |
1603 | // CHECK-NEXT: store <4 x float> [[TMP28]], <4 x float>* [[__A_ADDR_I_I]], align 16 |
1604 | // CHECK-NEXT: store <4 x float> [[TMP29]], <4 x float>* [[__B_ADDR_I_I]], align 16 |
1605 | // CHECK-NEXT: [[TMP30:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I_I]], align 16 |
1606 | // CHECK-NEXT: [[TMP31:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I_I]], align 16 |
1607 | // CHECK-NEXT: [[TMP32:%.*]] = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> [[TMP30]], <4 x float> [[TMP31]]) #2 |
1608 | // CHECK-NEXT: store <4 x float> [[TMP32]], <4 x float>* [[__T10_I]], align 16 |
1609 | // CHECK-NEXT: [[TMP33:%.*]] = load <4 x float>, <4 x float>* [[__T10_I]], align 16 |
1610 | // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x float> [[TMP33]], i32 0 |
1611 | // CHECK-NEXT: ret float [[VECEXT_I]] |
1612 | float test_mm512_reduce_min_ps(__m512 __W){ |
1613 | return _mm512_reduce_min_ps(__W); |
1614 | } |
1615 | |
1616 | // CHECK-LABEL: define i32 @test_mm512_mask_reduce_max_epi32(i16 zeroext %__M, <8 x i64> %__W) #0 { |
1617 | // CHECK-NEXT: entry: |
1618 | // CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 |
1619 | // CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 |
1620 | // CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 |
1621 | // CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 |
1622 | // CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 |
1623 | // CHECK-NEXT: [[__V1_ADDR_I14_I:%.*]] = alloca <2 x i64>, align 16 |
1624 | // CHECK-NEXT: [[__V2_ADDR_I15_I:%.*]] = alloca <2 x i64>, align 16 |
1625 | // CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 |
1626 | // CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 |
1627 | // CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 |
1628 | // CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 |
1629 | // CHECK-NEXT: [[__S_ADDR_I_I:%.*]] = alloca i32, align 4 |
1630 | // CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x i32>, align 64 |
1631 | // CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i16, align 2 |
1632 | // CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 |
1633 | // CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 |
1634 | // CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 |
1635 | // CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 |
1636 | // CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 |
1637 | // CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 |
1638 | // CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 |
1639 | // CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 |
1640 | // CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 |
1641 | // CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 |
1642 | // CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 |
1643 | // CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i16, align 2 |
1644 | // CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 |
1645 | // CHECK-NEXT: store i16 [[__M:%.*]], i16* [[__M_ADDR]], align 2 |
1646 | // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 |
1647 | // CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2 |
1648 | // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 |
1649 | // CHECK-NEXT: store i16 [[TMP0]], i16* [[__M_ADDR_I]], align 2 |
1650 | // CHECK-NEXT: store <8 x i64> [[TMP1]], <8 x i64>* [[__V_ADDR_I]], align 64 |
1651 | // CHECK-NEXT: store i32 -2147483648, i32* [[__S_ADDR_I_I]], align 4 |
1652 | // CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
1653 | // CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <16 x i32> undef, i32 [[TMP2]], i32 0 |
1654 | // CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
1655 | // CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <16 x i32> [[VECINIT_I_I]], i32 [[TMP3]], i32 1 |
1656 | // CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
1657 | // CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <16 x i32> [[VECINIT1_I_I]], i32 [[TMP4]], i32 2 |
1658 | // CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
1659 | // CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <16 x i32> [[VECINIT2_I_I]], i32 [[TMP5]], i32 3 |
1660 | // CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
1661 | // CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <16 x i32> [[VECINIT3_I_I]], i32 [[TMP6]], i32 4 |
1662 | // CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
1663 | // CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <16 x i32> [[VECINIT4_I_I]], i32 [[TMP7]], i32 5 |
1664 | // CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
1665 | // CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <16 x i32> [[VECINIT5_I_I]], i32 [[TMP8]], i32 6 |
1666 | // CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
1667 | // CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <16 x i32> [[VECINIT6_I_I]], i32 [[TMP9]], i32 7 |
1668 | // CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
1669 | // CHECK-NEXT: [[VECINIT8_I_I:%.*]] = insertelement <16 x i32> [[VECINIT7_I_I]], i32 [[TMP10]], i32 8 |
1670 | // CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
1671 | // CHECK-NEXT: [[VECINIT9_I_I:%.*]] = insertelement <16 x i32> [[VECINIT8_I_I]], i32 [[TMP11]], i32 9 |
1672 | // CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
1673 | // CHECK-NEXT: [[VECINIT10_I_I:%.*]] = insertelement <16 x i32> [[VECINIT9_I_I]], i32 [[TMP12]], i32 10 |
1674 | // CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
1675 | // CHECK-NEXT: [[VECINIT11_I_I:%.*]] = insertelement <16 x i32> [[VECINIT10_I_I]], i32 [[TMP13]], i32 11 |
1676 | // CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
1677 | // CHECK-NEXT: [[VECINIT12_I_I:%.*]] = insertelement <16 x i32> [[VECINIT11_I_I]], i32 [[TMP14]], i32 12 |
1678 | // CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
1679 | // CHECK-NEXT: [[VECINIT13_I_I:%.*]] = insertelement <16 x i32> [[VECINIT12_I_I]], i32 [[TMP15]], i32 13 |
1680 | // CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
1681 | // CHECK-NEXT: [[VECINIT14_I_I:%.*]] = insertelement <16 x i32> [[VECINIT13_I_I]], i32 [[TMP16]], i32 14 |
1682 | // CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
1683 | // CHECK-NEXT: [[VECINIT15_I_I:%.*]] = insertelement <16 x i32> [[VECINIT14_I_I]], i32 [[TMP17]], i32 15 |
1684 | // CHECK-NEXT: store <16 x i32> [[VECINIT15_I_I]], <16 x i32>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 |
1685 | // CHECK-NEXT: [[TMP18:%.*]] = load <16 x i32>, <16 x i32>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 |
1686 | // CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i32> [[TMP18]] to <8 x i64> |
1687 | // CHECK-NEXT: [[TMP20:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2 |
1688 | // CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
1689 | // CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__W_ADDR_I_I]], align 64 |
1690 | // CHECK-NEXT: store i16 [[TMP20]], i16* [[__U_ADDR_I_I]], align 2 |
1691 | // CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* [[__A2_ADDR_I_I]], align 64 |
1692 | // CHECK-NEXT: [[TMP22:%.*]] = load i16, i16* [[__U_ADDR_I_I]], align 2 |
1693 | // CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* [[__A2_ADDR_I_I]], align 64 |
1694 | // CHECK-NEXT: [[TMP24:%.*]] = bitcast <8 x i64> [[TMP23]] to <16 x i32> |
1695 | // CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR_I_I]], align 64 |
1696 | // CHECK-NEXT: [[TMP26:%.*]] = bitcast <8 x i64> [[TMP25]] to <16 x i32> |
1697 | // CHECK-NEXT: [[TMP27:%.*]] = bitcast i16 [[TMP22]] to <16 x i1> |
1698 | // CHECK-NEXT: [[TMP28:%.*]] = select <16 x i1> [[TMP27]], <16 x i32> [[TMP24]], <16 x i32> [[TMP26]] |
1699 | // CHECK-NEXT: [[TMP29:%.*]] = bitcast <16 x i32> [[TMP28]] to <8 x i64> |
1700 | // CHECK-NEXT: store <8 x i64> [[TMP29]], <8 x i64>* [[__V_ADDR_I]], align 64 |
1701 | // CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
1702 | // CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x i64> [[TMP30]], <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
1703 | // CHECK-NEXT: store <4 x i64> [[EXTRACT_I]], <4 x i64>* [[__T1_I]], align 32 |
1704 | // CHECK-NEXT: [[TMP31:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
1705 | // CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <8 x i64> [[TMP31]], <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
1706 | // CHECK-NEXT: store <4 x i64> [[EXTRACT4_I]], <4 x i64>* [[__T2_I]], align 32 |
1707 | // CHECK-NEXT: [[TMP32:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 |
1708 | // CHECK-NEXT: [[TMP33:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 |
1709 | // CHECK-NEXT: store <4 x i64> [[TMP32]], <4 x i64>* [[__A_ADDR_I_I]], align 32 |
1710 | // CHECK-NEXT: store <4 x i64> [[TMP33]], <4 x i64>* [[__B_ADDR_I_I]], align 32 |
1711 | // CHECK-NEXT: [[TMP34:%.*]] = load <4 x i64>, <4 x i64>* [[__A_ADDR_I_I]], align 32 |
1712 | // CHECK-NEXT: [[TMP35:%.*]] = bitcast <4 x i64> [[TMP34]] to <8 x i32> |
1713 | // CHECK-NEXT: [[TMP36:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 |
1714 | // CHECK-NEXT: [[TMP37:%.*]] = bitcast <4 x i64> [[TMP36]] to <8 x i32> |
1715 | // CHECK-NEXT: [[TMP38:%.*]] = icmp sgt <8 x i32> [[TMP35]], [[TMP37]] |
1716 | // CHECK-NEXT: [[TMP39:%.*]] = select <8 x i1> [[TMP38]], <8 x i32> [[TMP35]], <8 x i32> [[TMP37]] |
1717 | // CHECK-NEXT: [[TMP40:%.*]] = bitcast <8 x i32> [[TMP39]] to <4 x i64> |
1718 | // CHECK-NEXT: store <4 x i64> [[TMP40]], <4 x i64>* [[__T3_I]], align 32 |
1719 | // CHECK-NEXT: [[TMP41:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 |
1720 | // CHECK-NEXT: [[EXTRACT6_I:%.*]] = shufflevector <4 x i64> [[TMP41]], <4 x i64> undef, <2 x i32> <i32 0, i32 1> |
1721 | // CHECK-NEXT: store <2 x i64> [[EXTRACT6_I]], <2 x i64>* [[__T4_I]], align 16 |
1722 | // CHECK-NEXT: [[TMP42:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 |
1723 | // CHECK-NEXT: [[EXTRACT7_I:%.*]] = shufflevector <4 x i64> [[TMP42]], <4 x i64> undef, <2 x i32> <i32 2, i32 3> |
1724 | // CHECK-NEXT: store <2 x i64> [[EXTRACT7_I]], <2 x i64>* [[__T5_I]], align 16 |
1725 | // CHECK-NEXT: [[TMP43:%.*]] = load <2 x i64>, <2 x i64>* [[__T4_I]], align 16 |
1726 | // CHECK-NEXT: [[TMP44:%.*]] = load <2 x i64>, <2 x i64>* [[__T5_I]], align 16 |
1727 | // CHECK-NEXT: store <2 x i64> [[TMP43]], <2 x i64>* [[__V1_ADDR_I14_I]], align 16 |
1728 | // CHECK-NEXT: store <2 x i64> [[TMP44]], <2 x i64>* [[__V2_ADDR_I15_I]], align 16 |
1729 | // CHECK-NEXT: [[TMP45:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I14_I]], align 16 |
1730 | // CHECK-NEXT: [[TMP46:%.*]] = bitcast <2 x i64> [[TMP45]] to <4 x i32> |
1731 | // CHECK-NEXT: [[TMP47:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I15_I]], align 16 |
1732 | // CHECK-NEXT: [[TMP48:%.*]] = bitcast <2 x i64> [[TMP47]] to <4 x i32> |
1733 | // CHECK-NEXT: [[TMP49:%.*]] = icmp sgt <4 x i32> [[TMP46]], [[TMP48]] |
1734 | // CHECK-NEXT: [[TMP50:%.*]] = select <4 x i1> [[TMP49]], <4 x i32> [[TMP46]], <4 x i32> [[TMP48]] |
1735 | // CHECK-NEXT: [[TMP51:%.*]] = bitcast <4 x i32> [[TMP50]] to <2 x i64> |
1736 | // CHECK-NEXT: store <2 x i64> [[TMP51]], <2 x i64>* [[__T6_I]], align 16 |
1737 | // CHECK-NEXT: [[TMP52:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 |
1738 | // CHECK-NEXT: [[TMP53:%.*]] = bitcast <2 x i64> [[TMP52]] to <4 x i32> |
1739 | // CHECK-NEXT: [[TMP54:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 |
1740 | // CHECK-NEXT: [[TMP55:%.*]] = bitcast <2 x i64> [[TMP54]] to <4 x i32> |
1741 | // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[TMP53]], <4 x i32> [[TMP55]], <4 x i32> <i32 2, i32 3, i32 0, i32 1> |
1742 | // CHECK-NEXT: [[TMP56:%.*]] = bitcast <4 x i32> [[SHUFFLE_I]] to <2 x i64> |
1743 | // CHECK-NEXT: store <2 x i64> [[TMP56]], <2 x i64>* [[__T7_I]], align 16 |
1744 | // CHECK-NEXT: [[TMP57:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 |
1745 | // CHECK-NEXT: [[TMP58:%.*]] = load <2 x i64>, <2 x i64>* [[__T7_I]], align 16 |
1746 | // CHECK-NEXT: store <2 x i64> [[TMP57]], <2 x i64>* [[__V1_ADDR_I12_I]], align 16 |
1747 | // CHECK-NEXT: store <2 x i64> [[TMP58]], <2 x i64>* [[__V2_ADDR_I13_I]], align 16 |
1748 | // CHECK-NEXT: [[TMP59:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I12_I]], align 16 |
1749 | // CHECK-NEXT: [[TMP60:%.*]] = bitcast <2 x i64> [[TMP59]] to <4 x i32> |
1750 | // CHECK-NEXT: [[TMP61:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I13_I]], align 16 |
1751 | // CHECK-NEXT: [[TMP62:%.*]] = bitcast <2 x i64> [[TMP61]] to <4 x i32> |
1752 | // CHECK-NEXT: [[TMP63:%.*]] = icmp sgt <4 x i32> [[TMP60]], [[TMP62]] |
1753 | // CHECK-NEXT: [[TMP64:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[TMP60]], <4 x i32> [[TMP62]] |
1754 | // CHECK-NEXT: [[TMP65:%.*]] = bitcast <4 x i32> [[TMP64]] to <2 x i64> |
1755 | // CHECK-NEXT: store <2 x i64> [[TMP65]], <2 x i64>* [[__T8_I]], align 16 |
1756 | // CHECK-NEXT: [[TMP66:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 |
1757 | // CHECK-NEXT: [[TMP67:%.*]] = bitcast <2 x i64> [[TMP66]] to <4 x i32> |
1758 | // CHECK-NEXT: [[TMP68:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 |
1759 | // CHECK-NEXT: [[TMP69:%.*]] = bitcast <2 x i64> [[TMP68]] to <4 x i32> |
1760 | // CHECK-NEXT: [[SHUFFLE10_I:%.*]] = shufflevector <4 x i32> [[TMP67]], <4 x i32> [[TMP69]], <4 x i32> <i32 1, i32 0, i32 3, i32 2> |
1761 | // CHECK-NEXT: [[TMP70:%.*]] = bitcast <4 x i32> [[SHUFFLE10_I]] to <2 x i64> |
1762 | // CHECK-NEXT: store <2 x i64> [[TMP70]], <2 x i64>* [[__T9_I]], align 16 |
1763 | // CHECK-NEXT: [[TMP71:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 |
1764 | // CHECK-NEXT: [[TMP72:%.*]] = load <2 x i64>, <2 x i64>* [[__T9_I]], align 16 |
1765 | // CHECK-NEXT: store <2 x i64> [[TMP71]], <2 x i64>* [[__V1_ADDR_I_I]], align 16 |
1766 | // CHECK-NEXT: store <2 x i64> [[TMP72]], <2 x i64>* [[__V2_ADDR_I_I]], align 16 |
1767 | // CHECK-NEXT: [[TMP73:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I_I]], align 16 |
1768 | // CHECK-NEXT: [[TMP74:%.*]] = bitcast <2 x i64> [[TMP73]] to <4 x i32> |
1769 | // CHECK-NEXT: [[TMP75:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I_I]], align 16 |
1770 | // CHECK-NEXT: [[TMP76:%.*]] = bitcast <2 x i64> [[TMP75]] to <4 x i32> |
1771 | // CHECK-NEXT: [[TMP77:%.*]] = icmp sgt <4 x i32> [[TMP74]], [[TMP76]] |
1772 | // CHECK-NEXT: [[TMP78:%.*]] = select <4 x i1> [[TMP77]], <4 x i32> [[TMP74]], <4 x i32> [[TMP76]] |
1773 | // CHECK-NEXT: [[TMP79:%.*]] = bitcast <4 x i32> [[TMP78]] to <2 x i64> |
1774 | // CHECK-NEXT: store <4 x i32> [[TMP78]], <4 x i32>* [[__T10_I]], align 16 |
1775 | // CHECK-NEXT: [[TMP80:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 |
1776 | // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP80]], i32 0 |
1777 | // CHECK-NEXT: ret i32 [[VECEXT_I]] |
1778 | int test_mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __W){ |
1779 | return _mm512_mask_reduce_max_epi32(__M, __W); |
1780 | } |
1781 | |
1782 | // CHECK-LABEL: define i32 @test_mm512_mask_reduce_max_epu32(i16 zeroext %__M, <8 x i64> %__W) #0 { |
1783 | // CHECK-NEXT: entry: |
1784 | // CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 |
1785 | // CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 |
1786 | // CHECK-NEXT: [[__V1_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 |
1787 | // CHECK-NEXT: [[__V2_ADDR_I14_I:%.*]] = alloca <2 x i64>, align 16 |
1788 | // CHECK-NEXT: [[__V1_ADDR_I11_I:%.*]] = alloca <2 x i64>, align 16 |
1789 | // CHECK-NEXT: [[__V2_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 |
1790 | // CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 |
1791 | // CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 |
1792 | // CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I_I:%.*]] = alloca <8 x i64>, align 64 |
1793 | // CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 |
1794 | // CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 |
1795 | // CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i16, align 2 |
1796 | // CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 |
1797 | // CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 |
1798 | // CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 |
1799 | // CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 |
1800 | // CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 |
1801 | // CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 |
1802 | // CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 |
1803 | // CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 |
1804 | // CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 |
1805 | // CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 |
1806 | // CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 |
1807 | // CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i16, align 2 |
1808 | // CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 |
1809 | // CHECK-NEXT: store i16 [[__M:%.*]], i16* [[__M_ADDR]], align 2 |
1810 | // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 |
1811 | // CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2 |
1812 | // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 |
1813 | // CHECK-NEXT: store i16 [[TMP0]], i16* [[__M_ADDR_I]], align 2 |
1814 | // CHECK-NEXT: store <8 x i64> [[TMP1]], <8 x i64>* [[__V_ADDR_I]], align 64 |
1815 | // CHECK-NEXT: [[TMP2:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2 |
1816 | // CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
1817 | // CHECK-NEXT: store i16 [[TMP2]], i16* [[__U_ADDR_I_I]], align 2 |
1818 | // CHECK-NEXT: store <8 x i64> [[TMP3]], <8 x i64>* [[__A_ADDR_I_I]], align 64 |
1819 | // CHECK-NEXT: [[TMP4:%.*]] = load i16, i16* [[__U_ADDR_I_I]], align 2 |
1820 | // CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 |
1821 | // CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP5]] to <16 x i32> |
1822 | // CHECK-NEXT: store <8 x i64> zeroinitializer, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I_I]], align 64 |
1823 | // CHECK-NEXT: [[TMP7:%.*]] = load <8 x i64>, <8 x i64>* [[DOTCOMPOUNDLITERAL_I_I_I]], align 64 |
1824 | // CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP7]] to <16 x i32> |
1825 | // CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> |
1826 | // CHECK-NEXT: [[TMP10:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP6]], <16 x i32> [[TMP8]] |
1827 | // CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[TMP10]] to <8 x i64> |
1828 | // CHECK-NEXT: store <8 x i64> [[TMP11]], <8 x i64>* [[__V_ADDR_I]], align 64 |
1829 | // CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
1830 | // CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x i64> [[TMP12]], <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
1831 | // CHECK-NEXT: store <4 x i64> [[EXTRACT_I]], <4 x i64>* [[__T1_I]], align 32 |
1832 | // CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
1833 | // CHECK-NEXT: [[EXTRACT3_I:%.*]] = shufflevector <8 x i64> [[TMP13]], <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
1834 | // CHECK-NEXT: store <4 x i64> [[EXTRACT3_I]], <4 x i64>* [[__T2_I]], align 32 |
1835 | // CHECK-NEXT: [[TMP14:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 |
1836 | // CHECK-NEXT: [[TMP15:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 |
1837 | // CHECK-NEXT: store <4 x i64> [[TMP14]], <4 x i64>* [[__A2_ADDR_I_I]], align 32 |
1838 | // CHECK-NEXT: store <4 x i64> [[TMP15]], <4 x i64>* [[__B_ADDR_I_I]], align 32 |
1839 | // CHECK-NEXT: [[TMP16:%.*]] = load <4 x i64>, <4 x i64>* [[__A2_ADDR_I_I]], align 32 |
1840 | // CHECK-NEXT: [[TMP17:%.*]] = bitcast <4 x i64> [[TMP16]] to <8 x i32> |
1841 | // CHECK-NEXT: [[TMP18:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 |
1842 | // CHECK-NEXT: [[TMP19:%.*]] = bitcast <4 x i64> [[TMP18]] to <8 x i32> |
1843 | // CHECK-NEXT: [[TMP20:%.*]] = icmp ugt <8 x i32> [[TMP17]], [[TMP19]] |
1844 | // CHECK-NEXT: [[TMP21:%.*]] = select <8 x i1> [[TMP20]], <8 x i32> [[TMP17]], <8 x i32> [[TMP19]] |
1845 | // CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i32> [[TMP21]] to <4 x i64> |
1846 | // CHECK-NEXT: store <4 x i64> [[TMP22]], <4 x i64>* [[__T3_I]], align 32 |
1847 | // CHECK-NEXT: [[TMP23:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 |
1848 | // CHECK-NEXT: [[EXTRACT5_I:%.*]] = shufflevector <4 x i64> [[TMP23]], <4 x i64> undef, <2 x i32> <i32 0, i32 1> |
1849 | // CHECK-NEXT: store <2 x i64> [[EXTRACT5_I]], <2 x i64>* [[__T4_I]], align 16 |
1850 | // CHECK-NEXT: [[TMP24:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 |
1851 | // CHECK-NEXT: [[EXTRACT6_I:%.*]] = shufflevector <4 x i64> [[TMP24]], <4 x i64> undef, <2 x i32> <i32 2, i32 3> |
1852 | // CHECK-NEXT: store <2 x i64> [[EXTRACT6_I]], <2 x i64>* [[__T5_I]], align 16 |
1853 | // CHECK-NEXT: [[TMP25:%.*]] = load <2 x i64>, <2 x i64>* [[__T4_I]], align 16 |
1854 | // CHECK-NEXT: [[TMP26:%.*]] = load <2 x i64>, <2 x i64>* [[__T5_I]], align 16 |
1855 | // CHECK-NEXT: store <2 x i64> [[TMP25]], <2 x i64>* [[__V1_ADDR_I13_I]], align 16 |
1856 | // CHECK-NEXT: store <2 x i64> [[TMP26]], <2 x i64>* [[__V2_ADDR_I14_I]], align 16 |
1857 | // CHECK-NEXT: [[TMP27:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I13_I]], align 16 |
1858 | // CHECK-NEXT: [[TMP28:%.*]] = bitcast <2 x i64> [[TMP27]] to <4 x i32> |
1859 | // CHECK-NEXT: [[TMP29:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I14_I]], align 16 |
1860 | // CHECK-NEXT: [[TMP30:%.*]] = bitcast <2 x i64> [[TMP29]] to <4 x i32> |
1861 | // CHECK-NEXT: [[TMP31:%.*]] = icmp ugt <4 x i32> [[TMP28]], [[TMP30]] |
1862 | // CHECK-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP31]], <4 x i32> [[TMP28]], <4 x i32> [[TMP30]] |
1863 | // CHECK-NEXT: [[TMP33:%.*]] = bitcast <4 x i32> [[TMP32]] to <2 x i64> |
1864 | // CHECK-NEXT: store <2 x i64> [[TMP33]], <2 x i64>* [[__T6_I]], align 16 |
1865 | // CHECK-NEXT: [[TMP34:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 |
1866 | // CHECK-NEXT: [[TMP35:%.*]] = bitcast <2 x i64> [[TMP34]] to <4 x i32> |
1867 | // CHECK-NEXT: [[TMP36:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 |
1868 | // CHECK-NEXT: [[TMP37:%.*]] = bitcast <2 x i64> [[TMP36]] to <4 x i32> |
1869 | // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[TMP35]], <4 x i32> [[TMP37]], <4 x i32> <i32 2, i32 3, i32 0, i32 1> |
1870 | // CHECK-NEXT: [[TMP38:%.*]] = bitcast <4 x i32> [[SHUFFLE_I]] to <2 x i64> |
1871 | // CHECK-NEXT: store <2 x i64> [[TMP38]], <2 x i64>* [[__T7_I]], align 16 |
1872 | // CHECK-NEXT: [[TMP39:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 |
1873 | // CHECK-NEXT: [[TMP40:%.*]] = load <2 x i64>, <2 x i64>* [[__T7_I]], align 16 |
1874 | // CHECK-NEXT: store <2 x i64> [[TMP39]], <2 x i64>* [[__V1_ADDR_I11_I]], align 16 |
1875 | // CHECK-NEXT: store <2 x i64> [[TMP40]], <2 x i64>* [[__V2_ADDR_I12_I]], align 16 |
1876 | // CHECK-NEXT: [[TMP41:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I11_I]], align 16 |
1877 | // CHECK-NEXT: [[TMP42:%.*]] = bitcast <2 x i64> [[TMP41]] to <4 x i32> |
1878 | // CHECK-NEXT: [[TMP43:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I12_I]], align 16 |
1879 | // CHECK-NEXT: [[TMP44:%.*]] = bitcast <2 x i64> [[TMP43]] to <4 x i32> |
1880 | // CHECK-NEXT: [[TMP45:%.*]] = icmp ugt <4 x i32> [[TMP42]], [[TMP44]] |
1881 | // CHECK-NEXT: [[TMP46:%.*]] = select <4 x i1> [[TMP45]], <4 x i32> [[TMP42]], <4 x i32> [[TMP44]] |
1882 | // CHECK-NEXT: [[TMP47:%.*]] = bitcast <4 x i32> [[TMP46]] to <2 x i64> |
1883 | // CHECK-NEXT: store <2 x i64> [[TMP47]], <2 x i64>* [[__T8_I]], align 16 |
1884 | // CHECK-NEXT: [[TMP48:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 |
1885 | // CHECK-NEXT: [[TMP49:%.*]] = bitcast <2 x i64> [[TMP48]] to <4 x i32> |
1886 | // CHECK-NEXT: [[TMP50:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 |
1887 | // CHECK-NEXT: [[TMP51:%.*]] = bitcast <2 x i64> [[TMP50]] to <4 x i32> |
1888 | // CHECK-NEXT: [[SHUFFLE9_I:%.*]] = shufflevector <4 x i32> [[TMP49]], <4 x i32> [[TMP51]], <4 x i32> <i32 1, i32 0, i32 3, i32 2> |
1889 | // CHECK-NEXT: [[TMP52:%.*]] = bitcast <4 x i32> [[SHUFFLE9_I]] to <2 x i64> |
1890 | // CHECK-NEXT: store <2 x i64> [[TMP52]], <2 x i64>* [[__T9_I]], align 16 |
1891 | // CHECK-NEXT: [[TMP53:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 |
1892 | // CHECK-NEXT: [[TMP54:%.*]] = load <2 x i64>, <2 x i64>* [[__T9_I]], align 16 |
1893 | // CHECK-NEXT: store <2 x i64> [[TMP53]], <2 x i64>* [[__V1_ADDR_I_I]], align 16 |
1894 | // CHECK-NEXT: store <2 x i64> [[TMP54]], <2 x i64>* [[__V2_ADDR_I_I]], align 16 |
1895 | // CHECK-NEXT: [[TMP55:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I_I]], align 16 |
1896 | // CHECK-NEXT: [[TMP56:%.*]] = bitcast <2 x i64> [[TMP55]] to <4 x i32> |
1897 | // CHECK-NEXT: [[TMP57:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I_I]], align 16 |
1898 | // CHECK-NEXT: [[TMP58:%.*]] = bitcast <2 x i64> [[TMP57]] to <4 x i32> |
1899 | // CHECK-NEXT: [[TMP59:%.*]] = icmp ugt <4 x i32> [[TMP56]], [[TMP58]] |
1900 | // CHECK-NEXT: [[TMP60:%.*]] = select <4 x i1> [[TMP59]], <4 x i32> [[TMP56]], <4 x i32> [[TMP58]] |
1901 | // CHECK-NEXT: [[TMP61:%.*]] = bitcast <4 x i32> [[TMP60]] to <2 x i64> |
1902 | // CHECK-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[__T10_I]], align 16 |
1903 | // CHECK-NEXT: [[TMP62:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 |
1904 | // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP62]], i32 0 |
1905 | // CHECK-NEXT: ret i32 [[VECEXT_I]] |
1906 | unsigned int test_mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __W){ |
1907 | return _mm512_mask_reduce_max_epu32(__M, __W); |
1908 | } |
1909 | |
1910 | // CHECK-LABEL: define float @test_mm512_mask_reduce_max_ps(i16 zeroext %__M, <16 x float> %__W) #0 { |
1911 | // CHECK-NEXT: entry: |
1912 | // CHECK-NEXT: [[__W2_ADDR_I_I:%.*]] = alloca <16 x float>, align 64 |
1913 | // CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 |
1914 | // CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <16 x float>, align 64 |
1915 | // CHECK-NEXT: [[__A_ADDR_I16_I:%.*]] = alloca <8 x float>, align 32 |
1916 | // CHECK-NEXT: [[__B_ADDR_I17_I:%.*]] = alloca <8 x float>, align 32 |
1917 | // CHECK-NEXT: [[__A_ADDR_I14_I:%.*]] = alloca <4 x float>, align 16 |
1918 | // CHECK-NEXT: [[__B_ADDR_I15_I:%.*]] = alloca <4 x float>, align 16 |
1919 | // CHECK-NEXT: [[__A_ADDR_I12_I:%.*]] = alloca <4 x float>, align 16 |
1920 | // CHECK-NEXT: [[__B_ADDR_I13_I:%.*]] = alloca <4 x float>, align 16 |
1921 | // CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 |
1922 | // CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 |
1923 | // CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca float, align 4 |
1924 | // CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x float>, align 64 |
1925 | // CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i16, align 2 |
1926 | // CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <16 x float>, align 64 |
1927 | // CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x float>, align 32 |
1928 | // CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x float>, align 32 |
1929 | // CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x float>, align 32 |
1930 | // CHECK-NEXT: [[__T4_I:%.*]] = alloca <4 x float>, align 16 |
1931 | // CHECK-NEXT: [[__T5_I:%.*]] = alloca <4 x float>, align 16 |
1932 | // CHECK-NEXT: [[__T6_I:%.*]] = alloca <4 x float>, align 16 |
1933 | // CHECK-NEXT: [[__T7_I:%.*]] = alloca <4 x float>, align 16 |
1934 | // CHECK-NEXT: [[__T8_I:%.*]] = alloca <4 x float>, align 16 |
1935 | // CHECK-NEXT: [[__T9_I:%.*]] = alloca <4 x float>, align 16 |
1936 | // CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x float>, align 16 |
1937 | // CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i16, align 2 |
1938 | // CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <16 x float>, align 64 |
1939 | // CHECK-NEXT: store i16 [[__M:%.*]], i16* [[__M_ADDR]], align 2 |
1940 | // CHECK-NEXT: store <16 x float> [[__W:%.*]], <16 x float>* [[__W_ADDR]], align 64 |
1941 | // CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2 |
1942 | // CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[__W_ADDR]], align 64 |
1943 | // CHECK-NEXT: store i16 [[TMP0]], i16* [[__M_ADDR_I]], align 2 |
1944 | // CHECK-NEXT: store <16 x float> [[TMP1]], <16 x float>* [[__V_ADDR_I]], align 64 |
1945 | // CHECK-NEXT: store float 0xFFF0000000000000, float* [[__W_ADDR_I_I]], align 4 |
1946 | // CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 |
1947 | // CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <16 x float> undef, float [[TMP2]], i32 0 |
1948 | // CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 |
1949 | // CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <16 x float> [[VECINIT_I_I]], float [[TMP3]], i32 1 |
1950 | // CHECK-NEXT: [[TMP4:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 |
1951 | // CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <16 x float> [[VECINIT1_I_I]], float [[TMP4]], i32 2 |
1952 | // CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 |
1953 | // CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <16 x float> [[VECINIT2_I_I]], float [[TMP5]], i32 3 |
1954 | // CHECK-NEXT: [[TMP6:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 |
1955 | // CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <16 x float> [[VECINIT3_I_I]], float [[TMP6]], i32 4 |
1956 | // CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 |
1957 | // CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <16 x float> [[VECINIT4_I_I]], float [[TMP7]], i32 5 |
1958 | // CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 |
1959 | // CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <16 x float> [[VECINIT5_I_I]], float [[TMP8]], i32 6 |
1960 | // CHECK-NEXT: [[TMP9:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 |
1961 | // CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <16 x float> [[VECINIT6_I_I]], float [[TMP9]], i32 7 |
1962 | // CHECK-NEXT: [[TMP10:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 |
1963 | // CHECK-NEXT: [[VECINIT8_I_I:%.*]] = insertelement <16 x float> [[VECINIT7_I_I]], float [[TMP10]], i32 8 |
1964 | // CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 |
1965 | // CHECK-NEXT: [[VECINIT9_I_I:%.*]] = insertelement <16 x float> [[VECINIT8_I_I]], float [[TMP11]], i32 9 |
1966 | // CHECK-NEXT: [[TMP12:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 |
1967 | // CHECK-NEXT: [[VECINIT10_I_I:%.*]] = insertelement <16 x float> [[VECINIT9_I_I]], float [[TMP12]], i32 10 |
1968 | // CHECK-NEXT: [[TMP13:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 |
1969 | // CHECK-NEXT: [[VECINIT11_I_I:%.*]] = insertelement <16 x float> [[VECINIT10_I_I]], float [[TMP13]], i32 11 |
1970 | // CHECK-NEXT: [[TMP14:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 |
1971 | // CHECK-NEXT: [[VECINIT12_I_I:%.*]] = insertelement <16 x float> [[VECINIT11_I_I]], float [[TMP14]], i32 12 |
1972 | // CHECK-NEXT: [[TMP15:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 |
1973 | // CHECK-NEXT: [[VECINIT13_I_I:%.*]] = insertelement <16 x float> [[VECINIT12_I_I]], float [[TMP15]], i32 13 |
1974 | // CHECK-NEXT: [[TMP16:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 |
1975 | // CHECK-NEXT: [[VECINIT14_I_I:%.*]] = insertelement <16 x float> [[VECINIT13_I_I]], float [[TMP16]], i32 14 |
1976 | // CHECK-NEXT: [[TMP17:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 |
1977 | // CHECK-NEXT: [[VECINIT15_I_I:%.*]] = insertelement <16 x float> [[VECINIT14_I_I]], float [[TMP17]], i32 15 |
1978 | // CHECK-NEXT: store <16 x float> [[VECINIT15_I_I]], <16 x float>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 |
1979 | // CHECK-NEXT: [[TMP18:%.*]] = load <16 x float>, <16 x float>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 |
1980 | // CHECK-NEXT: [[TMP19:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2 |
1981 | // CHECK-NEXT: [[TMP20:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 |
1982 | // CHECK-NEXT: store <16 x float> [[TMP18]], <16 x float>* [[__W2_ADDR_I_I]], align 64 |
1983 | // CHECK-NEXT: store i16 [[TMP19]], i16* [[__U_ADDR_I_I]], align 2 |
1984 | // CHECK-NEXT: store <16 x float> [[TMP20]], <16 x float>* [[__A_ADDR_I_I]], align 64 |
1985 | // CHECK-NEXT: [[TMP21:%.*]] = load i16, i16* [[__U_ADDR_I_I]], align 2 |
1986 | // CHECK-NEXT: [[TMP22:%.*]] = load <16 x float>, <16 x float>* [[__A_ADDR_I_I]], align 64 |
1987 | // CHECK-NEXT: [[TMP23:%.*]] = load <16 x float>, <16 x float>* [[__W2_ADDR_I_I]], align 64 |
1988 | // CHECK-NEXT: [[TMP24:%.*]] = bitcast i16 [[TMP21]] to <16 x i1> |
1989 | // CHECK-NEXT: [[TMP25:%.*]] = select <16 x i1> [[TMP24]], <16 x float> [[TMP22]], <16 x float> [[TMP23]] |
1990 | // CHECK-NEXT: store <16 x float> [[TMP25]], <16 x float>* [[__V_ADDR_I]], align 64 |
1991 | // CHECK-NEXT: [[TMP26:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 |
1992 | // CHECK-NEXT: [[TMP27:%.*]] = bitcast <16 x float> [[TMP26]] to <8 x double> |
1993 | // CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x double> [[TMP27]], <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
1994 | // CHECK-NEXT: [[TMP28:%.*]] = bitcast <4 x double> [[EXTRACT_I]] to <8 x float> |
1995 | // CHECK-NEXT: store <8 x float> [[TMP28]], <8 x float>* [[__T1_I]], align 32 |
1996 | // CHECK-NEXT: [[TMP29:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 |
1997 | // CHECK-NEXT: [[TMP30:%.*]] = bitcast <16 x float> [[TMP29]] to <8 x double> |
1998 | // CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <8 x double> [[TMP30]], <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
1999 | // CHECK-NEXT: [[TMP31:%.*]] = bitcast <4 x double> [[EXTRACT4_I]] to <8 x float> |
2000 | // CHECK-NEXT: store <8 x float> [[TMP31]], <8 x float>* [[__T2_I]], align 32 |
2001 | // CHECK-NEXT: [[TMP32:%.*]] = load <8 x float>, <8 x float>* [[__T1_I]], align 32 |
2002 | // CHECK-NEXT: [[TMP33:%.*]] = load <8 x float>, <8 x float>* [[__T2_I]], align 32 |
2003 | // CHECK-NEXT: store <8 x float> [[TMP32]], <8 x float>* [[__A_ADDR_I16_I]], align 32 |
2004 | // CHECK-NEXT: store <8 x float> [[TMP33]], <8 x float>* [[__B_ADDR_I17_I]], align 32 |
2005 | // CHECK-NEXT: [[TMP34:%.*]] = load <8 x float>, <8 x float>* [[__A_ADDR_I16_I]], align 32 |
2006 | // CHECK-NEXT: [[TMP35:%.*]] = load <8 x float>, <8 x float>* [[__B_ADDR_I17_I]], align 32 |
2007 | // CHECK-NEXT: [[TMP36:%.*]] = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> [[TMP34]], <8 x float> [[TMP35]]) #2 |
2008 | // CHECK-NEXT: store <8 x float> [[TMP36]], <8 x float>* [[__T3_I]], align 32 |
2009 | // CHECK-NEXT: [[TMP37:%.*]] = load <8 x float>, <8 x float>* [[__T3_I]], align 32 |
2010 | // CHECK-NEXT: [[EXTRACT6_I:%.*]] = shufflevector <8 x float> [[TMP37]], <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
2011 | // CHECK-NEXT: store <4 x float> [[EXTRACT6_I]], <4 x float>* [[__T4_I]], align 16 |
2012 | // CHECK-NEXT: [[TMP38:%.*]] = load <8 x float>, <8 x float>* [[__T3_I]], align 32 |
2013 | // CHECK-NEXT: [[EXTRACT7_I:%.*]] = shufflevector <8 x float> [[TMP38]], <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
2014 | // CHECK-NEXT: store <4 x float> [[EXTRACT7_I]], <4 x float>* [[__T5_I]], align 16 |
2015 | // CHECK-NEXT: [[TMP39:%.*]] = load <4 x float>, <4 x float>* [[__T4_I]], align 16 |
2016 | // CHECK-NEXT: [[TMP40:%.*]] = load <4 x float>, <4 x float>* [[__T5_I]], align 16 |
2017 | // CHECK-NEXT: store <4 x float> [[TMP39]], <4 x float>* [[__A_ADDR_I14_I]], align 16 |
2018 | // CHECK-NEXT: store <4 x float> [[TMP40]], <4 x float>* [[__B_ADDR_I15_I]], align 16 |
2019 | // CHECK-NEXT: [[TMP41:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I14_I]], align 16 |
2020 | // CHECK-NEXT: [[TMP42:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I15_I]], align 16 |
2021 | // CHECK-NEXT: [[TMP43:%.*]] = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[TMP41]], <4 x float> [[TMP42]]) #2 |
2022 | // CHECK-NEXT: store <4 x float> [[TMP43]], <4 x float>* [[__T6_I]], align 16 |
2023 | // CHECK-NEXT: [[TMP44:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 |
2024 | // CHECK-NEXT: [[TMP45:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 |
2025 | // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[TMP44]], <4 x float> [[TMP45]], <4 x i32> <i32 2, i32 3, i32 0, i32 1> |
2026 | // CHECK-NEXT: store <4 x float> [[SHUFFLE_I]], <4 x float>* [[__T7_I]], align 16 |
2027 | // CHECK-NEXT: [[TMP46:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 |
2028 | // CHECK-NEXT: [[TMP47:%.*]] = load <4 x float>, <4 x float>* [[__T7_I]], align 16 |
2029 | // CHECK-NEXT: store <4 x float> [[TMP46]], <4 x float>* [[__A_ADDR_I12_I]], align 16 |
2030 | // CHECK-NEXT: store <4 x float> [[TMP47]], <4 x float>* [[__B_ADDR_I13_I]], align 16 |
2031 | // CHECK-NEXT: [[TMP48:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I12_I]], align 16 |
2032 | // CHECK-NEXT: [[TMP49:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I13_I]], align 16 |
2033 | // CHECK-NEXT: [[TMP50:%.*]] = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[TMP48]], <4 x float> [[TMP49]]) #2 |
2034 | // CHECK-NEXT: store <4 x float> [[TMP50]], <4 x float>* [[__T8_I]], align 16 |
2035 | // CHECK-NEXT: [[TMP51:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 |
2036 | // CHECK-NEXT: [[TMP52:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 |
2037 | // CHECK-NEXT: [[SHUFFLE10_I:%.*]] = shufflevector <4 x float> [[TMP51]], <4 x float> [[TMP52]], <4 x i32> <i32 1, i32 0, i32 3, i32 2> |
2038 | // CHECK-NEXT: store <4 x float> [[SHUFFLE10_I]], <4 x float>* [[__T9_I]], align 16 |
2039 | // CHECK-NEXT: [[TMP53:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 |
2040 | // CHECK-NEXT: [[TMP54:%.*]] = load <4 x float>, <4 x float>* [[__T9_I]], align 16 |
2041 | // CHECK-NEXT: store <4 x float> [[TMP53]], <4 x float>* [[__A2_ADDR_I_I]], align 16 |
2042 | // CHECK-NEXT: store <4 x float> [[TMP54]], <4 x float>* [[__B_ADDR_I_I]], align 16 |
2043 | // CHECK-NEXT: [[TMP55:%.*]] = load <4 x float>, <4 x float>* [[__A2_ADDR_I_I]], align 16 |
2044 | // CHECK-NEXT: [[TMP56:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I_I]], align 16 |
2045 | // CHECK-NEXT: [[TMP57:%.*]] = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[TMP55]], <4 x float> [[TMP56]]) #2 |
2046 | // CHECK-NEXT: store <4 x float> [[TMP57]], <4 x float>* [[__T10_I]], align 16 |
2047 | // CHECK-NEXT: [[TMP58:%.*]] = load <4 x float>, <4 x float>* [[__T10_I]], align 16 |
2048 | // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x float> [[TMP58]], i32 0 |
2049 | // CHECK-NEXT: ret float [[VECEXT_I]] |
2050 | float test_mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __W){ |
2051 | return _mm512_mask_reduce_max_ps(__M, __W); |
2052 | } |
2053 | |
2054 | // CHECK-LABEL: define i32 @test_mm512_mask_reduce_min_epi32(i16 zeroext %__M, <8 x i64> %__W) #0 { |
2055 | // CHECK-NEXT: entry: |
2056 | // CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 |
2057 | // CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 |
2058 | // CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 |
2059 | // CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 |
2060 | // CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 |
2061 | // CHECK-NEXT: [[__V1_ADDR_I14_I:%.*]] = alloca <2 x i64>, align 16 |
2062 | // CHECK-NEXT: [[__V2_ADDR_I15_I:%.*]] = alloca <2 x i64>, align 16 |
2063 | // CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 |
2064 | // CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 |
2065 | // CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 |
2066 | // CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 |
2067 | // CHECK-NEXT: [[__S_ADDR_I_I:%.*]] = alloca i32, align 4 |
2068 | // CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x i32>, align 64 |
2069 | // CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i16, align 2 |
2070 | // CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 |
2071 | // CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 |
2072 | // CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 |
2073 | // CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 |
2074 | // CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 |
2075 | // CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 |
2076 | // CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 |
2077 | // CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 |
2078 | // CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 |
2079 | // CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 |
2080 | // CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 |
2081 | // CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i16, align 2 |
2082 | // CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 |
2083 | // CHECK-NEXT: store i16 [[__M:%.*]], i16* [[__M_ADDR]], align 2 |
2084 | // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 |
2085 | // CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2 |
2086 | // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 |
2087 | // CHECK-NEXT: store i16 [[TMP0]], i16* [[__M_ADDR_I]], align 2 |
2088 | // CHECK-NEXT: store <8 x i64> [[TMP1]], <8 x i64>* [[__V_ADDR_I]], align 64 |
2089 | // CHECK-NEXT: store i32 2147483647, i32* [[__S_ADDR_I_I]], align 4 |
2090 | // CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
2091 | // CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <16 x i32> undef, i32 [[TMP2]], i32 0 |
2092 | // CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
2093 | // CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <16 x i32> [[VECINIT_I_I]], i32 [[TMP3]], i32 1 |
2094 | // CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
2095 | // CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <16 x i32> [[VECINIT1_I_I]], i32 [[TMP4]], i32 2 |
2096 | // CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
2097 | // CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <16 x i32> [[VECINIT2_I_I]], i32 [[TMP5]], i32 3 |
2098 | // CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
2099 | // CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <16 x i32> [[VECINIT3_I_I]], i32 [[TMP6]], i32 4 |
2100 | // CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
2101 | // CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <16 x i32> [[VECINIT4_I_I]], i32 [[TMP7]], i32 5 |
2102 | // CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
2103 | // CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <16 x i32> [[VECINIT5_I_I]], i32 [[TMP8]], i32 6 |
2104 | // CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
2105 | // CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <16 x i32> [[VECINIT6_I_I]], i32 [[TMP9]], i32 7 |
2106 | // CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
2107 | // CHECK-NEXT: [[VECINIT8_I_I:%.*]] = insertelement <16 x i32> [[VECINIT7_I_I]], i32 [[TMP10]], i32 8 |
2108 | // CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
2109 | // CHECK-NEXT: [[VECINIT9_I_I:%.*]] = insertelement <16 x i32> [[VECINIT8_I_I]], i32 [[TMP11]], i32 9 |
2110 | // CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
2111 | // CHECK-NEXT: [[VECINIT10_I_I:%.*]] = insertelement <16 x i32> [[VECINIT9_I_I]], i32 [[TMP12]], i32 10 |
2112 | // CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
2113 | // CHECK-NEXT: [[VECINIT11_I_I:%.*]] = insertelement <16 x i32> [[VECINIT10_I_I]], i32 [[TMP13]], i32 11 |
2114 | // CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
2115 | // CHECK-NEXT: [[VECINIT12_I_I:%.*]] = insertelement <16 x i32> [[VECINIT11_I_I]], i32 [[TMP14]], i32 12 |
2116 | // CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
2117 | // CHECK-NEXT: [[VECINIT13_I_I:%.*]] = insertelement <16 x i32> [[VECINIT12_I_I]], i32 [[TMP15]], i32 13 |
2118 | // CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
2119 | // CHECK-NEXT: [[VECINIT14_I_I:%.*]] = insertelement <16 x i32> [[VECINIT13_I_I]], i32 [[TMP16]], i32 14 |
2120 | // CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
2121 | // CHECK-NEXT: [[VECINIT15_I_I:%.*]] = insertelement <16 x i32> [[VECINIT14_I_I]], i32 [[TMP17]], i32 15 |
2122 | // CHECK-NEXT: store <16 x i32> [[VECINIT15_I_I]], <16 x i32>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 |
2123 | // CHECK-NEXT: [[TMP18:%.*]] = load <16 x i32>, <16 x i32>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 |
2124 | // CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i32> [[TMP18]] to <8 x i64> |
2125 | // CHECK-NEXT: [[TMP20:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2 |
2126 | // CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
2127 | // CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__W_ADDR_I_I]], align 64 |
2128 | // CHECK-NEXT: store i16 [[TMP20]], i16* [[__U_ADDR_I_I]], align 2 |
2129 | // CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* [[__A_ADDR_I_I]], align 64 |
2130 | // CHECK-NEXT: [[TMP22:%.*]] = load i16, i16* [[__U_ADDR_I_I]], align 2 |
2131 | // CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 |
2132 | // CHECK-NEXT: [[TMP24:%.*]] = bitcast <8 x i64> [[TMP23]] to <16 x i32> |
2133 | // CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR_I_I]], align 64 |
2134 | // CHECK-NEXT: [[TMP26:%.*]] = bitcast <8 x i64> [[TMP25]] to <16 x i32> |
2135 | // CHECK-NEXT: [[TMP27:%.*]] = bitcast i16 [[TMP22]] to <16 x i1> |
2136 | // CHECK-NEXT: [[TMP28:%.*]] = select <16 x i1> [[TMP27]], <16 x i32> [[TMP24]], <16 x i32> [[TMP26]] |
2137 | // CHECK-NEXT: [[TMP29:%.*]] = bitcast <16 x i32> [[TMP28]] to <8 x i64> |
2138 | // CHECK-NEXT: store <8 x i64> [[TMP29]], <8 x i64>* [[__V_ADDR_I]], align 64 |
2139 | // CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
2140 | // CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x i64> [[TMP30]], <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
2141 | // CHECK-NEXT: store <4 x i64> [[EXTRACT_I]], <4 x i64>* [[__T1_I]], align 32 |
2142 | // CHECK-NEXT: [[TMP31:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
2143 | // CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <8 x i64> [[TMP31]], <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
2144 | // CHECK-NEXT: store <4 x i64> [[EXTRACT4_I]], <4 x i64>* [[__T2_I]], align 32 |
2145 | // CHECK-NEXT: [[TMP32:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 |
2146 | // CHECK-NEXT: [[TMP33:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 |
2147 | // CHECK-NEXT: store <4 x i64> [[TMP32]], <4 x i64>* [[__A2_ADDR_I_I]], align 32 |
2148 | // CHECK-NEXT: store <4 x i64> [[TMP33]], <4 x i64>* [[__B_ADDR_I_I]], align 32 |
2149 | // CHECK-NEXT: [[TMP34:%.*]] = load <4 x i64>, <4 x i64>* [[__A2_ADDR_I_I]], align 32 |
2150 | // CHECK-NEXT: [[TMP35:%.*]] = bitcast <4 x i64> [[TMP34]] to <8 x i32> |
2151 | // CHECK-NEXT: [[TMP36:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 |
2152 | // CHECK-NEXT: [[TMP37:%.*]] = bitcast <4 x i64> [[TMP36]] to <8 x i32> |
2153 | // CHECK-NEXT: [[TMP38:%.*]] = icmp slt <8 x i32> [[TMP35]], [[TMP37]] |
2154 | // CHECK-NEXT: [[TMP39:%.*]] = select <8 x i1> [[TMP38]], <8 x i32> [[TMP35]], <8 x i32> [[TMP37]] |
2155 | // CHECK-NEXT: [[TMP40:%.*]] = bitcast <8 x i32> [[TMP39]] to <4 x i64> |
2156 | // CHECK-NEXT: store <4 x i64> [[TMP40]], <4 x i64>* [[__T3_I]], align 32 |
2157 | // CHECK-NEXT: [[TMP41:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 |
2158 | // CHECK-NEXT: [[EXTRACT6_I:%.*]] = shufflevector <4 x i64> [[TMP41]], <4 x i64> undef, <2 x i32> <i32 0, i32 1> |
2159 | // CHECK-NEXT: store <2 x i64> [[EXTRACT6_I]], <2 x i64>* [[__T4_I]], align 16 |
2160 | // CHECK-NEXT: [[TMP42:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 |
2161 | // CHECK-NEXT: [[EXTRACT7_I:%.*]] = shufflevector <4 x i64> [[TMP42]], <4 x i64> undef, <2 x i32> <i32 2, i32 3> |
2162 | // CHECK-NEXT: store <2 x i64> [[EXTRACT7_I]], <2 x i64>* [[__T5_I]], align 16 |
2163 | // CHECK-NEXT: [[TMP43:%.*]] = load <2 x i64>, <2 x i64>* [[__T4_I]], align 16 |
2164 | // CHECK-NEXT: [[TMP44:%.*]] = load <2 x i64>, <2 x i64>* [[__T5_I]], align 16 |
2165 | // CHECK-NEXT: store <2 x i64> [[TMP43]], <2 x i64>* [[__V1_ADDR_I14_I]], align 16 |
2166 | // CHECK-NEXT: store <2 x i64> [[TMP44]], <2 x i64>* [[__V2_ADDR_I15_I]], align 16 |
2167 | // CHECK-NEXT: [[TMP45:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I14_I]], align 16 |
2168 | // CHECK-NEXT: [[TMP46:%.*]] = bitcast <2 x i64> [[TMP45]] to <4 x i32> |
2169 | // CHECK-NEXT: [[TMP47:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I15_I]], align 16 |
2170 | // CHECK-NEXT: [[TMP48:%.*]] = bitcast <2 x i64> [[TMP47]] to <4 x i32> |
2171 | // CHECK-NEXT: [[TMP49:%.*]] = icmp slt <4 x i32> [[TMP46]], [[TMP48]] |
2172 | // CHECK-NEXT: [[TMP50:%.*]] = select <4 x i1> [[TMP49]], <4 x i32> [[TMP46]], <4 x i32> [[TMP48]] |
2173 | // CHECK-NEXT: [[TMP51:%.*]] = bitcast <4 x i32> [[TMP50]] to <2 x i64> |
2174 | // CHECK-NEXT: store <2 x i64> [[TMP51]], <2 x i64>* [[__T6_I]], align 16 |
2175 | // CHECK-NEXT: [[TMP52:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 |
2176 | // CHECK-NEXT: [[TMP53:%.*]] = bitcast <2 x i64> [[TMP52]] to <4 x i32> |
2177 | // CHECK-NEXT: [[TMP54:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 |
2178 | // CHECK-NEXT: [[TMP55:%.*]] = bitcast <2 x i64> [[TMP54]] to <4 x i32> |
2179 | // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[TMP53]], <4 x i32> [[TMP55]], <4 x i32> <i32 2, i32 3, i32 0, i32 1> |
2180 | // CHECK-NEXT: [[TMP56:%.*]] = bitcast <4 x i32> [[SHUFFLE_I]] to <2 x i64> |
2181 | // CHECK-NEXT: store <2 x i64> [[TMP56]], <2 x i64>* [[__T7_I]], align 16 |
2182 | // CHECK-NEXT: [[TMP57:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 |
2183 | // CHECK-NEXT: [[TMP58:%.*]] = load <2 x i64>, <2 x i64>* [[__T7_I]], align 16 |
2184 | // CHECK-NEXT: store <2 x i64> [[TMP57]], <2 x i64>* [[__V1_ADDR_I12_I]], align 16 |
2185 | // CHECK-NEXT: store <2 x i64> [[TMP58]], <2 x i64>* [[__V2_ADDR_I13_I]], align 16 |
2186 | // CHECK-NEXT: [[TMP59:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I12_I]], align 16 |
2187 | // CHECK-NEXT: [[TMP60:%.*]] = bitcast <2 x i64> [[TMP59]] to <4 x i32> |
2188 | // CHECK-NEXT: [[TMP61:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I13_I]], align 16 |
2189 | // CHECK-NEXT: [[TMP62:%.*]] = bitcast <2 x i64> [[TMP61]] to <4 x i32> |
2190 | // CHECK-NEXT: [[TMP63:%.*]] = icmp slt <4 x i32> [[TMP60]], [[TMP62]] |
2191 | // CHECK-NEXT: [[TMP64:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[TMP60]], <4 x i32> [[TMP62]] |
2192 | // CHECK-NEXT: [[TMP65:%.*]] = bitcast <4 x i32> [[TMP64]] to <2 x i64> |
2193 | // CHECK-NEXT: store <2 x i64> [[TMP65]], <2 x i64>* [[__T8_I]], align 16 |
2194 | // CHECK-NEXT: [[TMP66:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 |
2195 | // CHECK-NEXT: [[TMP67:%.*]] = bitcast <2 x i64> [[TMP66]] to <4 x i32> |
2196 | // CHECK-NEXT: [[TMP68:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 |
2197 | // CHECK-NEXT: [[TMP69:%.*]] = bitcast <2 x i64> [[TMP68]] to <4 x i32> |
2198 | // CHECK-NEXT: [[SHUFFLE10_I:%.*]] = shufflevector <4 x i32> [[TMP67]], <4 x i32> [[TMP69]], <4 x i32> <i32 1, i32 0, i32 3, i32 2> |
2199 | // CHECK-NEXT: [[TMP70:%.*]] = bitcast <4 x i32> [[SHUFFLE10_I]] to <2 x i64> |
2200 | // CHECK-NEXT: store <2 x i64> [[TMP70]], <2 x i64>* [[__T9_I]], align 16 |
2201 | // CHECK-NEXT: [[TMP71:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 |
2202 | // CHECK-NEXT: [[TMP72:%.*]] = load <2 x i64>, <2 x i64>* [[__T9_I]], align 16 |
2203 | // CHECK-NEXT: store <2 x i64> [[TMP71]], <2 x i64>* [[__V1_ADDR_I_I]], align 16 |
2204 | // CHECK-NEXT: store <2 x i64> [[TMP72]], <2 x i64>* [[__V2_ADDR_I_I]], align 16 |
2205 | // CHECK-NEXT: [[TMP73:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I_I]], align 16 |
2206 | // CHECK-NEXT: [[TMP74:%.*]] = bitcast <2 x i64> [[TMP73]] to <4 x i32> |
2207 | // CHECK-NEXT: [[TMP75:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I_I]], align 16 |
2208 | // CHECK-NEXT: [[TMP76:%.*]] = bitcast <2 x i64> [[TMP75]] to <4 x i32> |
2209 | // CHECK-NEXT: [[TMP77:%.*]] = icmp slt <4 x i32> [[TMP74]], [[TMP76]] |
2210 | // CHECK-NEXT: [[TMP78:%.*]] = select <4 x i1> [[TMP77]], <4 x i32> [[TMP74]], <4 x i32> [[TMP76]] |
2211 | // CHECK-NEXT: [[TMP79:%.*]] = bitcast <4 x i32> [[TMP78]] to <2 x i64> |
2212 | // CHECK-NEXT: store <4 x i32> [[TMP78]], <4 x i32>* [[__T10_I]], align 16 |
2213 | // CHECK-NEXT: [[TMP80:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 |
2214 | // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP80]], i32 0 |
2215 | // CHECK-NEXT: ret i32 [[VECEXT_I]] |
2216 | int test_mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __W){ |
2217 | return _mm512_mask_reduce_min_epi32(__M, __W); |
2218 | } |
2219 | |
2220 | // CHECK-LABEL: define i32 @test_mm512_mask_reduce_min_epu32(i16 zeroext %__M, <8 x i64> %__W) #0 { |
2221 | // CHECK-NEXT: entry: |
2222 | // CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 |
2223 | // CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 |
2224 | // CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64 |
2225 | // CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 |
2226 | // CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x i64>, align 32 |
2227 | // CHECK-NEXT: [[__V1_ADDR_I14_I:%.*]] = alloca <2 x i64>, align 16 |
2228 | // CHECK-NEXT: [[__V2_ADDR_I15_I:%.*]] = alloca <2 x i64>, align 16 |
2229 | // CHECK-NEXT: [[__V1_ADDR_I12_I:%.*]] = alloca <2 x i64>, align 16 |
2230 | // CHECK-NEXT: [[__V2_ADDR_I13_I:%.*]] = alloca <2 x i64>, align 16 |
2231 | // CHECK-NEXT: [[__V1_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 |
2232 | // CHECK-NEXT: [[__V2_ADDR_I_I:%.*]] = alloca <2 x i64>, align 16 |
2233 | // CHECK-NEXT: [[__S_ADDR_I_I:%.*]] = alloca i32, align 4 |
2234 | // CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x i32>, align 64 |
2235 | // CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i16, align 2 |
2236 | // CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64 |
2237 | // CHECK-NEXT: [[__T1_I:%.*]] = alloca <4 x i64>, align 32 |
2238 | // CHECK-NEXT: [[__T2_I:%.*]] = alloca <4 x i64>, align 32 |
2239 | // CHECK-NEXT: [[__T3_I:%.*]] = alloca <4 x i64>, align 32 |
2240 | // CHECK-NEXT: [[__T4_I:%.*]] = alloca <2 x i64>, align 16 |
2241 | // CHECK-NEXT: [[__T5_I:%.*]] = alloca <2 x i64>, align 16 |
2242 | // CHECK-NEXT: [[__T6_I:%.*]] = alloca <2 x i64>, align 16 |
2243 | // CHECK-NEXT: [[__T7_I:%.*]] = alloca <2 x i64>, align 16 |
2244 | // CHECK-NEXT: [[__T8_I:%.*]] = alloca <2 x i64>, align 16 |
2245 | // CHECK-NEXT: [[__T9_I:%.*]] = alloca <2 x i64>, align 16 |
2246 | // CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x i32>, align 16 |
2247 | // CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i16, align 2 |
2248 | // CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64 |
2249 | // CHECK-NEXT: store i16 [[__M:%.*]], i16* [[__M_ADDR]], align 2 |
2250 | // CHECK-NEXT: store <8 x i64> [[__W:%.*]], <8 x i64>* [[__W_ADDR]], align 64 |
2251 | // CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2 |
2252 | // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64 |
2253 | // CHECK-NEXT: store i16 [[TMP0]], i16* [[__M_ADDR_I]], align 2 |
2254 | // CHECK-NEXT: store <8 x i64> [[TMP1]], <8 x i64>* [[__V_ADDR_I]], align 64 |
2255 | // CHECK-NEXT: store i32 -1, i32* [[__S_ADDR_I_I]], align 4 |
2256 | // CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
2257 | // CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <16 x i32> undef, i32 [[TMP2]], i32 0 |
2258 | // CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
2259 | // CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <16 x i32> [[VECINIT_I_I]], i32 [[TMP3]], i32 1 |
2260 | // CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
2261 | // CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <16 x i32> [[VECINIT1_I_I]], i32 [[TMP4]], i32 2 |
2262 | // CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
2263 | // CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <16 x i32> [[VECINIT2_I_I]], i32 [[TMP5]], i32 3 |
2264 | // CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
2265 | // CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <16 x i32> [[VECINIT3_I_I]], i32 [[TMP6]], i32 4 |
2266 | // CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
2267 | // CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <16 x i32> [[VECINIT4_I_I]], i32 [[TMP7]], i32 5 |
2268 | // CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
2269 | // CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <16 x i32> [[VECINIT5_I_I]], i32 [[TMP8]], i32 6 |
2270 | // CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
2271 | // CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <16 x i32> [[VECINIT6_I_I]], i32 [[TMP9]], i32 7 |
2272 | // CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
2273 | // CHECK-NEXT: [[VECINIT8_I_I:%.*]] = insertelement <16 x i32> [[VECINIT7_I_I]], i32 [[TMP10]], i32 8 |
2274 | // CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
2275 | // CHECK-NEXT: [[VECINIT9_I_I:%.*]] = insertelement <16 x i32> [[VECINIT8_I_I]], i32 [[TMP11]], i32 9 |
2276 | // CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
2277 | // CHECK-NEXT: [[VECINIT10_I_I:%.*]] = insertelement <16 x i32> [[VECINIT9_I_I]], i32 [[TMP12]], i32 10 |
2278 | // CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
2279 | // CHECK-NEXT: [[VECINIT11_I_I:%.*]] = insertelement <16 x i32> [[VECINIT10_I_I]], i32 [[TMP13]], i32 11 |
2280 | // CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
2281 | // CHECK-NEXT: [[VECINIT12_I_I:%.*]] = insertelement <16 x i32> [[VECINIT11_I_I]], i32 [[TMP14]], i32 12 |
2282 | // CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
2283 | // CHECK-NEXT: [[VECINIT13_I_I:%.*]] = insertelement <16 x i32> [[VECINIT12_I_I]], i32 [[TMP15]], i32 13 |
2284 | // CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
2285 | // CHECK-NEXT: [[VECINIT14_I_I:%.*]] = insertelement <16 x i32> [[VECINIT13_I_I]], i32 [[TMP16]], i32 14 |
2286 | // CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4 |
2287 | // CHECK-NEXT: [[VECINIT15_I_I:%.*]] = insertelement <16 x i32> [[VECINIT14_I_I]], i32 [[TMP17]], i32 15 |
2288 | // CHECK-NEXT: store <16 x i32> [[VECINIT15_I_I]], <16 x i32>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 |
2289 | // CHECK-NEXT: [[TMP18:%.*]] = load <16 x i32>, <16 x i32>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 |
2290 | // CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i32> [[TMP18]] to <8 x i64> |
2291 | // CHECK-NEXT: [[TMP20:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2 |
2292 | // CHECK-NEXT: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
2293 | // CHECK-NEXT: store <8 x i64> [[TMP19]], <8 x i64>* [[__W_ADDR_I_I]], align 64 |
2294 | // CHECK-NEXT: store i16 [[TMP20]], i16* [[__U_ADDR_I_I]], align 2 |
2295 | // CHECK-NEXT: store <8 x i64> [[TMP21]], <8 x i64>* [[__A_ADDR_I_I]], align 64 |
2296 | // CHECK-NEXT: [[TMP22:%.*]] = load i16, i16* [[__U_ADDR_I_I]], align 2 |
2297 | // CHECK-NEXT: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64 |
2298 | // CHECK-NEXT: [[TMP24:%.*]] = bitcast <8 x i64> [[TMP23]] to <16 x i32> |
2299 | // CHECK-NEXT: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR_I_I]], align 64 |
2300 | // CHECK-NEXT: [[TMP26:%.*]] = bitcast <8 x i64> [[TMP25]] to <16 x i32> |
2301 | // CHECK-NEXT: [[TMP27:%.*]] = bitcast i16 [[TMP22]] to <16 x i1> |
2302 | // CHECK-NEXT: [[TMP28:%.*]] = select <16 x i1> [[TMP27]], <16 x i32> [[TMP24]], <16 x i32> [[TMP26]] |
2303 | // CHECK-NEXT: [[TMP29:%.*]] = bitcast <16 x i32> [[TMP28]] to <8 x i64> |
2304 | // CHECK-NEXT: store <8 x i64> [[TMP29]], <8 x i64>* [[__V_ADDR_I]], align 64 |
2305 | // CHECK-NEXT: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
2306 | // CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x i64> [[TMP30]], <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
2307 | // CHECK-NEXT: store <4 x i64> [[EXTRACT_I]], <4 x i64>* [[__T1_I]], align 32 |
2308 | // CHECK-NEXT: [[TMP31:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64 |
2309 | // CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <8 x i64> [[TMP31]], <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
2310 | // CHECK-NEXT: store <4 x i64> [[EXTRACT4_I]], <4 x i64>* [[__T2_I]], align 32 |
2311 | // CHECK-NEXT: [[TMP32:%.*]] = load <4 x i64>, <4 x i64>* [[__T1_I]], align 32 |
2312 | // CHECK-NEXT: [[TMP33:%.*]] = load <4 x i64>, <4 x i64>* [[__T2_I]], align 32 |
2313 | // CHECK-NEXT: store <4 x i64> [[TMP32]], <4 x i64>* [[__A2_ADDR_I_I]], align 32 |
2314 | // CHECK-NEXT: store <4 x i64> [[TMP33]], <4 x i64>* [[__B_ADDR_I_I]], align 32 |
2315 | // CHECK-NEXT: [[TMP34:%.*]] = load <4 x i64>, <4 x i64>* [[__A2_ADDR_I_I]], align 32 |
2316 | // CHECK-NEXT: [[TMP35:%.*]] = bitcast <4 x i64> [[TMP34]] to <8 x i32> |
2317 | // CHECK-NEXT: [[TMP36:%.*]] = load <4 x i64>, <4 x i64>* [[__B_ADDR_I_I]], align 32 |
2318 | // CHECK-NEXT: [[TMP37:%.*]] = bitcast <4 x i64> [[TMP36]] to <8 x i32> |
2319 | // CHECK-NEXT: [[TMP38:%.*]] = icmp ult <8 x i32> [[TMP35]], [[TMP37]] |
2320 | // CHECK-NEXT: [[TMP39:%.*]] = select <8 x i1> [[TMP38]], <8 x i32> [[TMP35]], <8 x i32> [[TMP37]] |
2321 | // CHECK-NEXT: [[TMP40:%.*]] = bitcast <8 x i32> [[TMP39]] to <4 x i64> |
2322 | // CHECK-NEXT: store <4 x i64> [[TMP40]], <4 x i64>* [[__T3_I]], align 32 |
2323 | // CHECK-NEXT: [[TMP41:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 |
2324 | // CHECK-NEXT: [[EXTRACT6_I:%.*]] = shufflevector <4 x i64> [[TMP41]], <4 x i64> undef, <2 x i32> <i32 0, i32 1> |
2325 | // CHECK-NEXT: store <2 x i64> [[EXTRACT6_I]], <2 x i64>* [[__T4_I]], align 16 |
2326 | // CHECK-NEXT: [[TMP42:%.*]] = load <4 x i64>, <4 x i64>* [[__T3_I]], align 32 |
2327 | // CHECK-NEXT: [[EXTRACT7_I:%.*]] = shufflevector <4 x i64> [[TMP42]], <4 x i64> undef, <2 x i32> <i32 2, i32 3> |
2328 | // CHECK-NEXT: store <2 x i64> [[EXTRACT7_I]], <2 x i64>* [[__T5_I]], align 16 |
2329 | // CHECK-NEXT: [[TMP43:%.*]] = load <2 x i64>, <2 x i64>* [[__T4_I]], align 16 |
2330 | // CHECK-NEXT: [[TMP44:%.*]] = load <2 x i64>, <2 x i64>* [[__T5_I]], align 16 |
2331 | // CHECK-NEXT: store <2 x i64> [[TMP43]], <2 x i64>* [[__V1_ADDR_I14_I]], align 16 |
2332 | // CHECK-NEXT: store <2 x i64> [[TMP44]], <2 x i64>* [[__V2_ADDR_I15_I]], align 16 |
2333 | // CHECK-NEXT: [[TMP45:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I14_I]], align 16 |
2334 | // CHECK-NEXT: [[TMP46:%.*]] = bitcast <2 x i64> [[TMP45]] to <4 x i32> |
2335 | // CHECK-NEXT: [[TMP47:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I15_I]], align 16 |
2336 | // CHECK-NEXT: [[TMP48:%.*]] = bitcast <2 x i64> [[TMP47]] to <4 x i32> |
2337 | // CHECK-NEXT: [[TMP49:%.*]] = icmp ult <4 x i32> [[TMP46]], [[TMP48]] |
2338 | // CHECK-NEXT: [[TMP50:%.*]] = select <4 x i1> [[TMP49]], <4 x i32> [[TMP46]], <4 x i32> [[TMP48]] |
2339 | // CHECK-NEXT: [[TMP51:%.*]] = bitcast <4 x i32> [[TMP50]] to <2 x i64> |
2340 | // CHECK-NEXT: store <2 x i64> [[TMP51]], <2 x i64>* [[__T6_I]], align 16 |
2341 | // CHECK-NEXT: [[TMP52:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 |
2342 | // CHECK-NEXT: [[TMP53:%.*]] = bitcast <2 x i64> [[TMP52]] to <4 x i32> |
2343 | // CHECK-NEXT: [[TMP54:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 |
2344 | // CHECK-NEXT: [[TMP55:%.*]] = bitcast <2 x i64> [[TMP54]] to <4 x i32> |
2345 | // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[TMP53]], <4 x i32> [[TMP55]], <4 x i32> <i32 2, i32 3, i32 0, i32 1> |
2346 | // CHECK-NEXT: [[TMP56:%.*]] = bitcast <4 x i32> [[SHUFFLE_I]] to <2 x i64> |
2347 | // CHECK-NEXT: store <2 x i64> [[TMP56]], <2 x i64>* [[__T7_I]], align 16 |
2348 | // CHECK-NEXT: [[TMP57:%.*]] = load <2 x i64>, <2 x i64>* [[__T6_I]], align 16 |
2349 | // CHECK-NEXT: [[TMP58:%.*]] = load <2 x i64>, <2 x i64>* [[__T7_I]], align 16 |
2350 | // CHECK-NEXT: store <2 x i64> [[TMP57]], <2 x i64>* [[__V1_ADDR_I12_I]], align 16 |
2351 | // CHECK-NEXT: store <2 x i64> [[TMP58]], <2 x i64>* [[__V2_ADDR_I13_I]], align 16 |
2352 | // CHECK-NEXT: [[TMP59:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I12_I]], align 16 |
2353 | // CHECK-NEXT: [[TMP60:%.*]] = bitcast <2 x i64> [[TMP59]] to <4 x i32> |
2354 | // CHECK-NEXT: [[TMP61:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I13_I]], align 16 |
2355 | // CHECK-NEXT: [[TMP62:%.*]] = bitcast <2 x i64> [[TMP61]] to <4 x i32> |
2356 | // CHECK-NEXT: [[TMP63:%.*]] = icmp ult <4 x i32> [[TMP60]], [[TMP62]] |
2357 | // CHECK-NEXT: [[TMP64:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[TMP60]], <4 x i32> [[TMP62]] |
2358 | // CHECK-NEXT: [[TMP65:%.*]] = bitcast <4 x i32> [[TMP64]] to <2 x i64> |
2359 | // CHECK-NEXT: store <2 x i64> [[TMP65]], <2 x i64>* [[__T8_I]], align 16 |
2360 | // CHECK-NEXT: [[TMP66:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 |
2361 | // CHECK-NEXT: [[TMP67:%.*]] = bitcast <2 x i64> [[TMP66]] to <4 x i32> |
2362 | // CHECK-NEXT: [[TMP68:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 |
2363 | // CHECK-NEXT: [[TMP69:%.*]] = bitcast <2 x i64> [[TMP68]] to <4 x i32> |
2364 | // CHECK-NEXT: [[SHUFFLE10_I:%.*]] = shufflevector <4 x i32> [[TMP67]], <4 x i32> [[TMP69]], <4 x i32> <i32 1, i32 0, i32 3, i32 2> |
2365 | // CHECK-NEXT: [[TMP70:%.*]] = bitcast <4 x i32> [[SHUFFLE10_I]] to <2 x i64> |
2366 | // CHECK-NEXT: store <2 x i64> [[TMP70]], <2 x i64>* [[__T9_I]], align 16 |
2367 | // CHECK-NEXT: [[TMP71:%.*]] = load <2 x i64>, <2 x i64>* [[__T8_I]], align 16 |
2368 | // CHECK-NEXT: [[TMP72:%.*]] = load <2 x i64>, <2 x i64>* [[__T9_I]], align 16 |
2369 | // CHECK-NEXT: store <2 x i64> [[TMP71]], <2 x i64>* [[__V1_ADDR_I_I]], align 16 |
2370 | // CHECK-NEXT: store <2 x i64> [[TMP72]], <2 x i64>* [[__V2_ADDR_I_I]], align 16 |
2371 | // CHECK-NEXT: [[TMP73:%.*]] = load <2 x i64>, <2 x i64>* [[__V1_ADDR_I_I]], align 16 |
2372 | // CHECK-NEXT: [[TMP74:%.*]] = bitcast <2 x i64> [[TMP73]] to <4 x i32> |
2373 | // CHECK-NEXT: [[TMP75:%.*]] = load <2 x i64>, <2 x i64>* [[__V2_ADDR_I_I]], align 16 |
2374 | // CHECK-NEXT: [[TMP76:%.*]] = bitcast <2 x i64> [[TMP75]] to <4 x i32> |
2375 | // CHECK-NEXT: [[TMP77:%.*]] = icmp ult <4 x i32> [[TMP74]], [[TMP76]] |
2376 | // CHECK-NEXT: [[TMP78:%.*]] = select <4 x i1> [[TMP77]], <4 x i32> [[TMP74]], <4 x i32> [[TMP76]] |
2377 | // CHECK-NEXT: [[TMP79:%.*]] = bitcast <4 x i32> [[TMP78]] to <2 x i64> |
2378 | // CHECK-NEXT: store <4 x i32> [[TMP78]], <4 x i32>* [[__T10_I]], align 16 |
2379 | // CHECK-NEXT: [[TMP80:%.*]] = load <4 x i32>, <4 x i32>* [[__T10_I]], align 16 |
2380 | // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[TMP80]], i32 0 |
2381 | // CHECK-NEXT: ret i32 [[VECEXT_I]] |
2382 | unsigned int test_mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __W){ |
2383 | return _mm512_mask_reduce_min_epu32(__M, __W); |
2384 | } |
2385 | |
2386 | // CHECK-LABEL: define float @test_mm512_mask_reduce_min_ps(i16 zeroext %__M, <16 x float> %__W) #0 { |
2387 | // CHECK-NEXT: entry: |
2388 | // CHECK-NEXT: [[__W2_ADDR_I_I:%.*]] = alloca <16 x float>, align 64 |
2389 | // CHECK-NEXT: [[__U_ADDR_I_I:%.*]] = alloca i16, align 2 |
2390 | // CHECK-NEXT: [[__A_ADDR_I_I:%.*]] = alloca <16 x float>, align 64 |
2391 | // CHECK-NEXT: [[__A_ADDR_I16_I:%.*]] = alloca <8 x float>, align 32 |
2392 | // CHECK-NEXT: [[__B_ADDR_I17_I:%.*]] = alloca <8 x float>, align 32 |
2393 | // CHECK-NEXT: [[__A_ADDR_I14_I:%.*]] = alloca <4 x float>, align 16 |
2394 | // CHECK-NEXT: [[__B_ADDR_I15_I:%.*]] = alloca <4 x float>, align 16 |
2395 | // CHECK-NEXT: [[__A_ADDR_I12_I:%.*]] = alloca <4 x float>, align 16 |
2396 | // CHECK-NEXT: [[__B_ADDR_I13_I:%.*]] = alloca <4 x float>, align 16 |
2397 | // CHECK-NEXT: [[__A2_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 |
2398 | // CHECK-NEXT: [[__B_ADDR_I_I:%.*]] = alloca <4 x float>, align 16 |
2399 | // CHECK-NEXT: [[__W_ADDR_I_I:%.*]] = alloca float, align 4 |
2400 | // CHECK-NEXT: [[DOTCOMPOUNDLITERAL_I_I:%.*]] = alloca <16 x float>, align 64 |
2401 | // CHECK-NEXT: [[__M_ADDR_I:%.*]] = alloca i16, align 2 |
2402 | // CHECK-NEXT: [[__V_ADDR_I:%.*]] = alloca <16 x float>, align 64 |
2403 | // CHECK-NEXT: [[__T1_I:%.*]] = alloca <8 x float>, align 32 |
2404 | // CHECK-NEXT: [[__T2_I:%.*]] = alloca <8 x float>, align 32 |
2405 | // CHECK-NEXT: [[__T3_I:%.*]] = alloca <8 x float>, align 32 |
2406 | // CHECK-NEXT: [[__T4_I:%.*]] = alloca <4 x float>, align 16 |
2407 | // CHECK-NEXT: [[__T5_I:%.*]] = alloca <4 x float>, align 16 |
2408 | // CHECK-NEXT: [[__T6_I:%.*]] = alloca <4 x float>, align 16 |
2409 | // CHECK-NEXT: [[__T7_I:%.*]] = alloca <4 x float>, align 16 |
2410 | // CHECK-NEXT: [[__T8_I:%.*]] = alloca <4 x float>, align 16 |
2411 | // CHECK-NEXT: [[__T9_I:%.*]] = alloca <4 x float>, align 16 |
2412 | // CHECK-NEXT: [[__T10_I:%.*]] = alloca <4 x float>, align 16 |
2413 | // CHECK-NEXT: [[__M_ADDR:%.*]] = alloca i16, align 2 |
2414 | // CHECK-NEXT: [[__W_ADDR:%.*]] = alloca <16 x float>, align 64 |
2415 | // CHECK-NEXT: store i16 [[__M:%.*]], i16* [[__M_ADDR]], align 2 |
2416 | // CHECK-NEXT: store <16 x float> [[__W:%.*]], <16 x float>* [[__W_ADDR]], align 64 |
2417 | // CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2 |
2418 | // CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[__W_ADDR]], align 64 |
2419 | // CHECK-NEXT: store i16 [[TMP0]], i16* [[__M_ADDR_I]], align 2 |
2420 | // CHECK-NEXT: store <16 x float> [[TMP1]], <16 x float>* [[__V_ADDR_I]], align 64 |
2421 | // CHECK-NEXT: store float 0x7FF0000000000000, float* [[__W_ADDR_I_I]], align 4 |
2422 | // CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 |
2423 | // CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <16 x float> undef, float [[TMP2]], i32 0 |
2424 | // CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 |
2425 | // CHECK-NEXT: [[VECINIT1_I_I:%.*]] = insertelement <16 x float> [[VECINIT_I_I]], float [[TMP3]], i32 1 |
2426 | // CHECK-NEXT: [[TMP4:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 |
2427 | // CHECK-NEXT: [[VECINIT2_I_I:%.*]] = insertelement <16 x float> [[VECINIT1_I_I]], float [[TMP4]], i32 2 |
2428 | // CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 |
2429 | // CHECK-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <16 x float> [[VECINIT2_I_I]], float [[TMP5]], i32 3 |
2430 | // CHECK-NEXT: [[TMP6:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 |
2431 | // CHECK-NEXT: [[VECINIT4_I_I:%.*]] = insertelement <16 x float> [[VECINIT3_I_I]], float [[TMP6]], i32 4 |
2432 | // CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 |
2433 | // CHECK-NEXT: [[VECINIT5_I_I:%.*]] = insertelement <16 x float> [[VECINIT4_I_I]], float [[TMP7]], i32 5 |
2434 | // CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 |
2435 | // CHECK-NEXT: [[VECINIT6_I_I:%.*]] = insertelement <16 x float> [[VECINIT5_I_I]], float [[TMP8]], i32 6 |
2436 | // CHECK-NEXT: [[TMP9:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 |
2437 | // CHECK-NEXT: [[VECINIT7_I_I:%.*]] = insertelement <16 x float> [[VECINIT6_I_I]], float [[TMP9]], i32 7 |
2438 | // CHECK-NEXT: [[TMP10:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 |
2439 | // CHECK-NEXT: [[VECINIT8_I_I:%.*]] = insertelement <16 x float> [[VECINIT7_I_I]], float [[TMP10]], i32 8 |
2440 | // CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 |
2441 | // CHECK-NEXT: [[VECINIT9_I_I:%.*]] = insertelement <16 x float> [[VECINIT8_I_I]], float [[TMP11]], i32 9 |
2442 | // CHECK-NEXT: [[TMP12:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 |
2443 | // CHECK-NEXT: [[VECINIT10_I_I:%.*]] = insertelement <16 x float> [[VECINIT9_I_I]], float [[TMP12]], i32 10 |
2444 | // CHECK-NEXT: [[TMP13:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 |
2445 | // CHECK-NEXT: [[VECINIT11_I_I:%.*]] = insertelement <16 x float> [[VECINIT10_I_I]], float [[TMP13]], i32 11 |
2446 | // CHECK-NEXT: [[TMP14:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 |
2447 | // CHECK-NEXT: [[VECINIT12_I_I:%.*]] = insertelement <16 x float> [[VECINIT11_I_I]], float [[TMP14]], i32 12 |
2448 | // CHECK-NEXT: [[TMP15:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 |
2449 | // CHECK-NEXT: [[VECINIT13_I_I:%.*]] = insertelement <16 x float> [[VECINIT12_I_I]], float [[TMP15]], i32 13 |
2450 | // CHECK-NEXT: [[TMP16:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 |
2451 | // CHECK-NEXT: [[VECINIT14_I_I:%.*]] = insertelement <16 x float> [[VECINIT13_I_I]], float [[TMP16]], i32 14 |
2452 | // CHECK-NEXT: [[TMP17:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4 |
2453 | // CHECK-NEXT: [[VECINIT15_I_I:%.*]] = insertelement <16 x float> [[VECINIT14_I_I]], float [[TMP17]], i32 15 |
2454 | // CHECK-NEXT: store <16 x float> [[VECINIT15_I_I]], <16 x float>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 |
2455 | // CHECK-NEXT: [[TMP18:%.*]] = load <16 x float>, <16 x float>* [[DOTCOMPOUNDLITERAL_I_I]], align 64 |
2456 | // CHECK-NEXT: [[TMP19:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2 |
2457 | // CHECK-NEXT: [[TMP20:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 |
2458 | // CHECK-NEXT: store <16 x float> [[TMP18]], <16 x float>* [[__W2_ADDR_I_I]], align 64 |
2459 | // CHECK-NEXT: store i16 [[TMP19]], i16* [[__U_ADDR_I_I]], align 2 |
2460 | // CHECK-NEXT: store <16 x float> [[TMP20]], <16 x float>* [[__A_ADDR_I_I]], align 64 |
2461 | // CHECK-NEXT: [[TMP21:%.*]] = load i16, i16* [[__U_ADDR_I_I]], align 2 |
2462 | // CHECK-NEXT: [[TMP22:%.*]] = load <16 x float>, <16 x float>* [[__A_ADDR_I_I]], align 64 |
2463 | // CHECK-NEXT: [[TMP23:%.*]] = load <16 x float>, <16 x float>* [[__W2_ADDR_I_I]], align 64 |
2464 | // CHECK-NEXT: [[TMP24:%.*]] = bitcast i16 [[TMP21]] to <16 x i1> |
2465 | // CHECK-NEXT: [[TMP25:%.*]] = select <16 x i1> [[TMP24]], <16 x float> [[TMP22]], <16 x float> [[TMP23]] |
2466 | // CHECK-NEXT: store <16 x float> [[TMP25]], <16 x float>* [[__V_ADDR_I]], align 64 |
2467 | // CHECK-NEXT: [[TMP26:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 |
2468 | // CHECK-NEXT: [[TMP27:%.*]] = bitcast <16 x float> [[TMP26]] to <8 x double> |
2469 | // CHECK-NEXT: [[EXTRACT_I:%.*]] = shufflevector <8 x double> [[TMP27]], <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
2470 | // CHECK-NEXT: [[TMP28:%.*]] = bitcast <4 x double> [[EXTRACT_I]] to <8 x float> |
2471 | // CHECK-NEXT: store <8 x float> [[TMP28]], <8 x float>* [[__T1_I]], align 32 |
2472 | // CHECK-NEXT: [[TMP29:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64 |
2473 | // CHECK-NEXT: [[TMP30:%.*]] = bitcast <16 x float> [[TMP29]] to <8 x double> |
2474 | // CHECK-NEXT: [[EXTRACT4_I:%.*]] = shufflevector <8 x double> [[TMP30]], <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
2475 | // CHECK-NEXT: [[TMP31:%.*]] = bitcast <4 x double> [[EXTRACT4_I]] to <8 x float> |
2476 | // CHECK-NEXT: store <8 x float> [[TMP31]], <8 x float>* [[__T2_I]], align 32 |
2477 | // CHECK-NEXT: [[TMP32:%.*]] = load <8 x float>, <8 x float>* [[__T1_I]], align 32 |
2478 | // CHECK-NEXT: [[TMP33:%.*]] = load <8 x float>, <8 x float>* [[__T2_I]], align 32 |
2479 | // CHECK-NEXT: store <8 x float> [[TMP32]], <8 x float>* [[__A_ADDR_I16_I]], align 32 |
2480 | // CHECK-NEXT: store <8 x float> [[TMP33]], <8 x float>* [[__B_ADDR_I17_I]], align 32 |
2481 | // CHECK-NEXT: [[TMP34:%.*]] = load <8 x float>, <8 x float>* [[__A_ADDR_I16_I]], align 32 |
2482 | // CHECK-NEXT: [[TMP35:%.*]] = load <8 x float>, <8 x float>* [[__B_ADDR_I17_I]], align 32 |
2483 | // CHECK-NEXT: [[TMP36:%.*]] = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> [[TMP34]], <8 x float> [[TMP35]]) #2 |
2484 | // CHECK-NEXT: store <8 x float> [[TMP36]], <8 x float>* [[__T3_I]], align 32 |
2485 | // CHECK-NEXT: [[TMP37:%.*]] = load <8 x float>, <8 x float>* [[__T3_I]], align 32 |
2486 | // CHECK-NEXT: [[EXTRACT6_I:%.*]] = shufflevector <8 x float> [[TMP37]], <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
2487 | // CHECK-NEXT: store <4 x float> [[EXTRACT6_I]], <4 x float>* [[__T4_I]], align 16 |
2488 | // CHECK-NEXT: [[TMP38:%.*]] = load <8 x float>, <8 x float>* [[__T3_I]], align 32 |
2489 | // CHECK-NEXT: [[EXTRACT7_I:%.*]] = shufflevector <8 x float> [[TMP38]], <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
2490 | // CHECK-NEXT: store <4 x float> [[EXTRACT7_I]], <4 x float>* [[__T5_I]], align 16 |
2491 | // CHECK-NEXT: [[TMP39:%.*]] = load <4 x float>, <4 x float>* [[__T4_I]], align 16 |
2492 | // CHECK-NEXT: [[TMP40:%.*]] = load <4 x float>, <4 x float>* [[__T5_I]], align 16 |
2493 | // CHECK-NEXT: store <4 x float> [[TMP39]], <4 x float>* [[__A_ADDR_I14_I]], align 16 |
2494 | // CHECK-NEXT: store <4 x float> [[TMP40]], <4 x float>* [[__B_ADDR_I15_I]], align 16 |
2495 | // CHECK-NEXT: [[TMP41:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I14_I]], align 16 |
2496 | // CHECK-NEXT: [[TMP42:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I15_I]], align 16 |
2497 | // CHECK-NEXT: [[TMP43:%.*]] = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> [[TMP41]], <4 x float> [[TMP42]]) #2 |
2498 | // CHECK-NEXT: store <4 x float> [[TMP43]], <4 x float>* [[__T6_I]], align 16 |
2499 | // CHECK-NEXT: [[TMP44:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 |
2500 | // CHECK-NEXT: [[TMP45:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 |
2501 | // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[TMP44]], <4 x float> [[TMP45]], <4 x i32> <i32 2, i32 3, i32 0, i32 1> |
2502 | // CHECK-NEXT: store <4 x float> [[SHUFFLE_I]], <4 x float>* [[__T7_I]], align 16 |
2503 | // CHECK-NEXT: [[TMP46:%.*]] = load <4 x float>, <4 x float>* [[__T6_I]], align 16 |
2504 | // CHECK-NEXT: [[TMP47:%.*]] = load <4 x float>, <4 x float>* [[__T7_I]], align 16 |
2505 | // CHECK-NEXT: store <4 x float> [[TMP46]], <4 x float>* [[__A_ADDR_I12_I]], align 16 |
2506 | // CHECK-NEXT: store <4 x float> [[TMP47]], <4 x float>* [[__B_ADDR_I13_I]], align 16 |
2507 | // CHECK-NEXT: [[TMP48:%.*]] = load <4 x float>, <4 x float>* [[__A_ADDR_I12_I]], align 16 |
2508 | // CHECK-NEXT: [[TMP49:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I13_I]], align 16 |
2509 | // CHECK-NEXT: [[TMP50:%.*]] = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> [[TMP48]], <4 x float> [[TMP49]]) #2 |
2510 | // CHECK-NEXT: store <4 x float> [[TMP50]], <4 x float>* [[__T8_I]], align 16 |
2511 | // CHECK-NEXT: [[TMP51:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 |
2512 | // CHECK-NEXT: [[TMP52:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 |
2513 | // CHECK-NEXT: [[SHUFFLE10_I:%.*]] = shufflevector <4 x float> [[TMP51]], <4 x float> [[TMP52]], <4 x i32> <i32 1, i32 0, i32 3, i32 2> |
2514 | // CHECK-NEXT: store <4 x float> [[SHUFFLE10_I]], <4 x float>* [[__T9_I]], align 16 |
2515 | // CHECK-NEXT: [[TMP53:%.*]] = load <4 x float>, <4 x float>* [[__T8_I]], align 16 |
2516 | // CHECK-NEXT: [[TMP54:%.*]] = load <4 x float>, <4 x float>* [[__T9_I]], align 16 |
2517 | // CHECK-NEXT: store <4 x float> [[TMP53]], <4 x float>* [[__A2_ADDR_I_I]], align 16 |
2518 | // CHECK-NEXT: store <4 x float> [[TMP54]], <4 x float>* [[__B_ADDR_I_I]], align 16 |
2519 | // CHECK-NEXT: [[TMP55:%.*]] = load <4 x float>, <4 x float>* [[__A2_ADDR_I_I]], align 16 |
2520 | // CHECK-NEXT: [[TMP56:%.*]] = load <4 x float>, <4 x float>* [[__B_ADDR_I_I]], align 16 |
2521 | // CHECK-NEXT: [[TMP57:%.*]] = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> [[TMP55]], <4 x float> [[TMP56]]) #2 |
2522 | // CHECK-NEXT: store <4 x float> [[TMP57]], <4 x float>* [[__T10_I]], align 16 |
2523 | // CHECK-NEXT: [[TMP58:%.*]] = load <4 x float>, <4 x float>* [[__T10_I]], align 16 |
2524 | // CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x float> [[TMP58]], i32 0 |
2525 | // CHECK-NEXT: ret float [[VECEXT_I]] |
2526 | float test_mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __W){ |
2527 | return _mm512_mask_reduce_min_ps(__M, __W); |
2528 | } |
2529 | |
2530 | |