1 | //===--- BuiltinsPTX.def - PTX Builtin function database ----*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the PTX-specific builtin function database. Users of |
10 | // this file must define the BUILTIN macro to make use of this information. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | // The format of this database matches clang/Basic/Builtins.def. |
15 | |
16 | #if defined(BUILTIN) && !defined(TARGET_BUILTIN) |
17 | # define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS) |
18 | #endif |
19 | |
20 | #pragma push_macro("SM_70") |
21 | #define SM_70 "sm_70|sm_71" |
22 | #pragma push_macro("SM_60") |
23 | #define SM_60 "sm_60|sm_61|sm_62|" SM_70 |
24 | |
25 | #pragma push_macro("PTX61") |
26 | #define PTX61 "ptx61" |
27 | #pragma push_macro("PTX60") |
28 | #define PTX60 "ptx60|" PTX61 |
29 | |
30 | #pragma push_macro("AND") |
31 | #define AND(a, b) a "," b |
32 | |
33 | // Special Registers |
34 | |
35 | BUILTIN(__nvvm_read_ptx_sreg_tid_x, "i", "nc") |
36 | BUILTIN(__nvvm_read_ptx_sreg_tid_y, "i", "nc") |
37 | BUILTIN(__nvvm_read_ptx_sreg_tid_z, "i", "nc") |
38 | BUILTIN(__nvvm_read_ptx_sreg_tid_w, "i", "nc") |
39 | |
40 | BUILTIN(__nvvm_read_ptx_sreg_ntid_x, "i", "nc") |
41 | BUILTIN(__nvvm_read_ptx_sreg_ntid_y, "i", "nc") |
42 | BUILTIN(__nvvm_read_ptx_sreg_ntid_z, "i", "nc") |
43 | BUILTIN(__nvvm_read_ptx_sreg_ntid_w, "i", "nc") |
44 | |
45 | BUILTIN(__nvvm_read_ptx_sreg_ctaid_x, "i", "nc") |
46 | BUILTIN(__nvvm_read_ptx_sreg_ctaid_y, "i", "nc") |
47 | BUILTIN(__nvvm_read_ptx_sreg_ctaid_z, "i", "nc") |
48 | BUILTIN(__nvvm_read_ptx_sreg_ctaid_w, "i", "nc") |
49 | |
50 | BUILTIN(__nvvm_read_ptx_sreg_nctaid_x, "i", "nc") |
51 | BUILTIN(__nvvm_read_ptx_sreg_nctaid_y, "i", "nc") |
52 | BUILTIN(__nvvm_read_ptx_sreg_nctaid_z, "i", "nc") |
53 | BUILTIN(__nvvm_read_ptx_sreg_nctaid_w, "i", "nc") |
54 | |
55 | BUILTIN(__nvvm_read_ptx_sreg_laneid, "i", "nc") |
56 | BUILTIN(__nvvm_read_ptx_sreg_warpid, "i", "nc") |
57 | BUILTIN(__nvvm_read_ptx_sreg_nwarpid, "i", "nc") |
58 | |
59 | BUILTIN(__nvvm_read_ptx_sreg_smid, "i", "nc") |
60 | BUILTIN(__nvvm_read_ptx_sreg_nsmid, "i", "nc") |
61 | BUILTIN(__nvvm_read_ptx_sreg_gridid, "i", "nc") |
62 | |
63 | BUILTIN(__nvvm_read_ptx_sreg_lanemask_eq, "i", "nc") |
64 | BUILTIN(__nvvm_read_ptx_sreg_lanemask_le, "i", "nc") |
65 | BUILTIN(__nvvm_read_ptx_sreg_lanemask_lt, "i", "nc") |
66 | BUILTIN(__nvvm_read_ptx_sreg_lanemask_ge, "i", "nc") |
67 | BUILTIN(__nvvm_read_ptx_sreg_lanemask_gt, "i", "nc") |
68 | |
69 | BUILTIN(__nvvm_read_ptx_sreg_clock, "i", "n") |
70 | BUILTIN(__nvvm_read_ptx_sreg_clock64, "LLi", "n") |
71 | |
72 | BUILTIN(__nvvm_read_ptx_sreg_pm0, "i", "n") |
73 | BUILTIN(__nvvm_read_ptx_sreg_pm1, "i", "n") |
74 | BUILTIN(__nvvm_read_ptx_sreg_pm2, "i", "n") |
75 | BUILTIN(__nvvm_read_ptx_sreg_pm3, "i", "n") |
76 | |
77 | // MISC |
78 | |
79 | BUILTIN(__nvvm_prmt, "UiUiUiUi", "") |
80 | |
81 | // Min Max |
82 | |
83 | BUILTIN(__nvvm_fmax_ftz_f, "fff", "") |
84 | BUILTIN(__nvvm_fmax_f, "fff", "") |
85 | BUILTIN(__nvvm_fmin_ftz_f, "fff", "") |
86 | BUILTIN(__nvvm_fmin_f, "fff", "") |
87 | |
88 | BUILTIN(__nvvm_fmax_d, "ddd", "") |
89 | BUILTIN(__nvvm_fmin_d, "ddd", "") |
90 | |
91 | // Multiplication |
92 | |
93 | BUILTIN(__nvvm_mulhi_i, "iii", "") |
94 | BUILTIN(__nvvm_mulhi_ui, "UiUiUi", "") |
95 | BUILTIN(__nvvm_mulhi_ll, "LLiLLiLLi", "") |
96 | BUILTIN(__nvvm_mulhi_ull, "ULLiULLiULLi", "") |
97 | |
98 | BUILTIN(__nvvm_mul_rn_ftz_f, "fff", "") |
99 | BUILTIN(__nvvm_mul_rn_f, "fff", "") |
100 | BUILTIN(__nvvm_mul_rz_ftz_f, "fff", "") |
101 | BUILTIN(__nvvm_mul_rz_f, "fff", "") |
102 | BUILTIN(__nvvm_mul_rm_ftz_f, "fff", "") |
103 | BUILTIN(__nvvm_mul_rm_f, "fff", "") |
104 | BUILTIN(__nvvm_mul_rp_ftz_f, "fff", "") |
105 | BUILTIN(__nvvm_mul_rp_f, "fff", "") |
106 | |
107 | BUILTIN(__nvvm_mul_rn_d, "ddd", "") |
108 | BUILTIN(__nvvm_mul_rz_d, "ddd", "") |
109 | BUILTIN(__nvvm_mul_rm_d, "ddd", "") |
110 | BUILTIN(__nvvm_mul_rp_d, "ddd", "") |
111 | |
112 | BUILTIN(__nvvm_mul24_i, "iii", "") |
113 | BUILTIN(__nvvm_mul24_ui, "UiUiUi", "") |
114 | |
115 | // Div |
116 | |
117 | BUILTIN(__nvvm_div_approx_ftz_f, "fff", "") |
118 | BUILTIN(__nvvm_div_approx_f, "fff", "") |
119 | |
120 | BUILTIN(__nvvm_div_rn_ftz_f, "fff", "") |
121 | BUILTIN(__nvvm_div_rn_f, "fff", "") |
122 | BUILTIN(__nvvm_div_rz_ftz_f, "fff", "") |
123 | BUILTIN(__nvvm_div_rz_f, "fff", "") |
124 | BUILTIN(__nvvm_div_rm_ftz_f, "fff", "") |
125 | BUILTIN(__nvvm_div_rm_f, "fff", "") |
126 | BUILTIN(__nvvm_div_rp_ftz_f, "fff", "") |
127 | BUILTIN(__nvvm_div_rp_f, "fff", "") |
128 | |
129 | BUILTIN(__nvvm_div_rn_d, "ddd", "") |
130 | BUILTIN(__nvvm_div_rz_d, "ddd", "") |
131 | BUILTIN(__nvvm_div_rm_d, "ddd", "") |
132 | BUILTIN(__nvvm_div_rp_d, "ddd", "") |
133 | |
134 | // Sad |
135 | |
136 | BUILTIN(__nvvm_sad_i, "iiii", "") |
137 | BUILTIN(__nvvm_sad_ui, "UiUiUiUi", "") |
138 | |
139 | // Floor, Ceil |
140 | |
141 | BUILTIN(__nvvm_floor_ftz_f, "ff", "") |
142 | BUILTIN(__nvvm_floor_f, "ff", "") |
143 | BUILTIN(__nvvm_floor_d, "dd", "") |
144 | |
145 | BUILTIN(__nvvm_ceil_ftz_f, "ff", "") |
146 | BUILTIN(__nvvm_ceil_f, "ff", "") |
147 | BUILTIN(__nvvm_ceil_d, "dd", "") |
148 | |
149 | // Abs |
150 | |
151 | BUILTIN(__nvvm_fabs_ftz_f, "ff", "") |
152 | BUILTIN(__nvvm_fabs_f, "ff", "") |
153 | BUILTIN(__nvvm_fabs_d, "dd", "") |
154 | |
155 | // Round |
156 | |
157 | BUILTIN(__nvvm_round_ftz_f, "ff", "") |
158 | BUILTIN(__nvvm_round_f, "ff", "") |
159 | BUILTIN(__nvvm_round_d, "dd", "") |
160 | |
161 | // Trunc |
162 | |
163 | BUILTIN(__nvvm_trunc_ftz_f, "ff", "") |
164 | BUILTIN(__nvvm_trunc_f, "ff", "") |
165 | BUILTIN(__nvvm_trunc_d, "dd", "") |
166 | |
167 | // Saturate |
168 | |
169 | BUILTIN(__nvvm_saturate_ftz_f, "ff", "") |
170 | BUILTIN(__nvvm_saturate_f, "ff", "") |
171 | BUILTIN(__nvvm_saturate_d, "dd", "") |
172 | |
173 | // Exp2, Log2 |
174 | |
175 | BUILTIN(__nvvm_ex2_approx_ftz_f, "ff", "") |
176 | BUILTIN(__nvvm_ex2_approx_f, "ff", "") |
177 | BUILTIN(__nvvm_ex2_approx_d, "dd", "") |
178 | |
179 | BUILTIN(__nvvm_lg2_approx_ftz_f, "ff", "") |
180 | BUILTIN(__nvvm_lg2_approx_f, "ff", "") |
181 | BUILTIN(__nvvm_lg2_approx_d, "dd", "") |
182 | |
183 | // Sin, Cos |
184 | |
185 | BUILTIN(__nvvm_sin_approx_ftz_f, "ff", "") |
186 | BUILTIN(__nvvm_sin_approx_f, "ff", "") |
187 | |
188 | BUILTIN(__nvvm_cos_approx_ftz_f, "ff", "") |
189 | BUILTIN(__nvvm_cos_approx_f, "ff", "") |
190 | |
191 | // Fma |
192 | |
193 | BUILTIN(__nvvm_fma_rn_ftz_f, "ffff", "") |
194 | BUILTIN(__nvvm_fma_rn_f, "ffff", "") |
195 | BUILTIN(__nvvm_fma_rz_ftz_f, "ffff", "") |
196 | BUILTIN(__nvvm_fma_rz_f, "ffff", "") |
197 | BUILTIN(__nvvm_fma_rm_ftz_f, "ffff", "") |
198 | BUILTIN(__nvvm_fma_rm_f, "ffff", "") |
199 | BUILTIN(__nvvm_fma_rp_ftz_f, "ffff", "") |
200 | BUILTIN(__nvvm_fma_rp_f, "ffff", "") |
201 | BUILTIN(__nvvm_fma_rn_d, "dddd", "") |
202 | BUILTIN(__nvvm_fma_rz_d, "dddd", "") |
203 | BUILTIN(__nvvm_fma_rm_d, "dddd", "") |
204 | BUILTIN(__nvvm_fma_rp_d, "dddd", "") |
205 | |
206 | // Rcp |
207 | |
208 | BUILTIN(__nvvm_rcp_rn_ftz_f, "ff", "") |
209 | BUILTIN(__nvvm_rcp_rn_f, "ff", "") |
210 | BUILTIN(__nvvm_rcp_rz_ftz_f, "ff", "") |
211 | BUILTIN(__nvvm_rcp_rz_f, "ff", "") |
212 | BUILTIN(__nvvm_rcp_rm_ftz_f, "ff", "") |
213 | BUILTIN(__nvvm_rcp_rm_f, "ff", "") |
214 | BUILTIN(__nvvm_rcp_rp_ftz_f, "ff", "") |
215 | BUILTIN(__nvvm_rcp_rp_f, "ff", "") |
216 | |
217 | BUILTIN(__nvvm_rcp_rn_d, "dd", "") |
218 | BUILTIN(__nvvm_rcp_rz_d, "dd", "") |
219 | BUILTIN(__nvvm_rcp_rm_d, "dd", "") |
220 | BUILTIN(__nvvm_rcp_rp_d, "dd", "") |
221 | BUILTIN(__nvvm_rcp_approx_ftz_d, "dd", "") |
222 | |
223 | // Sqrt |
224 | |
225 | BUILTIN(__nvvm_sqrt_rn_ftz_f, "ff", "") |
226 | BUILTIN(__nvvm_sqrt_rn_f, "ff", "") |
227 | BUILTIN(__nvvm_sqrt_rz_ftz_f, "ff", "") |
228 | BUILTIN(__nvvm_sqrt_rz_f, "ff", "") |
229 | BUILTIN(__nvvm_sqrt_rm_ftz_f, "ff", "") |
230 | BUILTIN(__nvvm_sqrt_rm_f, "ff", "") |
231 | BUILTIN(__nvvm_sqrt_rp_ftz_f, "ff", "") |
232 | BUILTIN(__nvvm_sqrt_rp_f, "ff", "") |
233 | BUILTIN(__nvvm_sqrt_approx_ftz_f, "ff", "") |
234 | BUILTIN(__nvvm_sqrt_approx_f, "ff", "") |
235 | |
236 | BUILTIN(__nvvm_sqrt_rn_d, "dd", "") |
237 | BUILTIN(__nvvm_sqrt_rz_d, "dd", "") |
238 | BUILTIN(__nvvm_sqrt_rm_d, "dd", "") |
239 | BUILTIN(__nvvm_sqrt_rp_d, "dd", "") |
240 | |
241 | // Rsqrt |
242 | |
243 | BUILTIN(__nvvm_rsqrt_approx_ftz_f, "ff", "") |
244 | BUILTIN(__nvvm_rsqrt_approx_f, "ff", "") |
245 | BUILTIN(__nvvm_rsqrt_approx_d, "dd", "") |
246 | |
247 | // Add |
248 | |
249 | BUILTIN(__nvvm_add_rn_ftz_f, "fff", "") |
250 | BUILTIN(__nvvm_add_rn_f, "fff", "") |
251 | BUILTIN(__nvvm_add_rz_ftz_f, "fff", "") |
252 | BUILTIN(__nvvm_add_rz_f, "fff", "") |
253 | BUILTIN(__nvvm_add_rm_ftz_f, "fff", "") |
254 | BUILTIN(__nvvm_add_rm_f, "fff", "") |
255 | BUILTIN(__nvvm_add_rp_ftz_f, "fff", "") |
256 | BUILTIN(__nvvm_add_rp_f, "fff", "") |
257 | |
258 | BUILTIN(__nvvm_add_rn_d, "ddd", "") |
259 | BUILTIN(__nvvm_add_rz_d, "ddd", "") |
260 | BUILTIN(__nvvm_add_rm_d, "ddd", "") |
261 | BUILTIN(__nvvm_add_rp_d, "ddd", "") |
262 | |
263 | // Convert |
264 | |
265 | BUILTIN(__nvvm_d2f_rn_ftz, "fd", "") |
266 | BUILTIN(__nvvm_d2f_rn, "fd", "") |
267 | BUILTIN(__nvvm_d2f_rz_ftz, "fd", "") |
268 | BUILTIN(__nvvm_d2f_rz, "fd", "") |
269 | BUILTIN(__nvvm_d2f_rm_ftz, "fd", "") |
270 | BUILTIN(__nvvm_d2f_rm, "fd", "") |
271 | BUILTIN(__nvvm_d2f_rp_ftz, "fd", "") |
272 | BUILTIN(__nvvm_d2f_rp, "fd", "") |
273 | |
274 | BUILTIN(__nvvm_d2i_rn, "id", "") |
275 | BUILTIN(__nvvm_d2i_rz, "id", "") |
276 | BUILTIN(__nvvm_d2i_rm, "id", "") |
277 | BUILTIN(__nvvm_d2i_rp, "id", "") |
278 | |
279 | BUILTIN(__nvvm_d2ui_rn, "Uid", "") |
280 | BUILTIN(__nvvm_d2ui_rz, "Uid", "") |
281 | BUILTIN(__nvvm_d2ui_rm, "Uid", "") |
282 | BUILTIN(__nvvm_d2ui_rp, "Uid", "") |
283 | |
284 | BUILTIN(__nvvm_i2d_rn, "di", "") |
285 | BUILTIN(__nvvm_i2d_rz, "di", "") |
286 | BUILTIN(__nvvm_i2d_rm, "di", "") |
287 | BUILTIN(__nvvm_i2d_rp, "di", "") |
288 | |
289 | BUILTIN(__nvvm_ui2d_rn, "dUi", "") |
290 | BUILTIN(__nvvm_ui2d_rz, "dUi", "") |
291 | BUILTIN(__nvvm_ui2d_rm, "dUi", "") |
292 | BUILTIN(__nvvm_ui2d_rp, "dUi", "") |
293 | |
294 | BUILTIN(__nvvm_f2i_rn_ftz, "if", "") |
295 | BUILTIN(__nvvm_f2i_rn, "if", "") |
296 | BUILTIN(__nvvm_f2i_rz_ftz, "if", "") |
297 | BUILTIN(__nvvm_f2i_rz, "if", "") |
298 | BUILTIN(__nvvm_f2i_rm_ftz, "if", "") |
299 | BUILTIN(__nvvm_f2i_rm, "if", "") |
300 | BUILTIN(__nvvm_f2i_rp_ftz, "if", "") |
301 | BUILTIN(__nvvm_f2i_rp, "if", "") |
302 | |
303 | BUILTIN(__nvvm_f2ui_rn_ftz, "Uif", "") |
304 | BUILTIN(__nvvm_f2ui_rn, "Uif", "") |
305 | BUILTIN(__nvvm_f2ui_rz_ftz, "Uif", "") |
306 | BUILTIN(__nvvm_f2ui_rz, "Uif", "") |
307 | BUILTIN(__nvvm_f2ui_rm_ftz, "Uif", "") |
308 | BUILTIN(__nvvm_f2ui_rm, "Uif", "") |
309 | BUILTIN(__nvvm_f2ui_rp_ftz, "Uif", "") |
310 | BUILTIN(__nvvm_f2ui_rp, "Uif", "") |
311 | |
312 | BUILTIN(__nvvm_i2f_rn, "fi", "") |
313 | BUILTIN(__nvvm_i2f_rz, "fi", "") |
314 | BUILTIN(__nvvm_i2f_rm, "fi", "") |
315 | BUILTIN(__nvvm_i2f_rp, "fi", "") |
316 | |
317 | BUILTIN(__nvvm_ui2f_rn, "fUi", "") |
318 | BUILTIN(__nvvm_ui2f_rz, "fUi", "") |
319 | BUILTIN(__nvvm_ui2f_rm, "fUi", "") |
320 | BUILTIN(__nvvm_ui2f_rp, "fUi", "") |
321 | |
322 | BUILTIN(__nvvm_lohi_i2d, "dii", "") |
323 | |
324 | BUILTIN(__nvvm_d2i_lo, "id", "") |
325 | BUILTIN(__nvvm_d2i_hi, "id", "") |
326 | |
327 | BUILTIN(__nvvm_f2ll_rn_ftz, "LLif", "") |
328 | BUILTIN(__nvvm_f2ll_rn, "LLif", "") |
329 | BUILTIN(__nvvm_f2ll_rz_ftz, "LLif", "") |
330 | BUILTIN(__nvvm_f2ll_rz, "LLif", "") |
331 | BUILTIN(__nvvm_f2ll_rm_ftz, "LLif", "") |
332 | BUILTIN(__nvvm_f2ll_rm, "LLif", "") |
333 | BUILTIN(__nvvm_f2ll_rp_ftz, "LLif", "") |
334 | BUILTIN(__nvvm_f2ll_rp, "LLif", "") |
335 | |
336 | BUILTIN(__nvvm_f2ull_rn_ftz, "ULLif", "") |
337 | BUILTIN(__nvvm_f2ull_rn, "ULLif", "") |
338 | BUILTIN(__nvvm_f2ull_rz_ftz, "ULLif", "") |
339 | BUILTIN(__nvvm_f2ull_rz, "ULLif", "") |
340 | BUILTIN(__nvvm_f2ull_rm_ftz, "ULLif", "") |
341 | BUILTIN(__nvvm_f2ull_rm, "ULLif", "") |
342 | BUILTIN(__nvvm_f2ull_rp_ftz, "ULLif", "") |
343 | BUILTIN(__nvvm_f2ull_rp, "ULLif", "") |
344 | |
345 | BUILTIN(__nvvm_d2ll_rn, "LLid", "") |
346 | BUILTIN(__nvvm_d2ll_rz, "LLid", "") |
347 | BUILTIN(__nvvm_d2ll_rm, "LLid", "") |
348 | BUILTIN(__nvvm_d2ll_rp, "LLid", "") |
349 | |
350 | BUILTIN(__nvvm_d2ull_rn, "ULLid", "") |
351 | BUILTIN(__nvvm_d2ull_rz, "ULLid", "") |
352 | BUILTIN(__nvvm_d2ull_rm, "ULLid", "") |
353 | BUILTIN(__nvvm_d2ull_rp, "ULLid", "") |
354 | |
355 | BUILTIN(__nvvm_ll2f_rn, "fLLi", "") |
356 | BUILTIN(__nvvm_ll2f_rz, "fLLi", "") |
357 | BUILTIN(__nvvm_ll2f_rm, "fLLi", "") |
358 | BUILTIN(__nvvm_ll2f_rp, "fLLi", "") |
359 | |
360 | BUILTIN(__nvvm_ull2f_rn, "fULLi", "") |
361 | BUILTIN(__nvvm_ull2f_rz, "fULLi", "") |
362 | BUILTIN(__nvvm_ull2f_rm, "fULLi", "") |
363 | BUILTIN(__nvvm_ull2f_rp, "fULLi", "") |
364 | |
365 | BUILTIN(__nvvm_ll2d_rn, "dLLi", "") |
366 | BUILTIN(__nvvm_ll2d_rz, "dLLi", "") |
367 | BUILTIN(__nvvm_ll2d_rm, "dLLi", "") |
368 | BUILTIN(__nvvm_ll2d_rp, "dLLi", "") |
369 | |
370 | BUILTIN(__nvvm_ull2d_rn, "dULLi", "") |
371 | BUILTIN(__nvvm_ull2d_rz, "dULLi", "") |
372 | BUILTIN(__nvvm_ull2d_rm, "dULLi", "") |
373 | BUILTIN(__nvvm_ull2d_rp, "dULLi", "") |
374 | |
375 | BUILTIN(__nvvm_f2h_rn_ftz, "Usf", "") |
376 | BUILTIN(__nvvm_f2h_rn, "Usf", "") |
377 | |
378 | // Bitcast |
379 | |
380 | BUILTIN(__nvvm_bitcast_f2i, "if", "") |
381 | BUILTIN(__nvvm_bitcast_i2f, "fi", "") |
382 | |
383 | BUILTIN(__nvvm_bitcast_ll2d, "dLLi", "") |
384 | BUILTIN(__nvvm_bitcast_d2ll, "LLid", "") |
385 | |
386 | // FNS |
387 | TARGET_BUILTIN(__nvvm_fns, "UiUiUii", "n", PTX60) |
388 | |
389 | // Sync |
390 | |
391 | BUILTIN(__syncthreads, "v", "") |
392 | BUILTIN(__nvvm_bar0_popc, "ii", "") |
393 | BUILTIN(__nvvm_bar0_and, "ii", "") |
394 | BUILTIN(__nvvm_bar0_or, "ii", "") |
395 | BUILTIN(__nvvm_bar_sync, "vi", "n") |
396 | TARGET_BUILTIN(__nvvm_bar_warp_sync, "vUi", "n", PTX60) |
397 | TARGET_BUILTIN(__nvvm_barrier_sync, "vUi", "n", PTX60) |
398 | TARGET_BUILTIN(__nvvm_barrier_sync_cnt, "vUiUi", "n", PTX60) |
399 | |
400 | // Shuffle |
401 | |
402 | BUILTIN(__nvvm_shfl_down_i32, "iiii", "") |
403 | BUILTIN(__nvvm_shfl_down_f32, "ffii", "") |
404 | BUILTIN(__nvvm_shfl_up_i32, "iiii", "") |
405 | BUILTIN(__nvvm_shfl_up_f32, "ffii", "") |
406 | BUILTIN(__nvvm_shfl_bfly_i32, "iiii", "") |
407 | BUILTIN(__nvvm_shfl_bfly_f32, "ffii", "") |
408 | BUILTIN(__nvvm_shfl_idx_i32, "iiii", "") |
409 | BUILTIN(__nvvm_shfl_idx_f32, "ffii", "") |
410 | |
411 | TARGET_BUILTIN(__nvvm_shfl_sync_down_i32, "iUiiii", "", PTX60) |
412 | TARGET_BUILTIN(__nvvm_shfl_sync_down_f32, "fUifii", "", PTX60) |
413 | TARGET_BUILTIN(__nvvm_shfl_sync_up_i32, "iUiiii", "", PTX60) |
414 | TARGET_BUILTIN(__nvvm_shfl_sync_up_f32, "fUifii", "", PTX60) |
415 | TARGET_BUILTIN(__nvvm_shfl_sync_bfly_i32, "iUiiii", "", PTX60) |
416 | TARGET_BUILTIN(__nvvm_shfl_sync_bfly_f32, "fUifii", "", PTX60) |
417 | TARGET_BUILTIN(__nvvm_shfl_sync_idx_i32, "iUiiii", "", PTX60) |
418 | TARGET_BUILTIN(__nvvm_shfl_sync_idx_f32, "fUifii", "", PTX60) |
419 | |
420 | // Vote |
421 | BUILTIN(__nvvm_vote_all, "bb", "") |
422 | BUILTIN(__nvvm_vote_any, "bb", "") |
423 | BUILTIN(__nvvm_vote_uni, "bb", "") |
424 | BUILTIN(__nvvm_vote_ballot, "Uib", "") |
425 | |
426 | TARGET_BUILTIN(__nvvm_vote_all_sync, "bUib", "", PTX60) |
427 | TARGET_BUILTIN(__nvvm_vote_any_sync, "bUib", "", PTX60) |
428 | TARGET_BUILTIN(__nvvm_vote_uni_sync, "bUib", "", PTX60) |
429 | TARGET_BUILTIN(__nvvm_vote_ballot_sync, "UiUib", "", PTX60) |
430 | |
431 | // Match |
432 | TARGET_BUILTIN(__nvvm_match_any_sync_i32, "UiUiUi", "", PTX60) |
433 | TARGET_BUILTIN(__nvvm_match_any_sync_i64, "WiUiWi", "", PTX60) |
434 | // These return a pair {value, predicate}, which requires custom lowering. |
435 | TARGET_BUILTIN(__nvvm_match_all_sync_i32p, "UiUiUii*", "", PTX60) |
436 | TARGET_BUILTIN(__nvvm_match_all_sync_i64p, "WiUiWii*", "", PTX60) |
437 | |
438 | // Membar |
439 | |
440 | BUILTIN(__nvvm_membar_cta, "v", "") |
441 | BUILTIN(__nvvm_membar_gl, "v", "") |
442 | BUILTIN(__nvvm_membar_sys, "v", "") |
443 | |
444 | // Memcpy, Memset |
445 | |
446 | BUILTIN(__nvvm_memcpy, "vUc*Uc*zi","") |
447 | BUILTIN(__nvvm_memset, "vUc*Uczi","") |
448 | |
449 | // Image |
450 | |
451 | BUILTIN(__builtin_ptx_read_image2Dfi_, "V4fiiii", "") |
452 | BUILTIN(__builtin_ptx_read_image2Dff_, "V4fiiff", "") |
453 | BUILTIN(__builtin_ptx_read_image2Dii_, "V4iiiii", "") |
454 | BUILTIN(__builtin_ptx_read_image2Dif_, "V4iiiff", "") |
455 | |
456 | BUILTIN(__builtin_ptx_read_image3Dfi_, "V4fiiiiii", "") |
457 | BUILTIN(__builtin_ptx_read_image3Dff_, "V4fiiffff", "") |
458 | BUILTIN(__builtin_ptx_read_image3Dii_, "V4iiiiiii", "") |
459 | BUILTIN(__builtin_ptx_read_image3Dif_, "V4iiiffff", "") |
460 | |
461 | BUILTIN(__builtin_ptx_write_image2Df_, "viiiffff", "") |
462 | BUILTIN(__builtin_ptx_write_image2Di_, "viiiiiii", "") |
463 | BUILTIN(__builtin_ptx_write_image2Dui_, "viiiUiUiUiUi", "") |
464 | BUILTIN(__builtin_ptx_get_image_depthi_, "ii", "") |
465 | BUILTIN(__builtin_ptx_get_image_heighti_, "ii", "") |
466 | BUILTIN(__builtin_ptx_get_image_widthi_, "ii", "") |
467 | BUILTIN(__builtin_ptx_get_image_channel_data_typei_, "ii", "") |
468 | BUILTIN(__builtin_ptx_get_image_channel_orderi_, "ii", "") |
469 | |
470 | // Atomic |
471 | // |
472 | // We need the atom intrinsics because |
473 | // - they are used in converging analysis |
474 | // - they are used in address space analysis and optimization |
475 | // So it does not hurt to expose them as builtins. |
476 | // |
477 | BUILTIN(__nvvm_atom_add_gen_i, "iiD*i", "n") |
478 | TARGET_BUILTIN(__nvvm_atom_cta_add_gen_i, "iiD*i", "n", SM_60) |
479 | TARGET_BUILTIN(__nvvm_atom_sys_add_gen_i, "iiD*i", "n", SM_60) |
480 | BUILTIN(__nvvm_atom_add_gen_l, "LiLiD*Li", "n") |
481 | TARGET_BUILTIN(__nvvm_atom_cta_add_gen_l, "LiLiD*Li", "n", SM_60) |
482 | TARGET_BUILTIN(__nvvm_atom_sys_add_gen_l, "LiLiD*Li", "n", SM_60) |
483 | BUILTIN(__nvvm_atom_add_gen_ll, "LLiLLiD*LLi", "n") |
484 | TARGET_BUILTIN(__nvvm_atom_cta_add_gen_ll, "LLiLLiD*LLi", "n", SM_60) |
485 | TARGET_BUILTIN(__nvvm_atom_sys_add_gen_ll, "LLiLLiD*LLi", "n", SM_60) |
486 | BUILTIN(__nvvm_atom_add_gen_f, "ffD*f", "n") |
487 | TARGET_BUILTIN(__nvvm_atom_cta_add_gen_f, "ffD*f", "n", SM_60) |
488 | TARGET_BUILTIN(__nvvm_atom_sys_add_gen_f, "ffD*f", "n", SM_60) |
489 | TARGET_BUILTIN(__nvvm_atom_add_gen_d, "ddD*d", "n", SM_60) |
490 | TARGET_BUILTIN(__nvvm_atom_cta_add_gen_d, "ddD*d", "n", SM_60) |
491 | TARGET_BUILTIN(__nvvm_atom_sys_add_gen_d, "ddD*d", "n", SM_60) |
492 | |
493 | BUILTIN(__nvvm_atom_sub_gen_i, "iiD*i", "n") |
494 | BUILTIN(__nvvm_atom_sub_gen_l, "LiLiD*Li", "n") |
495 | BUILTIN(__nvvm_atom_sub_gen_ll, "LLiLLiD*LLi", "n") |
496 | |
497 | BUILTIN(__nvvm_atom_xchg_gen_i, "iiD*i", "n") |
498 | TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_i, "iiD*i", "n", SM_60) |
499 | TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_i, "iiD*i", "n", SM_60) |
500 | BUILTIN(__nvvm_atom_xchg_gen_l, "LiLiD*Li", "n") |
501 | TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_l, "LiLiD*Li", "n", SM_60) |
502 | TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_l, "LiLiD*Li", "n", SM_60) |
503 | BUILTIN(__nvvm_atom_xchg_gen_ll, "LLiLLiD*LLi", "n") |
504 | TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_ll, "LLiLLiD*LLi", "n", SM_60) |
505 | TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_ll, "LLiLLiD*LLi", "n", SM_60) |
506 | |
507 | BUILTIN(__nvvm_atom_max_gen_i, "iiD*i", "n") |
508 | TARGET_BUILTIN(__nvvm_atom_cta_max_gen_i, "iiD*i", "n", SM_60) |
509 | TARGET_BUILTIN(__nvvm_atom_sys_max_gen_i, "iiD*i", "n", SM_60) |
510 | BUILTIN(__nvvm_atom_max_gen_ui, "UiUiD*Ui", "n") |
511 | TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ui, "UiUiD*Ui", "n", SM_60) |
512 | TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ui, "UiUiD*Ui", "n", SM_60) |
513 | BUILTIN(__nvvm_atom_max_gen_l, "LiLiD*Li", "n") |
514 | TARGET_BUILTIN(__nvvm_atom_cta_max_gen_l, "LiLiD*Li", "n", SM_60) |
515 | TARGET_BUILTIN(__nvvm_atom_sys_max_gen_l, "LiLiD*Li", "n", SM_60) |
516 | BUILTIN(__nvvm_atom_max_gen_ul, "ULiULiD*ULi", "n") |
517 | TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ul, "ULiULiD*ULi", "n", SM_60) |
518 | TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ul, "ULiULiD*ULi", "n", SM_60) |
519 | BUILTIN(__nvvm_atom_max_gen_ll, "LLiLLiD*LLi", "n") |
520 | TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ll, "LLiLLiD*LLi", "n", SM_60) |
521 | TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ll, "LLiLLiD*LLi", "n", SM_60) |
522 | BUILTIN(__nvvm_atom_max_gen_ull, "ULLiULLiD*ULLi", "n") |
523 | TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ull, "ULLiULLiD*ULLi", "n", SM_60) |
524 | TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ull, "ULLiULLiD*ULLi", "n", SM_60) |
525 | |
526 | BUILTIN(__nvvm_atom_min_gen_i, "iiD*i", "n") |
527 | TARGET_BUILTIN(__nvvm_atom_cta_min_gen_i, "iiD*i", "n", SM_60) |
528 | TARGET_BUILTIN(__nvvm_atom_sys_min_gen_i, "iiD*i", "n", SM_60) |
529 | BUILTIN(__nvvm_atom_min_gen_ui, "UiUiD*Ui", "n") |
530 | TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ui, "UiUiD*Ui", "n", SM_60) |
531 | TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ui, "UiUiD*Ui", "n", SM_60) |
532 | BUILTIN(__nvvm_atom_min_gen_l, "LiLiD*Li", "n") |
533 | TARGET_BUILTIN(__nvvm_atom_cta_min_gen_l, "LiLiD*Li", "n", SM_60) |
534 | TARGET_BUILTIN(__nvvm_atom_sys_min_gen_l, "LiLiD*Li", "n", SM_60) |
535 | BUILTIN(__nvvm_atom_min_gen_ul, "ULiULiD*ULi", "n") |
536 | TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ul, "ULiULiD*ULi", "n", SM_60) |
537 | TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ul, "ULiULiD*ULi", "n", SM_60) |
538 | BUILTIN(__nvvm_atom_min_gen_ll, "LLiLLiD*LLi", "n") |
539 | TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ll, "LLiLLiD*LLi", "n", SM_60) |
540 | TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ll, "LLiLLiD*LLi", "n", SM_60) |
541 | BUILTIN(__nvvm_atom_min_gen_ull, "ULLiULLiD*ULLi", "n") |
542 | TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ull, "ULLiULLiD*ULLi", "n", SM_60) |
543 | TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ull, "ULLiULLiD*ULLi", "n", SM_60) |
544 | |
545 | BUILTIN(__nvvm_atom_inc_gen_ui, "UiUiD*Ui", "n") |
546 | TARGET_BUILTIN(__nvvm_atom_cta_inc_gen_ui, "UiUiD*Ui", "n", SM_60) |
547 | TARGET_BUILTIN(__nvvm_atom_sys_inc_gen_ui, "UiUiD*Ui", "n", SM_60) |
548 | BUILTIN(__nvvm_atom_dec_gen_ui, "UiUiD*Ui", "n") |
549 | TARGET_BUILTIN(__nvvm_atom_cta_dec_gen_ui, "UiUiD*Ui", "n", SM_60) |
550 | TARGET_BUILTIN(__nvvm_atom_sys_dec_gen_ui, "UiUiD*Ui", "n", SM_60) |
551 | |
552 | BUILTIN(__nvvm_atom_and_gen_i, "iiD*i", "n") |
553 | TARGET_BUILTIN(__nvvm_atom_cta_and_gen_i, "iiD*i", "n", SM_60) |
554 | TARGET_BUILTIN(__nvvm_atom_sys_and_gen_i, "iiD*i", "n", SM_60) |
555 | BUILTIN(__nvvm_atom_and_gen_l, "LiLiD*Li", "n") |
556 | TARGET_BUILTIN(__nvvm_atom_cta_and_gen_l, "LiLiD*Li", "n", SM_60) |
557 | TARGET_BUILTIN(__nvvm_atom_sys_and_gen_l, "LiLiD*Li", "n", SM_60) |
558 | BUILTIN(__nvvm_atom_and_gen_ll, "LLiLLiD*LLi", "n") |
559 | TARGET_BUILTIN(__nvvm_atom_cta_and_gen_ll, "LLiLLiD*LLi", "n", SM_60) |
560 | TARGET_BUILTIN(__nvvm_atom_sys_and_gen_ll, "LLiLLiD*LLi", "n", SM_60) |
561 | |
562 | BUILTIN(__nvvm_atom_or_gen_i, "iiD*i", "n") |
563 | TARGET_BUILTIN(__nvvm_atom_cta_or_gen_i, "iiD*i", "n", SM_60) |
564 | TARGET_BUILTIN(__nvvm_atom_sys_or_gen_i, "iiD*i", "n", SM_60) |
565 | BUILTIN(__nvvm_atom_or_gen_l, "LiLiD*Li", "n") |
566 | TARGET_BUILTIN(__nvvm_atom_cta_or_gen_l, "LiLiD*Li", "n", SM_60) |
567 | TARGET_BUILTIN(__nvvm_atom_sys_or_gen_l, "LiLiD*Li", "n", SM_60) |
568 | BUILTIN(__nvvm_atom_or_gen_ll, "LLiLLiD*LLi", "n") |
569 | TARGET_BUILTIN(__nvvm_atom_cta_or_gen_ll, "LLiLLiD*LLi", "n", SM_60) |
570 | TARGET_BUILTIN(__nvvm_atom_sys_or_gen_ll, "LLiLLiD*LLi", "n", SM_60) |
571 | |
572 | BUILTIN(__nvvm_atom_xor_gen_i, "iiD*i", "n") |
573 | TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_i, "iiD*i", "n", SM_60) |
574 | TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_i, "iiD*i", "n", SM_60) |
575 | BUILTIN(__nvvm_atom_xor_gen_l, "LiLiD*Li", "n") |
576 | TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_l, "LiLiD*Li", "n", SM_60) |
577 | TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_l, "LiLiD*Li", "n", SM_60) |
578 | BUILTIN(__nvvm_atom_xor_gen_ll, "LLiLLiD*LLi", "n") |
579 | TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_ll, "LLiLLiD*LLi", "n", SM_60) |
580 | TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_ll, "LLiLLiD*LLi", "n", SM_60) |
581 | |
582 | BUILTIN(__nvvm_atom_cas_gen_i, "iiD*ii", "n") |
583 | TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_i, "iiD*ii", "n", SM_60) |
584 | TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_i, "iiD*ii", "n", SM_60) |
585 | BUILTIN(__nvvm_atom_cas_gen_l, "LiLiD*LiLi", "n") |
586 | TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_l, "LiLiD*LiLi", "n", SM_60) |
587 | TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_l, "LiLiD*LiLi", "n", SM_60) |
588 | BUILTIN(__nvvm_atom_cas_gen_ll, "LLiLLiD*LLiLLi", "n") |
589 | TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_ll, "LLiLLiD*LLiLLi", "n", SM_60) |
590 | TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_ll, "LLiLLiD*LLiLLi", "n", SM_60) |
591 | |
592 | // Compiler Error Warn |
593 | BUILTIN(__nvvm_compiler_error, "vcC*4", "n") |
594 | BUILTIN(__nvvm_compiler_warn, "vcC*4", "n") |
595 | |
596 | // __ldg. This is not implemented as a builtin by nvcc. |
597 | BUILTIN(__nvvm_ldg_c, "ccC*", "") |
598 | BUILTIN(__nvvm_ldg_s, "ssC*", "") |
599 | BUILTIN(__nvvm_ldg_i, "iiC*", "") |
600 | BUILTIN(__nvvm_ldg_l, "LiLiC*", "") |
601 | BUILTIN(__nvvm_ldg_ll, "LLiLLiC*", "") |
602 | |
603 | BUILTIN(__nvvm_ldg_uc, "UcUcC*", "") |
604 | BUILTIN(__nvvm_ldg_us, "UsUsC*", "") |
605 | BUILTIN(__nvvm_ldg_ui, "UiUiC*", "") |
606 | BUILTIN(__nvvm_ldg_ul, "ULiULiC*", "") |
607 | BUILTIN(__nvvm_ldg_ull, "ULLiULLiC*", "") |
608 | |
609 | BUILTIN(__nvvm_ldg_f, "ffC*", "") |
610 | BUILTIN(__nvvm_ldg_d, "ddC*", "") |
611 | |
612 | BUILTIN(__nvvm_ldg_c2, "E2cE2cC*", "") |
613 | BUILTIN(__nvvm_ldg_c4, "E4cE4cC*", "") |
614 | BUILTIN(__nvvm_ldg_s2, "E2sE2sC*", "") |
615 | BUILTIN(__nvvm_ldg_s4, "E4sE4sC*", "") |
616 | BUILTIN(__nvvm_ldg_i2, "E2iE2iC*", "") |
617 | BUILTIN(__nvvm_ldg_i4, "E4iE4iC*", "") |
618 | BUILTIN(__nvvm_ldg_ll2, "E2LLiE2LLiC*", "") |
619 | |
620 | BUILTIN(__nvvm_ldg_uc2, "E2UcE2UcC*", "") |
621 | BUILTIN(__nvvm_ldg_uc4, "E4UcE4UcC*", "") |
622 | BUILTIN(__nvvm_ldg_us2, "E2UsE2UsC*", "") |
623 | BUILTIN(__nvvm_ldg_us4, "E4UsE4UsC*", "") |
624 | BUILTIN(__nvvm_ldg_ui2, "E2UiE2UiC*", "") |
625 | BUILTIN(__nvvm_ldg_ui4, "E4UiE4UiC*", "") |
626 | BUILTIN(__nvvm_ldg_ull2, "E2ULLiE2ULLiC*", "") |
627 | |
628 | BUILTIN(__nvvm_ldg_f2, "E2fE2fC*", "") |
629 | BUILTIN(__nvvm_ldg_f4, "E4fE4fC*", "") |
630 | BUILTIN(__nvvm_ldg_d2, "E2dE2dC*", "") |
631 | |
632 | // Builtins to support WMMA instructions on sm_70 |
633 | TARGET_BUILTIN(__hmma_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX60)) |
634 | TARGET_BUILTIN(__hmma_m16n16k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX60)) |
635 | TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX60)) |
636 | TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX60)) |
637 | TARGET_BUILTIN(__hmma_m16n16k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX60)) |
638 | TARGET_BUILTIN(__hmma_m16n16k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX60)) |
639 | |
640 | TARGET_BUILTIN(__hmma_m32n8k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX61)) |
641 | TARGET_BUILTIN(__hmma_m32n8k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX61)) |
642 | TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61)) |
643 | TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61)) |
644 | TARGET_BUILTIN(__hmma_m32n8k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX61)) |
645 | TARGET_BUILTIN(__hmma_m32n8k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX61)) |
646 | |
647 | TARGET_BUILTIN(__hmma_m8n32k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX61)) |
648 | TARGET_BUILTIN(__hmma_m8n32k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX61)) |
649 | TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61)) |
650 | TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61)) |
651 | TARGET_BUILTIN(__hmma_m8n32k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX61)) |
652 | TARGET_BUILTIN(__hmma_m8n32k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX61)) |
653 | |
654 | TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX60)) |
655 | TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX60)) |
656 | TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX60)) |
657 | TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX60)) |
658 | |
659 | TARGET_BUILTIN(__hmma_m32n8k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX61)) |
660 | TARGET_BUILTIN(__hmma_m32n8k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX61)) |
661 | TARGET_BUILTIN(__hmma_m32n8k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX61)) |
662 | TARGET_BUILTIN(__hmma_m32n8k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX61)) |
663 | |
664 | TARGET_BUILTIN(__hmma_m8n32k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX61)) |
665 | TARGET_BUILTIN(__hmma_m8n32k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX61)) |
666 | TARGET_BUILTIN(__hmma_m8n32k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX61)) |
667 | TARGET_BUILTIN(__hmma_m8n32k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX61)) |
668 | |
669 | #undef BUILTIN |
670 | #undef TARGET_BUILTIN |
671 | #pragma pop_macro("AND") |
672 | #pragma pop_macro("SM_60") |
673 | #pragma pop_macro("SM_70") |
674 | #pragma pop_macro("PTX60") |
675 | #pragma pop_macro("PTX61") |
676 | |