__clang_cuda_cmath.h source code [clang_source_code/lib/Headers/__clang_cuda

1	/*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------===
2	*
3	* Permission is hereby granted, free of charge, to any person obtaining a copy
4	* of this software and associated documentation files (the "Software"), to deal
5	* in the Software without restriction, including without limitation the rights
6	* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7	* copies of the Software, and to permit persons to whom the Software is
8	* furnished to do so, subject to the following conditions:
9	*
10	* The above copyright notice and this permission notice shall be included in
11	* all copies or substantial portions of the Software.
12	*
13	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19	* THE SOFTWARE.
20	*
21	*===-----------------------------------------------------------------------===
22	*/
23	#ifndef __CLANG_CUDA_CMATH_H__
24	#define __CLANG_CUDA_CMATH_H__
25	#ifndef __CUDA__
26	#error "This file is for CUDA compilation only."
27	#endif
28
29	#include <limits>
30
31	// CUDA lets us use various std math functions on the device side. This file
32	// works in concert with __clang_cuda_math_forward_declares.h to make this work.
33	//
34	// Specifically, the forward-declares header declares __device__ overloads for
35	// these functions in the global namespace, then pulls them into namespace std
36	// with 'using' statements. Then this file implements those functions, after
37	// their implementations have been pulled in.
38	//
39	// It's important that we declare the functions in the global namespace and pull
40	// them into namespace std with using statements, as opposed to simply declaring
41	// these functions in namespace std, because our device functions need to
42	// overload the standard library functions, which may be declared in the global
43	// namespace or in std, depending on the degree of conformance of the stdlib
44	// implementation. Declaring in the global namespace and pulling into namespace
45	// std covers all of the known knowns.
46
47	#define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))
48
49	__DEVICE__ long long abs(long long __n) { return ::llabs(__n); }
50	__DEVICE__ long abs(long __n) { return ::labs(__n); }
51	__DEVICE__ float abs(float __x) { return ::fabsf(__x); }
52	__DEVICE__ double abs(double __x) { return ::fabs(__x); }
53	__DEVICE__ float acos(float __x) { return ::acosf(__x); }
54	__DEVICE__ float asin(float __x) { return ::asinf(__x); }
55	__DEVICE__ float atan(float __x) { return ::atanf(__x); }
56	__DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); }
57	__DEVICE__ float ceil(float __x) { return ::ceilf(__x); }
58	__DEVICE__ float cos(float __x) { return ::cosf(__x); }
59	__DEVICE__ float cosh(float __x) { return ::coshf(__x); }
60	__DEVICE__ float exp(float __x) { return ::expf(__x); }
61	__DEVICE__ float fabs(float __x) { return ::fabsf(__x); }
62	__DEVICE__ float floor(float __x) { return ::floorf(__x); }
63	__DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); }
64	__DEVICE__ int fpclassify(float __x) {
65	return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
66	FP_ZERO, __x);
67	}
68	__DEVICE__ int fpclassify(double __x) {
69	return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
70	FP_ZERO, __x);
71	}
72	__DEVICE__ float frexp(float __arg, int *__exp) {
73	return ::frexpf(__arg, __exp);
74	}
75
76	// For inscrutable reasons, the CUDA headers define these functions for us on
77	// Windows.
78	#ifndef _MSC_VER
79	__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
80	__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
81	__DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
82	// For inscrutable reasons, __finite(), the double-precision version of
83	// __finitef, does not exist when compiling for MacOS. __isfinited is available
84	// everywhere and is just as good.
85	__DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); }
86	__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
87	__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
88	#endif
89
90	__DEVICE__ bool isgreater(float __x, float __y) {
91	return __builtin_isgreater(__x, __y);
92	}
93	__DEVICE__ bool isgreater(double __x, double __y) {
94	return __builtin_isgreater(__x, __y);
95	}
96	__DEVICE__ bool isgreaterequal(float __x, float __y) {
97	return __builtin_isgreaterequal(__x, __y);
98	}
99	__DEVICE__ bool isgreaterequal(double __x, double __y) {
100	return __builtin_isgreaterequal(__x, __y);
101	}
102	__DEVICE__ bool isless(float __x, float __y) {
103	return __builtin_isless(__x, __y);
104	}
105	__DEVICE__ bool isless(double __x, double __y) {
106	return __builtin_isless(__x, __y);
107	}
108	__DEVICE__ bool islessequal(float __x, float __y) {
109	return __builtin_islessequal(__x, __y);
110	}
111	__DEVICE__ bool islessequal(double __x, double __y) {
112	return __builtin_islessequal(__x, __y);
113	}
114	__DEVICE__ bool islessgreater(float __x, float __y) {
115	return __builtin_islessgreater(__x, __y);
116	}
117	__DEVICE__ bool islessgreater(double __x, double __y) {
118	return __builtin_islessgreater(__x, __y);
119	}
120	__DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); }
121	__DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); }
122	__DEVICE__ bool isunordered(float __x, float __y) {
123	return __builtin_isunordered(__x, __y);
124	}
125	__DEVICE__ bool isunordered(double __x, double __y) {
126	return __builtin_isunordered(__x, __y);
127	}
128	__DEVICE__ float ldexp(float __arg, int __exp) {
129	return ::ldexpf(__arg, __exp);
130	}
131	__DEVICE__ float log(float __x) { return ::logf(__x); }
132	__DEVICE__ float log10(float __x) { return ::log10f(__x); }
133	__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
134	__DEVICE__ float pow(float __base, float __exp) {
135	return ::powf(__base, __exp);
136	}
137	__DEVICE__ float pow(float __base, int __iexp) {
138	return ::powif(__base, __iexp);
139	}
140	__DEVICE__ double pow(double __base, int __iexp) {
141	return ::powi(__base, __iexp);
142	}
143	__DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); }
144	__DEVICE__ bool signbit(double __x) { return ::__signbitd(__x); }
145	__DEVICE__ float sin(float __x) { return ::sinf(__x); }
146	__DEVICE__ float sinh(float __x) { return ::sinhf(__x); }
147	__DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); }
148	__DEVICE__ float tan(float __x) { return ::tanf(__x); }
149	__DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
150
151	// Notably missing above is nexttoward. We omit it because
152	// libdevice doesn't provide an implementation, and we don't want to be in the
153	// business of implementing tricky libm functions in this header.
154
155	// Now we've defined everything we promised we'd define in
156	// __clang_cuda_math_forward_declares.h. We need to do two additional things to
157	// fix up our math functions.
158	//
159	// 1) Define __device__ overloads for e.g. sin(int). The CUDA headers define
160	// only sin(float) and sin(double), which means that e.g. sin(0) is
161	// ambiguous.
162	//
163	// 2) Pull the __device__ overloads of "foobarf" math functions into namespace
164	// std. These are defined in the CUDA headers in the global namespace,
165	// independent of everything else we've done here.
166
167	// We can't use std::enable_if, because we want to be pre-C++11 compatible. But
168	// we go ahead and unconditionally define functions that are only available when
169	// compiling for C++11 to match the behavior of the CUDA headers.
170	template<bool __B, class __T = void>
171	struct __clang_cuda_enable_if {};
172
173	template <class __T> struct __clang_cuda_enable_if<true, __T> {
174	typedef __T type;
175	};
176
177	// Defines an overload of __fn that accepts one integral argument, calls
178	// __fn((double)x), and returns __retty.
179	#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn) \
180	template <typename __T> \
181	__DEVICE__ \
182	typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, \
183	__retty>::type \
184	__fn(__T __x) { \
185	return ::__fn((double)__x); \
186	}
187
188	// Defines an overload of __fn that accepts one two arithmetic arguments, calls
189	// __fn((double)x, (double)y), and returns a double.
190	//
191	// Note this is different from OVERLOAD_1, which generates an overload that
192	// accepts only integral arguments.
193	#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn) \
194	template <typename __T1, typename __T2> \
195	__DEVICE__ typename __clang_cuda_enable_if< \
196	std::numeric_limits<__T1>::is_specialized && \
197	std::numeric_limits<__T2>::is_specialized, \
198	__retty>::type \
199	__fn(__T1 __x, __T2 __y) { \
200	return __fn((double)__x, (double)__y); \
201	}
202
203	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos)
204	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh)
205	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin)
206	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh)
207	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan)
208	__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2);
209	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh)
210	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt)
211	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil)
212	__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign);
213	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos)
214	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh)
215	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf)
216	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc)
217	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp)
218	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2)
219	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1)
220	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs)
221	__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim);
222	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor)
223	__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax);
224	__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin);
225	__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod);
226	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify)
227	__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot);
228	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb)
229	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite)
230	__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater);
231	__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal);
232	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf);
233	__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless);
234	__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal);
235	__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater);
236	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan);
237	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal)
238	__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered);
239	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma)
240	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log)
241	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10)
242	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p)
243	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2)
244	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb)
245	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint)
246	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround)
247	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint)
248	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround)
249	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint);
250	__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter);
251	__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow);
252	__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder);
253	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint);
254	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round);
255	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit)
256	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin)
257	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh)
258	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt)
259	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan)
260	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh)
261	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma)
262	__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc);
263
264	#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1
265	#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2
266
267	// Overloads for functions that don't match the patterns expected by
268	// __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}.
269	template <typename __T1, typename __T2, typename __T3>
270	__DEVICE__ typename __clang_cuda_enable_if<
271	std::numeric_limits<__T1>::is_specialized &&
272	std::numeric_limits<__T2>::is_specialized &&
273	std::numeric_limits<__T3>::is_specialized,
274	double>::type
275	fma(__T1 __x, __T2 __y, __T3 __z) {
276	return std::fma((double)__x, (double)__y, (double)__z);
277	}
278
279	template <typename __T>
280	__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
281	double>::type
282	frexp(__T __x, int *__exp) {
283	return std::frexp((double)__x, __exp);
284	}
285
286	template <typename __T>
287	__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
288	double>::type
289	ldexp(__T __x, int __exp) {
290	return std::ldexp((double)__x, __exp);
291	}
292
293	template <typename __T1, typename __T2>
294	__DEVICE__ typename __clang_cuda_enable_if<
295	std::numeric_limits<__T1>::is_specialized &&
296	std::numeric_limits<__T2>::is_specialized,
297	double>::type
298	remquo(__T1 __x, __T2 __y, int *__quo) {
299	return std::remquo((double)__x, (double)__y, __quo);
300	}
301
302	template <typename __T>
303	__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
304	double>::type
305	scalbln(__T __x, long __exp) {
306	return std::scalbln((double)__x, __exp);
307	}
308
309	template <typename __T>
310	__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
311	double>::type
312	scalbn(__T __x, int __exp) {
313	return std::scalbn((double)__x, __exp);
314	}
315
316	// We need to define these overloads in exactly the namespace our standard
317	// library uses (including the right inline namespace), otherwise they won't be
318	// picked up by other functions in the standard library (e.g. functions in
319	// <complex>). Thus the ugliness below.
320	#ifdef _LIBCPP_BEGIN_NAMESPACE_STD
321	_LIBCPP_BEGIN_NAMESPACE_STD
322	#else
323	namespace std {
324	#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
325	_GLIBCXX_BEGIN_NAMESPACE_VERSION
326	#endif
327	#endif
328
329	// Pull the new overloads we defined above into namespace std.
330	using ::acos;
331	using ::acosh;
332	using ::asin;
333	using ::asinh;
334	using ::atan;
335	using ::atan2;
336	using ::atanh;
337	using ::cbrt;
338	using ::ceil;
339	using ::copysign;
340	using ::cos;
341	using ::cosh;
342	using ::erf;
343	using ::erfc;
344	using ::exp;
345	using ::exp2;
346	using ::expm1;
347	using ::fabs;
348	using ::fdim;
349	using ::floor;
350	using ::fma;
351	using ::fmax;
352	using ::fmin;
353	using ::fmod;
354	using ::fpclassify;
355	using ::frexp;
356	using ::hypot;
357	using ::ilogb;
358	using ::isfinite;
359	using ::isgreater;
360	using ::isgreaterequal;
361	using ::isless;
362	using ::islessequal;
363	using ::islessgreater;
364	using ::isnormal;
365	using ::isunordered;
366	using ::ldexp;
367	using ::lgamma;
368	using ::llrint;
369	using ::llround;
370	using ::log;
371	using ::log10;
372	using ::log1p;
373	using ::log2;
374	using ::logb;
375	using ::lrint;
376	using ::lround;
377	using ::nearbyint;
378	using ::nextafter;
379	using ::pow;
380	using ::remainder;
381	using ::remquo;
382	using ::rint;
383	using ::round;
384	using ::scalbln;
385	using ::scalbn;
386	using ::signbit;
387	using ::sin;
388	using ::sinh;
389	using ::sqrt;
390	using ::tan;
391	using ::tanh;
392	using ::tgamma;
393	using ::trunc;
394
395	// Well this is fun: We need to pull these symbols in for libc++, but we can't
396	// pull them in with libstdc++, because its ::isinf and ::isnan are different
397	// than its std::isinf and std::isnan.
398	#ifndef __GLIBCXX__
399	using ::isinf;
400	using ::isnan;
401	#endif
402
403	// Finally, pull the "foobarf" functions that CUDA defines in its headers into
404	// namespace std.
405	using ::acosf;
406	using ::acoshf;
407	using ::asinf;
408	using ::asinhf;
409	using ::atan2f;
410	using ::atanf;
411	using ::atanhf;
412	using ::cbrtf;
413	using ::ceilf;
414	using ::copysignf;
415	using ::cosf;
416	using ::coshf;
417	using ::erfcf;
418	using ::erff;
419	using ::exp2f;
420	using ::expf;
421	using ::expm1f;
422	using ::fabsf;
423	using ::fdimf;
424	using ::floorf;
425	using ::fmaf;
426	using ::fmaxf;
427	using ::fminf;
428	using ::fmodf;
429	using ::frexpf;
430	using ::hypotf;
431	using ::ilogbf;
432	using ::ldexpf;
433	using ::lgammaf;
434	using ::llrintf;
435	using ::llroundf;
436	using ::log10f;
437	using ::log1pf;
438	using ::log2f;
439	using ::logbf;
440	using ::logf;
441	using ::lrintf;
442	using ::lroundf;
443	using ::modff;
444	using ::nearbyintf;
445	using ::nextafterf;
446	using ::powf;
447	using ::remainderf;
448	using ::remquof;
449	using ::rintf;
450	using ::roundf;
451	using ::scalblnf;
452	using ::scalbnf;
453	using ::sinf;
454	using ::sinhf;
455	using ::sqrtf;
456	using ::tanf;
457	using ::tanhf;
458	using ::tgammaf;
459	using ::truncf;
460
461	#ifdef _LIBCPP_END_NAMESPACE_STD
462	_LIBCPP_END_NAMESPACE_STD
463	#else
464	#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
465	_GLIBCXX_END_NAMESPACE_VERSION
466	#endif
467	} // namespace std
468	#endif
469
470	#undef __DEVICE__
471
472	#endif
473

Clang Project