__clang_cuda_complex_builtins.h source code [clang_source_code/lib/Headers/__clang_cuda_complex

1	/*===-- __clang_cuda_complex_builtins - CUDA impls of runtime complex fns ---===
2	*
3	* Permission is hereby granted, free of charge, to any person obtaining a copy
4	* of this software and associated documentation files (the "Software"), to deal
5	* in the Software without restriction, including without limitation the rights
6	* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7	* copies of the Software, and to permit persons to whom the Software is
8	* furnished to do so, subject to the following conditions:
9	*
10	* The above copyright notice and this permission notice shall be included in
11	* all copies or substantial portions of the Software.
12	*
13	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19	* THE SOFTWARE.
20	*
21	*===-----------------------------------------------------------------------===
22	*/
23
24	#ifndef __CLANG_CUDA_COMPLEX_BUILTINS
25	#define __CLANG_CUDA_COMPLEX_BUILTINS
26
27	// This header defines __muldc3, __mulsc3, __divdc3, and __divsc3. These are
28	// libgcc functions that clang assumes are available when compiling c99 complex
29	// operations. (These implementations come from libc++, and have been modified
30	// to work with CUDA.)
31
32	extern "C" inline __device__ double _Complex __muldc3(double __a, double __b,
33	double __c, double __d) {
34	double __ac = __a * __c;
35	double __bd = __b * __d;
36	double __ad = __a * __d;
37	double __bc = __b * __c;
38	double _Complex z;
39	__real__(z) = __ac - __bd;
40	__imag__(z) = __ad + __bc;
41	if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {
42	int __recalc = 0;
43	if (std::isinf(__a) \|\| std::isinf(__b)) {
44	__a = std::copysign(std::isinf(__a) ? 1 : 0, __a);
45	__b = std::copysign(std::isinf(__b) ? 1 : 0, __b);
46	if (std::isnan(__c))
47	__c = std::copysign(0, __c);
48	if (std::isnan(__d))
49	__d = std::copysign(0, __d);
50	__recalc = 1;
51	}
52	if (std::isinf(__c) \|\| std::isinf(__d)) {
53	__c = std::copysign(std::isinf(__c) ? 1 : 0, __c);
54	__d = std::copysign(std::isinf(__d) ? 1 : 0, __d);
55	if (std::isnan(__a))
56	__a = std::copysign(0, __a);
57	if (std::isnan(__b))
58	__b = std::copysign(0, __b);
59	__recalc = 1;
60	}
61	if (!__recalc && (std::isinf(__ac) \|\| std::isinf(__bd) \|\|
62	std::isinf(__ad) \|\| std::isinf(__bc))) {
63	if (std::isnan(__a))
64	__a = std::copysign(0, __a);
65	if (std::isnan(__b))
66	__b = std::copysign(0, __b);
67	if (std::isnan(__c))
68	__c = std::copysign(0, __c);
69	if (std::isnan(__d))
70	__d = std::copysign(0, __d);
71	__recalc = 1;
72	}
73	if (__recalc) {
74	// Can't use std::numeric_limits<double>::infinity() -- that doesn't have
75	// a device overload (and isn't constexpr before C++11, naturally).
76	__real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d);
77	__imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c);
78	}
79	}
80	return z;
81	}
82
83	extern "C" inline __device__ float _Complex __mulsc3(float __a, float __b,
84	float __c, float __d) {
85	float __ac = __a * __c;
86	float __bd = __b * __d;
87	float __ad = __a * __d;
88	float __bc = __b * __c;
89	float _Complex z;
90	__real__(z) = __ac - __bd;
91	__imag__(z) = __ad + __bc;
92	if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {
93	int __recalc = 0;
94	if (std::isinf(__a) \|\| std::isinf(__b)) {
95	__a = std::copysign(std::isinf(__a) ? 1 : 0, __a);
96	__b = std::copysign(std::isinf(__b) ? 1 : 0, __b);
97	if (std::isnan(__c))
98	__c = std::copysign(0, __c);
99	if (std::isnan(__d))
100	__d = std::copysign(0, __d);
101	__recalc = 1;
102	}
103	if (std::isinf(__c) \|\| std::isinf(__d)) {
104	__c = std::copysign(std::isinf(__c) ? 1 : 0, __c);
105	__d = std::copysign(std::isinf(__d) ? 1 : 0, __d);
106	if (std::isnan(__a))
107	__a = std::copysign(0, __a);
108	if (std::isnan(__b))
109	__b = std::copysign(0, __b);
110	__recalc = 1;
111	}
112	if (!__recalc && (std::isinf(__ac) \|\| std::isinf(__bd) \|\|
113	std::isinf(__ad) \|\| std::isinf(__bc))) {
114	if (std::isnan(__a))
115	__a = std::copysign(0, __a);
116	if (std::isnan(__b))
117	__b = std::copysign(0, __b);
118	if (std::isnan(__c))
119	__c = std::copysign(0, __c);
120	if (std::isnan(__d))
121	__d = std::copysign(0, __d);
122	__recalc = 1;
123	}
124	if (__recalc) {
125	__real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d);
126	__imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c);
127	}
128	}
129	return z;
130	}
131
132	extern "C" inline __device__ double _Complex __divdc3(double __a, double __b,
133	double __c, double __d) {
134	int __ilogbw = 0;
135	// Can't use std::max, because that's defined in <algorithm>, and we don't
136	// want to pull that in for every compile. The CUDA headers define
137	// ::max(float, float) and ::max(double, double), which is sufficient for us.
138	double __logbw = std::logb(max(std::abs(__c), std::abs(__d)));
139	if (std::isfinite(__logbw)) {
140	__ilogbw = (int)__logbw;
141	__c = std::scalbn(__c, -__ilogbw);
142	__d = std::scalbn(__d, -__ilogbw);
143	}
144	double __denom = __c * __c + __d * __d;
145	double _Complex z;
146	__real__(z) = std::scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);
147	__imag__(z) = std::scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);
148	if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {
149	if ((__denom == 0.0) && (!std::isnan(__a) \|\| !std::isnan(__b))) {
150	__real__(z) = std::copysign(__builtin_huge_valf(), __c) * __a;
151	__imag__(z) = std::copysign(__builtin_huge_valf(), __c) * __b;
152	} else if ((std::isinf(__a) \|\| std::isinf(__b)) && std::isfinite(__c) &&
153	std::isfinite(__d)) {
154	__a = std::copysign(std::isinf(__a) ? 1.0 : 0.0, __a);
155	__b = std::copysign(std::isinf(__b) ? 1.0 : 0.0, __b);
156	__real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d);
157	__imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d);
158	} else if (std::isinf(__logbw) && __logbw > 0.0 && std::isfinite(__a) &&
159	std::isfinite(__b)) {
160	__c = std::copysign(std::isinf(__c) ? 1.0 : 0.0, __c);
161	__d = std::copysign(std::isinf(__d) ? 1.0 : 0.0, __d);
162	__real__(z) = 0.0 * (__a * __c + __b * __d);
163	__imag__(z) = 0.0 * (__b * __c - __a * __d);
164	}
165	}
166	return z;
167	}
168
169	extern "C" inline __device__ float _Complex __divsc3(float __a, float __b,
170	float __c, float __d) {
171	int __ilogbw = 0;
172	float __logbw = std::logb(max(std::abs(__c), std::abs(__d)));
173	if (std::isfinite(__logbw)) {
174	__ilogbw = (int)__logbw;
175	__c = std::scalbn(__c, -__ilogbw);
176	__d = std::scalbn(__d, -__ilogbw);
177	}
178	float __denom = __c * __c + __d * __d;
179	float _Complex z;
180	__real__(z) = std::scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);
181	__imag__(z) = std::scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);
182	if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {
183	if ((__denom == 0) && (!std::isnan(__a) \|\| !std::isnan(__b))) {
184	__real__(z) = std::copysign(__builtin_huge_valf(), __c) * __a;
185	__imag__(z) = std::copysign(__builtin_huge_valf(), __c) * __b;
186	} else if ((std::isinf(__a) \|\| std::isinf(__b)) && std::isfinite(__c) &&
187	std::isfinite(__d)) {
188	__a = std::copysign(std::isinf(__a) ? 1 : 0, __a);
189	__b = std::copysign(std::isinf(__b) ? 1 : 0, __b);
190	__real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d);
191	__imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d);
192	} else if (std::isinf(__logbw) && __logbw > 0 && std::isfinite(__a) &&
193	std::isfinite(__b)) {
194	__c = std::copysign(std::isinf(__c) ? 1 : 0, __c);
195	__d = std::copysign(std::isinf(__d) ? 1 : 0, __d);
196	__real__(z) = 0 * (__a * __c + __b * __d);
197	__imag__(z) = 0 * (__b * __c - __a * __d);
198	}
199	}
200	return z;
201	}
202
203	#endif // __CLANG_CUDA_COMPLEX_BUILTINS
204

Clang Project