1 | /*===-- __clang_cuda_complex_builtins - CUDA impls of runtime complex fns ---=== |
2 | * |
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
4 | * of this software and associated documentation files (the "Software"), to deal |
5 | * in the Software without restriction, including without limitation the rights |
6 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
7 | * copies of the Software, and to permit persons to whom the Software is |
8 | * furnished to do so, subject to the following conditions: |
9 | * |
10 | * The above copyright notice and this permission notice shall be included in |
11 | * all copies or substantial portions of the Software. |
12 | * |
13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
16 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
18 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
19 | * THE SOFTWARE. |
20 | * |
21 | *===-----------------------------------------------------------------------=== |
22 | */ |
23 | |
24 | #ifndef __CLANG_CUDA_COMPLEX_BUILTINS |
25 | #define __CLANG_CUDA_COMPLEX_BUILTINS |
26 | |
27 | // This header defines __muldc3, __mulsc3, __divdc3, and __divsc3. These are |
28 | // libgcc functions that clang assumes are available when compiling c99 complex |
29 | // operations. (These implementations come from libc++, and have been modified |
30 | // to work with CUDA.) |
31 | |
32 | extern "C" inline __device__ double _Complex __muldc3(double __a, double __b, |
33 | double __c, double __d) { |
34 | double __ac = __a * __c; |
35 | double __bd = __b * __d; |
36 | double __ad = __a * __d; |
37 | double __bc = __b * __c; |
38 | double _Complex z; |
39 | __real__(z) = __ac - __bd; |
40 | __imag__(z) = __ad + __bc; |
41 | if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) { |
42 | int __recalc = 0; |
43 | if (std::isinf(__a) || std::isinf(__b)) { |
44 | __a = std::copysign(std::isinf(__a) ? 1 : 0, __a); |
45 | __b = std::copysign(std::isinf(__b) ? 1 : 0, __b); |
46 | if (std::isnan(__c)) |
47 | __c = std::copysign(0, __c); |
48 | if (std::isnan(__d)) |
49 | __d = std::copysign(0, __d); |
50 | __recalc = 1; |
51 | } |
52 | if (std::isinf(__c) || std::isinf(__d)) { |
53 | __c = std::copysign(std::isinf(__c) ? 1 : 0, __c); |
54 | __d = std::copysign(std::isinf(__d) ? 1 : 0, __d); |
55 | if (std::isnan(__a)) |
56 | __a = std::copysign(0, __a); |
57 | if (std::isnan(__b)) |
58 | __b = std::copysign(0, __b); |
59 | __recalc = 1; |
60 | } |
61 | if (!__recalc && (std::isinf(__ac) || std::isinf(__bd) || |
62 | std::isinf(__ad) || std::isinf(__bc))) { |
63 | if (std::isnan(__a)) |
64 | __a = std::copysign(0, __a); |
65 | if (std::isnan(__b)) |
66 | __b = std::copysign(0, __b); |
67 | if (std::isnan(__c)) |
68 | __c = std::copysign(0, __c); |
69 | if (std::isnan(__d)) |
70 | __d = std::copysign(0, __d); |
71 | __recalc = 1; |
72 | } |
73 | if (__recalc) { |
74 | // Can't use std::numeric_limits<double>::infinity() -- that doesn't have |
75 | // a device overload (and isn't constexpr before C++11, naturally). |
76 | __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d); |
77 | __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c); |
78 | } |
79 | } |
80 | return z; |
81 | } |
82 | |
83 | extern "C" inline __device__ float _Complex __mulsc3(float __a, float __b, |
84 | float __c, float __d) { |
85 | float __ac = __a * __c; |
86 | float __bd = __b * __d; |
87 | float __ad = __a * __d; |
88 | float __bc = __b * __c; |
89 | float _Complex z; |
90 | __real__(z) = __ac - __bd; |
91 | __imag__(z) = __ad + __bc; |
92 | if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) { |
93 | int __recalc = 0; |
94 | if (std::isinf(__a) || std::isinf(__b)) { |
95 | __a = std::copysign(std::isinf(__a) ? 1 : 0, __a); |
96 | __b = std::copysign(std::isinf(__b) ? 1 : 0, __b); |
97 | if (std::isnan(__c)) |
98 | __c = std::copysign(0, __c); |
99 | if (std::isnan(__d)) |
100 | __d = std::copysign(0, __d); |
101 | __recalc = 1; |
102 | } |
103 | if (std::isinf(__c) || std::isinf(__d)) { |
104 | __c = std::copysign(std::isinf(__c) ? 1 : 0, __c); |
105 | __d = std::copysign(std::isinf(__d) ? 1 : 0, __d); |
106 | if (std::isnan(__a)) |
107 | __a = std::copysign(0, __a); |
108 | if (std::isnan(__b)) |
109 | __b = std::copysign(0, __b); |
110 | __recalc = 1; |
111 | } |
112 | if (!__recalc && (std::isinf(__ac) || std::isinf(__bd) || |
113 | std::isinf(__ad) || std::isinf(__bc))) { |
114 | if (std::isnan(__a)) |
115 | __a = std::copysign(0, __a); |
116 | if (std::isnan(__b)) |
117 | __b = std::copysign(0, __b); |
118 | if (std::isnan(__c)) |
119 | __c = std::copysign(0, __c); |
120 | if (std::isnan(__d)) |
121 | __d = std::copysign(0, __d); |
122 | __recalc = 1; |
123 | } |
124 | if (__recalc) { |
125 | __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d); |
126 | __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c); |
127 | } |
128 | } |
129 | return z; |
130 | } |
131 | |
132 | extern "C" inline __device__ double _Complex __divdc3(double __a, double __b, |
133 | double __c, double __d) { |
134 | int __ilogbw = 0; |
135 | // Can't use std::max, because that's defined in <algorithm>, and we don't |
136 | // want to pull that in for every compile. The CUDA headers define |
137 | // ::max(float, float) and ::max(double, double), which is sufficient for us. |
138 | double __logbw = std::logb(max(std::abs(__c), std::abs(__d))); |
139 | if (std::isfinite(__logbw)) { |
140 | __ilogbw = (int)__logbw; |
141 | __c = std::scalbn(__c, -__ilogbw); |
142 | __d = std::scalbn(__d, -__ilogbw); |
143 | } |
144 | double __denom = __c * __c + __d * __d; |
145 | double _Complex z; |
146 | __real__(z) = std::scalbn((__a * __c + __b * __d) / __denom, -__ilogbw); |
147 | __imag__(z) = std::scalbn((__b * __c - __a * __d) / __denom, -__ilogbw); |
148 | if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) { |
149 | if ((__denom == 0.0) && (!std::isnan(__a) || !std::isnan(__b))) { |
150 | __real__(z) = std::copysign(__builtin_huge_valf(), __c) * __a; |
151 | __imag__(z) = std::copysign(__builtin_huge_valf(), __c) * __b; |
152 | } else if ((std::isinf(__a) || std::isinf(__b)) && std::isfinite(__c) && |
153 | std::isfinite(__d)) { |
154 | __a = std::copysign(std::isinf(__a) ? 1.0 : 0.0, __a); |
155 | __b = std::copysign(std::isinf(__b) ? 1.0 : 0.0, __b); |
156 | __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d); |
157 | __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d); |
158 | } else if (std::isinf(__logbw) && __logbw > 0.0 && std::isfinite(__a) && |
159 | std::isfinite(__b)) { |
160 | __c = std::copysign(std::isinf(__c) ? 1.0 : 0.0, __c); |
161 | __d = std::copysign(std::isinf(__d) ? 1.0 : 0.0, __d); |
162 | __real__(z) = 0.0 * (__a * __c + __b * __d); |
163 | __imag__(z) = 0.0 * (__b * __c - __a * __d); |
164 | } |
165 | } |
166 | return z; |
167 | } |
168 | |
169 | extern "C" inline __device__ float _Complex __divsc3(float __a, float __b, |
170 | float __c, float __d) { |
171 | int __ilogbw = 0; |
172 | float __logbw = std::logb(max(std::abs(__c), std::abs(__d))); |
173 | if (std::isfinite(__logbw)) { |
174 | __ilogbw = (int)__logbw; |
175 | __c = std::scalbn(__c, -__ilogbw); |
176 | __d = std::scalbn(__d, -__ilogbw); |
177 | } |
178 | float __denom = __c * __c + __d * __d; |
179 | float _Complex z; |
180 | __real__(z) = std::scalbn((__a * __c + __b * __d) / __denom, -__ilogbw); |
181 | __imag__(z) = std::scalbn((__b * __c - __a * __d) / __denom, -__ilogbw); |
182 | if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) { |
183 | if ((__denom == 0) && (!std::isnan(__a) || !std::isnan(__b))) { |
184 | __real__(z) = std::copysign(__builtin_huge_valf(), __c) * __a; |
185 | __imag__(z) = std::copysign(__builtin_huge_valf(), __c) * __b; |
186 | } else if ((std::isinf(__a) || std::isinf(__b)) && std::isfinite(__c) && |
187 | std::isfinite(__d)) { |
188 | __a = std::copysign(std::isinf(__a) ? 1 : 0, __a); |
189 | __b = std::copysign(std::isinf(__b) ? 1 : 0, __b); |
190 | __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d); |
191 | __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d); |
192 | } else if (std::isinf(__logbw) && __logbw > 0 && std::isfinite(__a) && |
193 | std::isfinite(__b)) { |
194 | __c = std::copysign(std::isinf(__c) ? 1 : 0, __c); |
195 | __d = std::copysign(std::isinf(__d) ? 1 : 0, __d); |
196 | __real__(z) = 0 * (__a * __c + __b * __d); |
197 | __imag__(z) = 0 * (__b * __c - __a * __d); |
198 | } |
199 | } |
200 | return z; |
201 | } |
202 | |
203 | #endif // __CLANG_CUDA_COMPLEX_BUILTINS |
204 | |