1 | // Tests CUDA compilation pipeline construction in Driver. |
2 | // REQUIRES: clang-driver |
3 | // REQUIRES: x86-registered-target |
4 | // REQUIRES: nvptx-registered-target |
5 | |
6 | // Simple compilation case. Compile device-side to PTX assembly and make sure |
7 | // we use it on the host side. |
8 | // RUN: %clang -### -target x86_64-linux-gnu -c %s 2>&1 \ |
9 | // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ |
10 | // RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \ |
11 | // RUN: -check-prefix NOLINK %s |
12 | |
13 | // Typical compilation + link case. |
14 | // RUN: %clang -### -target x86_64-linux-gnu %s 2>&1 \ |
15 | // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ |
16 | // RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \ |
17 | // RUN: -check-prefix LINK %s |
18 | |
19 | // Verify that --cuda-host-only disables device-side compilation, but doesn't |
20 | // disable host-side compilation/linking. |
21 | // RUN: %clang -### -target x86_64-linux-gnu --cuda-host-only %s 2>&1 \ |
22 | // RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \ |
23 | // RUN: -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s |
24 | |
25 | // Verify that --cuda-device-only disables host-side compilation and linking. |
26 | // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only %s 2>&1 \ |
27 | // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ |
28 | // RUN: -check-prefix NOHOST -check-prefix NOLINK %s |
29 | |
30 | // Check that the last of --cuda-compile-host-device, --cuda-host-only, and |
31 | // --cuda-device-only wins. |
32 | |
33 | // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \ |
34 | // RUN: --cuda-host-only %s 2>&1 \ |
35 | // RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \ |
36 | // RUN: -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s |
37 | |
38 | // RUN: %clang -### -target x86_64-linux-gnu --cuda-compile-host-device \ |
39 | // RUN: --cuda-host-only %s 2>&1 \ |
40 | // RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \ |
41 | // RUN: -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s |
42 | |
43 | // RUN: %clang -### -target x86_64-linux-gnu --cuda-host-only \ |
44 | // RUN: --cuda-device-only %s 2>&1 \ |
45 | // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ |
46 | // RUN: -check-prefix NOHOST -check-prefix NOLINK %s |
47 | |
48 | // RUN: %clang -### -target x86_64-linux-gnu --cuda-compile-host-device \ |
49 | // RUN: --cuda-device-only %s 2>&1 \ |
50 | // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ |
51 | // RUN: -check-prefix NOHOST -check-prefix NOLINK %s |
52 | |
53 | // RUN: %clang -### -target x86_64-linux-gnu --cuda-host-only \ |
54 | // RUN: --cuda-compile-host-device %s 2>&1 \ |
55 | // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ |
56 | // RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \ |
57 | // RUN: -check-prefix LINK %s |
58 | |
59 | // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \ |
60 | // RUN: --cuda-compile-host-device %s 2>&1 \ |
61 | // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ |
62 | // RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \ |
63 | // RUN: -check-prefix LINK %s |
64 | |
65 | // Verify that --cuda-gpu-arch option passes the correct GPU architecture to |
66 | // device compilation. |
67 | // RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_30 -c %s 2>&1 \ |
68 | // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ |
69 | // RUN: -check-prefix DEVICE-SM30 -check-prefix HOST \ |
70 | // RUN: -check-prefix INCLUDES-DEVICE -check-prefix NOLINK %s |
71 | |
72 | // Verify that there is one device-side compilation per --cuda-gpu-arch args |
73 | // and that all results are included on the host side. |
74 | // RUN: %clang -### -target x86_64-linux-gnu \ |
75 | // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 -c %s 2>&1 \ |
76 | // RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,DEVICE2 \ |
77 | // RUN: -check-prefixes DEVICE-SM30,DEVICE2-SM35 \ |
78 | // RUN: -check-prefixes INCLUDES-DEVICE,INCLUDES-DEVICE2 \ |
79 | // RUN: -check-prefixes HOST,HOST-NOSAVE,NOLINK %s |
80 | |
81 | // Verify that device-side results are passed to the correct tool when |
82 | // -save-temps is used. |
83 | // RUN: %clang -### -target x86_64-linux-gnu -save-temps -c %s 2>&1 \ |
84 | // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-SAVE \ |
85 | // RUN: -check-prefix HOST -check-prefix HOST-SAVE -check-prefix NOLINK %s |
86 | |
87 | // Verify that device-side results are passed to the correct tool when |
88 | // -fno-integrated-as is used. |
89 | // RUN: %clang -### -target x86_64-linux-gnu -fno-integrated-as -c %s 2>&1 \ |
90 | // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ |
91 | // RUN: -check-prefix HOST -check-prefix HOST-NOSAVE \ |
92 | // RUN: -check-prefix HOST-AS -check-prefix NOLINK %s |
93 | |
94 | // Verify that --[no-]cuda-gpu-arch arguments are handled correctly. |
95 | // a) --no-cuda-gpu-arch=X negates preceding --cuda-gpu-arch=X |
96 | // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \ |
97 | // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ |
98 | // RUN: --no-cuda-gpu-arch=sm_35 \ |
99 | // RUN: -c %s 2>&1 \ |
100 | // RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,NOARCH-SM35 %s |
101 | |
102 | // b) --no-cuda-gpu-arch=X negates more than one preceding --cuda-gpu-arch=X |
103 | // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \ |
104 | // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ |
105 | // RUN: --no-cuda-gpu-arch=sm_35 \ |
106 | // RUN: -c %s 2>&1 \ |
107 | // RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,NOARCH-SM35 %s |
108 | |
109 | // c) if --no-cuda-gpu-arch=X negates all preceding --cuda-gpu-arch=X |
110 | // we default to sm_20 -- same as if no --cuda-gpu-arch were passed. |
111 | // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \ |
112 | // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ |
113 | // RUN: --no-cuda-gpu-arch=sm_35 --no-cuda-gpu-arch=sm_30 \ |
114 | // RUN: -c %s 2>&1 \ |
115 | // RUN: | FileCheck -check-prefixes ARCH-SM20,NOARCH-SM30,NOARCH-SM35 %s |
116 | |
117 | // d) --no-cuda-gpu-arch=X is a no-op if there's no preceding --cuda-gpu-arch=X |
118 | // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \ |
119 | // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30\ |
120 | // RUN: --no-cuda-gpu-arch=sm_50 \ |
121 | // RUN: -c %s 2>&1 \ |
122 | // RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,ARCH-SM35 %s |
123 | |
124 | // e) --no-cuda-gpu-arch=X does not affect following --cuda-gpu-arch=X |
125 | // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \ |
126 | // RUN: --no-cuda-gpu-arch=sm_35 --no-cuda-gpu-arch=sm_30 \ |
127 | // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ |
128 | // RUN: -c %s 2>&1 \ |
129 | // RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,ARCH-SM35 %s |
130 | |
131 | // f) --no-cuda-gpu-arch=all negates all preceding --cuda-gpu-arch=X |
132 | // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \ |
133 | // RUN: --cuda-gpu-arch=sm_20 --cuda-gpu-arch=sm_30 \ |
134 | // RUN: --no-cuda-gpu-arch=all \ |
135 | // RUN: --cuda-gpu-arch=sm_35 \ |
136 | // RUN: -c %s 2>&1 \ |
137 | // RUN: | FileCheck -check-prefixes NOARCH-SM20,NOARCH-SM30,ARCH-SM35 %s |
138 | |
139 | // g) There's no --cuda-gpu-arch=all |
140 | // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \ |
141 | // RUN: --cuda-gpu-arch=all \ |
142 | // RUN: -c %s 2>&1 \ |
143 | // RUN: | FileCheck -check-prefix ARCHALLERROR %s |
144 | |
145 | |
146 | // Verify that --[no-]cuda-include-ptx arguments are handled correctly. |
147 | // a) by default we're including PTX for all GPUs. |
148 | // RUN: %clang -### -target x86_64-linux-gnu \ |
149 | // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ |
150 | // RUN: -c %s 2>&1 \ |
151 | // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s |
152 | |
153 | // b) --no-cuda-include-ptx=all disables PTX inclusion for all GPUs |
154 | // RUN: %clang -### -target x86_64-linux-gnu \ |
155 | // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ |
156 | // RUN: --no-cuda-include-ptx=all \ |
157 | // RUN: -c %s 2>&1 \ |
158 | // RUN: | FileCheck -check-prefixes FATBIN-COMMON,NOPTX-SM35,NOPTX-SM30 %s |
159 | |
160 | // c) --no-cuda-include-ptx=sm_XX disables PTX inclusion for that GPU only. |
161 | // RUN: %clang -### -target x86_64-linux-gnu \ |
162 | // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ |
163 | // RUN: --no-cuda-include-ptx=sm_35 \ |
164 | // RUN: -c %s 2>&1 \ |
165 | // RUN: | FileCheck -check-prefixes FATBIN-COMMON,NOPTX-SM35,PTX-SM30 %s |
166 | // RUN: %clang -### -target x86_64-linux-gnu \ |
167 | // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ |
168 | // RUN: --no-cuda-include-ptx=sm_30 \ |
169 | // RUN: -c %s 2>&1 \ |
170 | // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,NOPTX-SM30 %s |
171 | |
172 | // d) --cuda-include-ptx=all overrides preceding --no-cuda-include-ptx=all |
173 | // RUN: %clang -### -target x86_64-linux-gnu \ |
174 | // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ |
175 | // RUN: --no-cuda-include-ptx=all --cuda-include-ptx=all \ |
176 | // RUN: -c %s 2>&1 \ |
177 | // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s |
178 | |
179 | // e) --cuda-include-ptx=all overrides preceding --no-cuda-include-ptx=sm_XX |
180 | // RUN: %clang -### -target x86_64-linux-gnu \ |
181 | // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ |
182 | // RUN: --no-cuda-include-ptx=sm_30 --cuda-include-ptx=all \ |
183 | // RUN: -c %s 2>&1 \ |
184 | // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s |
185 | |
186 | |
187 | // ARCH-SM20: "-cc1"{{.*}}"-target-cpu" "sm_20" |
188 | // NOARCH-SM20-NOT: "-cc1"{{.*}}"-target-cpu" "sm_20" |
189 | // ARCH-SM30: "-cc1"{{.*}}"-target-cpu" "sm_30" |
190 | // NOARCH-SM30-NOT: "-cc1"{{.*}}"-target-cpu" "sm_30" |
191 | // ARCH-SM35: "-cc1"{{.*}}"-target-cpu" "sm_35" |
192 | // NOARCH-SM35-NOT: "-cc1"{{.*}}"-target-cpu" "sm_35" |
193 | // ARCHALLERROR: error: Unsupported CUDA gpu architecture: all |
194 | |
195 | // Match device-side preprocessor and compiler phases with -save-temps. |
196 | // DEVICE-SAVE: "-cc1" "-triple" "nvptx64-nvidia-cuda" |
197 | // DEVICE-SAVE-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" |
198 | // DEVICE-SAVE-SAME: "-fcuda-is-device" |
199 | // DEVICE-SAVE-SAME: "-x" "cuda" |
200 | |
201 | // DEVICE-SAVE: "-cc1" "-triple" "nvptx64-nvidia-cuda" |
202 | // DEVICE-SAVE-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" |
203 | // DEVICE-SAVE-SAME: "-fcuda-is-device" |
204 | // DEVICE-SAVE-SAME: "-x" "cuda-cpp-output" |
205 | |
206 | // Match the job that produces PTX assembly. |
207 | // DEVICE: "-cc1" "-triple" "nvptx64-nvidia-cuda" |
208 | // DEVICE-NOSAVE-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" |
209 | // DEVICE-SAME: "-fcuda-is-device" |
210 | // DEVICE-SM30-SAME: "-target-cpu" "sm_30" |
211 | // DEVICE-SAME: "-o" "[[PTXFILE:[^"]*]]" |
212 | // DEVICE-NOSAVE-SAME: "-x" "cuda" |
213 | // DEVICE-SAVE-SAME: "-x" "ir" |
214 | |
215 | // Match the call to ptxas (which assembles PTX to SASS). |
216 | // DEVICE:ptxas |
217 | // DEVICE-SM30-DAG: "--gpu-name" "sm_30" |
218 | // DEVICE-DAG: "--output-file" "[[CUBINFILE:[^"]*]]" |
219 | // DEVICE-DAG: "[[PTXFILE]]" |
220 | |
221 | // Match another device-side compilation. |
222 | // DEVICE2: "-cc1" "-triple" "nvptx64-nvidia-cuda" |
223 | // DEVICE2-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" |
224 | // DEVICE2-SAME: "-fcuda-is-device" |
225 | // DEVICE2-SM35-SAME: "-target-cpu" "sm_35" |
226 | // DEVICE2-SAME: "-o" "[[PTXFILE2:[^"]*]]" |
227 | // DEVICE2-SAME: "-x" "cuda" |
228 | |
229 | // Match another call to ptxas. |
230 | // DEVICE2: ptxas |
231 | // DEVICE2-SM35-DAG: "--gpu-name" "sm_35" |
232 | // DEVICE2-DAG: "--output-file" "[[CUBINFILE2:[^"]*]]" |
233 | // DEVICE2-DAG: "[[PTXFILE2]]" |
234 | |
235 | // Match no device-side compilation. |
236 | // NODEVICE-NOT: "-cc1" "-triple" "nvptx64-nvidia-cuda" |
237 | // NODEVICE-NOT: "-fcuda-is-device" |
238 | |
239 | // INCLUDES-DEVICE:fatbinary |
240 | // INCLUDES-DEVICE-DAG: "--create" "[[FATBINARY:[^"]*]]" |
241 | // INCLUDES-DEVICE-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE]]" |
242 | // INCLUDES-DEVICE-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE]]" |
243 | // INCLUDES-DEVICE2-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE2]]" |
244 | // INCLUDES-DEVICE2-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE2]]" |
245 | |
246 | // Match host-side preprocessor job with -save-temps. |
247 | // HOST-SAVE: "-cc1" "-triple" "x86_64-unknown-linux-gnu" |
248 | // HOST-SAVE-SAME: "-aux-triple" "nvptx64-nvidia-cuda" |
249 | // HOST-SAVE-NOT: "-fcuda-is-device" |
250 | // HOST-SAVE-SAME: "-x" "cuda" |
251 | |
252 | // Match host-side compilation. |
253 | // HOST: "-cc1" "-triple" "x86_64-unknown-linux-gnu" |
254 | // HOST-SAME: "-aux-triple" "nvptx64-nvidia-cuda" |
255 | // HOST-NOT: "-fcuda-is-device" |
256 | // HOST-SAME: "-o" "[[HOSTOUTPUT:[^"]*]]" |
257 | // HOST-NOSAVE-SAME: "-x" "cuda" |
258 | // HOST-SAVE-SAME: "-x" "cuda-cpp-output" |
259 | // There is only one GPU binary after combining it with fatbinary! |
260 | // INCLUDES-DEVICE2-NOT: "-fcuda-include-gpubinary" |
261 | // INCLUDES-DEVICE-SAME: "-fcuda-include-gpubinary" "[[FATBINARY]]" |
262 | // There is only one GPU binary after combining it with fatbinary. |
263 | // INCLUDES-DEVICE2-NOT: "-fcuda-include-gpubinary" |
264 | |
265 | // Match external assembler that uses compilation output. |
266 | // HOST-AS: "-o" "{{.*}}.o" "[[HOSTOUTPUT]]" |
267 | |
268 | // Match no GPU code inclusion. |
269 | // NOINCLUDES-DEVICE-NOT: "-fcuda-include-gpubinary" |
270 | |
271 | // Match no host compilation. |
272 | // NOHOST-NOT: "-cc1" "-triple" |
273 | // NOHOST-NOT: "-x" "cuda" |
274 | |
275 | // Match linker. |
276 | // LINK: "{{.*}}{{ld|link}}{{(.exe)?}}" |
277 | // LINK-SAME: "[[HOSTOUTPUT]]" |
278 | |
279 | // Match no linker. |
280 | // NOLINK-NOT: "{{.*}}{{ld|link}}{{(.exe)?}}" |
281 | |
282 | // FATBIN-COMMON:fatbinary |
283 | // FATBIN-COMMON: "--create" "[[FATBINARY:[^"]*]]" |
284 | // FATBIN-COMMON: "--image=profile=sm_30,file= |
285 | // PTX-SM30: "--image=profile=compute_30,file= |
286 | // NOPTX-SM30-NOT: "--image=profile=compute_30,file= |
287 | // FATBIN-COMMON: "--image=profile=sm_35,file= |
288 | // PTX-SM35: "--image=profile=compute_35,file= |
289 | // NOPTX-SM35-NOT: "--image=profile=compute_35,file= |
290 | |