1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | #include "Cuda.h" |
10 | #include "CommonArgs.h" |
11 | #include "InputInfo.h" |
12 | #include "clang/Basic/Cuda.h" |
13 | #include "clang/Config/config.h" |
14 | #include "clang/Driver/Compilation.h" |
15 | #include "clang/Driver/Distro.h" |
16 | #include "clang/Driver/Driver.h" |
17 | #include "clang/Driver/DriverDiagnostic.h" |
18 | #include "clang/Driver/Options.h" |
19 | #include "llvm/Option/ArgList.h" |
20 | #include "llvm/Support/FileSystem.h" |
21 | #include "llvm/Support/Path.h" |
22 | #include "llvm/Support/Process.h" |
23 | #include "llvm/Support/Program.h" |
24 | #include "llvm/Support/VirtualFileSystem.h" |
25 | #include <system_error> |
26 | |
27 | using namespace clang::driver; |
28 | using namespace clang::driver::toolchains; |
29 | using namespace clang::driver::tools; |
30 | using namespace clang; |
31 | using namespace llvm::opt; |
32 | |
33 | |
34 | |
35 | static CudaVersion ParseCudaVersionFile(llvm::StringRef V) { |
36 | if (!V.startswith("CUDA Version ")) |
37 | return CudaVersion::UNKNOWN; |
38 | V = V.substr(strlen("CUDA Version ")); |
39 | int Major = -1, Minor = -1; |
40 | auto First = V.split('.'); |
41 | auto Second = First.second.split('.'); |
42 | if (First.first.getAsInteger(10, Major) || |
43 | Second.first.getAsInteger(10, Minor)) |
44 | return CudaVersion::UNKNOWN; |
45 | |
46 | if (Major == 7 && Minor == 0) { |
47 | |
48 | |
49 | return CudaVersion::CUDA_70; |
50 | } |
51 | if (Major == 7 && Minor == 5) |
52 | return CudaVersion::CUDA_75; |
53 | if (Major == 8 && Minor == 0) |
54 | return CudaVersion::CUDA_80; |
55 | if (Major == 9 && Minor == 0) |
56 | return CudaVersion::CUDA_90; |
57 | if (Major == 9 && Minor == 1) |
58 | return CudaVersion::CUDA_91; |
59 | if (Major == 9 && Minor == 2) |
60 | return CudaVersion::CUDA_92; |
61 | if (Major == 10 && Minor == 0) |
62 | return CudaVersion::CUDA_100; |
63 | if (Major == 10 && Minor == 1) |
64 | return CudaVersion::CUDA_101; |
65 | return CudaVersion::UNKNOWN; |
66 | } |
67 | |
68 | CudaInstallationDetector::CudaInstallationDetector( |
69 | const Driver &D, const llvm::Triple &HostTriple, |
70 | const llvm::opt::ArgList &Args) |
71 | : D(D) { |
72 | struct Candidate { |
73 | std::string Path; |
74 | bool StrictChecking; |
75 | |
76 | Candidate(std::string Path, bool StrictChecking = false) |
77 | : Path(Path), StrictChecking(StrictChecking) {} |
78 | }; |
79 | SmallVector<Candidate, 4> Candidates; |
80 | |
81 | |
82 | std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"}; |
83 | |
84 | if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) { |
85 | Candidates.emplace_back( |
86 | Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ).str()); |
87 | } else if (HostTriple.isOSWindows()) { |
88 | for (const char *Ver : Versions) |
89 | Candidates.emplace_back( |
90 | D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" + |
91 | Ver); |
92 | } else { |
93 | if (!Args.hasArg(clang::driver::options::OPT_cuda_path_ignore_env)) { |
94 | |
95 | |
96 | |
97 | |
98 | |
99 | |
100 | |
101 | |
102 | if (llvm::ErrorOr<std::string> ptxas = |
103 | llvm::sys::findProgramByName("ptxas")) { |
104 | SmallString<256> ptxasAbsolutePath; |
105 | llvm::sys::fs::real_path(*ptxas, ptxasAbsolutePath); |
106 | |
107 | StringRef ptxasDir = llvm::sys::path::parent_path(ptxasAbsolutePath); |
108 | if (llvm::sys::path::filename(ptxasDir) == "bin") |
109 | Candidates.emplace_back(llvm::sys::path::parent_path(ptxasDir), |
110 | ); |
111 | } |
112 | } |
113 | |
114 | Candidates.emplace_back(D.SysRoot + "/usr/local/cuda"); |
115 | for (const char *Ver : Versions) |
116 | Candidates.emplace_back(D.SysRoot + "/usr/local/cuda-" + Ver); |
117 | |
118 | if (Distro(D.getVFS()).IsDebian() || Distro(D.getVFS()).IsUbuntu()) |
119 | |
120 | |
121 | Candidates.emplace_back(D.SysRoot + "/usr/lib/cuda"); |
122 | } |
123 | |
124 | bool NoCudaLib = Args.hasArg(options::OPT_nocudalib); |
125 | |
126 | for (const auto &Candidate : Candidates) { |
127 | InstallPath = Candidate.Path; |
128 | if (InstallPath.empty() || !D.getVFS().exists(InstallPath)) |
129 | continue; |
130 | |
131 | BinPath = InstallPath + "/bin"; |
132 | IncludePath = InstallPath + "/include"; |
133 | LibDevicePath = InstallPath + "/nvvm/libdevice"; |
134 | |
135 | auto &FS = D.getVFS(); |
136 | if (!(FS.exists(IncludePath) && FS.exists(BinPath))) |
137 | continue; |
138 | bool CheckLibDevice = (!NoCudaLib || Candidate.StrictChecking); |
139 | if (CheckLibDevice && !FS.exists(LibDevicePath)) |
140 | continue; |
141 | |
142 | |
143 | |
144 | |
145 | |
146 | |
147 | |
148 | if (HostTriple.isArch64Bit() && FS.exists(InstallPath + "/lib64")) |
149 | LibPath = InstallPath + "/lib64"; |
150 | else if (FS.exists(InstallPath + "/lib")) |
151 | LibPath = InstallPath + "/lib"; |
152 | else |
153 | continue; |
154 | |
155 | llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile = |
156 | FS.getBufferForFile(InstallPath + "/version.txt"); |
157 | if (!VersionFile) { |
158 | |
159 | |
160 | Version = CudaVersion::CUDA_70; |
161 | } else { |
162 | Version = ParseCudaVersionFile((*VersionFile)->getBuffer()); |
163 | } |
164 | |
165 | if (Version >= CudaVersion::CUDA_90) { |
166 | |
167 | std::string FilePath = LibDevicePath + "/libdevice.10.bc"; |
168 | if (FS.exists(FilePath)) { |
169 | for (const char *GpuArchName : |
170 | {"sm_30", "sm_32", "sm_35", "sm_37", "sm_50", "sm_52", "sm_53", |
171 | "sm_60", "sm_61", "sm_62", "sm_70", "sm_72", "sm_75"}) { |
172 | const CudaArch GpuArch = StringToCudaArch(GpuArchName); |
173 | if (Version >= MinVersionForCudaArch(GpuArch) && |
174 | Version <= MaxVersionForCudaArch(GpuArch)) |
175 | LibDeviceMap[GpuArchName] = FilePath; |
176 | } |
177 | } |
178 | } else { |
179 | std::error_code EC; |
180 | for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE; |
181 | !EC && LI != LE; LI = LI.increment(EC)) { |
182 | StringRef FilePath = LI->path(); |
183 | StringRef FileName = llvm::sys::path::filename(FilePath); |
184 | |
185 | |
186 | const StringRef LibDeviceName = "libdevice."; |
187 | if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc"))) |
188 | continue; |
189 | StringRef GpuArch = FileName.slice( |
190 | LibDeviceName.size(), FileName.find('.', LibDeviceName.size())); |
191 | LibDeviceMap[GpuArch] = FilePath.str(); |
192 | |
193 | |
194 | |
195 | if (GpuArch == "compute_20") { |
196 | LibDeviceMap["sm_20"] = FilePath; |
197 | LibDeviceMap["sm_21"] = FilePath; |
198 | LibDeviceMap["sm_32"] = FilePath; |
199 | } else if (GpuArch == "compute_30") { |
200 | LibDeviceMap["sm_30"] = FilePath; |
201 | if (Version < CudaVersion::CUDA_80) { |
202 | LibDeviceMap["sm_50"] = FilePath; |
203 | LibDeviceMap["sm_52"] = FilePath; |
204 | LibDeviceMap["sm_53"] = FilePath; |
205 | } |
206 | LibDeviceMap["sm_60"] = FilePath; |
207 | LibDeviceMap["sm_61"] = FilePath; |
208 | LibDeviceMap["sm_62"] = FilePath; |
209 | } else if (GpuArch == "compute_35") { |
210 | LibDeviceMap["sm_35"] = FilePath; |
211 | LibDeviceMap["sm_37"] = FilePath; |
212 | } else if (GpuArch == "compute_50") { |
213 | if (Version >= CudaVersion::CUDA_80) { |
214 | LibDeviceMap["sm_50"] = FilePath; |
215 | LibDeviceMap["sm_52"] = FilePath; |
216 | LibDeviceMap["sm_53"] = FilePath; |
217 | } |
218 | } |
219 | } |
220 | } |
221 | |
222 | |
223 | |
224 | if (LibDeviceMap.empty() && !NoCudaLib) |
225 | continue; |
226 | |
227 | IsValid = true; |
228 | break; |
229 | } |
230 | } |
231 | |
232 | void CudaInstallationDetector::AddCudaIncludeArgs( |
233 | const ArgList &DriverArgs, ArgStringList &CC1Args) const { |
234 | if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) { |
235 | |
236 | |
237 | SmallString<128> P(D.ResourceDir); |
238 | llvm::sys::path::append(P, "include"); |
239 | llvm::sys::path::append(P, "cuda_wrappers"); |
240 | CC1Args.push_back("-internal-isystem"); |
241 | CC1Args.push_back(DriverArgs.MakeArgString(P)); |
242 | } |
243 | |
244 | if (DriverArgs.hasArg(options::OPT_nocudainc)) |
245 | return; |
246 | |
247 | if (!isValid()) { |
248 | D.Diag(diag::err_drv_no_cuda_installation); |
249 | return; |
250 | } |
251 | |
252 | CC1Args.push_back("-internal-isystem"); |
253 | CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath())); |
254 | CC1Args.push_back("-include"); |
255 | CC1Args.push_back("__clang_cuda_runtime_wrapper.h"); |
256 | } |
257 | |
258 | void CudaInstallationDetector::CheckCudaVersionSupportsArch( |
259 | CudaArch Arch) const { |
260 | if (Arch == CudaArch::UNKNOWN || Version == CudaVersion::UNKNOWN || |
261 | ArchsWithBadVersion.count(Arch) > 0) |
262 | return; |
263 | |
264 | auto MinVersion = MinVersionForCudaArch(Arch); |
265 | auto MaxVersion = MaxVersionForCudaArch(Arch); |
266 | if (Version < MinVersion || Version > MaxVersion) { |
267 | ArchsWithBadVersion.insert(Arch); |
268 | D.Diag(diag::err_drv_cuda_version_unsupported) |
269 | << CudaArchToString(Arch) << CudaVersionToString(MinVersion) |
270 | << CudaVersionToString(MaxVersion) << InstallPath |
271 | << CudaVersionToString(Version); |
272 | } |
273 | } |
274 | |
275 | void CudaInstallationDetector::print(raw_ostream &OS) const { |
276 | if (isValid()) |
277 | OS << "Found CUDA installation: " << InstallPath << ", version " |
278 | << CudaVersionToString(Version) << "\n"; |
279 | } |
280 | |
281 | namespace { |
282 | |
283 | |
284 | |
285 | |
286 | |
287 | enum DeviceDebugInfoLevel { |
288 | DisableDebugInfo, |
289 | DebugDirectivesOnly, |
290 | EmitSameDebugInfoAsHost, |
291 | |
292 | }; |
293 | } |
294 | |
295 | |
296 | |
297 | |
298 | |
299 | |
300 | |
301 | |
302 | |
303 | static DeviceDebugInfoLevel mustEmitDebugInfo(const ArgList &Args) { |
304 | const Arg *A = Args.getLastArg(options::OPT_O_Group); |
305 | bool IsDebugEnabled = !A || A->getOption().matches(options::OPT_O0) || |
306 | Args.hasFlag(options::OPT_cuda_noopt_device_debug, |
307 | options::OPT_no_cuda_noopt_device_debug, |
308 | ); |
309 | if (const Arg *A = Args.getLastArg(options::OPT_g_Group)) { |
310 | const Option &Opt = A->getOption(); |
311 | if (Opt.matches(options::OPT_gN_Group)) { |
312 | if (Opt.matches(options::OPT_g0) || Opt.matches(options::OPT_ggdb0)) |
313 | return DisableDebugInfo; |
314 | if (Opt.matches(options::OPT_gline_directives_only)) |
315 | return DebugDirectivesOnly; |
316 | } |
317 | return IsDebugEnabled ? EmitSameDebugInfoAsHost : DebugDirectivesOnly; |
318 | } |
319 | return DisableDebugInfo; |
320 | } |
321 | |
322 | void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA, |
323 | const InputInfo &Output, |
324 | const InputInfoList &Inputs, |
325 | const ArgList &Args, |
326 | const char *LinkingOutput) const { |
327 | const auto &TC = |
328 | static_cast<const toolchains::CudaToolChain &>(getToolChain()); |
329 | (0) . __assert_fail ("TC.getTriple().isNVPTX() && \"Wrong platform\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 329, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert(TC.getTriple().isNVPTX() && "Wrong platform"); |
330 | |
331 | StringRef GPUArchName; |
332 | |
333 | |
334 | |
335 | if (JA.isDeviceOffloading(Action::OFK_OpenMP)) { |
336 | GPUArchName = Args.getLastArgValue(options::OPT_march_EQ); |
337 | (0) . __assert_fail ("!GPUArchName.empty() && \"Must have an architecture passed in.\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 337, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert(!GPUArchName.empty() && "Must have an architecture passed in."); |
338 | } else |
339 | GPUArchName = JA.getOffloadingArch(); |
340 | |
341 | |
342 | CudaArch gpu_arch = StringToCudaArch(GPUArchName); |
343 | (0) . __assert_fail ("gpu_arch != CudaArch..UNKNOWN && \"Device action expected to have an architecture.\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 344, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert(gpu_arch != CudaArch::UNKNOWN && |
344 | (0) . __assert_fail ("gpu_arch != CudaArch..UNKNOWN && \"Device action expected to have an architecture.\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 344, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true"> "Device action expected to have an architecture."); |
345 | |
346 | |
347 | if (!Args.hasArg(options::OPT_no_cuda_version_check)) { |
348 | TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch); |
349 | } |
350 | |
351 | ArgStringList CmdArgs; |
352 | CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32"); |
353 | DeviceDebugInfoLevel DIKind = mustEmitDebugInfo(Args); |
354 | if (DIKind == EmitSameDebugInfoAsHost) { |
355 | |
356 | |
357 | CmdArgs.push_back("-g"); |
358 | CmdArgs.push_back("--dont-merge-basicblocks"); |
359 | CmdArgs.push_back("--return-at-end"); |
360 | } else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { |
361 | |
362 | |
363 | |
364 | |
365 | |
366 | |
367 | |
368 | StringRef OOpt = "3"; |
369 | if (A->getOption().matches(options::OPT_O4) || |
370 | A->getOption().matches(options::OPT_Ofast)) |
371 | OOpt = "3"; |
372 | else if (A->getOption().matches(options::OPT_O0)) |
373 | OOpt = "0"; |
374 | else if (A->getOption().matches(options::OPT_O)) { |
375 | |
376 | OOpt = llvm::StringSwitch<const char *>(A->getValue()) |
377 | .Case("1", "1") |
378 | .Case("2", "2") |
379 | .Case("3", "3") |
380 | .Case("s", "2") |
381 | .Case("z", "2") |
382 | .Default("2"); |
383 | } |
384 | CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt)); |
385 | } else { |
386 | |
387 | |
388 | CmdArgs.push_back("-O0"); |
389 | } |
390 | if (DIKind == DebugDirectivesOnly) |
391 | CmdArgs.push_back("-lineinfo"); |
392 | |
393 | |
394 | if (Args.hasArg(options::OPT_v)) |
395 | CmdArgs.push_back("-v"); |
396 | |
397 | CmdArgs.push_back("--gpu-name"); |
398 | CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch))); |
399 | CmdArgs.push_back("--output-file"); |
400 | CmdArgs.push_back(Args.MakeArgString(TC.getInputFilename(Output))); |
401 | for (const auto& II : Inputs) |
402 | CmdArgs.push_back(Args.MakeArgString(II.getFilename())); |
403 | |
404 | for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas)) |
405 | CmdArgs.push_back(Args.MakeArgString(A)); |
406 | |
407 | bool Relocatable = false; |
408 | if (JA.isOffloading(Action::OFK_OpenMP)) |
409 | |
410 | Relocatable = Args.hasFlag(options::OPT_fopenmp_relocatable_target, |
411 | options::OPT_fnoopenmp_relocatable_target, |
412 | ); |
413 | else if (JA.isOffloading(Action::OFK_Cuda)) |
414 | Relocatable = Args.hasFlag(options::OPT_fgpu_rdc, |
415 | options::OPT_fno_gpu_rdc, ); |
416 | |
417 | if (Relocatable) |
418 | CmdArgs.push_back("-c"); |
419 | |
420 | const char *Exec; |
421 | if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ)) |
422 | Exec = A->getValue(); |
423 | else |
424 | Exec = Args.MakeArgString(TC.GetProgramPath("ptxas")); |
425 | C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs)); |
426 | } |
427 | |
428 | static bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch) { |
429 | bool includePTX = true; |
430 | for (Arg *A : Args) { |
431 | if (!(A->getOption().matches(options::OPT_cuda_include_ptx_EQ) || |
432 | A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ))) |
433 | continue; |
434 | A->claim(); |
435 | const StringRef ArchStr = A->getValue(); |
436 | if (ArchStr == "all" || ArchStr == gpu_arch) { |
437 | includePTX = A->getOption().matches(options::OPT_cuda_include_ptx_EQ); |
438 | continue; |
439 | } |
440 | } |
441 | return includePTX; |
442 | } |
443 | |
444 | |
445 | |
446 | |
447 | void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA, |
448 | const InputInfo &Output, |
449 | const InputInfoList &Inputs, |
450 | const ArgList &Args, |
451 | const char *LinkingOutput) const { |
452 | const auto &TC = |
453 | static_cast<const toolchains::CudaToolChain &>(getToolChain()); |
454 | (0) . __assert_fail ("TC.getTriple().isNVPTX() && \"Wrong platform\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 454, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert(TC.getTriple().isNVPTX() && "Wrong platform"); |
455 | |
456 | ArgStringList CmdArgs; |
457 | CmdArgs.push_back("--cuda"); |
458 | CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32"); |
459 | CmdArgs.push_back(Args.MakeArgString("--create")); |
460 | CmdArgs.push_back(Args.MakeArgString(Output.getFilename())); |
461 | if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost) |
462 | CmdArgs.push_back("-g"); |
463 | |
464 | for (const auto& II : Inputs) { |
465 | auto *A = II.getAction(); |
466 | (0) . __assert_fail ("A->getInputs().size() == 1 && \"Device offload action is expected to have a single input\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 467, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert(A->getInputs().size() == 1 && |
467 | (0) . __assert_fail ("A->getInputs().size() == 1 && \"Device offload action is expected to have a single input\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 467, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true"> "Device offload action is expected to have a single input"); |
468 | const char *gpu_arch_str = A->getOffloadingArch(); |
469 | (0) . __assert_fail ("gpu_arch_str && \"Device action expected to have associated a GPU architecture!\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 470, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert(gpu_arch_str && |
470 | (0) . __assert_fail ("gpu_arch_str && \"Device action expected to have associated a GPU architecture!\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 470, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true"> "Device action expected to have associated a GPU architecture!"); |
471 | CudaArch gpu_arch = StringToCudaArch(gpu_arch_str); |
472 | |
473 | if (II.getType() == types::TY_PP_Asm && |
474 | !shouldIncludePTX(Args, gpu_arch_str)) |
475 | continue; |
476 | |
477 | |
478 | const char *Arch = |
479 | (II.getType() == types::TY_PP_Asm) |
480 | ? CudaVirtualArchToString(VirtualArchForCudaArch(gpu_arch)) |
481 | : gpu_arch_str; |
482 | CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") + |
483 | Arch + ",file=" + II.getFilename())); |
484 | } |
485 | |
486 | for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary)) |
487 | CmdArgs.push_back(Args.MakeArgString(A)); |
488 | |
489 | const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary")); |
490 | C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs)); |
491 | } |
492 | |
493 | void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA, |
494 | const InputInfo &Output, |
495 | const InputInfoList &Inputs, |
496 | const ArgList &Args, |
497 | const char *LinkingOutput) const { |
498 | const auto &TC = |
499 | static_cast<const toolchains::CudaToolChain &>(getToolChain()); |
500 | (0) . __assert_fail ("TC.getTriple().isNVPTX() && \"Wrong platform\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 500, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert(TC.getTriple().isNVPTX() && "Wrong platform"); |
501 | |
502 | ArgStringList CmdArgs; |
503 | |
504 | |
505 | |
506 | (0) . __assert_fail ("!JA.isHostOffloading(Action..OFK_OpenMP) && \"CUDA toolchain not expected for an OpenMP host device.\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 507, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert(!JA.isHostOffloading(Action::OFK_OpenMP) && |
507 | (0) . __assert_fail ("!JA.isHostOffloading(Action..OFK_OpenMP) && \"CUDA toolchain not expected for an OpenMP host device.\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 507, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true"> "CUDA toolchain not expected for an OpenMP host device."); |
508 | |
509 | if (Output.isFilename()) { |
510 | CmdArgs.push_back("-o"); |
511 | CmdArgs.push_back(Output.getFilename()); |
512 | } else |
513 | (0) . __assert_fail ("Output.isNothing() && \"Invalid output.\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 513, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert(Output.isNothing() && "Invalid output."); |
514 | if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost) |
515 | CmdArgs.push_back("-g"); |
516 | |
517 | if (Args.hasArg(options::OPT_v)) |
518 | CmdArgs.push_back("-v"); |
519 | |
520 | StringRef GPUArch = |
521 | Args.getLastArgValue(options::OPT_march_EQ); |
522 | (0) . __assert_fail ("!GPUArch.empty() && \"At least one GPU Arch required for ptxas.\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 522, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert(!GPUArch.empty() && "At least one GPU Arch required for ptxas."); |
523 | |
524 | CmdArgs.push_back("-arch"); |
525 | CmdArgs.push_back(Args.MakeArgString(GPUArch)); |
526 | |
527 | |
528 | |
529 | if (const Arg *A = Args.getLastArg(options::OPT_libomptarget_nvptx_path_EQ)) |
530 | CmdArgs.push_back(Args.MakeArgString(Twine("-L") + A->getValue())); |
531 | |
532 | |
533 | addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH"); |
534 | |
535 | |
536 | SmallString<256> DefaultLibPath = |
537 | llvm::sys::path::parent_path(TC.getDriver().Dir); |
538 | llvm::sys::path::append(DefaultLibPath, "lib" CLANG_LIBDIR_SUFFIX); |
539 | CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath)); |
540 | |
541 | |
542 | CmdArgs.push_back("-lomptarget-nvptx"); |
543 | |
544 | for (const auto &II : Inputs) { |
545 | if (II.getType() == types::TY_LLVM_IR || |
546 | II.getType() == types::TY_LTO_IR || |
547 | II.getType() == types::TY_LTO_BC || |
548 | II.getType() == types::TY_LLVM_BC) { |
549 | C.getDriver().Diag(diag::err_drv_no_linker_llvm_support) |
550 | << getToolChain().getTripleString(); |
551 | continue; |
552 | } |
553 | |
554 | |
555 | |
556 | if (!II.isFilename()) |
557 | continue; |
558 | |
559 | const char *CubinF = C.addTempFile( |
560 | C.getArgs().MakeArgString(getToolChain().getInputFilename(II))); |
561 | |
562 | CmdArgs.push_back(CubinF); |
563 | } |
564 | |
565 | AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA); |
566 | |
567 | const char *Exec = |
568 | Args.MakeArgString(getToolChain().GetProgramPath("nvlink")); |
569 | C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs)); |
570 | } |
571 | |
572 | |
573 | |
574 | |
575 | |
576 | CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple, |
577 | const ToolChain &HostTC, const ArgList &Args, |
578 | const Action::OffloadKind OK) |
579 | : ToolChain(D, Triple, Args), HostTC(HostTC), |
580 | CudaInstallation(D, HostTC.getTriple(), Args), OK(OK) { |
581 | if (CudaInstallation.isValid()) |
582 | getProgramPaths().push_back(CudaInstallation.getBinPath()); |
583 | |
584 | |
585 | getProgramPaths().push_back(getDriver().Dir); |
586 | } |
587 | |
588 | std::string CudaToolChain::getInputFilename(const InputInfo &Input) const { |
589 | |
590 | |
591 | |
592 | if (!(OK == Action::OFK_OpenMP && Input.getType() == types::TY_Object)) |
593 | return ToolChain::getInputFilename(Input); |
594 | |
595 | |
596 | |
597 | SmallString<256> Filename(ToolChain::getInputFilename(Input)); |
598 | llvm::sys::path::replace_extension(Filename, "cubin"); |
599 | return Filename.str(); |
600 | } |
601 | |
602 | void CudaToolChain::addClangTargetOptions( |
603 | const llvm::opt::ArgList &DriverArgs, |
604 | llvm::opt::ArgStringList &CC1Args, |
605 | Action::OffloadKind DeviceOffloadingKind) const { |
606 | HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind); |
607 | |
608 | StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ); |
609 | (0) . __assert_fail ("!GpuArch.empty() && \"Must have an explicit GPU arch.\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 609, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert(!GpuArch.empty() && "Must have an explicit GPU arch."); |
610 | (0) . __assert_fail ("(DeviceOffloadingKind == Action..OFK_OpenMP || DeviceOffloadingKind == Action..OFK_Cuda) && \"Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 612, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert((DeviceOffloadingKind == Action::OFK_OpenMP || |
611 | (0) . __assert_fail ("(DeviceOffloadingKind == Action..OFK_OpenMP || DeviceOffloadingKind == Action..OFK_Cuda) && \"Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 612, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true"> DeviceOffloadingKind == Action::OFK_Cuda) && |
612 | (0) . __assert_fail ("(DeviceOffloadingKind == Action..OFK_OpenMP || DeviceOffloadingKind == Action..OFK_Cuda) && \"Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 612, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true"> "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs."); |
613 | |
614 | if (DeviceOffloadingKind == Action::OFK_Cuda) { |
615 | CC1Args.push_back("-fcuda-is-device"); |
616 | |
617 | if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero, |
618 | options::OPT_fno_cuda_flush_denormals_to_zero, false)) |
619 | CC1Args.push_back("-fcuda-flush-denormals-to-zero"); |
620 | |
621 | if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals, |
622 | options::OPT_fno_cuda_approx_transcendentals, false)) |
623 | CC1Args.push_back("-fcuda-approx-transcendentals"); |
624 | |
625 | if (DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, |
626 | false)) |
627 | CC1Args.push_back("-fgpu-rdc"); |
628 | } |
629 | |
630 | if (DriverArgs.hasArg(options::OPT_nocudalib)) |
631 | return; |
632 | |
633 | std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch); |
634 | |
635 | if (LibDeviceFile.empty()) { |
636 | if (DeviceOffloadingKind == Action::OFK_OpenMP && |
637 | DriverArgs.hasArg(options::OPT_S)) |
638 | return; |
639 | |
640 | getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch; |
641 | return; |
642 | } |
643 | |
644 | CC1Args.push_back("-mlink-builtin-bitcode"); |
645 | CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile)); |
646 | |
647 | |
648 | |
649 | |
650 | const char *PtxFeature = "+ptx42"; |
651 | |
652 | |
653 | |
654 | if (CudaInstallation.version() >= CudaVersion::CUDA_91) { |
655 | |
656 | PtxFeature = "+ptx61"; |
657 | } else if (CudaInstallation.version() >= CudaVersion::CUDA_90) { |
658 | |
659 | PtxFeature = "+ptx60"; |
660 | } |
661 | CC1Args.append({"-target-feature", PtxFeature}); |
662 | if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr, |
663 | options::OPT_fno_cuda_short_ptr, false)) |
664 | CC1Args.append({"-mllvm", "--nvptx-short-ptr"}); |
665 | |
666 | if (CudaInstallation.version() >= CudaVersion::UNKNOWN) |
667 | CC1Args.push_back(DriverArgs.MakeArgString( |
668 | Twine("-target-sdk-version=") + |
669 | CudaVersionToString(CudaInstallation.version()))); |
670 | |
671 | if (DeviceOffloadingKind == Action::OFK_OpenMP) { |
672 | SmallVector<StringRef, 8> LibraryPaths; |
673 | if (const Arg *A = DriverArgs.getLastArg(options::OPT_libomptarget_nvptx_path_EQ)) |
674 | LibraryPaths.push_back(A->getValue()); |
675 | |
676 | |
677 | llvm::Optional<std::string> LibPath = |
678 | llvm::sys::Process::GetEnv("LIBRARY_PATH"); |
679 | if (LibPath) { |
680 | SmallVector<StringRef, 8> Frags; |
681 | const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'}; |
682 | llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr); |
683 | for (StringRef Path : Frags) |
684 | LibraryPaths.emplace_back(Path.trim()); |
685 | } |
686 | |
687 | |
688 | SmallString<256> DefaultLibPath = |
689 | llvm::sys::path::parent_path(getDriver().Dir); |
690 | llvm::sys::path::append(DefaultLibPath, Twine("lib") + CLANG_LIBDIR_SUFFIX); |
691 | LibraryPaths.emplace_back(DefaultLibPath.c_str()); |
692 | |
693 | std::string LibOmpTargetName = |
694 | "libomptarget-nvptx-" + GpuArch.str() + ".bc"; |
695 | bool FoundBCLibrary = false; |
696 | for (StringRef LibraryPath : LibraryPaths) { |
697 | SmallString<128> LibOmpTargetFile(LibraryPath); |
698 | llvm::sys::path::append(LibOmpTargetFile, LibOmpTargetName); |
699 | if (llvm::sys::fs::exists(LibOmpTargetFile)) { |
700 | CC1Args.push_back("-mlink-builtin-bitcode"); |
701 | CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetFile)); |
702 | FoundBCLibrary = true; |
703 | break; |
704 | } |
705 | } |
706 | if (!FoundBCLibrary) |
707 | getDriver().Diag(diag::warn_drv_omp_offload_target_missingbcruntime) |
708 | << LibOmpTargetName; |
709 | } |
710 | } |
711 | |
712 | bool CudaToolChain::supportsDebugInfoOption(const llvm::opt::Arg *A) const { |
713 | const Option &O = A->getOption(); |
714 | return (O.matches(options::OPT_gN_Group) && |
715 | !O.matches(options::OPT_gmodules)) || |
716 | O.matches(options::OPT_g_Flag) || |
717 | O.matches(options::OPT_ggdbN_Group) || O.matches(options::OPT_ggdb) || |
718 | O.matches(options::OPT_gdwarf) || O.matches(options::OPT_gdwarf_2) || |
719 | O.matches(options::OPT_gdwarf_3) || O.matches(options::OPT_gdwarf_4) || |
720 | O.matches(options::OPT_gdwarf_5) || |
721 | O.matches(options::OPT_gcolumn_info); |
722 | } |
723 | |
724 | void CudaToolChain::adjustDebugInfoKind( |
725 | codegenoptions::DebugInfoKind &DebugInfoKind, const ArgList &Args) const { |
726 | switch (mustEmitDebugInfo(Args)) { |
727 | case DisableDebugInfo: |
728 | DebugInfoKind = codegenoptions::NoDebugInfo; |
729 | break; |
730 | case DebugDirectivesOnly: |
731 | DebugInfoKind = codegenoptions::DebugDirectivesOnly; |
732 | break; |
733 | case EmitSameDebugInfoAsHost: |
734 | |
735 | break; |
736 | } |
737 | } |
738 | |
739 | void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs, |
740 | ArgStringList &CC1Args) const { |
741 | |
742 | if (!DriverArgs.hasArg(options::OPT_nocudainc) && |
743 | !DriverArgs.hasArg(options::OPT_no_cuda_version_check)) { |
744 | StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ); |
745 | (0) . __assert_fail ("!Arch.empty() && \"Must have an explicit GPU arch.\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 745, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert(!Arch.empty() && "Must have an explicit GPU arch."); |
746 | CudaInstallation.CheckCudaVersionSupportsArch(StringToCudaArch(Arch)); |
747 | } |
748 | CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args); |
749 | } |
750 | |
751 | llvm::opt::DerivedArgList * |
752 | CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, |
753 | StringRef BoundArch, |
754 | Action::OffloadKind DeviceOffloadKind) const { |
755 | DerivedArgList *DAL = |
756 | HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind); |
757 | if (!DAL) |
758 | DAL = new DerivedArgList(Args.getBaseArgs()); |
759 | |
760 | const OptTable &Opts = getDriver().getOpts(); |
761 | |
762 | |
763 | |
764 | |
765 | if (DeviceOffloadKind == Action::OFK_OpenMP) { |
766 | for (Arg *A : Args) { |
767 | bool IsDuplicate = false; |
768 | for (Arg *DALArg : *DAL) { |
769 | if (A == DALArg) { |
770 | IsDuplicate = true; |
771 | break; |
772 | } |
773 | } |
774 | if (!IsDuplicate) |
775 | DAL->append(A); |
776 | } |
777 | |
778 | StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ); |
779 | if (Arch.empty()) |
780 | DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), |
781 | CLANG_OPENMP_NVPTX_DEFAULT_ARCH); |
782 | |
783 | return DAL; |
784 | } |
785 | |
786 | for (Arg *A : Args) { |
787 | if (A->getOption().matches(options::OPT_Xarch__)) { |
788 | |
789 | if (BoundArch.empty() || A->getValue(0) != BoundArch) |
790 | continue; |
791 | |
792 | unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1)); |
793 | unsigned Prev = Index; |
794 | std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index)); |
795 | |
796 | |
797 | |
798 | |
799 | |
800 | |
801 | |
802 | |
803 | |
804 | if (!XarchArg || Index > Prev + 1) { |
805 | getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args) |
806 | << A->getAsString(Args); |
807 | continue; |
808 | } else if (XarchArg->getOption().hasFlag(options::DriverOption)) { |
809 | getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver) |
810 | << A->getAsString(Args); |
811 | continue; |
812 | } |
813 | XarchArg->setBaseArg(A); |
814 | A = XarchArg.release(); |
815 | DAL->AddSynthesizedArg(A); |
816 | } |
817 | DAL->append(A); |
818 | } |
819 | |
820 | if (!BoundArch.empty()) { |
821 | DAL->eraseArg(options::OPT_march_EQ); |
822 | DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch); |
823 | } |
824 | return DAL; |
825 | } |
826 | |
827 | Tool *CudaToolChain::buildAssembler() const { |
828 | return new tools::NVPTX::Assembler(*this); |
829 | } |
830 | |
831 | Tool *CudaToolChain::buildLinker() const { |
832 | if (OK == Action::OFK_OpenMP) |
833 | return new tools::NVPTX::OpenMPLinker(*this); |
834 | return new tools::NVPTX::Linker(*this); |
835 | } |
836 | |
837 | void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const { |
838 | HostTC.addClangWarningOptions(CC1Args); |
839 | } |
840 | |
841 | ToolChain::CXXStdlibType |
842 | CudaToolChain::GetCXXStdlibType(const ArgList &Args) const { |
843 | return HostTC.GetCXXStdlibType(Args); |
844 | } |
845 | |
846 | void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, |
847 | ArgStringList &CC1Args) const { |
848 | HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args); |
849 | } |
850 | |
851 | void CudaToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args, |
852 | ArgStringList &CC1Args) const { |
853 | HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args); |
854 | } |
855 | |
856 | void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args, |
857 | ArgStringList &CC1Args) const { |
858 | HostTC.AddIAMCUIncludeArgs(Args, CC1Args); |
859 | } |
860 | |
861 | SanitizerMask CudaToolChain::getSupportedSanitizers() const { |
862 | |
863 | |
864 | |
865 | |
866 | |
867 | |
868 | |
869 | |
870 | |
871 | return HostTC.getSupportedSanitizers(); |
872 | } |
873 | |
874 | VersionTuple CudaToolChain::computeMSVCVersion(const Driver *D, |
875 | const ArgList &Args) const { |
876 | return HostTC.computeMSVCVersion(D, Args); |
877 | } |
878 | |