Cuda.cpp source code [clang_source_code/lib/Driver/ToolChains/Cuda.cpp]

1	//===--- Cuda.cpp - Cuda Tool and ToolChain Implementations ------ C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "Cuda.h"
10	#include "CommonArgs.h"
11	#include "InputInfo.h"
12	#include "clang/Basic/Cuda.h"
13	#include "clang/Config/config.h"
14	#include "clang/Driver/Compilation.h"
15	#include "clang/Driver/Distro.h"
16	#include "clang/Driver/Driver.h"
17	#include "clang/Driver/DriverDiagnostic.h"
18	#include "clang/Driver/Options.h"
19	#include "llvm/Option/ArgList.h"
20	#include "llvm/Support/FileSystem.h"
21	#include "llvm/Support/Path.h"
22	#include "llvm/Support/Process.h"
23	#include "llvm/Support/Program.h"
24	#include "llvm/Support/VirtualFileSystem.h"
25	#include <system_error>
26
27	using namespace clang::driver;
28	using namespace clang::driver::toolchains;
29	using namespace clang::driver::tools;
30	using namespace clang;
31	using namespace llvm::opt;
32
33	// Parses the contents of version.txt in an CUDA installation. It should
34	// contain one line of the from e.g. "CUDA Version 7.5.2".
35	static CudaVersion ParseCudaVersionFile(llvm::StringRef V) {
36	if (!V.startswith("CUDA Version "))
37	return CudaVersion::UNKNOWN;
38	V = V.substr(strlen("CUDA Version "));
39	int Major = -1, Minor = -1;
40	auto First = V.split('.');
41	auto Second = First.second.split('.');
42	if (First.first.getAsInteger(10, Major) \|\|
43	Second.first.getAsInteger(10, Minor))
44	return CudaVersion::UNKNOWN;
45
46	if (Major == 7 && Minor == 0) {
47	// This doesn't appear to ever happen -- version.txt doesn't exist in the
48	// CUDA 7 installs I've seen. But no harm in checking.
49	return CudaVersion::CUDA_70;
50	}
51	if (Major == 7 && Minor == 5)
52	return CudaVersion::CUDA_75;
53	if (Major == 8 && Minor == 0)
54	return CudaVersion::CUDA_80;
55	if (Major == 9 && Minor == 0)
56	return CudaVersion::CUDA_90;
57	if (Major == 9 && Minor == 1)
58	return CudaVersion::CUDA_91;
59	if (Major == 9 && Minor == 2)
60	return CudaVersion::CUDA_92;
61	if (Major == 10 && Minor == 0)
62	return CudaVersion::CUDA_100;
63	if (Major == 10 && Minor == 1)
64	return CudaVersion::CUDA_101;
65	return CudaVersion::UNKNOWN;
66	}
67
68	CudaInstallationDetector::CudaInstallationDetector(
69	const Driver &D, const llvm::Triple &HostTriple,
70	const llvm::opt::ArgList &Args)
71	: D(D) {
72	struct Candidate {
73	std::string Path;
74	bool StrictChecking;
75
76	Candidate(std::string Path, bool StrictChecking = false)
77	: Path(Path), StrictChecking(StrictChecking) {}
78	};
79	SmallVector<Candidate, 4> Candidates;
80
81	// In decreasing order so we prefer newer versions to older versions.
82	std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
83
84	if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
85	Candidates.emplace_back(
86	Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ).str());
87	} else if (HostTriple.isOSWindows()) {
88	for (const char *Ver : Versions)
89	Candidates.emplace_back(
90	D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
91	Ver);
92	} else {
93	if (!Args.hasArg(clang::driver::options::OPT_cuda_path_ignore_env)) {
94	// Try to find ptxas binary. If the executable is located in a directory
95	// called 'bin/', its parent directory might be a good guess for a valid
96	// CUDA installation.
97	// However, some distributions might installs 'ptxas' to /usr/bin. In that
98	// case the candidate would be '/usr' which passes the following checks
99	// because '/usr/include' exists as well. To avoid this case, we always
100	// check for the directory potentially containing files for libdevice,
101	// even if the user passes -nocudalib.
102	if (llvm::ErrorOr<std::string> ptxas =
103	llvm::sys::findProgramByName("ptxas")) {
104	SmallString<256> ptxasAbsolutePath;
105	llvm::sys::fs::real_path(*ptxas, ptxasAbsolutePath);
106
107	StringRef ptxasDir = llvm::sys::path::parent_path(ptxasAbsolutePath);
108	if (llvm::sys::path::filename(ptxasDir) == "bin")
109	Candidates.emplace_back(llvm::sys::path::parent_path(ptxasDir),
110	/StrictChecking=/true);
111	}
112	}
113
114	Candidates.emplace_back(D.SysRoot + "/usr/local/cuda");
115	for (const char *Ver : Versions)
116	Candidates.emplace_back(D.SysRoot + "/usr/local/cuda-" + Ver);
117
118	if (Distro(D.getVFS()).IsDebian() \|\| Distro(D.getVFS()).IsUbuntu())
119	// Special case for Debian to have nvidia-cuda-toolkit work
120	// out of the box. More info on http://bugs.debian.org/882505
121	Candidates.emplace_back(D.SysRoot + "/usr/lib/cuda");
122	}
123
124	bool NoCudaLib = Args.hasArg(options::OPT_nocudalib);
125
126	for (const auto &Candidate : Candidates) {
127	InstallPath = Candidate.Path;
128	if (InstallPath.empty() \|\| !D.getVFS().exists(InstallPath))
129	continue;
130
131	BinPath = InstallPath + "/bin";
132	IncludePath = InstallPath + "/include";
133	LibDevicePath = InstallPath + "/nvvm/libdevice";
134
135	auto &FS = D.getVFS();
136	if (!(FS.exists(IncludePath) && FS.exists(BinPath)))
137	continue;
138	bool CheckLibDevice = (!NoCudaLib \|\| Candidate.StrictChecking);
139	if (CheckLibDevice && !FS.exists(LibDevicePath))
140	continue;
141
142	// On Linux, we have both lib and lib64 directories, and we need to choose
143	// based on our triple. On MacOS, we have only a lib directory.
144	//
145	// It's sufficient for our purposes to be flexible: If both lib and lib64
146	// exist, we choose whichever one matches our triple. Otherwise, if only
147	// lib exists, we use it.
148	if (HostTriple.isArch64Bit() && FS.exists(InstallPath + "/lib64"))
149	LibPath = InstallPath + "/lib64";
150	else if (FS.exists(InstallPath + "/lib"))
151	LibPath = InstallPath + "/lib";
152	else
153	continue;
154
155	llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
156	FS.getBufferForFile(InstallPath + "/version.txt");
157	if (!VersionFile) {
158	// CUDA 7.0 doesn't have a version.txt, so guess that's our version if
159	// version.txt isn't present.
160	Version = CudaVersion::CUDA_70;
161	} else {
162	Version = ParseCudaVersionFile((*VersionFile)->getBuffer());
163	}
164
165	if (Version >= CudaVersion::CUDA_90) {
166	// CUDA-9+ uses single libdevice file for all GPU variants.
167	std::string FilePath = LibDevicePath + "/libdevice.10.bc";
168	if (FS.exists(FilePath)) {
169	for (const char *GpuArchName :
170	{"sm_30", "sm_32", "sm_35", "sm_37", "sm_50", "sm_52", "sm_53",
171	"sm_60", "sm_61", "sm_62", "sm_70", "sm_72", "sm_75"}) {
172	const CudaArch GpuArch = StringToCudaArch(GpuArchName);
173	if (Version >= MinVersionForCudaArch(GpuArch) &&
174	Version <= MaxVersionForCudaArch(GpuArch))
175	LibDeviceMap[GpuArchName] = FilePath;
176	}
177	}
178	} else {
179	std::error_code EC;
180	for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
181	!EC && LI != LE; LI = LI.increment(EC)) {
182	StringRef FilePath = LI->path();
183	StringRef FileName = llvm::sys::path::filename(FilePath);
184	// Process all bitcode filenames that look like
185	// libdevice.compute_XX.YY.bc
186	const StringRef LibDeviceName = "libdevice.";
187	if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc")))
188	continue;
189	StringRef GpuArch = FileName.slice(
190	LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
191	LibDeviceMap[GpuArch] = FilePath.str();
192	// Insert map entries for specific devices with this compute
193	// capability. NVCC's choice of the libdevice library version is
194	// rather peculiar and depends on the CUDA version.
195	if (GpuArch == "compute_20") {
196	LibDeviceMap["sm_20"] = FilePath;
197	LibDeviceMap["sm_21"] = FilePath;
198	LibDeviceMap["sm_32"] = FilePath;
199	} else if (GpuArch == "compute_30") {
200	LibDeviceMap["sm_30"] = FilePath;
201	if (Version < CudaVersion::CUDA_80) {
202	LibDeviceMap["sm_50"] = FilePath;
203	LibDeviceMap["sm_52"] = FilePath;
204	LibDeviceMap["sm_53"] = FilePath;
205	}
206	LibDeviceMap["sm_60"] = FilePath;
207	LibDeviceMap["sm_61"] = FilePath;
208	LibDeviceMap["sm_62"] = FilePath;
209	} else if (GpuArch == "compute_35") {
210	LibDeviceMap["sm_35"] = FilePath;
211	LibDeviceMap["sm_37"] = FilePath;
212	} else if (GpuArch == "compute_50") {
213	if (Version >= CudaVersion::CUDA_80) {
214	LibDeviceMap["sm_50"] = FilePath;
215	LibDeviceMap["sm_52"] = FilePath;
216	LibDeviceMap["sm_53"] = FilePath;
217	}
218	}
219	}
220	}
221
222	// Check that we have found at least one libdevice that we can link in if
223	// -nocudalib hasn't been specified.
224	if (LibDeviceMap.empty() && !NoCudaLib)
225	continue;
226
227	IsValid = true;
228	break;
229	}
230	}
231
232	void CudaInstallationDetector::AddCudaIncludeArgs(
233	const ArgList &DriverArgs, ArgStringList &CC1Args) const {
234	if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
235	// Add cuda_wrappers/* to our system include path. This lets us wrap
236	// standard library headers.
237	SmallString<128> P(D.ResourceDir);
238	llvm::sys::path::append(P, "include");
239	llvm::sys::path::append(P, "cuda_wrappers");
240	CC1Args.push_back("-internal-isystem");
241	CC1Args.push_back(DriverArgs.MakeArgString(P));
242	}
243
244	if (DriverArgs.hasArg(options::OPT_nocudainc))
245	return;
246
247	if (!isValid()) {
248	D.Diag(diag::err_drv_no_cuda_installation);
249	return;
250	}
251
252	CC1Args.push_back("-internal-isystem");
253	CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
254	CC1Args.push_back("-include");
255	CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
256	}
257
258	void CudaInstallationDetector::CheckCudaVersionSupportsArch(
259	CudaArch Arch) const {
260	if (Arch == CudaArch::UNKNOWN \|\| Version == CudaVersion::UNKNOWN \|\|
261	ArchsWithBadVersion.count(Arch) > 0)
262	return;
263
264	auto MinVersion = MinVersionForCudaArch(Arch);
265	auto MaxVersion = MaxVersionForCudaArch(Arch);
266	if (Version < MinVersion \|\| Version > MaxVersion) {
267	ArchsWithBadVersion.insert(Arch);
268	D.Diag(diag::err_drv_cuda_version_unsupported)
269	<< CudaArchToString(Arch) << CudaVersionToString(MinVersion)
270	<< CudaVersionToString(MaxVersion) << InstallPath
271	<< CudaVersionToString(Version);
272	}
273	}
274
275	void CudaInstallationDetector::print(raw_ostream &OS) const {
276	if (isValid())
277	OS << "Found CUDA installation: " << InstallPath << ", version "
278	<< CudaVersionToString(Version) << "\n";
279	}
280
281	namespace {
282	/// Debug info level for the NVPTX devices. We may need to emit different debug
283	/// info level for the host and for the device itselfi. This type controls
284	/// emission of the debug info for the devices. It either prohibits disable info
285	/// emission completely, or emits debug directives only, or emits same debug
286	/// info as for the host.
287	enum DeviceDebugInfoLevel {
288	DisableDebugInfo, /// Do not emit debug info for the devices.
289	DebugDirectivesOnly, /// Emit only debug directives.
290	EmitSameDebugInfoAsHost, /// Use the same debug info level just like for the
291	/// host.
292	};
293	} // anonymous namespace
294
295	/// Define debug info level for the NVPTX devices. If the debug info for both
296	/// the host and device are disabled (-g0/-ggdb0 or no debug options at all). If
297	/// only debug directives are requested for the both host and device
298	/// (-gline-directvies-only), or the debug info only for the device is disabled
299	/// (optimization is on and --cuda-noopt-device-debug was not specified), the
300	/// debug directves only must be emitted for the device. Otherwise, use the same
301	/// debug info level just like for the host (with the limitations of only
302	/// supported DWARF2 standard).
303	static DeviceDebugInfoLevel mustEmitDebugInfo(const ArgList &Args) {
304	const Arg *A = Args.getLastArg(options::OPT_O_Group);
305	bool IsDebugEnabled = !A \|\| A->getOption().matches(options::OPT_O0) \|\|
306	Args.hasFlag(options::OPT_cuda_noopt_device_debug,
307	options::OPT_no_cuda_noopt_device_debug,
308	/Default=/false);
309	if (const Arg *A = Args.getLastArg(options::OPT_g_Group)) {
310	const Option &Opt = A->getOption();
311	if (Opt.matches(options::OPT_gN_Group)) {
312	if (Opt.matches(options::OPT_g0) \|\| Opt.matches(options::OPT_ggdb0))
313	return DisableDebugInfo;
314	if (Opt.matches(options::OPT_gline_directives_only))
315	return DebugDirectivesOnly;
316	}
317	return IsDebugEnabled ? EmitSameDebugInfoAsHost : DebugDirectivesOnly;
318	}
319	return DisableDebugInfo;
320	}
321
322	void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
323	const InputInfo &Output,
324	const InputInfoList &Inputs,
325	const ArgList &Args,
326	const char *LinkingOutput) const {
327	const auto &TC =
328	static_cast<const toolchains::CudaToolChain &>(getToolChain());
329	(0) . __assert_fail ("TC.getTriple().isNVPTX() && \"Wrong platform\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 329, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert(TC.getTriple().isNVPTX() && "Wrong platform");
330
331	StringRef GPUArchName;
332	// If this is an OpenMP action we need to extract the device architecture
333	// from the -march=arch option. This option may come from -Xopenmp-target
334	// flag or the default value.
335	if (JA.isDeviceOffloading(Action::OFK_OpenMP)) {
336	GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
337	(0) . __assert_fail ("!GPUArchName.empty() && \"Must have an architecture passed in.\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 337, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert(!GPUArchName.empty() && "Must have an architecture passed in.");
338	} else
339	GPUArchName = JA.getOffloadingArch();
340
341	// Obtain architecture from the action.
342	CudaArch gpu_arch = StringToCudaArch(GPUArchName);
343	(0) . __assert_fail ("gpu_arch != CudaArch..UNKNOWN && \"Device action expected to have an architecture.\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 344, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert(gpu_arch != CudaArch::UNKNOWN &&
344	(0) . __assert_fail ("gpu_arch != CudaArch..UNKNOWN && \"Device action expected to have an architecture.\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 344, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true"> "Device action expected to have an architecture.");
345
346	// Check that our installation's ptxas supports gpu_arch.
347	if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
348	TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
349	}
350
351	ArgStringList CmdArgs;
352	CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
353	DeviceDebugInfoLevel DIKind = mustEmitDebugInfo(Args);
354	if (DIKind == EmitSameDebugInfoAsHost) {
355	// ptxas does not accept -g option if optimization is enabled, so
356	// we ignore the compiler's -O* options if we want debug info.
357	CmdArgs.push_back("-g");
358	CmdArgs.push_back("--dont-merge-basicblocks");
359	CmdArgs.push_back("--return-at-end");
360	} else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
361	// Map the -O we received to -O{0,1,2,3}.
362	//
363	// TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
364	// default, so it may correspond more closely to the spirit of clang -O2.
365
366	// -O3 seems like the least-bad option when -Osomething is specified to
367	// clang but it isn't handled below.
368	StringRef OOpt = "3";
369	if (A->getOption().matches(options::OPT_O4) \|\|
370	A->getOption().matches(options::OPT_Ofast))
371	OOpt = "3";
372	else if (A->getOption().matches(options::OPT_O0))
373	OOpt = "0";
374	else if (A->getOption().matches(options::OPT_O)) {
375	// -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
376	OOpt = llvm::StringSwitch<const char *>(A->getValue())
377	.Case("1", "1")
378	.Case("2", "2")
379	.Case("3", "3")
380	.Case("s", "2")
381	.Case("z", "2")
382	.Default("2");
383	}
384	CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
385	} else {
386	// If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
387	// to no optimizations, but ptxas's default is -O3.
388	CmdArgs.push_back("-O0");
389	}
390	if (DIKind == DebugDirectivesOnly)
391	CmdArgs.push_back("-lineinfo");
392
393	// Pass -v to ptxas if it was passed to the driver.
394	if (Args.hasArg(options::OPT_v))
395	CmdArgs.push_back("-v");
396
397	CmdArgs.push_back("--gpu-name");
398	CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
399	CmdArgs.push_back("--output-file");
400	CmdArgs.push_back(Args.MakeArgString(TC.getInputFilename(Output)));
401	for (const auto& II : Inputs)
402	CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
403
404	for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
405	CmdArgs.push_back(Args.MakeArgString(A));
406
407	bool Relocatable = false;
408	if (JA.isOffloading(Action::OFK_OpenMP))
409	// In OpenMP we need to generate relocatable code.
410	Relocatable = Args.hasFlag(options::OPT_fopenmp_relocatable_target,
411	options::OPT_fnoopenmp_relocatable_target,
412	/Default=/true);
413	else if (JA.isOffloading(Action::OFK_Cuda))
414	Relocatable = Args.hasFlag(options::OPT_fgpu_rdc,
415	options::OPT_fno_gpu_rdc, /Default=/false);
416
417	if (Relocatable)
418	CmdArgs.push_back("-c");
419
420	const char *Exec;
421	if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))
422	Exec = A->getValue();
423	else
424	Exec = Args.MakeArgString(TC.GetProgramPath("ptxas"));
425	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
426	}
427
428	static bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch) {
429	bool includePTX = true;
430	for (Arg *A : Args) {
431	if (!(A->getOption().matches(options::OPT_cuda_include_ptx_EQ) \|\|
432	A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ)))
433	continue;
434	A->claim();
435	const StringRef ArchStr = A->getValue();
436	if (ArchStr == "all" \|\| ArchStr == gpu_arch) {
437	includePTX = A->getOption().matches(options::OPT_cuda_include_ptx_EQ);
438	continue;
439	}
440	}
441	return includePTX;
442	}
443
444	// All inputs to this linker must be from CudaDeviceActions, as we need to look
445	// at the Inputs' Actions in order to figure out which GPU architecture they
446	// correspond to.
447	void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
448	const InputInfo &Output,
449	const InputInfoList &Inputs,
450	const ArgList &Args,
451	const char *LinkingOutput) const {
452	const auto &TC =
453	static_cast<const toolchains::CudaToolChain &>(getToolChain());
454	(0) . __assert_fail ("TC.getTriple().isNVPTX() && \"Wrong platform\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 454, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert(TC.getTriple().isNVPTX() && "Wrong platform");
455
456	ArgStringList CmdArgs;
457	CmdArgs.push_back("--cuda");
458	CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32");
459	CmdArgs.push_back(Args.MakeArgString("--create"));
460	CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
461	if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
462	CmdArgs.push_back("-g");
463
464	for (const auto& II : Inputs) {
465	auto *A = II.getAction();
466	(0) . __assert_fail ("A->getInputs().size() == 1 && \"Device offload action is expected to have a single input\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 467, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert(A->getInputs().size() == 1 &&
467	(0) . __assert_fail ("A->getInputs().size() == 1 && \"Device offload action is expected to have a single input\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 467, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true"> "Device offload action is expected to have a single input");
468	const char *gpu_arch_str = A->getOffloadingArch();
469	(0) . __assert_fail ("gpu_arch_str && \"Device action expected to have associated a GPU architecture!\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 470, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert(gpu_arch_str &&
470	(0) . __assert_fail ("gpu_arch_str && \"Device action expected to have associated a GPU architecture!\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 470, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true"> "Device action expected to have associated a GPU architecture!");
471	CudaArch gpu_arch = StringToCudaArch(gpu_arch_str);
472
473	if (II.getType() == types::TY_PP_Asm &&
474	!shouldIncludePTX(Args, gpu_arch_str))
475	continue;
476	// We need to pass an Arch of the form "sm_XX" for cubin files and
477	// "compute_XX" for ptx.
478	const char *Arch =
479	(II.getType() == types::TY_PP_Asm)
480	? CudaVirtualArchToString(VirtualArchForCudaArch(gpu_arch))
481	: gpu_arch_str;
482	CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") +
483	Arch + ",file=" + II.getFilename()));
484	}
485
486	for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
487	CmdArgs.push_back(Args.MakeArgString(A));
488
489	const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary"));
490	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
491	}
492
493	void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
494	const InputInfo &Output,
495	const InputInfoList &Inputs,
496	const ArgList &Args,
497	const char *LinkingOutput) const {
498	const auto &TC =
499	static_cast<const toolchains::CudaToolChain &>(getToolChain());
500	(0) . __assert_fail ("TC.getTriple().isNVPTX() && \"Wrong platform\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 500, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert(TC.getTriple().isNVPTX() && "Wrong platform");
501
502	ArgStringList CmdArgs;
503
504	// OpenMP uses nvlink to link cubin files. The result will be embedded in the
505	// host binary by the host linker.
506	(0) . __assert_fail ("!JA.isHostOffloading(Action..OFK_OpenMP) && \"CUDA toolchain not expected for an OpenMP host device.\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 507, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert(!JA.isHostOffloading(Action::OFK_OpenMP) &&
507	(0) . __assert_fail ("!JA.isHostOffloading(Action..OFK_OpenMP) && \"CUDA toolchain not expected for an OpenMP host device.\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 507, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true"> "CUDA toolchain not expected for an OpenMP host device.");
508
509	if (Output.isFilename()) {
510	CmdArgs.push_back("-o");
511	CmdArgs.push_back(Output.getFilename());
512	} else
513	(0) . __assert_fail ("Output.isNothing() && \"Invalid output.\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 513, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert(Output.isNothing() && "Invalid output.");
514	if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
515	CmdArgs.push_back("-g");
516
517	if (Args.hasArg(options::OPT_v))
518	CmdArgs.push_back("-v");
519
520	StringRef GPUArch =
521	Args.getLastArgValue(options::OPT_march_EQ);
522	(0) . __assert_fail ("!GPUArch.empty() && \"At least one GPU Arch required for ptxas.\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 522, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert(!GPUArch.empty() && "At least one GPU Arch required for ptxas.");
523
524	CmdArgs.push_back("-arch");
525	CmdArgs.push_back(Args.MakeArgString(GPUArch));
526
527	// Assume that the directory specified with --libomptarget_nvptx_path
528	// contains the static library libomptarget-nvptx.a.
529	if (const Arg *A = Args.getLastArg(options::OPT_libomptarget_nvptx_path_EQ))
530	CmdArgs.push_back(Args.MakeArgString(Twine("-L") + A->getValue()));
531
532	// Add paths specified in LIBRARY_PATH environment variable as -L options.
533	addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
534
535	// Add paths for the default clang library path.
536	SmallString<256> DefaultLibPath =
537	llvm::sys::path::parent_path(TC.getDriver().Dir);
538	llvm::sys::path::append(DefaultLibPath, "lib" CLANG_LIBDIR_SUFFIX);
539	CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath));
540
541	// Add linking against library implementing OpenMP calls on NVPTX target.
542	CmdArgs.push_back("-lomptarget-nvptx");
543
544	for (const auto &II : Inputs) {
545	if (II.getType() == types::TY_LLVM_IR \|\|
546	II.getType() == types::TY_LTO_IR \|\|
547	II.getType() == types::TY_LTO_BC \|\|
548	II.getType() == types::TY_LLVM_BC) {
549	C.getDriver().Diag(diag::err_drv_no_linker_llvm_support)
550	<< getToolChain().getTripleString();
551	continue;
552	}
553
554	// Currently, we only pass the input files to the linker, we do not pass
555	// any libraries that may be valid only for the host.
556	if (!II.isFilename())
557	continue;
558
559	const char *CubinF = C.addTempFile(
560	C.getArgs().MakeArgString(getToolChain().getInputFilename(II)));
561
562	CmdArgs.push_back(CubinF);
563	}
564
565	AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA);
566
567	const char *Exec =
568	Args.MakeArgString(getToolChain().GetProgramPath("nvlink"));
569	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
570	}
571
572	/// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary,
573	/// which isn't properly a linker but nonetheless performs the step of stitching
574	/// together object files from the assembler into a single blob.
575
576	CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
577	const ToolChain &HostTC, const ArgList &Args,
578	const Action::OffloadKind OK)
579	: ToolChain(D, Triple, Args), HostTC(HostTC),
580	CudaInstallation(D, HostTC.getTriple(), Args), OK(OK) {
581	if (CudaInstallation.isValid())
582	getProgramPaths().push_back(CudaInstallation.getBinPath());
583	// Lookup binaries into the driver directory, this is used to
584	// discover the clang-offload-bundler executable.
585	getProgramPaths().push_back(getDriver().Dir);
586	}
587
588	std::string CudaToolChain::getInputFilename(const InputInfo &Input) const {
589	// Only object files are changed, for example assembly files keep their .s
590	// extensions. CUDA also continues to use .o as they don't use nvlink but
591	// fatbinary.
592	if (!(OK == Action::OFK_OpenMP && Input.getType() == types::TY_Object))
593	return ToolChain::getInputFilename(Input);
594
595	// Replace extension for object files with cubin because nvlink relies on
596	// these particular file names.
597	SmallString<256> Filename(ToolChain::getInputFilename(Input));
598	llvm::sys::path::replace_extension(Filename, "cubin");
599	return Filename.str();
600	}
601
602	void CudaToolChain::addClangTargetOptions(
603	const llvm::opt::ArgList &DriverArgs,
604	llvm::opt::ArgStringList &CC1Args,
605	Action::OffloadKind DeviceOffloadingKind) const {
606	HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
607
608	StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
609	(0) . __assert_fail ("!GpuArch.empty() && \"Must have an explicit GPU arch.\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 609, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
610	(0) . __assert_fail ("(DeviceOffloadingKind == Action..OFK_OpenMP \|\| DeviceOffloadingKind == Action..OFK_Cuda) && \"Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 612, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert((DeviceOffloadingKind == Action::OFK_OpenMP \|\|
611	(0) . __assert_fail ("(DeviceOffloadingKind == Action..OFK_OpenMP \|\| DeviceOffloadingKind == Action..OFK_Cuda) && \"Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 612, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true"> DeviceOffloadingKind == Action::OFK_Cuda) &&
612	(0) . __assert_fail ("(DeviceOffloadingKind == Action..OFK_OpenMP \|\| DeviceOffloadingKind == Action..OFK_Cuda) && \"Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 612, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true"> "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
613
614	if (DeviceOffloadingKind == Action::OFK_Cuda) {
615	CC1Args.push_back("-fcuda-is-device");
616
617	if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
618	options::OPT_fno_cuda_flush_denormals_to_zero, false))
619	CC1Args.push_back("-fcuda-flush-denormals-to-zero");
620
621	if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
622	options::OPT_fno_cuda_approx_transcendentals, false))
623	CC1Args.push_back("-fcuda-approx-transcendentals");
624
625	if (DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
626	false))
627	CC1Args.push_back("-fgpu-rdc");
628	}
629
630	if (DriverArgs.hasArg(options::OPT_nocudalib))
631	return;
632
633	std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);
634
635	if (LibDeviceFile.empty()) {
636	if (DeviceOffloadingKind == Action::OFK_OpenMP &&
637	DriverArgs.hasArg(options::OPT_S))
638	return;
639
640	getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
641	return;
642	}
643
644	CC1Args.push_back("-mlink-builtin-bitcode");
645	CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
646
647	// Libdevice in CUDA-7.0 requires PTX version that's more recent than LLVM
648	// defaults to. Use PTX4.2 by default, which is the PTX version that came with
649	// CUDA-7.0.
650	const char *PtxFeature = "+ptx42";
651	// TODO(tra): CUDA-10+ needs PTX 6.3 to support new features. However that
652	// requires fair amount of work on LLVM side. We'll keep using PTX 6.1 until
653	// all prerequisites are in place.
654	if (CudaInstallation.version() >= CudaVersion::CUDA_91) {
655	// CUDA-9.1 uses new instructions that are only available in PTX6.1+
656	PtxFeature = "+ptx61";
657	} else if (CudaInstallation.version() >= CudaVersion::CUDA_90) {
658	// CUDA-9.0 uses new instructions that are only available in PTX6.0+
659	PtxFeature = "+ptx60";
660	}
661	CC1Args.append({"-target-feature", PtxFeature});
662	if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr,
663	options::OPT_fno_cuda_short_ptr, false))
664	CC1Args.append({"-mllvm", "--nvptx-short-ptr"});
665
666	if (CudaInstallation.version() >= CudaVersion::UNKNOWN)
667	CC1Args.push_back(DriverArgs.MakeArgString(
668	Twine("-target-sdk-version=") +
669	CudaVersionToString(CudaInstallation.version())));
670
671	if (DeviceOffloadingKind == Action::OFK_OpenMP) {
672	SmallVector<StringRef, 8> LibraryPaths;
673	if (const Arg *A = DriverArgs.getLastArg(options::OPT_libomptarget_nvptx_path_EQ))
674	LibraryPaths.push_back(A->getValue());
675
676	// Add user defined library paths from LIBRARY_PATH.
677	llvm::Optional<std::string> LibPath =
678	llvm::sys::Process::GetEnv("LIBRARY_PATH");
679	if (LibPath) {
680	SmallVector<StringRef, 8> Frags;
681	const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'};
682	llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
683	for (StringRef Path : Frags)
684	LibraryPaths.emplace_back(Path.trim());
685	}
686
687	// Add path to lib / lib64 folder.
688	SmallString<256> DefaultLibPath =
689	llvm::sys::path::parent_path(getDriver().Dir);
690	llvm::sys::path::append(DefaultLibPath, Twine("lib") + CLANG_LIBDIR_SUFFIX);
691	LibraryPaths.emplace_back(DefaultLibPath.c_str());
692
693	std::string LibOmpTargetName =
694	"libomptarget-nvptx-" + GpuArch.str() + ".bc";
695	bool FoundBCLibrary = false;
696	for (StringRef LibraryPath : LibraryPaths) {
697	SmallString<128> LibOmpTargetFile(LibraryPath);
698	llvm::sys::path::append(LibOmpTargetFile, LibOmpTargetName);
699	if (llvm::sys::fs::exists(LibOmpTargetFile)) {
700	CC1Args.push_back("-mlink-builtin-bitcode");
701	CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetFile));
702	FoundBCLibrary = true;
703	break;
704	}
705	}
706	if (!FoundBCLibrary)
707	getDriver().Diag(diag::warn_drv_omp_offload_target_missingbcruntime)
708	<< LibOmpTargetName;
709	}
710	}
711
712	bool CudaToolChain::supportsDebugInfoOption(const llvm::opt::Arg *A) const {
713	const Option &O = A->getOption();
714	return (O.matches(options::OPT_gN_Group) &&
715	!O.matches(options::OPT_gmodules)) \|\|
716	O.matches(options::OPT_g_Flag) \|\|
717	O.matches(options::OPT_ggdbN_Group) \|\| O.matches(options::OPT_ggdb) \|\|
718	O.matches(options::OPT_gdwarf) \|\| O.matches(options::OPT_gdwarf_2) \|\|
719	O.matches(options::OPT_gdwarf_3) \|\| O.matches(options::OPT_gdwarf_4) \|\|
720	O.matches(options::OPT_gdwarf_5) \|\|
721	O.matches(options::OPT_gcolumn_info);
722	}
723
724	void CudaToolChain::adjustDebugInfoKind(
725	codegenoptions::DebugInfoKind &DebugInfoKind, const ArgList &Args) const {
726	switch (mustEmitDebugInfo(Args)) {
727	case DisableDebugInfo:
728	DebugInfoKind = codegenoptions::NoDebugInfo;
729	break;
730	case DebugDirectivesOnly:
731	DebugInfoKind = codegenoptions::DebugDirectivesOnly;
732	break;
733	case EmitSameDebugInfoAsHost:
734	// Use same debug info level as the host.
735	break;
736	}
737	}
738
739	void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
740	ArgStringList &CC1Args) const {
741	// Check our CUDA version if we're going to include the CUDA headers.
742	if (!DriverArgs.hasArg(options::OPT_nocudainc) &&
743	!DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
744	StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
745	(0) . __assert_fail ("!Arch.empty() && \"Must have an explicit GPU arch.\"", "/home/seafit/code_projects/clang_source/clang/lib/Driver/ToolChains/Cuda.cpp", 745, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert(!Arch.empty() && "Must have an explicit GPU arch.");
746	CudaInstallation.CheckCudaVersionSupportsArch(StringToCudaArch(Arch));
747	}
748	CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
749	}
750
751	llvm::opt::DerivedArgList *
752	CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
753	StringRef BoundArch,
754	Action::OffloadKind DeviceOffloadKind) const {
755	DerivedArgList *DAL =
756	HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
757	if (!DAL)
758	DAL = new DerivedArgList(Args.getBaseArgs());
759
760	const OptTable &Opts = getDriver().getOpts();
761
762	// For OpenMP device offloading, append derived arguments. Make sure
763	// flags are not duplicated.
764	// Also append the compute capability.
765	if (DeviceOffloadKind == Action::OFK_OpenMP) {
766	for (Arg *A : Args) {
767	bool IsDuplicate = false;
768	for (Arg DALArg : DAL) {
769	if (A == DALArg) {
770	IsDuplicate = true;
771	break;
772	}
773	}
774	if (!IsDuplicate)
775	DAL->append(A);
776	}
777
778	StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
779	if (Arch.empty())
780	DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
781	CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
782
783	return DAL;
784	}
785
786	for (Arg *A : Args) {
787	if (A->getOption().matches(options::OPT_Xarch__)) {
788	// Skip this argument unless the architecture matches BoundArch
789	if (BoundArch.empty() \|\| A->getValue(0) != BoundArch)
790	continue;
791
792	unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
793	unsigned Prev = Index;
794	std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index));
795
796	// If the argument parsing failed or more than one argument was
797	// consumed, the -Xarch_ argument's parameter tried to consume
798	// extra arguments. Emit an error and ignore.
799	//
800	// We also want to disallow any options which would alter the
801	// driver behavior; that isn't going to work in our model. We
802	// use isDriverOption() as an approximation, although things
803	// like -O4 are going to slip through.
804	if (!XarchArg \|\| Index > Prev + 1) {
805	getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args)
806	<< A->getAsString(Args);
807	continue;
808	} else if (XarchArg->getOption().hasFlag(options::DriverOption)) {
809	getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
810	<< A->getAsString(Args);
811	continue;
812	}
813	XarchArg->setBaseArg(A);
814	A = XarchArg.release();
815	DAL->AddSynthesizedArg(A);
816	}
817	DAL->append(A);
818	}
819
820	if (!BoundArch.empty()) {
821	DAL->eraseArg(options::OPT_march_EQ);
822	DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
823	}
824	return DAL;
825	}
826
827	Tool *CudaToolChain::buildAssembler() const {
828	return new tools::NVPTX::Assembler(*this);
829	}
830
831	Tool *CudaToolChain::buildLinker() const {
832	if (OK == Action::OFK_OpenMP)
833	return new tools::NVPTX::OpenMPLinker(*this);
834	return new tools::NVPTX::Linker(*this);
835	}
836
837	void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
838	HostTC.addClangWarningOptions(CC1Args);
839	}
840
841	ToolChain::CXXStdlibType
842	CudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
843	return HostTC.GetCXXStdlibType(Args);
844	}
845
846	void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
847	ArgStringList &CC1Args) const {
848	HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
849	}
850
851	void CudaToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args,
852	ArgStringList &CC1Args) const {
853	HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
854	}
855
856	void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
857	ArgStringList &CC1Args) const {
858	HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
859	}
860
861	SanitizerMask CudaToolChain::getSupportedSanitizers() const {
862	// The CudaToolChain only supports sanitizers in the sense that it allows
863	// sanitizer arguments on the command line if they are supported by the host
864	// toolchain. The CudaToolChain will actually ignore any command line
865	// arguments for any of these "supported" sanitizers. That means that no
866	// sanitization of device code is actually supported at this time.
867	//
868	// This behavior is necessary because the host and device toolchains
869	// invocations often share the command line, so the device toolchain must
870	// tolerate flags meant only for the host toolchain.
871	return HostTC.getSupportedSanitizers();
872	}
873
874	VersionTuple CudaToolChain::computeMSVCVersion(const Driver *D,
875	const ArgList &Args) const {
876	return HostTC.computeMSVCVersion(D, Args);
877	}
878

clang::driver::CudaInstallationDetector::AddCudaIncludeArgs

clang::driver::CudaInstallationDetector::CheckCudaVersionSupportsArch

clang::driver::CudaInstallationDetector::print

Clang Project