1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | |
14 | #include "CGOpenMPRuntimeNVPTX.h" |
15 | #include "CodeGenFunction.h" |
16 | #include "clang/AST/DeclOpenMP.h" |
17 | #include "clang/AST/StmtOpenMP.h" |
18 | #include "clang/AST/StmtVisitor.h" |
19 | #include "clang/Basic/Cuda.h" |
20 | #include "llvm/ADT/SmallPtrSet.h" |
21 | |
22 | using namespace clang; |
23 | using namespace CodeGen; |
24 | |
25 | namespace { |
26 | enum OpenMPRTLFunctionNVPTX { |
27 | |
28 | |
29 | OMPRTL_NVPTX__kmpc_kernel_init, |
30 | |
31 | OMPRTL_NVPTX__kmpc_kernel_deinit, |
32 | |
33 | |
34 | OMPRTL_NVPTX__kmpc_spmd_kernel_init, |
35 | |
36 | OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2, |
37 | |
38 | |
39 | |
40 | OMPRTL_NVPTX__kmpc_kernel_prepare_parallel, |
41 | |
42 | |
43 | OMPRTL_NVPTX__kmpc_kernel_parallel, |
44 | |
45 | OMPRTL_NVPTX__kmpc_kernel_end_parallel, |
46 | |
47 | |
48 | OMPRTL_NVPTX__kmpc_serialized_parallel, |
49 | |
50 | |
51 | OMPRTL_NVPTX__kmpc_end_serialized_parallel, |
52 | |
53 | |
54 | OMPRTL_NVPTX__kmpc_shuffle_int32, |
55 | |
56 | |
57 | OMPRTL_NVPTX__kmpc_shuffle_int64, |
58 | |
59 | |
60 | |
61 | |
62 | |
63 | OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2, |
64 | |
65 | |
66 | |
67 | |
68 | |
69 | |
70 | |
71 | |
72 | |
73 | |
74 | |
75 | OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2, |
76 | |
77 | OMPRTL_NVPTX__kmpc_end_reduce_nowait, |
78 | |
79 | OMPRTL_NVPTX__kmpc_data_sharing_init_stack, |
80 | |
81 | OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd, |
82 | |
83 | |
84 | OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack, |
85 | |
86 | OMPRTL_NVPTX__kmpc_data_sharing_pop_stack, |
87 | |
88 | |
89 | OMPRTL_NVPTX__kmpc_begin_sharing_variables, |
90 | |
91 | OMPRTL_NVPTX__kmpc_end_sharing_variables, |
92 | |
93 | OMPRTL_NVPTX__kmpc_get_shared_variables, |
94 | |
95 | |
96 | OMPRTL_NVPTX__kmpc_parallel_level, |
97 | |
98 | OMPRTL_NVPTX__kmpc_is_spmd_exec_mode, |
99 | |
100 | |
101 | OMPRTL_NVPTX__kmpc_get_team_static_memory, |
102 | |
103 | |
104 | OMPRTL_NVPTX__kmpc_restore_team_static_memory, |
105 | |
106 | OMPRTL__kmpc_barrier, |
107 | |
108 | |
109 | OMPRTL__kmpc_barrier_simple_spmd, |
110 | }; |
111 | |
112 | |
113 | class NVPTXActionTy final : public PrePostActionTy { |
114 | llvm::FunctionCallee EnterCallee = nullptr; |
115 | ArrayRef<llvm::Value *> EnterArgs; |
116 | llvm::FunctionCallee ExitCallee = nullptr; |
117 | ArrayRef<llvm::Value *> ExitArgs; |
118 | bool Conditional = false; |
119 | llvm::BasicBlock *ContBlock = nullptr; |
120 | |
121 | public: |
122 | NVPTXActionTy(llvm::FunctionCallee EnterCallee, |
123 | ArrayRef<llvm::Value *> EnterArgs, |
124 | llvm::FunctionCallee ExitCallee, |
125 | ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) |
126 | : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), |
127 | ExitArgs(ExitArgs), Conditional(Conditional) {} |
128 | void Enter(CodeGenFunction &CGF) override { |
129 | llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); |
130 | if (Conditional) { |
131 | llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); |
132 | auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); |
133 | ContBlock = CGF.createBasicBlock("omp_if.end"); |
134 | |
135 | CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); |
136 | CGF.EmitBlock(ThenBlock); |
137 | } |
138 | } |
139 | void Done(CodeGenFunction &CGF) { |
140 | |
141 | CGF.EmitBranch(ContBlock); |
142 | CGF.EmitBlock(ContBlock, true); |
143 | } |
144 | void Exit(CodeGenFunction &CGF) override { |
145 | CGF.EmitRuntimeCall(ExitCallee, ExitArgs); |
146 | } |
147 | }; |
148 | |
149 | |
150 | |
151 | |
152 | |
153 | class ExecutionRuntimeModesRAII { |
154 | private: |
155 | CGOpenMPRuntimeNVPTX::ExecutionMode SavedExecMode = |
156 | CGOpenMPRuntimeNVPTX::EM_Unknown; |
157 | CGOpenMPRuntimeNVPTX::ExecutionMode &ExecMode; |
158 | bool SavedRuntimeMode = false; |
159 | bool *RuntimeMode = nullptr; |
160 | |
161 | public: |
162 | |
163 | ExecutionRuntimeModesRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &ExecMode) |
164 | : ExecMode(ExecMode) { |
165 | SavedExecMode = ExecMode; |
166 | ExecMode = CGOpenMPRuntimeNVPTX::EM_NonSPMD; |
167 | } |
168 | |
169 | ExecutionRuntimeModesRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &ExecMode, |
170 | bool &RuntimeMode, bool FullRuntimeMode) |
171 | : ExecMode(ExecMode), RuntimeMode(&RuntimeMode) { |
172 | SavedExecMode = ExecMode; |
173 | SavedRuntimeMode = RuntimeMode; |
174 | ExecMode = CGOpenMPRuntimeNVPTX::EM_SPMD; |
175 | RuntimeMode = FullRuntimeMode; |
176 | } |
177 | ~ExecutionRuntimeModesRAII() { |
178 | ExecMode = SavedExecMode; |
179 | if (RuntimeMode) |
180 | *RuntimeMode = SavedRuntimeMode; |
181 | } |
182 | }; |
183 | |
184 | |
185 | |
186 | |
187 | |
188 | enum MachineConfiguration : unsigned { |
189 | WarpSize = 32, |
190 | |
191 | |
192 | LaneIDBits = 5, |
193 | LaneIDMask = WarpSize - 1, |
194 | |
195 | |
196 | GlobalMemoryAlignment = 128, |
197 | |
198 | |
199 | SharedMemorySize = 128, |
200 | }; |
201 | |
202 | static const ValueDecl *getPrivateItem(const Expr *RefExpr) { |
203 | RefExpr = RefExpr->IgnoreParens(); |
204 | if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(RefExpr)) { |
205 | const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); |
206 | while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) |
207 | Base = TempASE->getBase()->IgnoreParenImpCasts(); |
208 | RefExpr = Base; |
209 | } else if (auto *OASE = dyn_cast<OMPArraySectionExpr>(RefExpr)) { |
210 | const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); |
211 | while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) |
212 | Base = TempOASE->getBase()->IgnoreParenImpCasts(); |
213 | while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) |
214 | Base = TempASE->getBase()->IgnoreParenImpCasts(); |
215 | RefExpr = Base; |
216 | } |
217 | RefExpr = RefExpr->IgnoreParenImpCasts(); |
218 | if (const auto *DE = dyn_cast<DeclRefExpr>(RefExpr)) |
219 | return cast<ValueDecl>(DE->getDecl()->getCanonicalDecl()); |
220 | const auto *ME = cast<MemberExpr>(RefExpr); |
221 | return cast<ValueDecl>(ME->getMemberDecl()->getCanonicalDecl()); |
222 | } |
223 | |
224 | typedef std::pair<CharUnits , const ValueDecl *> VarsDataTy; |
225 | static bool stable_sort_comparator(const VarsDataTy P1, const VarsDataTy P2) { |
226 | return P1.first > P2.first; |
227 | } |
228 | |
229 | static RecordDecl *buildRecordForGlobalizedVars( |
230 | ASTContext &C, ArrayRef<const ValueDecl *> EscapedDecls, |
231 | ArrayRef<const ValueDecl *> EscapedDeclsForTeams, |
232 | llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> |
233 | &MappedDeclsFields, int BufSize) { |
234 | if (EscapedDecls.empty() && EscapedDeclsForTeams.empty()) |
235 | return nullptr; |
236 | SmallVector<VarsDataTy, 4> GlobalizedVars; |
237 | for (const ValueDecl *D : EscapedDecls) |
238 | GlobalizedVars.emplace_back( |
239 | CharUnits::fromQuantity(std::max( |
240 | C.getDeclAlign(D).getQuantity(), |
241 | static_cast<CharUnits::QuantityType>(GlobalMemoryAlignment))), |
242 | D); |
243 | for (const ValueDecl *D : EscapedDeclsForTeams) |
244 | GlobalizedVars.emplace_back(C.getDeclAlign(D), D); |
245 | std::stable_sort(GlobalizedVars.begin(), GlobalizedVars.end(), |
246 | stable_sort_comparator); |
247 | |
248 | |
249 | |
250 | |
251 | |
252 | RecordDecl *GlobalizedRD = C.buildImplicitRecord("_globalized_locals_ty"); |
253 | GlobalizedRD->startDefinition(); |
254 | llvm::SmallPtrSet<const ValueDecl *, 16> SingleEscaped( |
255 | EscapedDeclsForTeams.begin(), EscapedDeclsForTeams.end()); |
256 | for (const auto &Pair : GlobalizedVars) { |
257 | const ValueDecl *VD = Pair.second; |
258 | QualType Type = VD->getType(); |
259 | if (Type->isLValueReferenceType()) |
260 | Type = C.getPointerType(Type.getNonReferenceType()); |
261 | else |
262 | Type = Type.getNonReferenceType(); |
263 | SourceLocation Loc = VD->getLocation(); |
264 | FieldDecl *Field; |
265 | if (SingleEscaped.count(VD)) { |
266 | Field = FieldDecl::Create( |
267 | C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type, |
268 | C.getTrivialTypeSourceInfo(Type, SourceLocation()), |
269 | , , |
270 | ICIS_NoInit); |
271 | Field->setAccess(AS_public); |
272 | if (VD->hasAttrs()) { |
273 | for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), |
274 | E(VD->getAttrs().end()); |
275 | I != E; ++I) |
276 | Field->addAttr(*I); |
277 | } |
278 | } else { |
279 | llvm::APInt ArraySize(32, BufSize); |
280 | Type = C.getConstantArrayType(Type, ArraySize, ArrayType::Normal, 0); |
281 | Field = FieldDecl::Create( |
282 | C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type, |
283 | C.getTrivialTypeSourceInfo(Type, SourceLocation()), |
284 | , , |
285 | ICIS_NoInit); |
286 | Field->setAccess(AS_public); |
287 | llvm::APInt Align(32, std::max(C.getDeclAlign(VD).getQuantity(), |
288 | static_cast<CharUnits::QuantityType>( |
289 | GlobalMemoryAlignment))); |
290 | Field->addAttr(AlignedAttr::CreateImplicit( |
291 | C, AlignedAttr::GNU_aligned, , |
292 | IntegerLiteral::Create(C, Align, |
293 | C.getIntTypeForBitwidth(32, ), |
294 | SourceLocation()))); |
295 | } |
296 | GlobalizedRD->addDecl(Field); |
297 | MappedDeclsFields.try_emplace(VD, Field); |
298 | } |
299 | GlobalizedRD->completeDefinition(); |
300 | return GlobalizedRD; |
301 | } |
302 | |
303 | |
304 | class CheckVarsEscapingDeclContext final |
305 | : public ConstStmtVisitor<CheckVarsEscapingDeclContext> { |
306 | CodeGenFunction &CGF; |
307 | llvm::SetVector<const ValueDecl *> EscapedDecls; |
308 | llvm::SetVector<const ValueDecl *> EscapedVariableLengthDecls; |
309 | llvm::SmallPtrSet<const Decl *, 4> EscapedParameters; |
310 | RecordDecl *GlobalizedRD = nullptr; |
311 | llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields; |
312 | bool AllEscaped = false; |
313 | bool IsForCombinedParallelRegion = false; |
314 | |
315 | void markAsEscaped(const ValueDecl *VD) { |
316 | |
317 | if (!isa<VarDecl>(VD) || |
318 | OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) |
319 | return; |
320 | VD = cast<ValueDecl>(VD->getCanonicalDecl()); |
321 | |
322 | if (auto *CSI = CGF.CapturedStmtInfo) { |
323 | if (const FieldDecl *FD = CSI->lookup(cast<VarDecl>(VD))) { |
324 | |
325 | |
326 | if (!IsForCombinedParallelRegion) { |
327 | if (!FD->hasAttrs()) |
328 | return; |
329 | const auto *Attr = FD->getAttr<OMPCaptureKindAttr>(); |
330 | if (!Attr) |
331 | return; |
332 | if (((Attr->getCaptureKind() != OMPC_map) && |
333 | !isOpenMPPrivate( |
334 | static_cast<OpenMPClauseKind>(Attr->getCaptureKind()))) || |
335 | ((Attr->getCaptureKind() == OMPC_map) && |
336 | !FD->getType()->isAnyPointerType())) |
337 | return; |
338 | } |
339 | if (!FD->getType()->isReferenceType()) { |
340 | (0) . __assert_fail ("!VD->getType()->isVariablyModifiedType() && \"Parameter captured by value with variably modified type\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 341, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(!VD->getType()->isVariablyModifiedType() && |
341 | (0) . __assert_fail ("!VD->getType()->isVariablyModifiedType() && \"Parameter captured by value with variably modified type\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 341, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true"> "Parameter captured by value with variably modified type"); |
342 | EscapedParameters.insert(VD); |
343 | } else if (!IsForCombinedParallelRegion) { |
344 | return; |
345 | } |
346 | } |
347 | } |
348 | if ((!CGF.CapturedStmtInfo || |
349 | (IsForCombinedParallelRegion && CGF.CapturedStmtInfo)) && |
350 | VD->getType()->isReferenceType()) |
351 | |
352 | return; |
353 | if (VD->getType()->isVariablyModifiedType()) |
354 | EscapedVariableLengthDecls.insert(VD); |
355 | else |
356 | EscapedDecls.insert(VD); |
357 | } |
358 | |
359 | void VisitValueDecl(const ValueDecl *VD) { |
360 | if (VD->getType()->isLValueReferenceType()) |
361 | markAsEscaped(VD); |
362 | if (const auto *VarD = dyn_cast<VarDecl>(VD)) { |
363 | if (!isa<ParmVarDecl>(VarD) && VarD->hasInit()) { |
364 | const bool SavedAllEscaped = AllEscaped; |
365 | AllEscaped = VD->getType()->isLValueReferenceType(); |
366 | Visit(VarD->getInit()); |
367 | AllEscaped = SavedAllEscaped; |
368 | } |
369 | } |
370 | } |
371 | void VisitOpenMPCapturedStmt(const CapturedStmt *S, |
372 | ArrayRef<OMPClause *> Clauses, |
373 | bool IsCombinedParallelRegion) { |
374 | if (!S) |
375 | return; |
376 | for (const CapturedStmt::Capture &C : S->captures()) { |
377 | if (C.capturesVariable() && !C.capturesVariableByCopy()) { |
378 | const ValueDecl *VD = C.getCapturedVar(); |
379 | bool SavedIsForCombinedParallelRegion = IsForCombinedParallelRegion; |
380 | if (IsCombinedParallelRegion) { |
381 | |
382 | |
383 | |
384 | IsForCombinedParallelRegion = false; |
385 | for (const OMPClause *C : Clauses) { |
386 | if (!isOpenMPPrivate(C->getClauseKind()) || |
387 | C->getClauseKind() == OMPC_reduction || |
388 | C->getClauseKind() == OMPC_linear || |
389 | C->getClauseKind() == OMPC_private) |
390 | continue; |
391 | ArrayRef<const Expr *> Vars; |
392 | if (const auto *PC = dyn_cast<OMPFirstprivateClause>(C)) |
393 | Vars = PC->getVarRefs(); |
394 | else if (const auto *PC = dyn_cast<OMPLastprivateClause>(C)) |
395 | Vars = PC->getVarRefs(); |
396 | else |
397 | llvm_unreachable("Unexpected clause."); |
398 | for (const auto *E : Vars) { |
399 | const Decl *D = |
400 | cast<DeclRefExpr>(E)->getDecl()->getCanonicalDecl(); |
401 | if (D == VD->getCanonicalDecl()) { |
402 | IsForCombinedParallelRegion = true; |
403 | break; |
404 | } |
405 | } |
406 | if (IsForCombinedParallelRegion) |
407 | break; |
408 | } |
409 | } |
410 | markAsEscaped(VD); |
411 | if (isa<OMPCapturedExprDecl>(VD)) |
412 | VisitValueDecl(VD); |
413 | IsForCombinedParallelRegion = SavedIsForCombinedParallelRegion; |
414 | } |
415 | } |
416 | } |
417 | |
418 | void buildRecordForGlobalizedVars(bool IsInTTDRegion) { |
419 | (0) . __assert_fail ("!GlobalizedRD && \"Record for globalized variables is built already.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 420, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(!GlobalizedRD && |
420 | (0) . __assert_fail ("!GlobalizedRD && \"Record for globalized variables is built already.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 420, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true"> "Record for globalized variables is built already."); |
421 | ArrayRef<const ValueDecl *> EscapedDeclsForParallel, EscapedDeclsForTeams; |
422 | if (IsInTTDRegion) |
423 | EscapedDeclsForTeams = EscapedDecls.getArrayRef(); |
424 | else |
425 | EscapedDeclsForParallel = EscapedDecls.getArrayRef(); |
426 | GlobalizedRD = ::buildRecordForGlobalizedVars( |
427 | CGF.getContext(), EscapedDeclsForParallel, EscapedDeclsForTeams, |
428 | MappedDeclsFields, WarpSize); |
429 | } |
430 | |
431 | public: |
432 | CheckVarsEscapingDeclContext(CodeGenFunction &CGF, |
433 | ArrayRef<const ValueDecl *> TeamsReductions) |
434 | : CGF(CGF), EscapedDecls(TeamsReductions.begin(), TeamsReductions.end()) { |
435 | } |
436 | virtual ~CheckVarsEscapingDeclContext() = default; |
437 | void VisitDeclStmt(const DeclStmt *S) { |
438 | if (!S) |
439 | return; |
440 | for (const Decl *D : S->decls()) |
441 | if (const auto *VD = dyn_cast_or_null<ValueDecl>(D)) |
442 | VisitValueDecl(VD); |
443 | } |
444 | void VisitOMPExecutableDirective(const OMPExecutableDirective *D) { |
445 | if (!D) |
446 | return; |
447 | if (!D->hasAssociatedStmt()) |
448 | return; |
449 | if (const auto *S = |
450 | dyn_cast_or_null<CapturedStmt>(D->getAssociatedStmt())) { |
451 | |
452 | |
453 | llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; |
454 | getOpenMPCaptureRegions(CaptureRegions, D->getDirectiveKind()); |
455 | if (CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown) { |
456 | VisitStmt(S->getCapturedStmt()); |
457 | return; |
458 | } |
459 | VisitOpenMPCapturedStmt( |
460 | S, D->clauses(), |
461 | CaptureRegions.back() == OMPD_parallel && |
462 | isOpenMPDistributeDirective(D->getDirectiveKind())); |
463 | } |
464 | } |
465 | void VisitCapturedStmt(const CapturedStmt *S) { |
466 | if (!S) |
467 | return; |
468 | for (const CapturedStmt::Capture &C : S->captures()) { |
469 | if (C.capturesVariable() && !C.capturesVariableByCopy()) { |
470 | const ValueDecl *VD = C.getCapturedVar(); |
471 | markAsEscaped(VD); |
472 | if (isa<OMPCapturedExprDecl>(VD)) |
473 | VisitValueDecl(VD); |
474 | } |
475 | } |
476 | } |
477 | void VisitLambdaExpr(const LambdaExpr *E) { |
478 | if (!E) |
479 | return; |
480 | for (const LambdaCapture &C : E->captures()) { |
481 | if (C.capturesVariable()) { |
482 | if (C.getCaptureKind() == LCK_ByRef) { |
483 | const ValueDecl *VD = C.getCapturedVar(); |
484 | markAsEscaped(VD); |
485 | if (E->isInitCapture(&C) || isa<OMPCapturedExprDecl>(VD)) |
486 | VisitValueDecl(VD); |
487 | } |
488 | } |
489 | } |
490 | } |
491 | void VisitBlockExpr(const BlockExpr *E) { |
492 | if (!E) |
493 | return; |
494 | for (const BlockDecl::Capture &C : E->getBlockDecl()->captures()) { |
495 | if (C.isByRef()) { |
496 | const VarDecl *VD = C.getVariable(); |
497 | markAsEscaped(VD); |
498 | if (isa<OMPCapturedExprDecl>(VD) || VD->isInitCapture()) |
499 | VisitValueDecl(VD); |
500 | } |
501 | } |
502 | } |
503 | void VisitCallExpr(const CallExpr *E) { |
504 | if (!E) |
505 | return; |
506 | for (const Expr *Arg : E->arguments()) { |
507 | if (!Arg) |
508 | continue; |
509 | if (Arg->isLValue()) { |
510 | const bool SavedAllEscaped = AllEscaped; |
511 | AllEscaped = true; |
512 | Visit(Arg); |
513 | AllEscaped = SavedAllEscaped; |
514 | } else { |
515 | Visit(Arg); |
516 | } |
517 | } |
518 | Visit(E->getCallee()); |
519 | } |
520 | void VisitDeclRefExpr(const DeclRefExpr *E) { |
521 | if (!E) |
522 | return; |
523 | const ValueDecl *VD = E->getDecl(); |
524 | if (AllEscaped) |
525 | markAsEscaped(VD); |
526 | if (isa<OMPCapturedExprDecl>(VD)) |
527 | VisitValueDecl(VD); |
528 | else if (const auto *VarD = dyn_cast<VarDecl>(VD)) |
529 | if (VarD->isInitCapture()) |
530 | VisitValueDecl(VD); |
531 | } |
532 | void VisitUnaryOperator(const UnaryOperator *E) { |
533 | if (!E) |
534 | return; |
535 | if (E->getOpcode() == UO_AddrOf) { |
536 | const bool SavedAllEscaped = AllEscaped; |
537 | AllEscaped = true; |
538 | Visit(E->getSubExpr()); |
539 | AllEscaped = SavedAllEscaped; |
540 | } else { |
541 | Visit(E->getSubExpr()); |
542 | } |
543 | } |
544 | void VisitImplicitCastExpr(const ImplicitCastExpr *E) { |
545 | if (!E) |
546 | return; |
547 | if (E->getCastKind() == CK_ArrayToPointerDecay) { |
548 | const bool SavedAllEscaped = AllEscaped; |
549 | AllEscaped = true; |
550 | Visit(E->getSubExpr()); |
551 | AllEscaped = SavedAllEscaped; |
552 | } else { |
553 | Visit(E->getSubExpr()); |
554 | } |
555 | } |
556 | void VisitExpr(const Expr *E) { |
557 | if (!E) |
558 | return; |
559 | bool SavedAllEscaped = AllEscaped; |
560 | if (!E->isLValue()) |
561 | AllEscaped = false; |
562 | for (const Stmt *Child : E->children()) |
563 | if (Child) |
564 | Visit(Child); |
565 | AllEscaped = SavedAllEscaped; |
566 | } |
567 | void VisitStmt(const Stmt *S) { |
568 | if (!S) |
569 | return; |
570 | for (const Stmt *Child : S->children()) |
571 | if (Child) |
572 | Visit(Child); |
573 | } |
574 | |
575 | |
576 | |
577 | const RecordDecl *getGlobalizedRecord(bool IsInTTDRegion) { |
578 | if (!GlobalizedRD) |
579 | buildRecordForGlobalizedVars(IsInTTDRegion); |
580 | return GlobalizedRD; |
581 | } |
582 | |
583 | |
584 | const FieldDecl *getFieldForGlobalizedVar(const ValueDecl *VD) const { |
585 | (0) . __assert_fail ("GlobalizedRD && \"Record for globalized variables must be generated already.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 586, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(GlobalizedRD && |
586 | (0) . __assert_fail ("GlobalizedRD && \"Record for globalized variables must be generated already.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 586, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true"> "Record for globalized variables must be generated already."); |
587 | auto I = MappedDeclsFields.find(VD); |
588 | if (I == MappedDeclsFields.end()) |
589 | return nullptr; |
590 | return I->getSecond(); |
591 | } |
592 | |
593 | |
594 | ArrayRef<const ValueDecl *> getEscapedDecls() const { |
595 | return EscapedDecls.getArrayRef(); |
596 | } |
597 | |
598 | |
599 | |
600 | const llvm::SmallPtrSetImpl<const Decl *> &getEscapedParameters() const { |
601 | return EscapedParameters; |
602 | } |
603 | |
604 | |
605 | |
606 | ArrayRef<const ValueDecl *> getEscapedVariableLengthDecls() const { |
607 | return EscapedVariableLengthDecls.getArrayRef(); |
608 | } |
609 | }; |
610 | } |
611 | |
612 | |
613 | static llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF) { |
614 | return CGF.EmitRuntimeCall( |
615 | llvm::Intrinsic::getDeclaration( |
616 | &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize), |
617 | "nvptx_warp_size"); |
618 | } |
619 | |
620 | |
621 | static llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF) { |
622 | return CGF.EmitRuntimeCall( |
623 | llvm::Intrinsic::getDeclaration( |
624 | &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x), |
625 | "nvptx_tid"); |
626 | } |
627 | |
628 | |
629 | |
630 | |
631 | static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) { |
632 | CGBuilderTy &Bld = CGF.Builder; |
633 | return Bld.CreateAShr(getNVPTXThreadID(CGF), LaneIDBits, "nvptx_warp_id"); |
634 | } |
635 | |
636 | |
637 | |
638 | |
639 | static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) { |
640 | CGBuilderTy &Bld = CGF.Builder; |
641 | return Bld.CreateAnd(getNVPTXThreadID(CGF), Bld.getInt32(LaneIDMask), |
642 | "nvptx_lane_id"); |
643 | } |
644 | |
645 | |
646 | static llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF) { |
647 | return CGF.EmitRuntimeCall( |
648 | llvm::Intrinsic::getDeclaration( |
649 | &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x), |
650 | "nvptx_num_threads"); |
651 | } |
652 | |
653 | |
654 | |
655 | |
656 | |
657 | |
658 | static llvm::Value *getThreadLimit(CodeGenFunction &CGF, |
659 | bool IsInSPMDExecutionMode = false) { |
660 | CGBuilderTy &Bld = CGF.Builder; |
661 | return IsInSPMDExecutionMode |
662 | ? getNVPTXNumThreads(CGF) |
663 | : Bld.CreateNUWSub(getNVPTXNumThreads(CGF), getNVPTXWarpSize(CGF), |
664 | "thread_limit"); |
665 | } |
666 | |
667 | |
668 | |
669 | |
670 | |
671 | |
672 | |
673 | |
674 | static llvm::Value *getMasterThreadID(CodeGenFunction &CGF) { |
675 | CGBuilderTy &Bld = CGF.Builder; |
676 | llvm::Value *NumThreads = getNVPTXNumThreads(CGF); |
677 | |
678 | |
679 | llvm::Value *Mask = Bld.CreateNUWSub(getNVPTXWarpSize(CGF), Bld.getInt32(1)); |
680 | |
681 | return Bld.CreateAnd(Bld.CreateNUWSub(NumThreads, Bld.getInt32(1)), |
682 | Bld.CreateNot(Mask), "master_tid"); |
683 | } |
684 | |
685 | CGOpenMPRuntimeNVPTX::WorkerFunctionState::WorkerFunctionState( |
686 | CodeGenModule &CGM, SourceLocation Loc) |
687 | : WorkerFn(nullptr), CGFI(CGM.getTypes().arrangeNullaryFunction()), |
688 | Loc(Loc) { |
689 | createWorkerFunction(CGM); |
690 | } |
691 | |
692 | void CGOpenMPRuntimeNVPTX::WorkerFunctionState::createWorkerFunction( |
693 | CodeGenModule &CGM) { |
694 | |
695 | |
696 | WorkerFn = llvm::Function::Create( |
697 | CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, |
698 | , &CGM.getModule()); |
699 | CGM.SetInternalFunctionAttributes(GlobalDecl(), WorkerFn, CGFI); |
700 | WorkerFn->setDoesNotRecurse(); |
701 | } |
702 | |
703 | CGOpenMPRuntimeNVPTX::ExecutionMode |
704 | CGOpenMPRuntimeNVPTX::getExecutionMode() const { |
705 | return CurrentExecutionMode; |
706 | } |
707 | |
708 | static CGOpenMPRuntimeNVPTX::DataSharingMode |
709 | getDataSharingMode(CodeGenModule &CGM) { |
710 | return CGM.getLangOpts().OpenMPCUDAMode ? CGOpenMPRuntimeNVPTX::CUDA |
711 | : CGOpenMPRuntimeNVPTX::Generic; |
712 | } |
713 | |
714 | |
715 | |
716 | static bool isTrivial(ASTContext &Ctx, const Expr * E) { |
717 | |
718 | |
719 | return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || |
720 | !E->hasNonTrivialCall(Ctx)) && |
721 | !E->HasSideEffects(Ctx, ); |
722 | } |
723 | |
724 | |
725 | |
726 | static const Stmt *getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body) { |
727 | if (const auto *C = dyn_cast<CompoundStmt>(Body)) { |
728 | const Stmt *Child = nullptr; |
729 | for (const Stmt *S : C->body()) { |
730 | if (const auto *E = dyn_cast<Expr>(S)) { |
731 | if (isTrivial(Ctx, E)) |
732 | continue; |
733 | } |
734 | |
735 | if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || |
736 | isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) |
737 | continue; |
738 | |
739 | if (const auto *DS = dyn_cast<DeclStmt>(S)) { |
740 | if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { |
741 | if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || |
742 | isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || |
743 | isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || |
744 | isa<UsingDirectiveDecl>(D) || |
745 | isa<OMPDeclareReductionDecl>(D) || |
746 | isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) |
747 | return true; |
748 | const auto *VD = dyn_cast<VarDecl>(D); |
749 | if (!VD) |
750 | return false; |
751 | return VD->isConstexpr() || |
752 | ((VD->getType().isTrivialType(Ctx) || |
753 | VD->getType()->isReferenceType()) && |
754 | (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); |
755 | })) |
756 | continue; |
757 | } |
758 | |
759 | if (Child) |
760 | return Body; |
761 | Child = S; |
762 | } |
763 | if (Child) |
764 | return Child; |
765 | } |
766 | return Body; |
767 | } |
768 | |
769 | |
770 | |
771 | |
772 | static bool hasParallelIfNumThreadsClause(ASTContext &Ctx, |
773 | const OMPExecutableDirective &D) { |
774 | if (D.hasClausesOfKind<OMPNumThreadsClause>()) |
775 | return true; |
776 | for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { |
777 | OpenMPDirectiveKind NameModifier = C->getNameModifier(); |
778 | if (NameModifier != OMPD_parallel && NameModifier != OMPD_unknown) |
779 | continue; |
780 | const Expr *Cond = C->getCondition(); |
781 | bool Result; |
782 | if (!Cond->EvaluateAsBooleanCondition(Result, Ctx) || !Result) |
783 | return true; |
784 | } |
785 | return false; |
786 | } |
787 | |
788 | |
789 | static bool hasNestedSPMDDirective(ASTContext &Ctx, |
790 | const OMPExecutableDirective &D) { |
791 | const auto *CS = D.getInnermostCapturedStmt(); |
792 | const auto *Body = |
793 | CS->getCapturedStmt()->IgnoreContainers(); |
794 | const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body); |
795 | |
796 | if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) { |
797 | OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); |
798 | switch (D.getDirectiveKind()) { |
799 | case OMPD_target: |
800 | if (isOpenMPParallelDirective(DKind) && |
801 | !hasParallelIfNumThreadsClause(Ctx, *NestedDir)) |
802 | return true; |
803 | if (DKind == OMPD_teams) { |
804 | Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( |
805 | ); |
806 | if (!Body) |
807 | return false; |
808 | ChildStmt = getSingleCompoundChild(Ctx, Body); |
809 | if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { |
810 | DKind = NND->getDirectiveKind(); |
811 | if (isOpenMPParallelDirective(DKind) && |
812 | !hasParallelIfNumThreadsClause(Ctx, *NND)) |
813 | return true; |
814 | } |
815 | } |
816 | return false; |
817 | case OMPD_target_teams: |
818 | return isOpenMPParallelDirective(DKind) && |
819 | !hasParallelIfNumThreadsClause(Ctx, *NestedDir); |
820 | case OMPD_target_simd: |
821 | case OMPD_target_parallel: |
822 | case OMPD_target_parallel_for: |
823 | case OMPD_target_parallel_for_simd: |
824 | case OMPD_target_teams_distribute: |
825 | case OMPD_target_teams_distribute_simd: |
826 | case OMPD_target_teams_distribute_parallel_for: |
827 | case OMPD_target_teams_distribute_parallel_for_simd: |
828 | case OMPD_parallel: |
829 | case OMPD_for: |
830 | case OMPD_parallel_for: |
831 | case OMPD_parallel_sections: |
832 | case OMPD_for_simd: |
833 | case OMPD_parallel_for_simd: |
834 | case OMPD_cancel: |
835 | case OMPD_cancellation_point: |
836 | case OMPD_ordered: |
837 | case OMPD_threadprivate: |
838 | case OMPD_allocate: |
839 | case OMPD_task: |
840 | case OMPD_simd: |
841 | case OMPD_sections: |
842 | case OMPD_section: |
843 | case OMPD_single: |
844 | case OMPD_master: |
845 | case OMPD_critical: |
846 | case OMPD_taskyield: |
847 | case OMPD_barrier: |
848 | case OMPD_taskwait: |
849 | case OMPD_taskgroup: |
850 | case OMPD_atomic: |
851 | case OMPD_flush: |
852 | case OMPD_teams: |
853 | case OMPD_target_data: |
854 | case OMPD_target_exit_data: |
855 | case OMPD_target_enter_data: |
856 | case OMPD_distribute: |
857 | case OMPD_distribute_simd: |
858 | case OMPD_distribute_parallel_for: |
859 | case OMPD_distribute_parallel_for_simd: |
860 | case OMPD_teams_distribute: |
861 | case OMPD_teams_distribute_simd: |
862 | case OMPD_teams_distribute_parallel_for: |
863 | case OMPD_teams_distribute_parallel_for_simd: |
864 | case OMPD_target_update: |
865 | case OMPD_declare_simd: |
866 | case OMPD_declare_target: |
867 | case OMPD_end_declare_target: |
868 | case OMPD_declare_reduction: |
869 | case OMPD_declare_mapper: |
870 | case OMPD_taskloop: |
871 | case OMPD_taskloop_simd: |
872 | case OMPD_requires: |
873 | case OMPD_unknown: |
874 | llvm_unreachable("Unexpected directive."); |
875 | } |
876 | } |
877 | |
878 | return false; |
879 | } |
880 | |
881 | static bool supportsSPMDExecutionMode(ASTContext &Ctx, |
882 | const OMPExecutableDirective &D) { |
883 | OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); |
884 | switch (DirectiveKind) { |
885 | case OMPD_target: |
886 | case OMPD_target_teams: |
887 | return hasNestedSPMDDirective(Ctx, D); |
888 | case OMPD_target_parallel: |
889 | case OMPD_target_parallel_for: |
890 | case OMPD_target_parallel_for_simd: |
891 | case OMPD_target_teams_distribute_parallel_for: |
892 | case OMPD_target_teams_distribute_parallel_for_simd: |
893 | return !hasParallelIfNumThreadsClause(Ctx, D); |
894 | case OMPD_target_simd: |
895 | case OMPD_target_teams_distribute: |
896 | case OMPD_target_teams_distribute_simd: |
897 | return false; |
898 | case OMPD_parallel: |
899 | case OMPD_for: |
900 | case OMPD_parallel_for: |
901 | case OMPD_parallel_sections: |
902 | case OMPD_for_simd: |
903 | case OMPD_parallel_for_simd: |
904 | case OMPD_cancel: |
905 | case OMPD_cancellation_point: |
906 | case OMPD_ordered: |
907 | case OMPD_threadprivate: |
908 | case OMPD_allocate: |
909 | case OMPD_task: |
910 | case OMPD_simd: |
911 | case OMPD_sections: |
912 | case OMPD_section: |
913 | case OMPD_single: |
914 | case OMPD_master: |
915 | case OMPD_critical: |
916 | case OMPD_taskyield: |
917 | case OMPD_barrier: |
918 | case OMPD_taskwait: |
919 | case OMPD_taskgroup: |
920 | case OMPD_atomic: |
921 | case OMPD_flush: |
922 | case OMPD_teams: |
923 | case OMPD_target_data: |
924 | case OMPD_target_exit_data: |
925 | case OMPD_target_enter_data: |
926 | case OMPD_distribute: |
927 | case OMPD_distribute_simd: |
928 | case OMPD_distribute_parallel_for: |
929 | case OMPD_distribute_parallel_for_simd: |
930 | case OMPD_teams_distribute: |
931 | case OMPD_teams_distribute_simd: |
932 | case OMPD_teams_distribute_parallel_for: |
933 | case OMPD_teams_distribute_parallel_for_simd: |
934 | case OMPD_target_update: |
935 | case OMPD_declare_simd: |
936 | case OMPD_declare_target: |
937 | case OMPD_end_declare_target: |
938 | case OMPD_declare_reduction: |
939 | case OMPD_declare_mapper: |
940 | case OMPD_taskloop: |
941 | case OMPD_taskloop_simd: |
942 | case OMPD_requires: |
943 | case OMPD_unknown: |
944 | break; |
945 | } |
946 | llvm_unreachable( |
947 | "Unknown programming model for OpenMP directive on NVPTX target."); |
948 | } |
949 | |
950 | |
951 | |
952 | static bool hasStaticScheduling(const OMPExecutableDirective &D) { |
953 | (0) . __assert_fail ("isOpenMPWorksharingDirective(D.getDirectiveKind()) && isOpenMPLoopDirective(D.getDirectiveKind()) && \"Expected loop-based directive.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 955, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(isOpenMPWorksharingDirective(D.getDirectiveKind()) && |
954 | (0) . __assert_fail ("isOpenMPWorksharingDirective(D.getDirectiveKind()) && isOpenMPLoopDirective(D.getDirectiveKind()) && \"Expected loop-based directive.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 955, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true"> isOpenMPLoopDirective(D.getDirectiveKind()) && |
955 | (0) . __assert_fail ("isOpenMPWorksharingDirective(D.getDirectiveKind()) && isOpenMPLoopDirective(D.getDirectiveKind()) && \"Expected loop-based directive.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 955, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true"> "Expected loop-based directive."); |
956 | return !D.hasClausesOfKind<OMPOrderedClause>() && |
957 | (!D.hasClausesOfKind<OMPScheduleClause>() || |
958 | llvm::any_of(D.getClausesOfKind<OMPScheduleClause>(), |
959 | [](const OMPScheduleClause *C) { |
960 | return C->getScheduleKind() == OMPC_SCHEDULE_static; |
961 | })); |
962 | } |
963 | |
964 | |
965 | static bool hasNestedLightweightDirective(ASTContext &Ctx, |
966 | const OMPExecutableDirective &D) { |
967 | (0) . __assert_fail ("supportsSPMDExecutionMode(Ctx, D) && \"Expected SPMD mode directive.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 967, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(supportsSPMDExecutionMode(Ctx, D) && "Expected SPMD mode directive."); |
968 | const auto *CS = D.getInnermostCapturedStmt(); |
969 | const auto *Body = |
970 | CS->getCapturedStmt()->IgnoreContainers(); |
971 | const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body); |
972 | |
973 | if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) { |
974 | OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); |
975 | switch (D.getDirectiveKind()) { |
976 | case OMPD_target: |
977 | if (isOpenMPParallelDirective(DKind) && |
978 | isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) && |
979 | hasStaticScheduling(*NestedDir)) |
980 | return true; |
981 | if (DKind == OMPD_parallel) { |
982 | Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( |
983 | ); |
984 | if (!Body) |
985 | return false; |
986 | ChildStmt = getSingleCompoundChild(Ctx, Body); |
987 | if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { |
988 | DKind = NND->getDirectiveKind(); |
989 | if (isOpenMPWorksharingDirective(DKind) && |
990 | isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND)) |
991 | return true; |
992 | } |
993 | } else if (DKind == OMPD_teams) { |
994 | Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( |
995 | ); |
996 | if (!Body) |
997 | return false; |
998 | ChildStmt = getSingleCompoundChild(Ctx, Body); |
999 | if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { |
1000 | DKind = NND->getDirectiveKind(); |
1001 | if (isOpenMPParallelDirective(DKind) && |
1002 | isOpenMPWorksharingDirective(DKind) && |
1003 | isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND)) |
1004 | return true; |
1005 | if (DKind == OMPD_parallel) { |
1006 | Body = NND->getInnermostCapturedStmt()->IgnoreContainers( |
1007 | ); |
1008 | if (!Body) |
1009 | return false; |
1010 | ChildStmt = getSingleCompoundChild(Ctx, Body); |
1011 | if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { |
1012 | DKind = NND->getDirectiveKind(); |
1013 | if (isOpenMPWorksharingDirective(DKind) && |
1014 | isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND)) |
1015 | return true; |
1016 | } |
1017 | } |
1018 | } |
1019 | } |
1020 | return false; |
1021 | case OMPD_target_teams: |
1022 | if (isOpenMPParallelDirective(DKind) && |
1023 | isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) && |
1024 | hasStaticScheduling(*NestedDir)) |
1025 | return true; |
1026 | if (DKind == OMPD_parallel) { |
1027 | Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( |
1028 | ); |
1029 | if (!Body) |
1030 | return false; |
1031 | ChildStmt = getSingleCompoundChild(Ctx, Body); |
1032 | if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { |
1033 | DKind = NND->getDirectiveKind(); |
1034 | if (isOpenMPWorksharingDirective(DKind) && |
1035 | isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND)) |
1036 | return true; |
1037 | } |
1038 | } |
1039 | return false; |
1040 | case OMPD_target_parallel: |
1041 | return isOpenMPWorksharingDirective(DKind) && |
1042 | isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NestedDir); |
1043 | case OMPD_target_teams_distribute: |
1044 | case OMPD_target_simd: |
1045 | case OMPD_target_parallel_for: |
1046 | case OMPD_target_parallel_for_simd: |
1047 | case OMPD_target_teams_distribute_simd: |
1048 | case OMPD_target_teams_distribute_parallel_for: |
1049 | case OMPD_target_teams_distribute_parallel_for_simd: |
1050 | case OMPD_parallel: |
1051 | case OMPD_for: |
1052 | case OMPD_parallel_for: |
1053 | case OMPD_parallel_sections: |
1054 | case OMPD_for_simd: |
1055 | case OMPD_parallel_for_simd: |
1056 | case OMPD_cancel: |
1057 | case OMPD_cancellation_point: |
1058 | case OMPD_ordered: |
1059 | case OMPD_threadprivate: |
1060 | case OMPD_allocate: |
1061 | case OMPD_task: |
1062 | case OMPD_simd: |
1063 | case OMPD_sections: |
1064 | case OMPD_section: |
1065 | case OMPD_single: |
1066 | case OMPD_master: |
1067 | case OMPD_critical: |
1068 | case OMPD_taskyield: |
1069 | case OMPD_barrier: |
1070 | case OMPD_taskwait: |
1071 | case OMPD_taskgroup: |
1072 | case OMPD_atomic: |
1073 | case OMPD_flush: |
1074 | case OMPD_teams: |
1075 | case OMPD_target_data: |
1076 | case OMPD_target_exit_data: |
1077 | case OMPD_target_enter_data: |
1078 | case OMPD_distribute: |
1079 | case OMPD_distribute_simd: |
1080 | case OMPD_distribute_parallel_for: |
1081 | case OMPD_distribute_parallel_for_simd: |
1082 | case OMPD_teams_distribute: |
1083 | case OMPD_teams_distribute_simd: |
1084 | case OMPD_teams_distribute_parallel_for: |
1085 | case OMPD_teams_distribute_parallel_for_simd: |
1086 | case OMPD_target_update: |
1087 | case OMPD_declare_simd: |
1088 | case OMPD_declare_target: |
1089 | case OMPD_end_declare_target: |
1090 | case OMPD_declare_reduction: |
1091 | case OMPD_declare_mapper: |
1092 | case OMPD_taskloop: |
1093 | case OMPD_taskloop_simd: |
1094 | case OMPD_requires: |
1095 | case OMPD_unknown: |
1096 | llvm_unreachable("Unexpected directive."); |
1097 | } |
1098 | } |
1099 | |
1100 | return false; |
1101 | } |
1102 | |
1103 | |
1104 | |
1105 | static bool supportsLightweightRuntime(ASTContext &Ctx, |
1106 | const OMPExecutableDirective &D) { |
1107 | if (!supportsSPMDExecutionMode(Ctx, D)) |
1108 | return false; |
1109 | OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); |
1110 | switch (DirectiveKind) { |
1111 | case OMPD_target: |
1112 | case OMPD_target_teams: |
1113 | case OMPD_target_parallel: |
1114 | return hasNestedLightweightDirective(Ctx, D); |
1115 | case OMPD_target_parallel_for: |
1116 | case OMPD_target_parallel_for_simd: |
1117 | case OMPD_target_teams_distribute_parallel_for: |
1118 | case OMPD_target_teams_distribute_parallel_for_simd: |
1119 | |
1120 | return hasStaticScheduling(D); |
1121 | case OMPD_target_simd: |
1122 | case OMPD_target_teams_distribute: |
1123 | case OMPD_target_teams_distribute_simd: |
1124 | return false; |
1125 | case OMPD_parallel: |
1126 | case OMPD_for: |
1127 | case OMPD_parallel_for: |
1128 | case OMPD_parallel_sections: |
1129 | case OMPD_for_simd: |
1130 | case OMPD_parallel_for_simd: |
1131 | case OMPD_cancel: |
1132 | case OMPD_cancellation_point: |
1133 | case OMPD_ordered: |
1134 | case OMPD_threadprivate: |
1135 | case OMPD_allocate: |
1136 | case OMPD_task: |
1137 | case OMPD_simd: |
1138 | case OMPD_sections: |
1139 | case OMPD_section: |
1140 | case OMPD_single: |
1141 | case OMPD_master: |
1142 | case OMPD_critical: |
1143 | case OMPD_taskyield: |
1144 | case OMPD_barrier: |
1145 | case OMPD_taskwait: |
1146 | case OMPD_taskgroup: |
1147 | case OMPD_atomic: |
1148 | case OMPD_flush: |
1149 | case OMPD_teams: |
1150 | case OMPD_target_data: |
1151 | case OMPD_target_exit_data: |
1152 | case OMPD_target_enter_data: |
1153 | case OMPD_distribute: |
1154 | case OMPD_distribute_simd: |
1155 | case OMPD_distribute_parallel_for: |
1156 | case OMPD_distribute_parallel_for_simd: |
1157 | case OMPD_teams_distribute: |
1158 | case OMPD_teams_distribute_simd: |
1159 | case OMPD_teams_distribute_parallel_for: |
1160 | case OMPD_teams_distribute_parallel_for_simd: |
1161 | case OMPD_target_update: |
1162 | case OMPD_declare_simd: |
1163 | case OMPD_declare_target: |
1164 | case OMPD_end_declare_target: |
1165 | case OMPD_declare_reduction: |
1166 | case OMPD_declare_mapper: |
1167 | case OMPD_taskloop: |
1168 | case OMPD_taskloop_simd: |
1169 | case OMPD_requires: |
1170 | case OMPD_unknown: |
1171 | break; |
1172 | } |
1173 | llvm_unreachable( |
1174 | "Unknown programming model for OpenMP directive on NVPTX target."); |
1175 | } |
1176 | |
1177 | void CGOpenMPRuntimeNVPTX::emitNonSPMDKernel(const OMPExecutableDirective &D, |
1178 | StringRef ParentName, |
1179 | llvm::Function *&OutlinedFn, |
1180 | llvm::Constant *&OutlinedFnID, |
1181 | bool IsOffloadEntry, |
1182 | const RegionCodeGenTy &CodeGen) { |
1183 | ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode); |
1184 | EntryFunctionState EST; |
1185 | WorkerFunctionState WST(CGM, D.getBeginLoc()); |
1186 | Work.clear(); |
1187 | WrapperFunctionsMap.clear(); |
1188 | |
1189 | |
1190 | class NVPTXPrePostActionTy : public PrePostActionTy { |
1191 | CGOpenMPRuntimeNVPTX::EntryFunctionState &EST; |
1192 | CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST; |
1193 | |
1194 | public: |
1195 | NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX::EntryFunctionState &EST, |
1196 | CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST) |
1197 | : EST(EST), WST(WST) {} |
1198 | void Enter(CodeGenFunction &CGF) override { |
1199 | auto &RT = |
1200 | static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime()); |
1201 | RT.emitNonSPMDEntryHeader(CGF, EST, WST); |
1202 | |
1203 | RT.setLocThreadIdInsertPt(CGF, ); |
1204 | } |
1205 | void Exit(CodeGenFunction &CGF) override { |
1206 | auto &RT = |
1207 | static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime()); |
1208 | RT.clearLocThreadIdInsertPt(CGF); |
1209 | RT.emitNonSPMDEntryFooter(CGF, EST); |
1210 | } |
1211 | } Action(EST, WST); |
1212 | CodeGen.setAction(Action); |
1213 | IsInTTDRegion = true; |
1214 | |
1215 | GlobalizedRecords.emplace_back(); |
1216 | if (!KernelStaticGlobalized) { |
1217 | KernelStaticGlobalized = new llvm::GlobalVariable( |
1218 | CGM.getModule(), CGM.VoidPtrTy, , |
1219 | llvm::GlobalValue::InternalLinkage, |
1220 | llvm::ConstantPointerNull::get(CGM.VoidPtrTy), |
1221 | "_openmp_kernel_static_glob_rd$ptr", , |
1222 | llvm::GlobalValue::NotThreadLocal, |
1223 | CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared)); |
1224 | } |
1225 | emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, |
1226 | IsOffloadEntry, CodeGen); |
1227 | IsInTTDRegion = false; |
1228 | |
1229 | |
1230 | |
1231 | WST.WorkerFn->setName(Twine(OutlinedFn->getName(), "_worker")); |
1232 | |
1233 | |
1234 | emitWorkerFunction(WST); |
1235 | } |
1236 | |
1237 | |
1238 | void CGOpenMPRuntimeNVPTX::(CodeGenFunction &CGF, |
1239 | EntryFunctionState &EST, |
1240 | WorkerFunctionState &WST) { |
1241 | CGBuilderTy &Bld = CGF.Builder; |
1242 | |
1243 | llvm::BasicBlock *WorkerBB = CGF.createBasicBlock(".worker"); |
1244 | llvm::BasicBlock *MasterCheckBB = CGF.createBasicBlock(".mastercheck"); |
1245 | llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master"); |
1246 | EST.ExitBB = CGF.createBasicBlock(".exit"); |
1247 | |
1248 | llvm::Value *IsWorker = |
1249 | Bld.CreateICmpULT(getNVPTXThreadID(CGF), getThreadLimit(CGF)); |
1250 | Bld.CreateCondBr(IsWorker, WorkerBB, MasterCheckBB); |
1251 | |
1252 | CGF.EmitBlock(WorkerBB); |
1253 | emitCall(CGF, WST.Loc, WST.WorkerFn); |
1254 | CGF.EmitBranch(EST.ExitBB); |
1255 | |
1256 | CGF.EmitBlock(MasterCheckBB); |
1257 | llvm::Value *IsMaster = |
1258 | Bld.CreateICmpEQ(getNVPTXThreadID(CGF), getMasterThreadID(CGF)); |
1259 | Bld.CreateCondBr(IsMaster, MasterBB, EST.ExitBB); |
1260 | |
1261 | CGF.EmitBlock(MasterBB); |
1262 | IsInTargetMasterThreadRegion = true; |
1263 | |
1264 | |
1265 | |
1266 | |
1267 | llvm::Value *Args[] = {getThreadLimit(CGF), |
1268 | Bld.getInt16()}; |
1269 | CGF.EmitRuntimeCall( |
1270 | createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), Args); |
1271 | |
1272 | |
1273 | CGF.EmitRuntimeCall( |
1274 | createNVPTXRuntimeFunction( |
1275 | OMPRTL_NVPTX__kmpc_data_sharing_init_stack)); |
1276 | |
1277 | emitGenericVarsProlog(CGF, WST.Loc); |
1278 | } |
1279 | |
1280 | void CGOpenMPRuntimeNVPTX::(CodeGenFunction &CGF, |
1281 | EntryFunctionState &EST) { |
1282 | IsInTargetMasterThreadRegion = false; |
1283 | if (!CGF.HaveInsertPoint()) |
1284 | return; |
1285 | |
1286 | emitGenericVarsEpilog(CGF); |
1287 | |
1288 | if (!EST.ExitBB) |
1289 | EST.ExitBB = CGF.createBasicBlock(".exit"); |
1290 | |
1291 | llvm::BasicBlock *TerminateBB = CGF.createBasicBlock(".termination.notifier"); |
1292 | CGF.EmitBranch(TerminateBB); |
1293 | |
1294 | CGF.EmitBlock(TerminateBB); |
1295 | |
1296 | |
1297 | llvm::Value *Args[] = {CGF.Builder.getInt16()}; |
1298 | CGF.EmitRuntimeCall( |
1299 | createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_deinit), Args); |
1300 | |
1301 | syncCTAThreads(CGF); |
1302 | |
1303 | CGF.EmitBranch(EST.ExitBB); |
1304 | |
1305 | CGF.EmitBlock(EST.ExitBB); |
1306 | EST.ExitBB = nullptr; |
1307 | } |
1308 | |
1309 | void CGOpenMPRuntimeNVPTX::emitSPMDKernel(const OMPExecutableDirective &D, |
1310 | StringRef ParentName, |
1311 | llvm::Function *&OutlinedFn, |
1312 | llvm::Constant *&OutlinedFnID, |
1313 | bool IsOffloadEntry, |
1314 | const RegionCodeGenTy &CodeGen) { |
1315 | ExecutionRuntimeModesRAII ModeRAII( |
1316 | CurrentExecutionMode, RequiresFullRuntime, |
1317 | CGM.getLangOpts().OpenMPCUDAForceFullRuntime || |
1318 | !supportsLightweightRuntime(CGM.getContext(), D)); |
1319 | EntryFunctionState EST; |
1320 | |
1321 | |
1322 | class NVPTXPrePostActionTy : public PrePostActionTy { |
1323 | CGOpenMPRuntimeNVPTX &RT; |
1324 | CGOpenMPRuntimeNVPTX::EntryFunctionState &EST; |
1325 | const OMPExecutableDirective &D; |
1326 | |
1327 | public: |
1328 | NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX &RT, |
1329 | CGOpenMPRuntimeNVPTX::EntryFunctionState &EST, |
1330 | const OMPExecutableDirective &D) |
1331 | : RT(RT), EST(EST), D(D) {} |
1332 | void Enter(CodeGenFunction &CGF) override { |
1333 | RT.emitSPMDEntryHeader(CGF, EST, D); |
1334 | |
1335 | RT.setLocThreadIdInsertPt(CGF, ); |
1336 | } |
1337 | void Exit(CodeGenFunction &CGF) override { |
1338 | RT.clearLocThreadIdInsertPt(CGF); |
1339 | RT.emitSPMDEntryFooter(CGF, EST); |
1340 | } |
1341 | } Action(*this, EST, D); |
1342 | CodeGen.setAction(Action); |
1343 | IsInTTDRegion = true; |
1344 | |
1345 | GlobalizedRecords.emplace_back(); |
1346 | if (!KernelStaticGlobalized) { |
1347 | KernelStaticGlobalized = new llvm::GlobalVariable( |
1348 | CGM.getModule(), CGM.VoidPtrTy, , |
1349 | llvm::GlobalValue::InternalLinkage, |
1350 | llvm::ConstantPointerNull::get(CGM.VoidPtrTy), |
1351 | "_openmp_kernel_static_glob_rd$ptr", , |
1352 | llvm::GlobalValue::NotThreadLocal, |
1353 | CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared)); |
1354 | } |
1355 | emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, |
1356 | IsOffloadEntry, CodeGen); |
1357 | IsInTTDRegion = false; |
1358 | } |
1359 | |
1360 | void CGOpenMPRuntimeNVPTX::( |
1361 | CodeGenFunction &CGF, EntryFunctionState &EST, |
1362 | const OMPExecutableDirective &D) { |
1363 | CGBuilderTy &Bld = CGF.Builder; |
1364 | |
1365 | |
1366 | llvm::BasicBlock *ExecuteBB = CGF.createBasicBlock(".execute"); |
1367 | EST.ExitBB = CGF.createBasicBlock(".exit"); |
1368 | |
1369 | llvm::Value *Args[] = {getThreadLimit(CGF, ), |
1370 | |
1371 | Bld.getInt16(RequiresFullRuntime ? 1 : 0), |
1372 | .getInt16(0)}; |
1373 | CGF.EmitRuntimeCall( |
1374 | createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args); |
1375 | |
1376 | if (RequiresFullRuntime) { |
1377 | |
1378 | CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( |
1379 | OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd)); |
1380 | } |
1381 | |
1382 | CGF.EmitBranch(ExecuteBB); |
1383 | |
1384 | CGF.EmitBlock(ExecuteBB); |
1385 | |
1386 | IsInTargetMasterThreadRegion = true; |
1387 | } |
1388 | |
1389 | void CGOpenMPRuntimeNVPTX::(CodeGenFunction &CGF, |
1390 | EntryFunctionState &EST) { |
1391 | IsInTargetMasterThreadRegion = false; |
1392 | if (!CGF.HaveInsertPoint()) |
1393 | return; |
1394 | |
1395 | if (!EST.ExitBB) |
1396 | EST.ExitBB = CGF.createBasicBlock(".exit"); |
1397 | |
1398 | llvm::BasicBlock *OMPDeInitBB = CGF.createBasicBlock(".omp.deinit"); |
1399 | CGF.EmitBranch(OMPDeInitBB); |
1400 | |
1401 | CGF.EmitBlock(OMPDeInitBB); |
1402 | |
1403 | llvm::Value *Args[] = { |
1404 | CGF.Builder.getInt16(RequiresFullRuntime ? 1 : 0)}; |
1405 | CGF.EmitRuntimeCall( |
1406 | createNVPTXRuntimeFunction( |
1407 | OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2), Args); |
1408 | CGF.EmitBranch(EST.ExitBB); |
1409 | |
1410 | CGF.EmitBlock(EST.ExitBB); |
1411 | EST.ExitBB = nullptr; |
1412 | } |
1413 | |
1414 | |
1415 | |
1416 | |
1417 | |
1418 | |
1419 | |
1420 | static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name, |
1421 | bool Mode) { |
1422 | auto *GVMode = |
1423 | new llvm::GlobalVariable(CGM.getModule(), CGM.Int8Ty, , |
1424 | llvm::GlobalValue::WeakAnyLinkage, |
1425 | llvm::ConstantInt::get(CGM.Int8Ty, Mode ? 0 : 1), |
1426 | Twine(Name, "_exec_mode")); |
1427 | CGM.addCompilerUsedGlobal(GVMode); |
1428 | } |
1429 | |
1430 | void CGOpenMPRuntimeNVPTX::emitWorkerFunction(WorkerFunctionState &WST) { |
1431 | ASTContext &Ctx = CGM.getContext(); |
1432 | |
1433 | CodeGenFunction CGF(CGM, ); |
1434 | CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, WST.WorkerFn, WST.CGFI, {}, |
1435 | WST.Loc, WST.Loc); |
1436 | emitWorkerLoop(CGF, WST); |
1437 | CGF.FinishFunction(); |
1438 | } |
1439 | |
1440 | void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF, |
1441 | WorkerFunctionState &WST) { |
1442 | |
1443 | |
1444 | |
1445 | |
1446 | |
1447 | |
1448 | |
1449 | |
1450 | |
1451 | CGBuilderTy &Bld = CGF.Builder; |
1452 | |
1453 | llvm::BasicBlock *AwaitBB = CGF.createBasicBlock(".await.work"); |
1454 | llvm::BasicBlock *SelectWorkersBB = CGF.createBasicBlock(".select.workers"); |
1455 | llvm::BasicBlock *ExecuteBB = CGF.createBasicBlock(".execute.parallel"); |
1456 | llvm::BasicBlock *TerminateBB = CGF.createBasicBlock(".terminate.parallel"); |
1457 | llvm::BasicBlock *BarrierBB = CGF.createBasicBlock(".barrier.parallel"); |
1458 | llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit"); |
1459 | |
1460 | CGF.EmitBranch(AwaitBB); |
1461 | |
1462 | |
1463 | CGF.EmitBlock(AwaitBB); |
1464 | |
1465 | syncCTAThreads(CGF); |
1466 | |
1467 | Address WorkFn = |
1468 | CGF.CreateDefaultAlignTempAlloca(CGF.Int8PtrTy, ); |
1469 | Address ExecStatus = |
1470 | CGF.CreateDefaultAlignTempAlloca(CGF.Int8Ty, ); |
1471 | CGF.InitTempAlloca(ExecStatus, Bld.getInt8()); |
1472 | CGF.InitTempAlloca(WorkFn, llvm::Constant::getNullValue(CGF.Int8PtrTy)); |
1473 | |
1474 | |
1475 | llvm::Value *Args[] = {WorkFn.getPointer(), |
1476 | .getInt16(1)}; |
1477 | llvm::Value *Ret = CGF.EmitRuntimeCall( |
1478 | createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args); |
1479 | Bld.CreateStore(Bld.CreateZExt(Ret, CGF.Int8Ty), ExecStatus); |
1480 | |
1481 | |
1482 | llvm::Value *WorkID = Bld.CreateLoad(WorkFn); |
1483 | llvm::Value *ShouldTerminate = Bld.CreateIsNull(WorkID, "should_terminate"); |
1484 | Bld.CreateCondBr(ShouldTerminate, ExitBB, SelectWorkersBB); |
1485 | |
1486 | |
1487 | CGF.EmitBlock(SelectWorkersBB); |
1488 | llvm::Value *IsActive = |
1489 | Bld.CreateIsNotNull(Bld.CreateLoad(ExecStatus), "is_active"); |
1490 | Bld.CreateCondBr(IsActive, ExecuteBB, BarrierBB); |
1491 | |
1492 | |
1493 | CGF.EmitBlock(ExecuteBB); |
1494 | |
1495 | setLocThreadIdInsertPt(CGF, ); |
1496 | |
1497 | |
1498 | for (llvm::Function *W : Work) { |
1499 | |
1500 | llvm::Value *ID = Bld.CreatePointerBitCastOrAddrSpaceCast(W, CGM.Int8PtrTy); |
1501 | |
1502 | llvm::Value *WorkFnMatch = |
1503 | Bld.CreateICmpEQ(Bld.CreateLoad(WorkFn), ID, "work_match"); |
1504 | |
1505 | llvm::BasicBlock *ExecuteFNBB = CGF.createBasicBlock(".execute.fn"); |
1506 | llvm::BasicBlock *CheckNextBB = CGF.createBasicBlock(".check.next"); |
1507 | Bld.CreateCondBr(WorkFnMatch, ExecuteFNBB, CheckNextBB); |
1508 | |
1509 | |
1510 | CGF.EmitBlock(ExecuteFNBB); |
1511 | |
1512 | |
1513 | |
1514 | |
1515 | |
1516 | emitCall(CGF, WST.Loc, W, |
1517 | {Bld.getInt16(), getThreadID(CGF, WST.Loc)}); |
1518 | |
1519 | |
1520 | CGF.EmitBranch(TerminateBB); |
1521 | |
1522 | CGF.EmitBlock(CheckNextBB); |
1523 | } |
1524 | |
1525 | |
1526 | |
1527 | auto *ParallelFnTy = |
1528 | llvm::FunctionType::get(CGM.VoidTy, {CGM.Int16Ty, CGM.Int32Ty}, |
1529 | ); |
1530 | llvm::Value *WorkFnCast = |
1531 | Bld.CreateBitCast(WorkID, ParallelFnTy->getPointerTo()); |
1532 | |
1533 | |
1534 | |
1535 | |
1536 | emitCall(CGF, WST.Loc, {ParallelFnTy, WorkFnCast}, |
1537 | {Bld.getInt16(), getThreadID(CGF, WST.Loc)}); |
1538 | |
1539 | CGF.EmitBranch(TerminateBB); |
1540 | |
1541 | |
1542 | CGF.EmitBlock(TerminateBB); |
1543 | CGF.EmitRuntimeCall( |
1544 | createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_end_parallel), |
1545 | llvm::None); |
1546 | CGF.EmitBranch(BarrierBB); |
1547 | |
1548 | |
1549 | CGF.EmitBlock(BarrierBB); |
1550 | |
1551 | syncCTAThreads(CGF); |
1552 | CGF.EmitBranch(AwaitBB); |
1553 | |
1554 | |
1555 | CGF.EmitBlock(ExitBB); |
1556 | |
1557 | clearLocThreadIdInsertPt(CGF); |
1558 | } |
1559 | |
1560 | |
1561 | |
1562 | |
1563 | |
1564 | llvm::FunctionCallee |
1565 | CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { |
1566 | llvm::FunctionCallee RTLFn = nullptr; |
1567 | switch (static_cast<OpenMPRTLFunctionNVPTX>(Function)) { |
1568 | case OMPRTL_NVPTX__kmpc_kernel_init: { |
1569 | |
1570 | |
1571 | llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty}; |
1572 | auto *FnTy = |
1573 | llvm::FunctionType::get(CGM.VoidTy, TypeParams, false); |
1574 | RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_init"); |
1575 | break; |
1576 | } |
1577 | case OMPRTL_NVPTX__kmpc_kernel_deinit: { |
1578 | |
1579 | llvm::Type *TypeParams[] = {CGM.Int16Ty}; |
1580 | auto *FnTy = |
1581 | llvm::FunctionType::get(CGM.VoidTy, TypeParams, false); |
1582 | RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_deinit"); |
1583 | break; |
1584 | } |
1585 | case OMPRTL_NVPTX__kmpc_spmd_kernel_init: { |
1586 | |
1587 | |
1588 | llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty}; |
1589 | auto *FnTy = |
1590 | llvm::FunctionType::get(CGM.VoidTy, TypeParams, false); |
1591 | RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_init"); |
1592 | break; |
1593 | } |
1594 | case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2: { |
1595 | |
1596 | llvm::Type *TypeParams[] = {CGM.Int16Ty}; |
1597 | auto *FnTy = |
1598 | llvm::FunctionType::get(CGM.VoidTy, TypeParams, false); |
1599 | RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit_v2"); |
1600 | break; |
1601 | } |
1602 | case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: { |
1603 | |
1604 | |
1605 | llvm::Type *TypeParams[] = {CGM.Int8PtrTy, CGM.Int16Ty}; |
1606 | auto *FnTy = |
1607 | llvm::FunctionType::get(CGM.VoidTy, TypeParams, false); |
1608 | RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel"); |
1609 | break; |
1610 | } |
1611 | case OMPRTL_NVPTX__kmpc_kernel_parallel: { |
1612 | |
1613 | |
1614 | llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy, CGM.Int16Ty}; |
1615 | llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy); |
1616 | auto *FnTy = |
1617 | llvm::FunctionType::get(RetTy, TypeParams, false); |
1618 | RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_parallel"); |
1619 | break; |
1620 | } |
1621 | case OMPRTL_NVPTX__kmpc_kernel_end_parallel: { |
1622 | |
1623 | auto *FnTy = |
1624 | llvm::FunctionType::get(CGM.VoidTy, llvm::None, false); |
1625 | RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_end_parallel"); |
1626 | break; |
1627 | } |
1628 | case OMPRTL_NVPTX__kmpc_serialized_parallel: { |
1629 | |
1630 | |
1631 | llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; |
1632 | auto *FnTy = |
1633 | llvm::FunctionType::get(CGM.VoidTy, TypeParams, false); |
1634 | RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); |
1635 | break; |
1636 | } |
1637 | case OMPRTL_NVPTX__kmpc_end_serialized_parallel: { |
1638 | |
1639 | |
1640 | llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; |
1641 | auto *FnTy = |
1642 | llvm::FunctionType::get(CGM.VoidTy, TypeParams, false); |
1643 | RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); |
1644 | break; |
1645 | } |
1646 | case OMPRTL_NVPTX__kmpc_shuffle_int32: { |
1647 | |
1648 | |
1649 | llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty}; |
1650 | auto *FnTy = |
1651 | llvm::FunctionType::get(CGM.Int32Ty, TypeParams, false); |
1652 | RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int32"); |
1653 | break; |
1654 | } |
1655 | case OMPRTL_NVPTX__kmpc_shuffle_int64: { |
1656 | |
1657 | |
1658 | llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int16Ty, CGM.Int16Ty}; |
1659 | auto *FnTy = |
1660 | llvm::FunctionType::get(CGM.Int64Ty, TypeParams, false); |
1661 | RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int64"); |
1662 | break; |
1663 | } |
1664 | case OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2: { |
1665 | |
1666 | |
1667 | |
1668 | |
1669 | |
1670 | llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty, |
1671 | CGM.Int16Ty, CGM.Int16Ty}; |
1672 | auto *ShuffleReduceFnTy = |
1673 | llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams, |
1674 | ); |
1675 | llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty}; |
1676 | auto *InterWarpCopyFnTy = |
1677 | llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams, |
1678 | ); |
1679 | llvm::Type *TypeParams[] = {getIdentTyPointerTy(), |
1680 | CGM.Int32Ty, |
1681 | CGM.Int32Ty, |
1682 | CGM.SizeTy, |
1683 | CGM.VoidPtrTy, |
1684 | ShuffleReduceFnTy->getPointerTo(), |
1685 | InterWarpCopyFnTy->getPointerTo()}; |
1686 | auto *FnTy = |
1687 | llvm::FunctionType::get(CGM.Int32Ty, TypeParams, ); |
1688 | RTLFn = CGM.CreateRuntimeFunction( |
1689 | FnTy, ); |
1690 | break; |
1691 | } |
1692 | case OMPRTL_NVPTX__kmpc_end_reduce_nowait: { |
1693 | |
1694 | llvm::Type *TypeParams[] = {CGM.Int32Ty}; |
1695 | auto *FnTy = |
1696 | llvm::FunctionType::get(CGM.VoidTy, TypeParams, ); |
1697 | RTLFn = CGM.CreateRuntimeFunction( |
1698 | FnTy, ); |
1699 | break; |
1700 | } |
1701 | case OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2: { |
1702 | |
1703 | |
1704 | |
1705 | |
1706 | |
1707 | |
1708 | |
1709 | |
1710 | |
1711 | |
1712 | |
1713 | llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty, |
1714 | CGM.Int16Ty, CGM.Int16Ty}; |
1715 | auto *ShuffleReduceFnTy = |
1716 | llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams, |
1717 | ); |
1718 | llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty}; |
1719 | auto *InterWarpCopyFnTy = |
1720 | llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams, |
1721 | ); |
1722 | llvm::Type *GlobalListTypeParams[] = {CGM.VoidPtrTy, CGM.IntTy, |
1723 | CGM.VoidPtrTy}; |
1724 | auto *GlobalListFnTy = |
1725 | llvm::FunctionType::get(CGM.VoidTy, GlobalListTypeParams, |
1726 | ); |
1727 | llvm::Type *TypeParams[] = {getIdentTyPointerTy(), |
1728 | CGM.Int32Ty, |
1729 | CGM.VoidPtrTy, |
1730 | CGM.Int32Ty, |
1731 | CGM.VoidPtrTy, |
1732 | ShuffleReduceFnTy->getPointerTo(), |
1733 | InterWarpCopyFnTy->getPointerTo(), |
1734 | GlobalListFnTy->getPointerTo(), |
1735 | GlobalListFnTy->getPointerTo(), |
1736 | GlobalListFnTy->getPointerTo(), |
1737 | GlobalListFnTy->getPointerTo()}; |
1738 | auto *FnTy = |
1739 | llvm::FunctionType::get(CGM.Int32Ty, TypeParams, ); |
1740 | RTLFn = CGM.CreateRuntimeFunction( |
1741 | FnTy, ); |
1742 | break; |
1743 | } |
1744 | case OMPRTL_NVPTX__kmpc_data_sharing_init_stack: { |
1745 | |
1746 | auto *FnTy = |
1747 | llvm::FunctionType::get(CGM.VoidTy, llvm::None, false); |
1748 | RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack"); |
1749 | break; |
1750 | } |
1751 | case OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd: { |
1752 | |
1753 | auto *FnTy = |
1754 | llvm::FunctionType::get(CGM.VoidTy, llvm::None, false); |
1755 | RTLFn = |
1756 | CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack_spmd"); |
1757 | break; |
1758 | } |
1759 | case OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack: { |
1760 | |
1761 | |
1762 | llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty}; |
1763 | auto *FnTy = |
1764 | llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, ); |
1765 | RTLFn = CGM.CreateRuntimeFunction( |
1766 | FnTy, ); |
1767 | break; |
1768 | } |
1769 | case OMPRTL_NVPTX__kmpc_data_sharing_pop_stack: { |
1770 | |
1771 | llvm::Type *TypeParams[] = {CGM.VoidPtrTy}; |
1772 | auto *FnTy = |
1773 | llvm::FunctionType::get(CGM.VoidTy, TypeParams, ); |
1774 | RTLFn = CGM.CreateRuntimeFunction(FnTy, |
1775 | ); |
1776 | break; |
1777 | } |
1778 | case OMPRTL_NVPTX__kmpc_begin_sharing_variables: { |
1779 | |
1780 | |
1781 | llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo(), CGM.SizeTy}; |
1782 | auto *FnTy = |
1783 | llvm::FunctionType::get(CGM.VoidTy, TypeParams, false); |
1784 | RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_begin_sharing_variables"); |
1785 | break; |
1786 | } |
1787 | case OMPRTL_NVPTX__kmpc_end_sharing_variables: { |
1788 | |
1789 | auto *FnTy = |
1790 | llvm::FunctionType::get(CGM.VoidTy, llvm::None, false); |
1791 | RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_sharing_variables"); |
1792 | break; |
1793 | } |
1794 | case OMPRTL_NVPTX__kmpc_get_shared_variables: { |
1795 | |
1796 | llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo()}; |
1797 | auto *FnTy = |
1798 | llvm::FunctionType::get(CGM.VoidTy, TypeParams, false); |
1799 | RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_shared_variables"); |
1800 | break; |
1801 | } |
1802 | case OMPRTL_NVPTX__kmpc_parallel_level: { |
1803 | |
1804 | llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; |
1805 | auto *FnTy = |
1806 | llvm::FunctionType::get(CGM.Int16Ty, TypeParams, false); |
1807 | RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_parallel_level"); |
1808 | break; |
1809 | } |
1810 | case OMPRTL_NVPTX__kmpc_is_spmd_exec_mode: { |
1811 | |
1812 | auto *FnTy = llvm::FunctionType::get(CGM.Int8Ty, ); |
1813 | RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_is_spmd_exec_mode"); |
1814 | break; |
1815 | } |
1816 | case OMPRTL_NVPTX__kmpc_get_team_static_memory: { |
1817 | |
1818 | |
1819 | llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.VoidPtrTy, CGM.SizeTy, |
1820 | CGM.Int16Ty, CGM.VoidPtrPtrTy}; |
1821 | auto *FnTy = |
1822 | llvm::FunctionType::get(CGM.VoidTy, TypeParams, false); |
1823 | RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_team_static_memory"); |
1824 | break; |
1825 | } |
1826 | case OMPRTL_NVPTX__kmpc_restore_team_static_memory: { |
1827 | |
1828 | |
1829 | llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.Int16Ty}; |
1830 | auto *FnTy = |
1831 | llvm::FunctionType::get(CGM.VoidTy, TypeParams, ); |
1832 | RTLFn = |
1833 | CGM.CreateRuntimeFunction(FnTy, "__kmpc_restore_team_static_memory"); |
1834 | break; |
1835 | } |
1836 | case OMPRTL__kmpc_barrier: { |
1837 | |
1838 | llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; |
1839 | auto *FnTy = |
1840 | llvm::FunctionType::get(CGM.VoidTy, TypeParams, false); |
1841 | RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_barrier"); |
1842 | cast<llvm::Function>(RTLFn.getCallee()) |
1843 | ->addFnAttr(llvm::Attribute::Convergent); |
1844 | break; |
1845 | } |
1846 | case OMPRTL__kmpc_barrier_simple_spmd: { |
1847 | |
1848 | |
1849 | llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; |
1850 | auto *FnTy = |
1851 | llvm::FunctionType::get(CGM.VoidTy, TypeParams, false); |
1852 | RTLFn = |
1853 | CGM.CreateRuntimeFunction(FnTy, "__kmpc_barrier_simple_spmd"); |
1854 | cast<llvm::Function>(RTLFn.getCallee()) |
1855 | ->addFnAttr(llvm::Attribute::Convergent); |
1856 | break; |
1857 | } |
1858 | } |
1859 | return RTLFn; |
1860 | } |
1861 | |
1862 | void CGOpenMPRuntimeNVPTX::createOffloadEntry(llvm::Constant *ID, |
1863 | llvm::Constant *Addr, |
1864 | uint64_t Size, int32_t, |
1865 | llvm::GlobalValue::LinkageTypes) { |
1866 | |
1867 | |
1868 | if (!isa<llvm::Function>(Addr)) |
1869 | return; |
1870 | llvm::Module &M = CGM.getModule(); |
1871 | llvm::LLVMContext &Ctx = CGM.getLLVMContext(); |
1872 | |
1873 | |
1874 | llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations"); |
1875 | |
1876 | llvm::Metadata *MDVals[] = { |
1877 | llvm::ConstantAsMetadata::get(Addr), llvm::MDString::get(Ctx, "kernel"), |
1878 | llvm::ConstantAsMetadata::get( |
1879 | llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))}; |
1880 | |
1881 | MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); |
1882 | } |
1883 | |
1884 | void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction( |
1885 | const OMPExecutableDirective &D, StringRef ParentName, |
1886 | llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, |
1887 | bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { |
1888 | if (!IsOffloadEntry) |
1889 | return; |
1890 | |
1891 | (0) . __assert_fail ("!ParentName.empty() && \"Invalid target region parent name!\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 1891, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(!ParentName.empty() && "Invalid target region parent name!"); |
1892 | |
1893 | bool Mode = supportsSPMDExecutionMode(CGM.getContext(), D); |
1894 | if (Mode) |
1895 | emitSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, |
1896 | CodeGen); |
1897 | else |
1898 | emitNonSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, |
1899 | CodeGen); |
1900 | |
1901 | setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode); |
1902 | } |
1903 | |
1904 | namespace { |
1905 | LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); |
1906 | |
1907 | enum ModeFlagsTy : unsigned { |
1908 | |
1909 | KMP_IDENT_SPMD_MODE = 0x01, |
1910 | |
1911 | KMP_IDENT_SIMPLE_RT_MODE = 0x02, |
1912 | LLVM_MARK_AS_BITMASK_ENUM(KMP_IDENT_SIMPLE_RT_MODE) |
1913 | }; |
1914 | |
1915 | |
1916 | static const ModeFlagsTy UndefinedMode = |
1917 | (~KMP_IDENT_SPMD_MODE) & KMP_IDENT_SIMPLE_RT_MODE; |
1918 | } |
1919 | |
1920 | unsigned CGOpenMPRuntimeNVPTX::getDefaultLocationReserved2Flags() const { |
1921 | switch (getExecutionMode()) { |
1922 | case EM_SPMD: |
1923 | if (requiresFullRuntime()) |
1924 | return KMP_IDENT_SPMD_MODE & (~KMP_IDENT_SIMPLE_RT_MODE); |
1925 | return KMP_IDENT_SPMD_MODE | KMP_IDENT_SIMPLE_RT_MODE; |
1926 | case EM_NonSPMD: |
1927 | (0) . __assert_fail ("requiresFullRuntime() && \"Expected full runtime.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 1927, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(requiresFullRuntime() && "Expected full runtime."); |
1928 | return (~KMP_IDENT_SPMD_MODE) & (~KMP_IDENT_SIMPLE_RT_MODE); |
1929 | case EM_Unknown: |
1930 | return UndefinedMode; |
1931 | } |
1932 | llvm_unreachable("Unknown flags are requested."); |
1933 | } |
1934 | |
1935 | CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM) |
1936 | : CGOpenMPRuntime(CGM, "_", "$") { |
1937 | if (!CGM.getLangOpts().OpenMPIsDevice) |
1938 | llvm_unreachable("OpenMP NVPTX can only handle device code."); |
1939 | } |
1940 | |
1941 | void CGOpenMPRuntimeNVPTX::emitProcBindClause(CodeGenFunction &CGF, |
1942 | OpenMPProcBindClauseKind ProcBind, |
1943 | SourceLocation Loc) { |
1944 | |
1945 | if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) |
1946 | return; |
1947 | |
1948 | CGOpenMPRuntime::emitProcBindClause(CGF, ProcBind, Loc); |
1949 | } |
1950 | |
1951 | void CGOpenMPRuntimeNVPTX::emitNumThreadsClause(CodeGenFunction &CGF, |
1952 | llvm::Value *NumThreads, |
1953 | SourceLocation Loc) { |
1954 | |
1955 | if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) |
1956 | return; |
1957 | |
1958 | CGOpenMPRuntime::emitNumThreadsClause(CGF, NumThreads, Loc); |
1959 | } |
1960 | |
1961 | void CGOpenMPRuntimeNVPTX::emitNumTeamsClause(CodeGenFunction &CGF, |
1962 | const Expr *NumTeams, |
1963 | const Expr *ThreadLimit, |
1964 | SourceLocation Loc) {} |
1965 | |
1966 | llvm::Function *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction( |
1967 | const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, |
1968 | OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { |
1969 | |
1970 | class NVPTXPrePostActionTy : public PrePostActionTy { |
1971 | bool &IsInParallelRegion; |
1972 | bool PrevIsInParallelRegion; |
1973 | |
1974 | public: |
1975 | NVPTXPrePostActionTy(bool &IsInParallelRegion) |
1976 | : IsInParallelRegion(IsInParallelRegion) {} |
1977 | void Enter(CodeGenFunction &CGF) override { |
1978 | PrevIsInParallelRegion = IsInParallelRegion; |
1979 | IsInParallelRegion = true; |
1980 | } |
1981 | void Exit(CodeGenFunction &CGF) override { |
1982 | IsInParallelRegion = PrevIsInParallelRegion; |
1983 | } |
1984 | } Action(IsInParallelRegion); |
1985 | CodeGen.setAction(Action); |
1986 | bool PrevIsInTTDRegion = IsInTTDRegion; |
1987 | IsInTTDRegion = false; |
1988 | bool PrevIsInTargetMasterThreadRegion = IsInTargetMasterThreadRegion; |
1989 | IsInTargetMasterThreadRegion = false; |
1990 | auto *OutlinedFun = |
1991 | cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction( |
1992 | D, ThreadIDVar, InnermostKind, CodeGen)); |
1993 | IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion; |
1994 | IsInTTDRegion = PrevIsInTTDRegion; |
1995 | if (getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD && |
1996 | !IsInParallelRegion) { |
1997 | llvm::Function *WrapperFun = |
1998 | createParallelDataSharingWrapper(OutlinedFun, D); |
1999 | WrapperFunctionsMap[OutlinedFun] = WrapperFun; |
2000 | } |
2001 | |
2002 | return OutlinedFun; |
2003 | } |
2004 | |
2005 | |
2006 | |
2007 | static void |
2008 | getDistributeLastprivateVars(ASTContext &Ctx, const OMPExecutableDirective &D, |
2009 | llvm::SmallVectorImpl<const ValueDecl *> &Vars) { |
2010 | (0) . __assert_fail ("isOpenMPTeamsDirective(D.getDirectiveKind()) && \"expected teams directive.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 2011, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(isOpenMPTeamsDirective(D.getDirectiveKind()) && |
2011 | (0) . __assert_fail ("isOpenMPTeamsDirective(D.getDirectiveKind()) && \"expected teams directive.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 2011, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true"> "expected teams directive."); |
2012 | const OMPExecutableDirective *Dir = &D; |
2013 | if (!isOpenMPDistributeDirective(D.getDirectiveKind())) { |
2014 | if (const Stmt *S = getSingleCompoundChild( |
2015 | Ctx, |
2016 | D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers( |
2017 | ))) { |
2018 | Dir = dyn_cast<OMPExecutableDirective>(S); |
2019 | if (Dir && !isOpenMPDistributeDirective(Dir->getDirectiveKind())) |
2020 | Dir = nullptr; |
2021 | } |
2022 | } |
2023 | if (!Dir) |
2024 | return; |
2025 | for (const auto *C : Dir->getClausesOfKind<OMPLastprivateClause>()) { |
2026 | for (const Expr *E : C->getVarRefs()) |
2027 | Vars.push_back(getPrivateItem(E)); |
2028 | } |
2029 | } |
2030 | |
2031 | |
2032 | static void |
2033 | getTeamsReductionVars(ASTContext &Ctx, const OMPExecutableDirective &D, |
2034 | llvm::SmallVectorImpl<const ValueDecl *> &Vars) { |
2035 | (0) . __assert_fail ("isOpenMPTeamsDirective(D.getDirectiveKind()) && \"expected teams directive.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 2036, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(isOpenMPTeamsDirective(D.getDirectiveKind()) && |
2036 | (0) . __assert_fail ("isOpenMPTeamsDirective(D.getDirectiveKind()) && \"expected teams directive.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 2036, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true"> "expected teams directive."); |
2037 | for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { |
2038 | for (const Expr *E : C->privates()) |
2039 | Vars.push_back(getPrivateItem(E)); |
2040 | } |
2041 | } |
2042 | |
2043 | llvm::Function *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction( |
2044 | const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, |
2045 | OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { |
2046 | SourceLocation Loc = D.getBeginLoc(); |
2047 | |
2048 | const RecordDecl *GlobalizedRD = nullptr; |
2049 | llvm::SmallVector<const ValueDecl *, 4> LastPrivatesReductions; |
2050 | llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields; |
2051 | |
2052 | if (getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD) |
2053 | getTeamsReductionVars(CGM.getContext(), D, LastPrivatesReductions); |
2054 | if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) { |
2055 | getDistributeLastprivateVars(CGM.getContext(), D, LastPrivatesReductions); |
2056 | if (!LastPrivatesReductions.empty()) { |
2057 | GlobalizedRD = ::buildRecordForGlobalizedVars( |
2058 | CGM.getContext(), llvm::None, LastPrivatesReductions, |
2059 | MappedDeclsFields, WarpSize); |
2060 | } |
2061 | } else if (!LastPrivatesReductions.empty()) { |
2062 | (0) . __assert_fail ("!TeamAndReductions.first && \"Previous team declaration is not expected.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 2063, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(!TeamAndReductions.first && |
2063 | (0) . __assert_fail ("!TeamAndReductions.first && \"Previous team declaration is not expected.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 2063, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true"> "Previous team declaration is not expected."); |
2064 | TeamAndReductions.first = D.getCapturedStmt(OMPD_teams)->getCapturedDecl(); |
2065 | std::swap(TeamAndReductions.second, LastPrivatesReductions); |
2066 | } |
2067 | |
2068 | |
2069 | class NVPTXPrePostActionTy : public PrePostActionTy { |
2070 | SourceLocation &Loc; |
2071 | const RecordDecl *GlobalizedRD; |
2072 | llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> |
2073 | &MappedDeclsFields; |
2074 | |
2075 | public: |
2076 | NVPTXPrePostActionTy( |
2077 | SourceLocation &Loc, const RecordDecl *GlobalizedRD, |
2078 | llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> |
2079 | &MappedDeclsFields) |
2080 | : Loc(Loc), GlobalizedRD(GlobalizedRD), |
2081 | MappedDeclsFields(MappedDeclsFields) {} |
2082 | void Enter(CodeGenFunction &CGF) override { |
2083 | auto &Rt = |
2084 | static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime()); |
2085 | if (GlobalizedRD) { |
2086 | auto I = Rt.FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first; |
2087 | I->getSecond().GlobalRecord = GlobalizedRD; |
2088 | I->getSecond().MappedParams = |
2089 | llvm::make_unique<CodeGenFunction::OMPMapVars>(); |
2090 | DeclToAddrMapTy &Data = I->getSecond().LocalVarData; |
2091 | for (const auto &Pair : MappedDeclsFields) { |
2092 | (0) . __assert_fail ("Pair.getFirst()->isCanonicalDecl() && \"Expected canonical declaration\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 2093, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(Pair.getFirst()->isCanonicalDecl() && |
2093 | (0) . __assert_fail ("Pair.getFirst()->isCanonicalDecl() && \"Expected canonical declaration\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 2093, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true"> "Expected canonical declaration"); |
2094 | Data.insert(std::make_pair(Pair.getFirst(), |
2095 | MappedVarData(Pair.getSecond(), |
2096 | ))); |
2097 | } |
2098 | } |
2099 | Rt.emitGenericVarsProlog(CGF, Loc); |
2100 | } |
2101 | void Exit(CodeGenFunction &CGF) override { |
2102 | static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime()) |
2103 | .emitGenericVarsEpilog(CGF); |
2104 | } |
2105 | } Action(Loc, GlobalizedRD, MappedDeclsFields); |
2106 | CodeGen.setAction(Action); |
2107 | llvm::Function *OutlinedFun = CGOpenMPRuntime::emitTeamsOutlinedFunction( |
2108 | D, ThreadIDVar, InnermostKind, CodeGen); |
2109 | OutlinedFun->removeFnAttr(llvm::Attribute::NoInline); |
2110 | OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone); |
2111 | OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline); |
2112 | |
2113 | return OutlinedFun; |
2114 | } |
2115 | |
2116 | void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF, |
2117 | SourceLocation Loc, |
2118 | bool WithSPMDCheck) { |
2119 | if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic && |
2120 | getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD) |
2121 | return; |
2122 | |
2123 | CGBuilderTy &Bld = CGF.Builder; |
2124 | |
2125 | const auto I = FunctionGlobalizedDecls.find(CGF.CurFn); |
2126 | if (I == FunctionGlobalizedDecls.end()) |
2127 | return; |
2128 | if (const RecordDecl *GlobalizedVarsRecord = I->getSecond().GlobalRecord) { |
2129 | QualType GlobalRecTy = CGM.getContext().getRecordType(GlobalizedVarsRecord); |
2130 | QualType SecGlobalRecTy; |
2131 | |
2132 | |
2133 | |
2134 | |
2135 | |
2136 | unsigned Alignment = |
2137 | CGM.getContext().getTypeAlignInChars(GlobalRecTy).getQuantity(); |
2138 | unsigned GlobalRecordSize = |
2139 | CGM.getContext().getTypeSizeInChars(GlobalRecTy).getQuantity(); |
2140 | GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment); |
2141 | |
2142 | llvm::PointerType *GlobalRecPtrTy = |
2143 | CGF.ConvertTypeForMem(GlobalRecTy)->getPointerTo(); |
2144 | llvm::Value *GlobalRecCastAddr; |
2145 | llvm::Value *IsTTD = nullptr; |
2146 | if (!IsInTTDRegion && |
2147 | (WithSPMDCheck || |
2148 | getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown)) { |
2149 | llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit"); |
2150 | llvm::BasicBlock *SPMDBB = CGF.createBasicBlock(".spmd"); |
2151 | llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd"); |
2152 | if (I->getSecond().SecondaryGlobalRecord.hasValue()) { |
2153 | llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); |
2154 | llvm::Value *ThreadID = getThreadID(CGF, Loc); |
2155 | llvm::Value *PL = CGF.EmitRuntimeCall( |
2156 | createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level), |
2157 | {RTLoc, ThreadID}); |
2158 | IsTTD = Bld.CreateIsNull(PL); |
2159 | } |
2160 | llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall( |
2161 | createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode))); |
2162 | Bld.CreateCondBr(IsSPMD, SPMDBB, NonSPMDBB); |
2163 | |
2164 | (void)ApplyDebugLocation::CreateEmpty(CGF); |
2165 | CGF.EmitBlock(SPMDBB); |
2166 | Address RecPtr = Address(llvm::ConstantPointerNull::get(GlobalRecPtrTy), |
2167 | CharUnits::fromQuantity(Alignment)); |
2168 | CGF.EmitBranch(ExitBB); |
2169 | |
2170 | (void)ApplyDebugLocation::CreateEmpty(CGF); |
2171 | CGF.EmitBlock(NonSPMDBB); |
2172 | llvm::Value *Size = llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize); |
2173 | if (const RecordDecl *SecGlobalizedVarsRecord = |
2174 | I->getSecond().SecondaryGlobalRecord.getValueOr(nullptr)) { |
2175 | SecGlobalRecTy = |
2176 | CGM.getContext().getRecordType(SecGlobalizedVarsRecord); |
2177 | |
2178 | |
2179 | |
2180 | |
2181 | |
2182 | unsigned Alignment = |
2183 | CGM.getContext().getTypeAlignInChars(SecGlobalRecTy).getQuantity(); |
2184 | unsigned GlobalRecordSize = |
2185 | CGM.getContext().getTypeSizeInChars(SecGlobalRecTy).getQuantity(); |
2186 | GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment); |
2187 | Size = Bld.CreateSelect( |
2188 | IsTTD, llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize), Size); |
2189 | } |
2190 | |
2191 | |
2192 | llvm::Value *GlobalRecordSizeArg[] = { |
2193 | Size, CGF.Builder.getInt16()}; |
2194 | llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( |
2195 | createNVPTXRuntimeFunction( |
2196 | OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack), |
2197 | GlobalRecordSizeArg); |
2198 | GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( |
2199 | GlobalRecValue, GlobalRecPtrTy); |
2200 | CGF.EmitBlock(ExitBB); |
2201 | auto *Phi = Bld.CreatePHI(GlobalRecPtrTy, |
2202 | , "_select_stack"); |
2203 | Phi->addIncoming(RecPtr.getPointer(), SPMDBB); |
2204 | Phi->addIncoming(GlobalRecCastAddr, NonSPMDBB); |
2205 | GlobalRecCastAddr = Phi; |
2206 | I->getSecond().GlobalRecordAddr = Phi; |
2207 | I->getSecond().IsInSPMDModeFlag = IsSPMD; |
2208 | } else if (IsInTTDRegion) { |
2209 | (0) . __assert_fail ("GlobalizedRecords.back().Records.size() < 2 && \"Expected less than 2 globalized records. one for target and one \" \"for teams.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 2211, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(GlobalizedRecords.back().Records.size() < 2 && |
2210 | (0) . __assert_fail ("GlobalizedRecords.back().Records.size() < 2 && \"Expected less than 2 globalized records. one for target and one \" \"for teams.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 2211, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true"> "Expected less than 2 globalized records: one for target and one " |
2211 | (0) . __assert_fail ("GlobalizedRecords.back().Records.size() < 2 && \"Expected less than 2 globalized records. one for target and one \" \"for teams.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 2211, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true"> "for teams."); |
2212 | unsigned Offset = 0; |
2213 | for (const RecordDecl *RD : GlobalizedRecords.back().Records) { |
2214 | QualType RDTy = CGM.getContext().getRecordType(RD); |
2215 | unsigned Alignment = |
2216 | CGM.getContext().getTypeAlignInChars(RDTy).getQuantity(); |
2217 | unsigned Size = CGM.getContext().getTypeSizeInChars(RDTy).getQuantity(); |
2218 | Offset = |
2219 | llvm::alignTo(llvm::alignTo(Offset, Alignment) + Size, Alignment); |
2220 | } |
2221 | unsigned Alignment = |
2222 | CGM.getContext().getTypeAlignInChars(GlobalRecTy).getQuantity(); |
2223 | Offset = llvm::alignTo(Offset, Alignment); |
2224 | GlobalizedRecords.back().Records.push_back(GlobalizedVarsRecord); |
2225 | ++GlobalizedRecords.back().RegionCounter; |
2226 | if (GlobalizedRecords.back().Records.size() == 1) { |
2227 | (0) . __assert_fail ("KernelStaticGlobalized && \"Kernel static pointer must be initialized already.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 2228, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(KernelStaticGlobalized && |
2228 | (0) . __assert_fail ("KernelStaticGlobalized && \"Kernel static pointer must be initialized already.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 2228, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true"> "Kernel static pointer must be initialized already."); |
2229 | auto *UseSharedMemory = new llvm::GlobalVariable( |
2230 | CGM.getModule(), CGM.Int16Ty, , |
2231 | llvm::GlobalValue::InternalLinkage, nullptr, |
2232 | "_openmp_static_kernel$is_shared"); |
2233 | UseSharedMemory->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); |
2234 | QualType Int16Ty = CGM.getContext().getIntTypeForBitwidth( |
2235 | , ); |
2236 | llvm::Value *IsInSharedMemory = CGF.EmitLoadOfScalar( |
2237 | Address(UseSharedMemory, |
2238 | CGM.getContext().getTypeAlignInChars(Int16Ty)), |
2239 | , Int16Ty, Loc); |
2240 | auto *StaticGlobalized = new llvm::GlobalVariable( |
2241 | CGM.getModule(), CGM.Int8Ty, , |
2242 | llvm::GlobalValue::CommonLinkage, nullptr); |
2243 | auto *RecSize = new llvm::GlobalVariable( |
2244 | CGM.getModule(), CGM.SizeTy, , |
2245 | llvm::GlobalValue::InternalLinkage, nullptr, |
2246 | "_openmp_static_kernel$size"); |
2247 | RecSize->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); |
2248 | llvm::Value *Ld = CGF.EmitLoadOfScalar( |
2249 | Address(RecSize, CGM.getSizeAlign()), , |
2250 | CGM.getContext().getSizeType(), Loc); |
2251 | llvm::Value *ResAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( |
2252 | KernelStaticGlobalized, CGM.VoidPtrPtrTy); |
2253 | llvm::Value *GlobalRecordSizeArg[] = { |
2254 | llvm::ConstantInt::get( |
2255 | CGM.Int16Ty, |
2256 | getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD ? 1 : 0), |
2257 | StaticGlobalized, Ld, IsInSharedMemory, ResAddr}; |
2258 | CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( |
2259 | OMPRTL_NVPTX__kmpc_get_team_static_memory), |
2260 | GlobalRecordSizeArg); |
2261 | GlobalizedRecords.back().Buffer = StaticGlobalized; |
2262 | GlobalizedRecords.back().RecSize = RecSize; |
2263 | GlobalizedRecords.back().UseSharedMemory = UseSharedMemory; |
2264 | GlobalizedRecords.back().Loc = Loc; |
2265 | } |
2266 | (0) . __assert_fail ("KernelStaticGlobalized && \"Global address must be set already.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 2266, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(KernelStaticGlobalized && "Global address must be set already."); |
2267 | Address FrameAddr = CGF.EmitLoadOfPointer( |
2268 | Address(KernelStaticGlobalized, CGM.getPointerAlign()), |
2269 | CGM.getContext() |
2270 | .getPointerType(CGM.getContext().VoidPtrTy) |
2271 | .castAs<PointerType>()); |
2272 | llvm::Value *GlobalRecValue = |
2273 | Bld.CreateConstInBoundsGEP(FrameAddr, Offset).getPointer(); |
2274 | I->getSecond().GlobalRecordAddr = GlobalRecValue; |
2275 | I->getSecond().IsInSPMDModeFlag = nullptr; |
2276 | GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( |
2277 | GlobalRecValue, CGF.ConvertTypeForMem(GlobalRecTy)->getPointerTo()); |
2278 | } else { |
2279 | |
2280 | |
2281 | llvm::Value *GlobalRecordSizeArg[] = { |
2282 | llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize), |
2283 | CGF.Builder.getInt16()}; |
2284 | llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( |
2285 | createNVPTXRuntimeFunction( |
2286 | OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack), |
2287 | GlobalRecordSizeArg); |
2288 | GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( |
2289 | GlobalRecValue, GlobalRecPtrTy); |
2290 | I->getSecond().GlobalRecordAddr = GlobalRecValue; |
2291 | I->getSecond().IsInSPMDModeFlag = nullptr; |
2292 | } |
2293 | LValue Base = |
2294 | CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, GlobalRecTy); |
2295 | |
2296 | |
2297 | |
2298 | LValue SecBase; |
2299 | decltype(I->getSecond().LocalVarData)::const_iterator SecIt; |
2300 | if (IsTTD) { |
2301 | SecIt = I->getSecond().SecondaryLocalVarData->begin(); |
2302 | llvm::PointerType *SecGlobalRecPtrTy = |
2303 | CGF.ConvertTypeForMem(SecGlobalRecTy)->getPointerTo(); |
2304 | SecBase = CGF.MakeNaturalAlignPointeeAddrLValue( |
2305 | Bld.CreatePointerBitCastOrAddrSpaceCast( |
2306 | I->getSecond().GlobalRecordAddr, SecGlobalRecPtrTy), |
2307 | SecGlobalRecTy); |
2308 | } |
2309 | for (auto &Rec : I->getSecond().LocalVarData) { |
2310 | bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first); |
2311 | llvm::Value *ParValue; |
2312 | if (EscapedParam) { |
2313 | const auto *VD = cast<VarDecl>(Rec.first); |
2314 | LValue ParLVal = |
2315 | CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); |
2316 | ParValue = CGF.EmitLoadOfScalar(ParLVal, Loc); |
2317 | } |
2318 | LValue VarAddr = CGF.EmitLValueForField(Base, Rec.second.FD); |
2319 | |
2320 | QualType VarTy; |
2321 | if (Rec.second.IsOnePerTeam) { |
2322 | VarTy = Rec.second.FD->getType(); |
2323 | } else { |
2324 | llvm::Value *Ptr = CGF.Builder.CreateInBoundsGEP( |
2325 | VarAddr.getAddress().getPointer(), |
2326 | {Bld.getInt32(0), getNVPTXLaneID(CGF)}); |
2327 | VarTy = |
2328 | Rec.second.FD->getType()->castAsArrayTypeUnsafe()->getElementType(); |
2329 | VarAddr = CGF.MakeAddrLValue( |
2330 | Address(Ptr, CGM.getContext().getDeclAlign(Rec.first)), VarTy, |
2331 | AlignmentSource::Decl); |
2332 | } |
2333 | Rec.second.PrivateAddr = VarAddr.getAddress(); |
2334 | if (!IsInTTDRegion && |
2335 | (WithSPMDCheck || |
2336 | getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown)) { |
2337 | (0) . __assert_fail ("I->getSecond().IsInSPMDModeFlag && \"Expected unknown execution mode or required SPMD check.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 2338, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(I->getSecond().IsInSPMDModeFlag && |
2338 | (0) . __assert_fail ("I->getSecond().IsInSPMDModeFlag && \"Expected unknown execution mode or required SPMD check.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 2338, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true"> "Expected unknown execution mode or required SPMD check."); |
2339 | if (IsTTD) { |
2340 | (0) . __assert_fail ("SecIt->second.IsOnePerTeam && \"Secondary glob data must be one per team.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 2341, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(SecIt->second.IsOnePerTeam && |
2341 | (0) . __assert_fail ("SecIt->second.IsOnePerTeam && \"Secondary glob data must be one per team.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 2341, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true"> "Secondary glob data must be one per team."); |
2342 | LValue SecVarAddr = CGF.EmitLValueForField(SecBase, SecIt->second.FD); |
2343 | VarAddr.setAddress( |
2344 | Address(Bld.CreateSelect(IsTTD, SecVarAddr.getPointer(), |
2345 | VarAddr.getPointer()), |
2346 | VarAddr.getAlignment())); |
2347 | Rec.second.PrivateAddr = VarAddr.getAddress(); |
2348 | } |
2349 | Address GlobalPtr = Rec.second.PrivateAddr; |
2350 | Address LocalAddr = CGF.CreateMemTemp(VarTy, Rec.second.FD->getName()); |
2351 | Rec.second.PrivateAddr = Address( |
2352 | Bld.CreateSelect(I->getSecond().IsInSPMDModeFlag, |
2353 | LocalAddr.getPointer(), GlobalPtr.getPointer()), |
2354 | LocalAddr.getAlignment()); |
2355 | } |
2356 | if (EscapedParam) { |
2357 | const auto *VD = cast<VarDecl>(Rec.first); |
2358 | CGF.EmitStoreOfScalar(ParValue, VarAddr); |
2359 | I->getSecond().MappedParams->setVarAddr(CGF, VD, VarAddr.getAddress()); |
2360 | } |
2361 | if (IsTTD) |
2362 | ++SecIt; |
2363 | } |
2364 | } |
2365 | for (const ValueDecl *VD : I->getSecond().EscapedVariableLengthDecls) { |
2366 | |
2367 | |
2368 | |
2369 | |
2370 | CGBuilderTy &Bld = CGF.Builder; |
2371 | llvm::Value *Size = CGF.getTypeSize(VD->getType()); |
2372 | CharUnits Align = CGM.getContext().getDeclAlign(VD); |
2373 | Size = Bld.CreateNUWAdd( |
2374 | Size, llvm::ConstantInt::get(CGF.SizeTy, Align.getQuantity() - 1)); |
2375 | llvm::Value *AlignVal = |
2376 | llvm::ConstantInt::get(CGF.SizeTy, Align.getQuantity()); |
2377 | Size = Bld.CreateUDiv(Size, AlignVal); |
2378 | Size = Bld.CreateNUWMul(Size, AlignVal); |
2379 | |
2380 | |
2381 | llvm::Value *GlobalRecordSizeArg[] = { |
2382 | Size, CGF.Builder.getInt16()}; |
2383 | llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( |
2384 | createNVPTXRuntimeFunction( |
2385 | OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack), |
2386 | GlobalRecordSizeArg); |
2387 | llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( |
2388 | GlobalRecValue, CGF.ConvertTypeForMem(VD->getType())->getPointerTo()); |
2389 | LValue Base = CGF.MakeAddrLValue(GlobalRecCastAddr, VD->getType(), |
2390 | CGM.getContext().getDeclAlign(VD), |
2391 | AlignmentSource::Decl); |
2392 | I->getSecond().MappedParams->setVarAddr(CGF, cast<VarDecl>(VD), |
2393 | Base.getAddress()); |
2394 | I->getSecond().EscapedVariableLengthDeclsAddrs.emplace_back(GlobalRecValue); |
2395 | } |
2396 | I->getSecond().MappedParams->apply(CGF); |
2397 | } |
2398 | |
2399 | void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF, |
2400 | bool WithSPMDCheck) { |
2401 | if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic && |
2402 | getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD) |
2403 | return; |
2404 | |
2405 | const auto I = FunctionGlobalizedDecls.find(CGF.CurFn); |
2406 | if (I != FunctionGlobalizedDecls.end()) { |
2407 | I->getSecond().MappedParams->restore(CGF); |
2408 | if (!CGF.HaveInsertPoint()) |
2409 | return; |
2410 | for (llvm::Value *Addr : |
2411 | llvm::reverse(I->getSecond().EscapedVariableLengthDeclsAddrs)) { |
2412 | CGF.EmitRuntimeCall( |
2413 | createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack), |
2414 | Addr); |
2415 | } |
2416 | if (I->getSecond().GlobalRecordAddr) { |
2417 | if (!IsInTTDRegion && |
2418 | (WithSPMDCheck || |
2419 | getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown)) { |
2420 | CGBuilderTy &Bld = CGF.Builder; |
2421 | llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit"); |
2422 | llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd"); |
2423 | Bld.CreateCondBr(I->getSecond().IsInSPMDModeFlag, ExitBB, NonSPMDBB); |
2424 | |
2425 | (void)ApplyDebugLocation::CreateEmpty(CGF); |
2426 | CGF.EmitBlock(NonSPMDBB); |
2427 | CGF.EmitRuntimeCall( |
2428 | createNVPTXRuntimeFunction( |
2429 | OMPRTL_NVPTX__kmpc_data_sharing_pop_stack), |
2430 | CGF.EmitCastToVoidPtr(I->getSecond().GlobalRecordAddr)); |
2431 | CGF.EmitBlock(ExitBB); |
2432 | } else if (IsInTTDRegion) { |
2433 | 0.") ? static_cast (0) . __assert_fail ("GlobalizedRecords.back().RegionCounter > 0 && \"region counter must be > 0.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 2434, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(GlobalizedRecords.back().RegionCounter > 0 && |
2434 | 0.") ? static_cast (0) . __assert_fail ("GlobalizedRecords.back().RegionCounter > 0 && \"region counter must be > 0.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 2434, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true"> "region counter must be > 0."); |
2435 | --GlobalizedRecords.back().RegionCounter; |
2436 | |
2437 | if (GlobalizedRecords.back().RegionCounter == 0) { |
2438 | QualType Int16Ty = CGM.getContext().getIntTypeForBitwidth( |
2439 | , ); |
2440 | llvm::Value *IsInSharedMemory = CGF.EmitLoadOfScalar( |
2441 | Address(GlobalizedRecords.back().UseSharedMemory, |
2442 | CGM.getContext().getTypeAlignInChars(Int16Ty)), |
2443 | , Int16Ty, GlobalizedRecords.back().Loc); |
2444 | llvm::Value *Args[] = { |
2445 | llvm::ConstantInt::get( |
2446 | CGM.Int16Ty, |
2447 | getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD ? 1 : 0), |
2448 | IsInSharedMemory}; |
2449 | CGF.EmitRuntimeCall( |
2450 | createNVPTXRuntimeFunction( |
2451 | OMPRTL_NVPTX__kmpc_restore_team_static_memory), |
2452 | Args); |
2453 | } |
2454 | } else { |
2455 | CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( |
2456 | OMPRTL_NVPTX__kmpc_data_sharing_pop_stack), |
2457 | I->getSecond().GlobalRecordAddr); |
2458 | } |
2459 | } |
2460 | } |
2461 | } |
2462 | |
2463 | void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF, |
2464 | const OMPExecutableDirective &D, |
2465 | SourceLocation Loc, |
2466 | llvm::Function *OutlinedFn, |
2467 | ArrayRef<llvm::Value *> CapturedVars) { |
2468 | if (!CGF.HaveInsertPoint()) |
2469 | return; |
2470 | |
2471 | Address ZeroAddr = CGF.CreateMemTemp( |
2472 | CGF.getContext().getIntTypeForBitwidth(, ), |
2473 | ".zero.addr"); |
2474 | CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32( 0)); |
2475 | llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; |
2476 | OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer()); |
2477 | OutlinedFnArgs.push_back(ZeroAddr.getPointer()); |
2478 | OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); |
2479 | emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); |
2480 | } |
2481 | |
2482 | void CGOpenMPRuntimeNVPTX::emitParallelCall( |
2483 | CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, |
2484 | ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) { |
2485 | if (!CGF.HaveInsertPoint()) |
2486 | return; |
2487 | |
2488 | if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) |
2489 | emitSPMDParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond); |
2490 | else |
2491 | emitNonSPMDParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond); |
2492 | } |
2493 | |
2494 | void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall( |
2495 | CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, |
2496 | ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) { |
2497 | llvm::Function *Fn = cast<llvm::Function>(OutlinedFn); |
2498 | |
2499 | |
2500 | Fn->setLinkage(llvm::GlobalValue::InternalLinkage); |
2501 | |
2502 | Address ZeroAddr = CGF.CreateMemTemp(CGF.getContext().getIntTypeForBitwidth( |
2503 | , ), |
2504 | ".zero.addr"); |
2505 | CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32( 0)); |
2506 | |
2507 | Address ThreadIDAddr = ZeroAddr; |
2508 | auto &&CodeGen = [this, Fn, CapturedVars, Loc, ZeroAddr, &ThreadIDAddr]( |
2509 | CodeGenFunction &CGF, PrePostActionTy &Action) { |
2510 | Action.Enter(CGF); |
2511 | |
2512 | llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; |
2513 | OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); |
2514 | OutlinedFnArgs.push_back(ZeroAddr.getPointer()); |
2515 | OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); |
2516 | emitOutlinedFunctionCall(CGF, Loc, Fn, OutlinedFnArgs); |
2517 | }; |
2518 | auto &&SeqGen = [this, &CodeGen, Loc](CodeGenFunction &CGF, |
2519 | PrePostActionTy &) { |
2520 | |
2521 | RegionCodeGenTy RCG(CodeGen); |
2522 | llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); |
2523 | llvm::Value *ThreadID = getThreadID(CGF, Loc); |
2524 | llvm::Value *Args[] = {RTLoc, ThreadID}; |
2525 | |
2526 | NVPTXActionTy Action( |
2527 | createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel), |
2528 | Args, |
2529 | createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel), |
2530 | Args); |
2531 | RCG.setAction(Action); |
2532 | RCG(CGF); |
2533 | }; |
2534 | |
2535 | auto &&L0ParallelGen = [this, CapturedVars, Fn](CodeGenFunction &CGF, |
2536 | PrePostActionTy &Action) { |
2537 | CGBuilderTy &Bld = CGF.Builder; |
2538 | llvm::Function *WFn = WrapperFunctionsMap[Fn]; |
2539 | (0) . __assert_fail ("WFn && \"Wrapper function does not exist!\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 2539, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(WFn && "Wrapper function does not exist!"); |
2540 | llvm::Value *ID = Bld.CreateBitOrPointerCast(WFn, CGM.Int8PtrTy); |
2541 | |
2542 | |
2543 | llvm::Value *Args[] = {ID, .getInt16(1)}; |
2544 | CGF.EmitRuntimeCall( |
2545 | createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel), |
2546 | Args); |
2547 | |
2548 | |
2549 | |
2550 | CodeGenFunction::OMPPrivateScope PrivateArgScope(CGF); |
2551 | |
2552 | |
2553 | if (!CapturedVars.empty()) { |
2554 | |
2555 | Address SharedArgs = |
2556 | CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy, "shared_arg_refs"); |
2557 | llvm::Value *SharedArgsPtr = SharedArgs.getPointer(); |
2558 | |
2559 | llvm::Value *DataSharingArgs[] = { |
2560 | SharedArgsPtr, |
2561 | llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())}; |
2562 | CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( |
2563 | OMPRTL_NVPTX__kmpc_begin_sharing_variables), |
2564 | DataSharingArgs); |
2565 | |
2566 | |
2567 | unsigned Idx = 0; |
2568 | ASTContext &Ctx = CGF.getContext(); |
2569 | Address SharedArgListAddress = CGF.EmitLoadOfPointer( |
2570 | SharedArgs, Ctx.getPointerType(Ctx.getPointerType(Ctx.VoidPtrTy)) |
2571 | .castAs<PointerType>()); |
2572 | for (llvm::Value *V : CapturedVars) { |
2573 | Address Dst = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx); |
2574 | llvm::Value *PtrV; |
2575 | if (V->getType()->isIntegerTy()) |
2576 | PtrV = Bld.CreateIntToPtr(V, CGF.VoidPtrTy); |
2577 | else |
2578 | PtrV = Bld.CreatePointerBitCastOrAddrSpaceCast(V, CGF.VoidPtrTy); |
2579 | CGF.EmitStoreOfScalar(PtrV, Dst, , |
2580 | Ctx.getPointerType(Ctx.VoidPtrTy)); |
2581 | ++Idx; |
2582 | } |
2583 | } |
2584 | |
2585 | |
2586 | |
2587 | syncCTAThreads(CGF); |
2588 | |
2589 | |
2590 | |
2591 | |
2592 | |
2593 | |
2594 | |
2595 | syncCTAThreads(CGF); |
2596 | |
2597 | if (!CapturedVars.empty()) |
2598 | CGF.EmitRuntimeCall( |
2599 | createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_sharing_variables)); |
2600 | |
2601 | |
2602 | Work.emplace_back(WFn); |
2603 | }; |
2604 | |
2605 | auto &&LNParallelGen = [this, Loc, &SeqGen, &L0ParallelGen]( |
2606 | CodeGenFunction &CGF, PrePostActionTy &Action) { |
2607 | if (IsInParallelRegion) { |
2608 | SeqGen(CGF, Action); |
2609 | } else if (IsInTargetMasterThreadRegion) { |
2610 | L0ParallelGen(CGF, Action); |
2611 | } else { |
2612 | |
2613 | |
2614 | |
2615 | |
2616 | |
2617 | |
2618 | CGBuilderTy &Bld = CGF.Builder; |
2619 | llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit"); |
2620 | llvm::BasicBlock *SeqBB = CGF.createBasicBlock(".sequential"); |
2621 | llvm::BasicBlock *ParallelCheckBB = CGF.createBasicBlock(".parcheck"); |
2622 | llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master"); |
2623 | llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall( |
2624 | createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode))); |
2625 | Bld.CreateCondBr(IsSPMD, SeqBB, ParallelCheckBB); |
2626 | |
2627 | (void)ApplyDebugLocation::CreateEmpty(CGF); |
2628 | CGF.EmitBlock(ParallelCheckBB); |
2629 | llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); |
2630 | llvm::Value *ThreadID = getThreadID(CGF, Loc); |
2631 | llvm::Value *PL = CGF.EmitRuntimeCall( |
2632 | createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level), |
2633 | {RTLoc, ThreadID}); |
2634 | llvm::Value *Res = Bld.CreateIsNotNull(PL); |
2635 | Bld.CreateCondBr(Res, SeqBB, MasterBB); |
2636 | CGF.EmitBlock(SeqBB); |
2637 | SeqGen(CGF, Action); |
2638 | CGF.EmitBranch(ExitBB); |
2639 | |
2640 | (void)ApplyDebugLocation::CreateEmpty(CGF); |
2641 | CGF.EmitBlock(MasterBB); |
2642 | L0ParallelGen(CGF, Action); |
2643 | CGF.EmitBranch(ExitBB); |
2644 | |
2645 | (void)ApplyDebugLocation::CreateEmpty(CGF); |
2646 | |
2647 | CGF.EmitBlock(ExitBB, ); |
2648 | } |
2649 | }; |
2650 | |
2651 | if (IfCond) { |
2652 | emitOMPIfClause(CGF, IfCond, LNParallelGen, SeqGen); |
2653 | } else { |
2654 | CodeGenFunction::RunCleanupsScope Scope(CGF); |
2655 | RegionCodeGenTy ThenRCG(LNParallelGen); |
2656 | ThenRCG(CGF); |
2657 | } |
2658 | } |
2659 | |
2660 | void CGOpenMPRuntimeNVPTX::emitSPMDParallelCall( |
2661 | CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, |
2662 | ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) { |
2663 | |
2664 | |
2665 | |
2666 | llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; |
2667 | |
2668 | Address ZeroAddr = CGF.CreateMemTemp(CGF.getContext().getIntTypeForBitwidth( |
2669 | , ), |
2670 | ".zero.addr"); |
2671 | CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32( 0)); |
2672 | |
2673 | Address ThreadIDAddr = ZeroAddr; |
2674 | auto &&CodeGen = [this, OutlinedFn, CapturedVars, Loc, ZeroAddr, |
2675 | &ThreadIDAddr](CodeGenFunction &CGF, |
2676 | PrePostActionTy &Action) { |
2677 | Action.Enter(CGF); |
2678 | |
2679 | llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; |
2680 | OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); |
2681 | OutlinedFnArgs.push_back(ZeroAddr.getPointer()); |
2682 | OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); |
2683 | emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); |
2684 | }; |
2685 | auto &&SeqGen = [this, &CodeGen, Loc](CodeGenFunction &CGF, |
2686 | PrePostActionTy &) { |
2687 | |
2688 | RegionCodeGenTy RCG(CodeGen); |
2689 | llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); |
2690 | llvm::Value *ThreadID = getThreadID(CGF, Loc); |
2691 | llvm::Value *Args[] = {RTLoc, ThreadID}; |
2692 | |
2693 | NVPTXActionTy Action( |
2694 | createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel), |
2695 | Args, |
2696 | createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel), |
2697 | Args); |
2698 | RCG.setAction(Action); |
2699 | RCG(CGF); |
2700 | }; |
2701 | |
2702 | if (IsInTargetMasterThreadRegion) { |
2703 | |
2704 | ThreadIDAddr = emitThreadIDAddress(CGF, Loc); |
2705 | RegionCodeGenTy RCG(CodeGen); |
2706 | RCG(CGF); |
2707 | } else { |
2708 | |
2709 | |
2710 | |
2711 | RegionCodeGenTy RCG(SeqGen); |
2712 | RCG(CGF); |
2713 | } |
2714 | } |
2715 | |
2716 | void CGOpenMPRuntimeNVPTX::syncCTAThreads(CodeGenFunction &CGF) { |
2717 | |
2718 | if (!CGF.HaveInsertPoint()) |
2719 | return; |
2720 | |
2721 | |
2722 | llvm::Value *Args[] = { |
2723 | llvm::ConstantPointerNull::get( |
2724 | cast<llvm::PointerType>(getIdentTyPointerTy())), |
2725 | llvm::ConstantInt::get(CGF.Int32Ty, , )}; |
2726 | CGF.EmitRuntimeCall( |
2727 | createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier_simple_spmd), Args); |
2728 | } |
2729 | |
2730 | void CGOpenMPRuntimeNVPTX::emitBarrierCall(CodeGenFunction &CGF, |
2731 | SourceLocation Loc, |
2732 | OpenMPDirectiveKind Kind, bool, |
2733 | bool) { |
2734 | |
2735 | if (!CGF.HaveInsertPoint()) |
2736 | return; |
2737 | |
2738 | unsigned Flags = getDefaultFlagsForBarriers(Kind); |
2739 | llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), |
2740 | getThreadID(CGF, Loc)}; |
2741 | CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier), Args); |
2742 | } |
2743 | |
2744 | void CGOpenMPRuntimeNVPTX::emitCriticalRegion( |
2745 | CodeGenFunction &CGF, StringRef CriticalName, |
2746 | const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, |
2747 | const Expr *Hint) { |
2748 | llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.critical.loop"); |
2749 | llvm::BasicBlock *TestBB = CGF.createBasicBlock("omp.critical.test"); |
2750 | llvm::BasicBlock *SyncBB = CGF.createBasicBlock("omp.critical.sync"); |
2751 | llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.critical.body"); |
2752 | llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.critical.exit"); |
2753 | |
2754 | |
2755 | llvm::Value *ThreadID = getNVPTXThreadID(CGF); |
2756 | |
2757 | |
2758 | llvm::Value *TeamWidth = getNVPTXNumThreads(CGF); |
2759 | |
2760 | |
2761 | QualType Int32Ty = |
2762 | CGF.getContext().getIntTypeForBitwidth(, ); |
2763 | Address Counter = CGF.CreateMemTemp(Int32Ty, "critical_counter"); |
2764 | LValue CounterLVal = CGF.MakeAddrLValue(Counter, Int32Ty); |
2765 | CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty), CounterLVal, |
2766 | ); |
2767 | |
2768 | |
2769 | CGF.EmitBlock(LoopBB); |
2770 | llvm::Value *CounterVal = CGF.EmitLoadOfScalar(CounterLVal, Loc); |
2771 | llvm::Value *CmpLoopBound = CGF.Builder.CreateICmpSLT(CounterVal, TeamWidth); |
2772 | CGF.Builder.CreateCondBr(CmpLoopBound, TestBB, ExitBB); |
2773 | |
2774 | |
2775 | |
2776 | CGF.EmitBlock(TestBB); |
2777 | CounterVal = CGF.EmitLoadOfScalar(CounterLVal, Loc); |
2778 | llvm::Value *CmpThreadToCounter = |
2779 | CGF.Builder.CreateICmpEQ(ThreadID, CounterVal); |
2780 | CGF.Builder.CreateCondBr(CmpThreadToCounter, BodyBB, SyncBB); |
2781 | |
2782 | |
2783 | CGF.EmitBlock(BodyBB); |
2784 | |
2785 | |
2786 | CGOpenMPRuntime::emitCriticalRegion(CGF, CriticalName, CriticalOpGen, Loc, |
2787 | Hint); |
2788 | |
2789 | |
2790 | |
2791 | |
2792 | |
2793 | CGF.EmitBlock(SyncBB); |
2794 | emitBarrierCall(CGF, Loc, OMPD_unknown, , |
2795 | ); |
2796 | |
2797 | llvm::Value *IncCounterVal = |
2798 | CGF.Builder.CreateNSWAdd(CounterVal, CGF.Builder.getInt32(1)); |
2799 | CGF.EmitStoreOfScalar(IncCounterVal, CounterLVal); |
2800 | CGF.EmitBranch(LoopBB); |
2801 | |
2802 | |
2803 | CGF.EmitBlock(ExitBB, ); |
2804 | } |
2805 | |
2806 | |
2807 | static llvm::Value *castValueToType(CodeGenFunction &CGF, llvm::Value *Val, |
2808 | QualType ValTy, QualType CastTy, |
2809 | SourceLocation Loc) { |
2810 | (0) . __assert_fail ("!CGF.getContext().getTypeSizeInChars(CastTy).isZero() && \"Cast type must sized.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 2811, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(!CGF.getContext().getTypeSizeInChars(CastTy).isZero() && |
2811 | (0) . __assert_fail ("!CGF.getContext().getTypeSizeInChars(CastTy).isZero() && \"Cast type must sized.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 2811, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true"> "Cast type must sized."); |
2812 | (0) . __assert_fail ("!CGF.getContext().getTypeSizeInChars(ValTy).isZero() && \"Val type must sized.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 2813, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(!CGF.getContext().getTypeSizeInChars(ValTy).isZero() && |
2813 | (0) . __assert_fail ("!CGF.getContext().getTypeSizeInChars(ValTy).isZero() && \"Val type must sized.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 2813, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true"> "Val type must sized."); |
2814 | llvm::Type *LLVMCastTy = CGF.ConvertTypeForMem(CastTy); |
2815 | if (ValTy == CastTy) |
2816 | return Val; |
2817 | if (CGF.getContext().getTypeSizeInChars(ValTy) == |
2818 | CGF.getContext().getTypeSizeInChars(CastTy)) |
2819 | return CGF.Builder.CreateBitCast(Val, LLVMCastTy); |
2820 | if (CastTy->isIntegerType() && ValTy->isIntegerType()) |
2821 | return CGF.Builder.CreateIntCast(Val, LLVMCastTy, |
2822 | CastTy->hasSignedIntegerRepresentation()); |
2823 | Address CastItem = CGF.CreateMemTemp(CastTy); |
2824 | Address ValCastItem = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
2825 | CastItem, Val->getType()->getPointerTo(CastItem.getAddressSpace())); |
2826 | CGF.EmitStoreOfScalar(Val, ValCastItem, , ValTy); |
2827 | return CGF.EmitLoadOfScalar(CastItem, , CastTy, Loc); |
2828 | } |
2829 | |
2830 | |
2831 | |
2832 | static llvm::Value *createRuntimeShuffleFunction(CodeGenFunction &CGF, |
2833 | llvm::Value *Elem, |
2834 | QualType ElemType, |
2835 | llvm::Value *Offset, |
2836 | SourceLocation Loc) { |
2837 | CodeGenModule &CGM = CGF.CGM; |
2838 | CGBuilderTy &Bld = CGF.Builder; |
2839 | CGOpenMPRuntimeNVPTX &RT = |
2840 | *(static_cast<CGOpenMPRuntimeNVPTX *>(&CGM.getOpenMPRuntime())); |
2841 | |
2842 | CharUnits Size = CGF.getContext().getTypeSizeInChars(ElemType); |
2843 | (0) . __assert_fail ("Size.getQuantity() <= 8 && \"Unsupported bitwidth in shuffle instruction.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 2844, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(Size.getQuantity() <= 8 && |
2844 | (0) . __assert_fail ("Size.getQuantity() <= 8 && \"Unsupported bitwidth in shuffle instruction.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 2844, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true"> "Unsupported bitwidth in shuffle instruction."); |
2845 | |
2846 | OpenMPRTLFunctionNVPTX ShuffleFn = Size.getQuantity() <= 4 |
2847 | ? OMPRTL_NVPTX__kmpc_shuffle_int32 |
2848 | : OMPRTL_NVPTX__kmpc_shuffle_int64; |
2849 | |
2850 | |
2851 | QualType CastTy = CGF.getContext().getIntTypeForBitwidth( |
2852 | Size.getQuantity() <= 4 ? 32 : 64, ); |
2853 | llvm::Value *ElemCast = castValueToType(CGF, Elem, ElemType, CastTy, Loc); |
2854 | llvm::Value *WarpSize = |
2855 | Bld.CreateIntCast(getNVPTXWarpSize(CGF), CGM.Int16Ty, ); |
2856 | |
2857 | llvm::Value *ShuffledVal = CGF.EmitRuntimeCall( |
2858 | RT.createNVPTXRuntimeFunction(ShuffleFn), {ElemCast, Offset, WarpSize}); |
2859 | |
2860 | return castValueToType(CGF, ShuffledVal, CastTy, ElemType, Loc); |
2861 | } |
2862 | |
2863 | static void shuffleAndStore(CodeGenFunction &CGF, Address SrcAddr, |
2864 | Address DestAddr, QualType ElemType, |
2865 | llvm::Value *Offset, SourceLocation Loc) { |
2866 | CGBuilderTy &Bld = CGF.Builder; |
2867 | |
2868 | CharUnits Size = CGF.getContext().getTypeSizeInChars(ElemType); |
2869 | |
2870 | |
2871 | |
2872 | |
2873 | |
2874 | |
2875 | |
2876 | |
2877 | |
2878 | |
2879 | Address ElemPtr = DestAddr; |
2880 | Address Ptr = SrcAddr; |
2881 | Address PtrEnd = Bld.CreatePointerBitCastOrAddrSpaceCast( |
2882 | Bld.CreateConstGEP(SrcAddr, 1), CGF.VoidPtrTy); |
2883 | for (int IntSize = 8; IntSize >= 1; IntSize /= 2) { |
2884 | if (Size < CharUnits::fromQuantity(IntSize)) |
2885 | continue; |
2886 | QualType IntType = CGF.getContext().getIntTypeForBitwidth( |
2887 | CGF.getContext().toBits(CharUnits::fromQuantity(IntSize)), |
2888 | ); |
2889 | llvm::Type *IntTy = CGF.ConvertTypeForMem(IntType); |
2890 | Ptr = Bld.CreatePointerBitCastOrAddrSpaceCast(Ptr, IntTy->getPointerTo()); |
2891 | ElemPtr = |
2892 | Bld.CreatePointerBitCastOrAddrSpaceCast(ElemPtr, IntTy->getPointerTo()); |
2893 | if (Size.getQuantity() / IntSize > 1) { |
2894 | llvm::BasicBlock *PreCondBB = CGF.createBasicBlock(".shuffle.pre_cond"); |
2895 | llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".shuffle.then"); |
2896 | llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".shuffle.exit"); |
2897 | llvm::BasicBlock *CurrentBB = Bld.GetInsertBlock(); |
2898 | CGF.EmitBlock(PreCondBB); |
2899 | llvm::PHINode *PhiSrc = |
2900 | Bld.CreatePHI(Ptr.getType(), ); |
2901 | PhiSrc->addIncoming(Ptr.getPointer(), CurrentBB); |
2902 | llvm::PHINode *PhiDest = |
2903 | Bld.CreatePHI(ElemPtr.getType(), ); |
2904 | PhiDest->addIncoming(ElemPtr.getPointer(), CurrentBB); |
2905 | Ptr = Address(PhiSrc, Ptr.getAlignment()); |
2906 | ElemPtr = Address(PhiDest, ElemPtr.getAlignment()); |
2907 | llvm::Value *PtrDiff = Bld.CreatePtrDiff( |
2908 | PtrEnd.getPointer(), Bld.CreatePointerBitCastOrAddrSpaceCast( |
2909 | Ptr.getPointer(), CGF.VoidPtrTy)); |
2910 | Bld.CreateCondBr(Bld.CreateICmpSGT(PtrDiff, Bld.getInt64(IntSize - 1)), |
2911 | ThenBB, ExitBB); |
2912 | CGF.EmitBlock(ThenBB); |
2913 | llvm::Value *Res = createRuntimeShuffleFunction( |
2914 | CGF, CGF.EmitLoadOfScalar(Ptr, , IntType, Loc), |
2915 | IntType, Offset, Loc); |
2916 | CGF.EmitStoreOfScalar(Res, ElemPtr, , IntType); |
2917 | Address LocalPtr = Bld.CreateConstGEP(Ptr, 1); |
2918 | Address LocalElemPtr = Bld.CreateConstGEP(ElemPtr, 1); |
2919 | PhiSrc->addIncoming(LocalPtr.getPointer(), ThenBB); |
2920 | PhiDest->addIncoming(LocalElemPtr.getPointer(), ThenBB); |
2921 | CGF.EmitBranch(PreCondBB); |
2922 | CGF.EmitBlock(ExitBB); |
2923 | } else { |
2924 | llvm::Value *Res = createRuntimeShuffleFunction( |
2925 | CGF, CGF.EmitLoadOfScalar(Ptr, , IntType, Loc), |
2926 | IntType, Offset, Loc); |
2927 | CGF.EmitStoreOfScalar(Res, ElemPtr, , IntType); |
2928 | Ptr = Bld.CreateConstGEP(Ptr, 1); |
2929 | ElemPtr = Bld.CreateConstGEP(ElemPtr, 1); |
2930 | } |
2931 | Size = Size % IntSize; |
2932 | } |
2933 | } |
2934 | |
2935 | namespace { |
2936 | enum CopyAction : unsigned { |
2937 | |
2938 | |
2939 | RemoteLaneToThread, |
2940 | |
2941 | ThreadCopy, |
2942 | |
2943 | ThreadToScratchpad, |
2944 | |
2945 | |
2946 | ScratchpadToThread, |
2947 | }; |
2948 | } |
2949 | |
2950 | struct CopyOptionsTy { |
2951 | llvm::Value *RemoteLaneOffset; |
2952 | llvm::Value *ScratchpadIndex; |
2953 | llvm::Value *ScratchpadWidth; |
2954 | }; |
2955 | |
2956 | |
2957 | |
2958 | static void emitReductionListCopy( |
2959 | CopyAction Action, CodeGenFunction &CGF, QualType ReductionArrayTy, |
2960 | ArrayRef<const Expr *> Privates, Address SrcBase, Address DestBase, |
2961 | CopyOptionsTy CopyOptions = {nullptr, nullptr, nullptr}) { |
2962 | |
2963 | CodeGenModule &CGM = CGF.CGM; |
2964 | ASTContext &C = CGM.getContext(); |
2965 | CGBuilderTy &Bld = CGF.Builder; |
2966 | |
2967 | llvm::Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset; |
2968 | llvm::Value *ScratchpadIndex = CopyOptions.ScratchpadIndex; |
2969 | llvm::Value *ScratchpadWidth = CopyOptions.ScratchpadWidth; |
2970 | |
2971 | |
2972 | |
2973 | unsigned Idx = 0; |
2974 | unsigned Size = Privates.size(); |
2975 | for (const Expr *Private : Privates) { |
2976 | Address SrcElementAddr = Address::invalid(); |
2977 | Address DestElementAddr = Address::invalid(); |
2978 | Address DestElementPtrAddr = Address::invalid(); |
2979 | |
2980 | bool ShuffleInElement = false; |
2981 | |
2982 | |
2983 | bool UpdateDestListPtr = false; |
2984 | |
2985 | |
2986 | bool IncrScratchpadSrc = false; |
2987 | bool IncrScratchpadDest = false; |
2988 | |
2989 | switch (Action) { |
2990 | case RemoteLaneToThread: { |
2991 | |
2992 | Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx); |
2993 | SrcElementAddr = CGF.EmitLoadOfPointer( |
2994 | SrcElementPtrAddr, |
2995 | C.getPointerType(Private->getType())->castAs<PointerType>()); |
2996 | |
2997 | |
2998 | |
2999 | DestElementPtrAddr = Bld.CreateConstArrayGEP(DestBase, Idx); |
3000 | DestElementAddr = |
3001 | CGF.CreateMemTemp(Private->getType(), ".omp.reduction.element"); |
3002 | ShuffleInElement = true; |
3003 | UpdateDestListPtr = true; |
3004 | break; |
3005 | } |
3006 | case ThreadCopy: { |
3007 | |
3008 | Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx); |
3009 | SrcElementAddr = CGF.EmitLoadOfPointer( |
3010 | SrcElementPtrAddr, |
3011 | C.getPointerType(Private->getType())->castAs<PointerType>()); |
3012 | |
3013 | |
3014 | |
3015 | DestElementPtrAddr = Bld.CreateConstArrayGEP(DestBase, Idx); |
3016 | DestElementAddr = CGF.EmitLoadOfPointer( |
3017 | DestElementPtrAddr, |
3018 | C.getPointerType(Private->getType())->castAs<PointerType>()); |
3019 | break; |
3020 | } |
3021 | case ThreadToScratchpad: { |
3022 | |
3023 | Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx); |
3024 | SrcElementAddr = CGF.EmitLoadOfPointer( |
3025 | SrcElementPtrAddr, |
3026 | C.getPointerType(Private->getType())->castAs<PointerType>()); |
3027 | |
3028 | |
3029 | |
3030 | llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType()); |
3031 | llvm::Value *CurrentOffset = |
3032 | Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex); |
3033 | llvm::Value *ScratchPadElemAbsolutePtrVal = |
3034 | Bld.CreateNUWAdd(DestBase.getPointer(), CurrentOffset); |
3035 | ScratchPadElemAbsolutePtrVal = |
3036 | Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy); |
3037 | DestElementAddr = Address(ScratchPadElemAbsolutePtrVal, |
3038 | C.getTypeAlignInChars(Private->getType())); |
3039 | IncrScratchpadDest = true; |
3040 | break; |
3041 | } |
3042 | case ScratchpadToThread: { |
3043 | |
3044 | |
3045 | llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType()); |
3046 | llvm::Value *CurrentOffset = |
3047 | Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex); |
3048 | llvm::Value *ScratchPadElemAbsolutePtrVal = |
3049 | Bld.CreateNUWAdd(SrcBase.getPointer(), CurrentOffset); |
3050 | ScratchPadElemAbsolutePtrVal = |
3051 | Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy); |
3052 | SrcElementAddr = Address(ScratchPadElemAbsolutePtrVal, |
3053 | C.getTypeAlignInChars(Private->getType())); |
3054 | IncrScratchpadSrc = true; |
3055 | |
3056 | |
3057 | |
3058 | DestElementPtrAddr = Bld.CreateConstArrayGEP(DestBase, Idx); |
3059 | DestElementAddr = |
3060 | CGF.CreateMemTemp(Private->getType(), ".omp.reduction.element"); |
3061 | UpdateDestListPtr = true; |
3062 | break; |
3063 | } |
3064 | } |
3065 | |
3066 | |
3067 | |
3068 | SrcElementAddr = Bld.CreateElementBitCast( |
3069 | SrcElementAddr, CGF.ConvertTypeForMem(Private->getType())); |
3070 | DestElementAddr = Bld.CreateElementBitCast(DestElementAddr, |
3071 | SrcElementAddr.getElementType()); |
3072 | |
3073 | |
3074 | |
3075 | if (ShuffleInElement) { |
3076 | shuffleAndStore(CGF, SrcElementAddr, DestElementAddr, Private->getType(), |
3077 | RemoteLaneOffset, Private->getExprLoc()); |
3078 | } else { |
3079 | switch (CGF.getEvaluationKind(Private->getType())) { |
3080 | case TEK_Scalar: { |
3081 | llvm::Value *Elem = |
3082 | CGF.EmitLoadOfScalar(SrcElementAddr, , |
3083 | Private->getType(), Private->getExprLoc()); |
3084 | |
3085 | CGF.EmitStoreOfScalar(Elem, DestElementAddr, , |
3086 | Private->getType()); |
3087 | break; |
3088 | } |
3089 | case TEK_Complex: { |
3090 | CodeGenFunction::ComplexPairTy Elem = CGF.EmitLoadOfComplex( |
3091 | CGF.MakeAddrLValue(SrcElementAddr, Private->getType()), |
3092 | Private->getExprLoc()); |
3093 | CGF.EmitStoreOfComplex( |
3094 | Elem, CGF.MakeAddrLValue(DestElementAddr, Private->getType()), |
3095 | ); |
3096 | break; |
3097 | } |
3098 | case TEK_Aggregate: |
3099 | CGF.EmitAggregateCopy( |
3100 | CGF.MakeAddrLValue(DestElementAddr, Private->getType()), |
3101 | CGF.MakeAddrLValue(SrcElementAddr, Private->getType()), |
3102 | Private->getType(), AggValueSlot::DoesNotOverlap); |
3103 | break; |
3104 | } |
3105 | } |
3106 | |
3107 | |
3108 | |
3109 | |
3110 | |
3111 | |
3112 | if (UpdateDestListPtr) { |
3113 | CGF.EmitStoreOfScalar(Bld.CreatePointerBitCastOrAddrSpaceCast( |
3114 | DestElementAddr.getPointer(), CGF.VoidPtrTy), |
3115 | DestElementPtrAddr, , |
3116 | C.VoidPtrTy); |
3117 | } |
3118 | |
3119 | |
3120 | |
3121 | |
3122 | if ((IncrScratchpadDest || IncrScratchpadSrc) && (Idx + 1 < Size)) { |
3123 | llvm::Value *ScratchpadBasePtr = |
3124 | IncrScratchpadDest ? DestBase.getPointer() : SrcBase.getPointer(); |
3125 | llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType()); |
3126 | ScratchpadBasePtr = Bld.CreateNUWAdd( |
3127 | ScratchpadBasePtr, |
3128 | Bld.CreateNUWMul(ScratchpadWidth, ElementSizeInChars)); |
3129 | |
3130 | |
3131 | ScratchpadBasePtr = Bld.CreateNUWSub( |
3132 | ScratchpadBasePtr, llvm::ConstantInt::get(CGM.SizeTy, 1)); |
3133 | ScratchpadBasePtr = Bld.CreateUDiv( |
3134 | ScratchpadBasePtr, |
3135 | llvm::ConstantInt::get(CGM.SizeTy, GlobalMemoryAlignment)); |
3136 | ScratchpadBasePtr = Bld.CreateNUWAdd( |
3137 | ScratchpadBasePtr, llvm::ConstantInt::get(CGM.SizeTy, 1)); |
3138 | ScratchpadBasePtr = Bld.CreateNUWMul( |
3139 | ScratchpadBasePtr, |
3140 | llvm::ConstantInt::get(CGM.SizeTy, GlobalMemoryAlignment)); |
3141 | |
3142 | if (IncrScratchpadDest) |
3143 | DestBase = Address(ScratchpadBasePtr, CGF.getPointerAlign()); |
3144 | else |
3145 | SrcBase = Address(ScratchpadBasePtr, CGF.getPointerAlign()); |
3146 | } |
3147 | |
3148 | ++Idx; |
3149 | } |
3150 | } |
3151 | |
3152 | |
3153 | |
3154 | |
3155 | |
3156 | |
3157 | |
3158 | |
3159 | |
3160 | |
3161 | |
3162 | |
3163 | |
3164 | static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, |
3165 | ArrayRef<const Expr *> Privates, |
3166 | QualType ReductionArrayTy, |
3167 | SourceLocation Loc) { |
3168 | ASTContext &C = CGM.getContext(); |
3169 | llvm::Module &M = CGM.getModule(); |
3170 | |
3171 | |
3172 | |
3173 | |
3174 | ImplicitParamDecl ReduceListArg(C, , Loc, , |
3175 | C.VoidPtrTy, ImplicitParamDecl::Other); |
3176 | |
3177 | |
3178 | ImplicitParamDecl NumWarpsArg(C, , Loc, , |
3179 | C.getIntTypeForBitwidth(32, true), |
3180 | ImplicitParamDecl::Other); |
3181 | FunctionArgList Args; |
3182 | Args.push_back(&ReduceListArg); |
3183 | Args.push_back(&NumWarpsArg); |
3184 | |
3185 | const CGFunctionInfo &CGFI = |
3186 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); |
3187 | auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), |
3188 | llvm::GlobalValue::InternalLinkage, |
3189 | "_omp_reduction_inter_warp_copy_func", &M); |
3190 | CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); |
3191 | Fn->setDoesNotRecurse(); |
3192 | CodeGenFunction CGF(CGM); |
3193 | CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); |
3194 | |
3195 | CGBuilderTy &Bld = CGF.Builder; |
3196 | |
3197 | |
3198 | |
3199 | |
3200 | |
3201 | |
3202 | |
3203 | |
3204 | StringRef TransferMediumName = |
3205 | "__openmp_nvptx_data_transfer_temporary_storage"; |
3206 | llvm::GlobalVariable *TransferMedium = |
3207 | M.getGlobalVariable(TransferMediumName); |
3208 | if (!TransferMedium) { |
3209 | auto *Ty = llvm::ArrayType::get(CGM.Int32Ty, WarpSize); |
3210 | unsigned SharedAddressSpace = C.getTargetAddressSpace(LangAS::cuda_shared); |
3211 | TransferMedium = new llvm::GlobalVariable( |
3212 | M, Ty, , llvm::GlobalVariable::CommonLinkage, |
3213 | llvm::Constant::getNullValue(Ty), TransferMediumName, |
3214 | , llvm::GlobalVariable::NotThreadLocal, |
3215 | SharedAddressSpace); |
3216 | CGM.addCompilerUsedGlobal(TransferMedium); |
3217 | } |
3218 | |
3219 | |
3220 | llvm::Value *ThreadID = getNVPTXThreadID(CGF); |
3221 | |
3222 | llvm::Value *LaneID = getNVPTXLaneID(CGF); |
3223 | |
3224 | llvm::Value *WarpID = getNVPTXWarpID(CGF); |
3225 | |
3226 | Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); |
3227 | Address LocalReduceList( |
3228 | Bld.CreatePointerBitCastOrAddrSpaceCast( |
3229 | CGF.EmitLoadOfScalar(AddrReduceListArg, , |
3230 | C.VoidPtrTy, Loc), |
3231 | CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()), |
3232 | CGF.getPointerAlign()); |
3233 | |
3234 | unsigned Idx = 0; |
3235 | for (const Expr *Private : Privates) { |
3236 | |
3237 | |
3238 | |
3239 | |
3240 | unsigned RealTySize = |
3241 | C.getTypeSizeInChars(Private->getType()) |
3242 | .alignTo(C.getTypeAlignInChars(Private->getType())) |
3243 | .getQuantity(); |
3244 | for (unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /=2) { |
3245 | unsigned NumIters = RealTySize / TySize; |
3246 | if (NumIters == 0) |
3247 | continue; |
3248 | QualType CType = C.getIntTypeForBitwidth( |
3249 | C.toBits(CharUnits::fromQuantity(TySize)), ); |
3250 | llvm::Type *CopyType = CGF.ConvertTypeForMem(CType); |
3251 | CharUnits Align = CharUnits::fromQuantity(TySize); |
3252 | llvm::Value *Cnt = nullptr; |
3253 | Address CntAddr = Address::invalid(); |
3254 | llvm::BasicBlock *PrecondBB = nullptr; |
3255 | llvm::BasicBlock *ExitBB = nullptr; |
3256 | if (NumIters > 1) { |
3257 | CntAddr = CGF.CreateMemTemp(C.IntTy, ".cnt.addr"); |
3258 | CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.IntTy), CntAddr, |
3259 | , C.IntTy); |
3260 | PrecondBB = CGF.createBasicBlock("precond"); |
3261 | ExitBB = CGF.createBasicBlock("exit"); |
3262 | llvm::BasicBlock *BodyBB = CGF.createBasicBlock("body"); |
3263 | |
3264 | (void)ApplyDebugLocation::CreateEmpty(CGF); |
3265 | CGF.EmitBlock(PrecondBB); |
3266 | Cnt = CGF.EmitLoadOfScalar(CntAddr, , C.IntTy, Loc); |
3267 | llvm::Value *Cmp = |
3268 | Bld.CreateICmpULT(Cnt, llvm::ConstantInt::get(CGM.IntTy, NumIters)); |
3269 | Bld.CreateCondBr(Cmp, BodyBB, ExitBB); |
3270 | CGF.EmitBlock(BodyBB); |
3271 | } |
3272 | |
3273 | CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc, OMPD_unknown, |
3274 | , |
3275 | ); |
3276 | llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then"); |
3277 | llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else"); |
3278 | llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont"); |
3279 | |
3280 | |
3281 | llvm::Value *IsWarpMaster = Bld.CreateIsNull(LaneID, "warp_master"); |
3282 | Bld.CreateCondBr(IsWarpMaster, ThenBB, ElseBB); |
3283 | CGF.EmitBlock(ThenBB); |
3284 | |
3285 | |
3286 | Address ElemPtrPtrAddr = Bld.CreateConstArrayGEP(LocalReduceList, Idx); |
3287 | llvm::Value *ElemPtrPtr = CGF.EmitLoadOfScalar( |
3288 | ElemPtrPtrAddr, , C.VoidPtrTy, SourceLocation()); |
3289 | |
3290 | Address ElemPtr = Address(ElemPtrPtr, Align); |
3291 | ElemPtr = Bld.CreateElementBitCast(ElemPtr, CopyType); |
3292 | if (NumIters > 1) { |
3293 | ElemPtr = Address(Bld.CreateGEP(ElemPtr.getPointer(), Cnt), |
3294 | ElemPtr.getAlignment()); |
3295 | } |
3296 | |
3297 | |
3298 | |
3299 | llvm::Value *MediumPtrVal = Bld.CreateInBoundsGEP( |
3300 | TransferMedium, {llvm::Constant::getNullValue(CGM.Int64Ty), WarpID}); |
3301 | Address MediumPtr(MediumPtrVal, Align); |
3302 | |
3303 | |
3304 | MediumPtr = Bld.CreateElementBitCast(MediumPtr, CopyType); |
3305 | |
3306 | |
3307 | |
3308 | llvm::Value *Elem = |
3309 | CGF.EmitLoadOfScalar(ElemPtr, , CType, Loc); |
3310 | |
3311 | CGF.EmitStoreOfScalar(Elem, MediumPtr, , CType); |
3312 | |
3313 | Bld.CreateBr(MergeBB); |
3314 | |
3315 | CGF.EmitBlock(ElseBB); |
3316 | Bld.CreateBr(MergeBB); |
3317 | |
3318 | CGF.EmitBlock(MergeBB); |
3319 | |
3320 | |
3321 | CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc, OMPD_unknown, |
3322 | , |
3323 | ); |
3324 | |
3325 | |
3326 | |
3327 | |
3328 | llvm::BasicBlock *W0ThenBB = CGF.createBasicBlock("then"); |
3329 | llvm::BasicBlock *W0ElseBB = CGF.createBasicBlock("else"); |
3330 | llvm::BasicBlock *W0MergeBB = CGF.createBasicBlock("ifcont"); |
3331 | |
3332 | Address AddrNumWarpsArg = CGF.GetAddrOfLocalVar(&NumWarpsArg); |
3333 | llvm::Value *NumWarpsVal = CGF.EmitLoadOfScalar( |
3334 | AddrNumWarpsArg, , C.IntTy, Loc); |
3335 | |
3336 | |
3337 | llvm::Value *IsActiveThread = |
3338 | Bld.CreateICmpULT(ThreadID, NumWarpsVal, "is_active_thread"); |
3339 | Bld.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB); |
3340 | |
3341 | CGF.EmitBlock(W0ThenBB); |
3342 | |
3343 | |
3344 | llvm::Value *SrcMediumPtrVal = Bld.CreateInBoundsGEP( |
3345 | TransferMedium, |
3346 | {llvm::Constant::getNullValue(CGM.Int64Ty), ThreadID}); |
3347 | Address SrcMediumPtr(SrcMediumPtrVal, Align); |
3348 | |
3349 | SrcMediumPtr = Bld.CreateElementBitCast(SrcMediumPtr, CopyType); |
3350 | |
3351 | |
3352 | Address TargetElemPtrPtr = Bld.CreateConstArrayGEP(LocalReduceList, Idx); |
3353 | llvm::Value *TargetElemPtrVal = CGF.EmitLoadOfScalar( |
3354 | TargetElemPtrPtr, , C.VoidPtrTy, Loc); |
3355 | Address TargetElemPtr = Address(TargetElemPtrVal, Align); |
3356 | TargetElemPtr = Bld.CreateElementBitCast(TargetElemPtr, CopyType); |
3357 | if (NumIters > 1) { |
3358 | TargetElemPtr = Address(Bld.CreateGEP(TargetElemPtr.getPointer(), Cnt), |
3359 | TargetElemPtr.getAlignment()); |
3360 | } |
3361 | |
3362 | |
3363 | llvm::Value *SrcMediumValue = |
3364 | CGF.EmitLoadOfScalar(SrcMediumPtr, , CType, Loc); |
3365 | CGF.EmitStoreOfScalar(SrcMediumValue, TargetElemPtr, , |
3366 | CType); |
3367 | Bld.CreateBr(W0MergeBB); |
3368 | |
3369 | CGF.EmitBlock(W0ElseBB); |
3370 | Bld.CreateBr(W0MergeBB); |
3371 | |
3372 | CGF.EmitBlock(W0MergeBB); |
3373 | |
3374 | if (NumIters > 1) { |
3375 | Cnt = Bld.CreateNSWAdd(Cnt, llvm::ConstantInt::get(CGM.IntTy, )); |
3376 | CGF.EmitStoreOfScalar(Cnt, CntAddr, , C.IntTy); |
3377 | CGF.EmitBranch(PrecondBB); |
3378 | (void)ApplyDebugLocation::CreateEmpty(CGF); |
3379 | CGF.EmitBlock(ExitBB); |
3380 | } |
3381 | RealTySize %= TySize; |
3382 | } |
3383 | ++Idx; |
3384 | } |
3385 | |
3386 | CGF.FinishFunction(); |
3387 | return Fn; |
3388 | } |
3389 | |
3390 | |
3391 | |
3392 | |
3393 | |
3394 | |
3395 | |
3396 | |
3397 | |
3398 | |
3399 | |
3400 | |
3401 | |
3402 | |
3403 | |
3404 | |
3405 | |
3406 | |
3407 | |
3408 | |
3409 | |
3410 | |
3411 | |
3412 | |
3413 | |
3414 | |
3415 | |
3416 | |
3417 | |
3418 | |
3419 | |
3420 | |
3421 | |
3422 | |
3423 | |
3424 | |
3425 | |
3426 | |
3427 | |
3428 | |
3429 | |
3430 | |
3431 | |
3432 | |
3433 | |
3434 | |
3435 | |
3436 | |
3437 | |
3438 | |
3439 | |
3440 | |
3441 | |
3442 | |
3443 | |
3444 | |
3445 | |
3446 | |
3447 | |
3448 | |
3449 | |
3450 | |
3451 | |
3452 | |
3453 | |
3454 | |
3455 | |
3456 | static llvm::Function *emitShuffleAndReduceFunction( |
3457 | CodeGenModule &CGM, ArrayRef<const Expr *> Privates, |
3458 | QualType ReductionArrayTy, llvm::Function *ReduceFn, SourceLocation Loc) { |
3459 | ASTContext &C = CGM.getContext(); |
3460 | |
3461 | |
3462 | ImplicitParamDecl ReduceListArg(C, , Loc, , |
3463 | C.VoidPtrTy, ImplicitParamDecl::Other); |
3464 | |
3465 | ImplicitParamDecl LaneIDArg(C, , Loc, , C.ShortTy, |
3466 | ImplicitParamDecl::Other); |
3467 | |
3468 | ImplicitParamDecl RemoteLaneOffsetArg(C, , Loc, , |
3469 | C.ShortTy, ImplicitParamDecl::Other); |
3470 | |
3471 | ImplicitParamDecl AlgoVerArg(C, , Loc, , |
3472 | C.ShortTy, ImplicitParamDecl::Other); |
3473 | FunctionArgList Args; |
3474 | Args.push_back(&ReduceListArg); |
3475 | Args.push_back(&LaneIDArg); |
3476 | Args.push_back(&RemoteLaneOffsetArg); |
3477 | Args.push_back(&AlgoVerArg); |
3478 | |
3479 | const CGFunctionInfo &CGFI = |
3480 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); |
3481 | auto *Fn = llvm::Function::Create( |
3482 | CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, |
3483 | "_omp_reduction_shuffle_and_reduce_func", &CGM.getModule()); |
3484 | CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); |
3485 | Fn->setDoesNotRecurse(); |
3486 | CodeGenFunction CGF(CGM); |
3487 | CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); |
3488 | |
3489 | CGBuilderTy &Bld = CGF.Builder; |
3490 | |
3491 | Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); |
3492 | Address LocalReduceList( |
3493 | Bld.CreatePointerBitCastOrAddrSpaceCast( |
3494 | CGF.EmitLoadOfScalar(AddrReduceListArg, , |
3495 | C.VoidPtrTy, SourceLocation()), |
3496 | CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()), |
3497 | CGF.getPointerAlign()); |
3498 | |
3499 | Address AddrLaneIDArg = CGF.GetAddrOfLocalVar(&LaneIDArg); |
3500 | llvm::Value *LaneIDArgVal = CGF.EmitLoadOfScalar( |
3501 | AddrLaneIDArg, , C.ShortTy, SourceLocation()); |
3502 | |
3503 | Address AddrRemoteLaneOffsetArg = CGF.GetAddrOfLocalVar(&RemoteLaneOffsetArg); |
3504 | llvm::Value *RemoteLaneOffsetArgVal = CGF.EmitLoadOfScalar( |
3505 | AddrRemoteLaneOffsetArg, , C.ShortTy, SourceLocation()); |
3506 | |
3507 | Address AddrAlgoVerArg = CGF.GetAddrOfLocalVar(&AlgoVerArg); |
3508 | llvm::Value *AlgoVerArgVal = CGF.EmitLoadOfScalar( |
3509 | AddrAlgoVerArg, , C.ShortTy, SourceLocation()); |
3510 | |
3511 | |
3512 | |
3513 | Address RemoteReduceList = |
3514 | CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.remote_reduce_list"); |
3515 | |
3516 | |
3517 | |
3518 | |
3519 | emitReductionListCopy(RemoteLaneToThread, CGF, ReductionArrayTy, Privates, |
3520 | LocalReduceList, RemoteReduceList, |
3521 | {RemoteLaneOffsetArgVal, |
3522 | , |
3523 | }); |
3524 | |
3525 | |
3526 | |
3527 | |
3528 | |
3529 | |
3530 | |
3531 | |
3532 | |
3533 | |
3534 | |
3535 | |
3536 | |
3537 | |
3538 | |
3539 | |
3540 | |
3541 | |
3542 | |
3543 | |
3544 | |
3545 | |
3546 | llvm::Value *CondAlgo0 = Bld.CreateIsNull(AlgoVerArgVal); |
3547 | |
3548 | llvm::Value *Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1)); |
3549 | llvm::Value *CondAlgo1 = Bld.CreateAnd( |
3550 | Algo1, Bld.CreateICmpULT(LaneIDArgVal, RemoteLaneOffsetArgVal)); |
3551 | |
3552 | llvm::Value *Algo2 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(2)); |
3553 | llvm::Value *CondAlgo2 = Bld.CreateAnd( |
3554 | Algo2, Bld.CreateIsNull(Bld.CreateAnd(LaneIDArgVal, Bld.getInt16(1)))); |
3555 | CondAlgo2 = Bld.CreateAnd( |
3556 | CondAlgo2, Bld.CreateICmpSGT(RemoteLaneOffsetArgVal, Bld.getInt16(0))); |
3557 | |
3558 | llvm::Value *CondReduce = Bld.CreateOr(CondAlgo0, CondAlgo1); |
3559 | CondReduce = Bld.CreateOr(CondReduce, CondAlgo2); |
3560 | |
3561 | llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then"); |
3562 | llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else"); |
3563 | llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont"); |
3564 | Bld.CreateCondBr(CondReduce, ThenBB, ElseBB); |
3565 | |
3566 | CGF.EmitBlock(ThenBB); |
3567 | |
3568 | llvm::Value *LocalReduceListPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( |
3569 | LocalReduceList.getPointer(), CGF.VoidPtrTy); |
3570 | llvm::Value *RemoteReduceListPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( |
3571 | RemoteReduceList.getPointer(), CGF.VoidPtrTy); |
3572 | CGM.getOpenMPRuntime().emitOutlinedFunctionCall( |
3573 | CGF, Loc, ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr}); |
3574 | Bld.CreateBr(MergeBB); |
3575 | |
3576 | CGF.EmitBlock(ElseBB); |
3577 | Bld.CreateBr(MergeBB); |
3578 | |
3579 | CGF.EmitBlock(MergeBB); |
3580 | |
3581 | |
3582 | |
3583 | Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1)); |
3584 | llvm::Value *CondCopy = Bld.CreateAnd( |
3585 | Algo1, Bld.CreateICmpUGE(LaneIDArgVal, RemoteLaneOffsetArgVal)); |
3586 | |
3587 | llvm::BasicBlock *CpyThenBB = CGF.createBasicBlock("then"); |
3588 | llvm::BasicBlock *CpyElseBB = CGF.createBasicBlock("else"); |
3589 | llvm::BasicBlock *CpyMergeBB = CGF.createBasicBlock("ifcont"); |
3590 | Bld.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB); |
3591 | |
3592 | CGF.EmitBlock(CpyThenBB); |
3593 | emitReductionListCopy(ThreadCopy, CGF, ReductionArrayTy, Privates, |
3594 | RemoteReduceList, LocalReduceList); |
3595 | Bld.CreateBr(CpyMergeBB); |
3596 | |
3597 | CGF.EmitBlock(CpyElseBB); |
3598 | Bld.CreateBr(CpyMergeBB); |
3599 | |
3600 | CGF.EmitBlock(CpyMergeBB); |
3601 | |
3602 | CGF.FinishFunction(); |
3603 | return Fn; |
3604 | } |
3605 | |
3606 | |
3607 | |
3608 | |
3609 | |
3610 | |
3611 | |
3612 | static llvm::Value *emitListToGlobalCopyFunction( |
3613 | CodeGenModule &CGM, ArrayRef<const Expr *> Privates, |
3614 | QualType ReductionArrayTy, SourceLocation Loc, |
3615 | const RecordDecl *TeamReductionRec, |
3616 | const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> |
3617 | &VarFieldMap) { |
3618 | ASTContext &C = CGM.getContext(); |
3619 | |
3620 | |
3621 | ImplicitParamDecl BufferArg(C, , Loc, , |
3622 | C.VoidPtrTy, ImplicitParamDecl::Other); |
3623 | |
3624 | ImplicitParamDecl IdxArg(C, , Loc, , C.IntTy, |
3625 | ImplicitParamDecl::Other); |
3626 | |
3627 | ImplicitParamDecl ReduceListArg(C, , Loc, , |
3628 | C.VoidPtrTy, ImplicitParamDecl::Other); |
3629 | FunctionArgList Args; |
3630 | Args.push_back(&BufferArg); |
3631 | Args.push_back(&IdxArg); |
3632 | Args.push_back(&ReduceListArg); |
3633 | |
3634 | const CGFunctionInfo &CGFI = |
3635 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); |
3636 | auto *Fn = llvm::Function::Create( |
3637 | CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, |
3638 | "_omp_reduction_list_to_global_copy_func", &CGM.getModule()); |
3639 | CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); |
3640 | Fn->setDoesNotRecurse(); |
3641 | CodeGenFunction CGF(CGM); |
3642 | CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); |
3643 | |
3644 | CGBuilderTy &Bld = CGF.Builder; |
3645 | |
3646 | Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); |
3647 | Address AddrBufferArg = CGF.GetAddrOfLocalVar(&BufferArg); |
3648 | Address LocalReduceList( |
3649 | Bld.CreatePointerBitCastOrAddrSpaceCast( |
3650 | CGF.EmitLoadOfScalar(AddrReduceListArg, , |
3651 | C.VoidPtrTy, Loc), |
3652 | CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()), |
3653 | CGF.getPointerAlign()); |
3654 | QualType StaticTy = C.getRecordType(TeamReductionRec); |
3655 | llvm::Type *LLVMReductionsBufferTy = |
3656 | CGM.getTypes().ConvertTypeForMem(StaticTy); |
3657 | llvm::Value *BufferArrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( |
3658 | CGF.EmitLoadOfScalar(AddrBufferArg, , C.VoidPtrTy, Loc), |
3659 | LLVMReductionsBufferTy->getPointerTo()); |
3660 | llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty), |
3661 | CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg), |
3662 | , C.IntTy, |
3663 | Loc)}; |
3664 | unsigned Idx = 0; |
3665 | for (const Expr *Private : Privates) { |
3666 | |
3667 | Address ElemPtrPtrAddr = Bld.CreateConstArrayGEP(LocalReduceList, Idx); |
3668 | llvm::Value *ElemPtrPtr = CGF.EmitLoadOfScalar( |
3669 | ElemPtrPtrAddr, , C.VoidPtrTy, SourceLocation()); |
3670 | |
3671 | ElemPtrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( |
3672 | ElemPtrPtr, CGF.ConvertTypeForMem(Private->getType())->getPointerTo()); |
3673 | Address ElemPtr = |
3674 | Address(ElemPtrPtr, C.getTypeAlignInChars(Private->getType())); |
3675 | const ValueDecl *VD = cast<DeclRefExpr>(Private)->getDecl(); |
3676 | |
3677 | const FieldDecl *FD = VarFieldMap.lookup(VD); |
3678 | LValue GlobLVal = CGF.EmitLValueForField( |
3679 | CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD); |
3680 | llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(GlobLVal.getPointer(), Idxs); |
3681 | GlobLVal.setAddress(Address(BufferPtr, GlobLVal.getAlignment())); |
3682 | switch (CGF.getEvaluationKind(Private->getType())) { |
3683 | case TEK_Scalar: { |
3684 | llvm::Value *V = CGF.EmitLoadOfScalar(ElemPtr, , |
3685 | Private->getType(), Loc); |
3686 | CGF.EmitStoreOfScalar(V, GlobLVal); |
3687 | break; |
3688 | } |
3689 | case TEK_Complex: { |
3690 | CodeGenFunction::ComplexPairTy V = CGF.EmitLoadOfComplex( |
3691 | CGF.MakeAddrLValue(ElemPtr, Private->getType()), Loc); |
3692 | CGF.EmitStoreOfComplex(V, GlobLVal, ); |
3693 | break; |
3694 | } |
3695 | case TEK_Aggregate: |
3696 | CGF.EmitAggregateCopy(GlobLVal, |
3697 | CGF.MakeAddrLValue(ElemPtr, Private->getType()), |
3698 | Private->getType(), AggValueSlot::DoesNotOverlap); |
3699 | break; |
3700 | } |
3701 | ++Idx; |
3702 | } |
3703 | |
3704 | CGF.FinishFunction(); |
3705 | return Fn; |
3706 | } |
3707 | |
3708 | |
3709 | |
3710 | |
3711 | |
3712 | |
3713 | |
3714 | |
3715 | |
3716 | |
3717 | static llvm::Value *emitListToGlobalReduceFunction( |
3718 | CodeGenModule &CGM, ArrayRef<const Expr *> Privates, |
3719 | QualType ReductionArrayTy, SourceLocation Loc, |
3720 | const RecordDecl *TeamReductionRec, |
3721 | const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> |
3722 | &VarFieldMap, |
3723 | llvm::Function *ReduceFn) { |
3724 | ASTContext &C = CGM.getContext(); |
3725 | |
3726 | |
3727 | ImplicitParamDecl BufferArg(C, , Loc, , |
3728 | C.VoidPtrTy, ImplicitParamDecl::Other); |
3729 | |
3730 | ImplicitParamDecl IdxArg(C, , Loc, , C.IntTy, |
3731 | ImplicitParamDecl::Other); |
3732 | |
3733 | ImplicitParamDecl ReduceListArg(C, , Loc, , |
3734 | C.VoidPtrTy, ImplicitParamDecl::Other); |
3735 | FunctionArgList Args; |
3736 | Args.push_back(&BufferArg); |
3737 | Args.push_back(&IdxArg); |
3738 | Args.push_back(&ReduceListArg); |
3739 | |
3740 | const CGFunctionInfo &CGFI = |
3741 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); |
3742 | auto *Fn = llvm::Function::Create( |
3743 | CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, |
3744 | "_omp_reduction_list_to_global_reduce_func", &CGM.getModule()); |
3745 | CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); |
3746 | Fn->setDoesNotRecurse(); |
3747 | CodeGenFunction CGF(CGM); |
3748 | CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); |
3749 | |
3750 | CGBuilderTy &Bld = CGF.Builder; |
3751 | |
3752 | Address AddrBufferArg = CGF.GetAddrOfLocalVar(&BufferArg); |
3753 | QualType StaticTy = C.getRecordType(TeamReductionRec); |
3754 | llvm::Type *LLVMReductionsBufferTy = |
3755 | CGM.getTypes().ConvertTypeForMem(StaticTy); |
3756 | llvm::Value *BufferArrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( |
3757 | CGF.EmitLoadOfScalar(AddrBufferArg, , C.VoidPtrTy, Loc), |
3758 | LLVMReductionsBufferTy->getPointerTo()); |
3759 | |
3760 | |
3761 | |
3762 | Address ReductionList = |
3763 | CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); |
3764 | auto IPriv = Privates.begin(); |
3765 | llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty), |
3766 | CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg), |
3767 | , C.IntTy, |
3768 | Loc)}; |
3769 | unsigned Idx = 0; |
3770 | for (unsigned I = 0, E = Privates.size(); I < E; ++I, ++IPriv, ++Idx) { |
3771 | Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); |
3772 | |
3773 | const ValueDecl *VD = cast<DeclRefExpr>(*IPriv)->getDecl(); |
3774 | const FieldDecl *FD = VarFieldMap.lookup(VD); |
3775 | LValue GlobLVal = CGF.EmitLValueForField( |
3776 | CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD); |
3777 | llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(GlobLVal.getPointer(), Idxs); |
3778 | llvm::Value *Ptr = CGF.EmitCastToVoidPtr(BufferPtr); |
3779 | CGF.EmitStoreOfScalar(Ptr, Elem, , C.VoidPtrTy); |
3780 | if ((*IPriv)->getType()->isVariablyModifiedType()) { |
3781 | |
3782 | ++Idx; |
3783 | Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); |
3784 | llvm::Value *Size = CGF.Builder.CreateIntCast( |
3785 | CGF.getVLASize( |
3786 | CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) |
3787 | .NumElts, |
3788 | CGF.SizeTy, ); |
3789 | CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), |
3790 | Elem); |
3791 | } |
3792 | } |
3793 | |
3794 | |
3795 | llvm::Value *GlobalReduceList = |
3796 | CGF.EmitCastToVoidPtr(ReductionList.getPointer()); |
3797 | Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); |
3798 | llvm::Value *ReducedPtr = CGF.EmitLoadOfScalar( |
3799 | AddrReduceListArg, , C.VoidPtrTy, Loc); |
3800 | CGM.getOpenMPRuntime().emitOutlinedFunctionCall( |
3801 | CGF, Loc, ReduceFn, {GlobalReduceList, ReducedPtr}); |
3802 | CGF.FinishFunction(); |
3803 | return Fn; |
3804 | } |
3805 | |
3806 | |
3807 | |
3808 | |
3809 | |
3810 | |
3811 | |
3812 | static llvm::Value *emitGlobalToListCopyFunction( |
3813 | CodeGenModule &CGM, ArrayRef<const Expr *> Privates, |
3814 | QualType ReductionArrayTy, SourceLocation Loc, |
3815 | const RecordDecl *TeamReductionRec, |
3816 | const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> |
3817 | &VarFieldMap) { |
3818 | ASTContext &C = CGM.getContext(); |
3819 | |
3820 | |
3821 | ImplicitParamDecl BufferArg(C, , Loc, , |
3822 | C.VoidPtrTy, ImplicitParamDecl::Other); |
3823 | |
3824 | ImplicitParamDecl IdxArg(C, , Loc, , C.IntTy, |
3825 | ImplicitParamDecl::Other); |
3826 | |
3827 | ImplicitParamDecl ReduceListArg(C, , Loc, , |
3828 | C.VoidPtrTy, ImplicitParamDecl::Other); |
3829 | FunctionArgList Args; |
3830 | Args.push_back(&BufferArg); |
3831 | Args.push_back(&IdxArg); |
3832 | Args.push_back(&ReduceListArg); |
3833 | |
3834 | const CGFunctionInfo &CGFI = |
3835 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); |
3836 | auto *Fn = llvm::Function::Create( |
3837 | CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, |
3838 | "_omp_reduction_global_to_list_copy_func", &CGM.getModule()); |
3839 | CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); |
3840 | Fn->setDoesNotRecurse(); |
3841 | CodeGenFunction CGF(CGM); |
3842 | CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); |
3843 | |
3844 | CGBuilderTy &Bld = CGF.Builder; |
3845 | |
3846 | Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); |
3847 | Address AddrBufferArg = CGF.GetAddrOfLocalVar(&BufferArg); |
3848 | Address LocalReduceList( |
3849 | Bld.CreatePointerBitCastOrAddrSpaceCast( |
3850 | CGF.EmitLoadOfScalar(AddrReduceListArg, , |
3851 | C.VoidPtrTy, Loc), |
3852 | CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()), |
3853 | CGF.getPointerAlign()); |
3854 | QualType StaticTy = C.getRecordType(TeamReductionRec); |
3855 | llvm::Type *LLVMReductionsBufferTy = |
3856 | CGM.getTypes().ConvertTypeForMem(StaticTy); |
3857 | llvm::Value *BufferArrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( |
3858 | CGF.EmitLoadOfScalar(AddrBufferArg, , C.VoidPtrTy, Loc), |
3859 | LLVMReductionsBufferTy->getPointerTo()); |
3860 | |
3861 | llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty), |
3862 | CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg), |
3863 | , C.IntTy, |
3864 | Loc)}; |
3865 | unsigned Idx = 0; |
3866 | for (const Expr *Private : Privates) { |
3867 | |
3868 | Address ElemPtrPtrAddr = Bld.CreateConstArrayGEP(LocalReduceList, Idx); |
3869 | llvm::Value *ElemPtrPtr = CGF.EmitLoadOfScalar( |
3870 | ElemPtrPtrAddr, , C.VoidPtrTy, SourceLocation()); |
3871 | |
3872 | ElemPtrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( |
3873 | ElemPtrPtr, CGF.ConvertTypeForMem(Private->getType())->getPointerTo()); |
3874 | Address ElemPtr = |
3875 | Address(ElemPtrPtr, C.getTypeAlignInChars(Private->getType())); |
3876 | const ValueDecl *VD = cast<DeclRefExpr>(Private)->getDecl(); |
3877 | |
3878 | const FieldDecl *FD = VarFieldMap.lookup(VD); |
3879 | LValue GlobLVal = CGF.EmitLValueForField( |
3880 | CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD); |
3881 | llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(GlobLVal.getPointer(), Idxs); |
3882 | GlobLVal.setAddress(Address(BufferPtr, GlobLVal.getAlignment())); |
3883 | switch (CGF.getEvaluationKind(Private->getType())) { |
3884 | case TEK_Scalar: { |
3885 | llvm::Value *V = CGF.EmitLoadOfScalar(GlobLVal, Loc); |
3886 | CGF.EmitStoreOfScalar(V, ElemPtr, , Private->getType()); |
3887 | break; |
3888 | } |
3889 | case TEK_Complex: { |
3890 | CodeGenFunction::ComplexPairTy V = CGF.EmitLoadOfComplex(GlobLVal, Loc); |
3891 | CGF.EmitStoreOfComplex(V, CGF.MakeAddrLValue(ElemPtr, Private->getType()), |
3892 | ); |
3893 | break; |
3894 | } |
3895 | case TEK_Aggregate: |
3896 | CGF.EmitAggregateCopy(CGF.MakeAddrLValue(ElemPtr, Private->getType()), |
3897 | GlobLVal, Private->getType(), |
3898 | AggValueSlot::DoesNotOverlap); |
3899 | break; |
3900 | } |
3901 | ++Idx; |
3902 | } |
3903 | |
3904 | CGF.FinishFunction(); |
3905 | return Fn; |
3906 | } |
3907 | |
3908 | |
3909 | |
3910 | |
3911 | |
3912 | |
3913 | |
3914 | |
3915 | |
3916 | |
3917 | static llvm::Value *emitGlobalToListReduceFunction( |
3918 | CodeGenModule &CGM, ArrayRef<const Expr *> Privates, |
3919 | QualType ReductionArrayTy, SourceLocation Loc, |
3920 | const RecordDecl *TeamReductionRec, |
3921 | const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> |
3922 | &VarFieldMap, |
3923 | llvm::Function *ReduceFn) { |
3924 | ASTContext &C = CGM.getContext(); |
3925 | |
3926 | |
3927 | ImplicitParamDecl BufferArg(C, , Loc, , |
3928 | C.VoidPtrTy, ImplicitParamDecl::Other); |
3929 | |
3930 | ImplicitParamDecl IdxArg(C, , Loc, , C.IntTy, |
3931 | ImplicitParamDecl::Other); |
3932 | |
3933 | ImplicitParamDecl ReduceListArg(C, , Loc, , |
3934 | C.VoidPtrTy, ImplicitParamDecl::Other); |
3935 | FunctionArgList Args; |
3936 | Args.push_back(&BufferArg); |
3937 | Args.push_back(&IdxArg); |
3938 | Args.push_back(&ReduceListArg); |
3939 | |
3940 | const CGFunctionInfo &CGFI = |
3941 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); |
3942 | auto *Fn = llvm::Function::Create( |
3943 | CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, |
3944 | "_omp_reduction_global_to_list_reduce_func", &CGM.getModule()); |
3945 | CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); |
3946 | Fn->setDoesNotRecurse(); |
3947 | CodeGenFunction CGF(CGM); |
3948 | CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); |
3949 | |
3950 | CGBuilderTy &Bld = CGF.Builder; |
3951 | |
3952 | Address AddrBufferArg = CGF.GetAddrOfLocalVar(&BufferArg); |
3953 | QualType StaticTy = C.getRecordType(TeamReductionRec); |
3954 | llvm::Type *LLVMReductionsBufferTy = |
3955 | CGM.getTypes().ConvertTypeForMem(StaticTy); |
3956 | llvm::Value *BufferArrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( |
3957 | CGF.EmitLoadOfScalar(AddrBufferArg, , C.VoidPtrTy, Loc), |
3958 | LLVMReductionsBufferTy->getPointerTo()); |
3959 | |
3960 | |
3961 | |
3962 | Address ReductionList = |
3963 | CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); |
3964 | auto IPriv = Privates.begin(); |
3965 | llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty), |
3966 | CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg), |
3967 | , C.IntTy, |
3968 | Loc)}; |
3969 | unsigned Idx = 0; |
3970 | for (unsigned I = 0, E = Privates.size(); I < E; ++I, ++IPriv, ++Idx) { |
3971 | Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); |
3972 | |
3973 | const ValueDecl *VD = cast<DeclRefExpr>(*IPriv)->getDecl(); |
3974 | const FieldDecl *FD = VarFieldMap.lookup(VD); |
3975 | LValue GlobLVal = CGF.EmitLValueForField( |
3976 | CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD); |
3977 | llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(GlobLVal.getPointer(), Idxs); |
3978 | llvm::Value *Ptr = CGF.EmitCastToVoidPtr(BufferPtr); |
3979 | CGF.EmitStoreOfScalar(Ptr, Elem, , C.VoidPtrTy); |
3980 | if ((*IPriv)->getType()->isVariablyModifiedType()) { |
3981 | |
3982 | ++Idx; |
3983 | Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); |
3984 | llvm::Value *Size = CGF.Builder.CreateIntCast( |
3985 | CGF.getVLASize( |
3986 | CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) |
3987 | .NumElts, |
3988 | CGF.SizeTy, ); |
3989 | CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), |
3990 | Elem); |
3991 | } |
3992 | } |
3993 | |
3994 | |
3995 | llvm::Value *GlobalReduceList = |
3996 | CGF.EmitCastToVoidPtr(ReductionList.getPointer()); |
3997 | Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); |
3998 | llvm::Value *ReducedPtr = CGF.EmitLoadOfScalar( |
3999 | AddrReduceListArg, , C.VoidPtrTy, Loc); |
4000 | CGM.getOpenMPRuntime().emitOutlinedFunctionCall( |
4001 | CGF, Loc, ReduceFn, {ReducedPtr, GlobalReduceList}); |
4002 | CGF.FinishFunction(); |
4003 | return Fn; |
4004 | } |
4005 | |
4006 | |
4007 | |
4008 | |
4009 | |
4010 | |
4011 | |
4012 | |
4013 | |
4014 | |
4015 | |
4016 | |
4017 | |
4018 | |
4019 | |
4020 | |
4021 | |
4022 | |
4023 | |
4024 | |
4025 | |
4026 | |
4027 | |
4028 | |
4029 | |
4030 | |
4031 | |
4032 | |
4033 | |
4034 | |
4035 | |
4036 | |
4037 | |
4038 | |
4039 | |
4040 | |
4041 | |
4042 | |
4043 | |
4044 | |
4045 | |
4046 | |
4047 | |
4048 | |
4049 | |
4050 | |
4051 | |
4052 | |
4053 | |
4054 | |
4055 | |
4056 | |
4057 | |
4058 | |
4059 | |
4060 | |
4061 | |
4062 | |
4063 | |
4064 | |
4065 | |
4066 | |
4067 | |
4068 | |
4069 | |
4070 | |
4071 | |
4072 | |
4073 | |
4074 | |
4075 | |
4076 | |
4077 | |
4078 | |
4079 | |
4080 | |
4081 | |
4082 | |
4083 | |
4084 | |
4085 | |
4086 | |
4087 | |
4088 | |
4089 | |
4090 | |
4091 | |
4092 | |
4093 | |
4094 | |
4095 | |
4096 | |
4097 | |
4098 | |
4099 | |
4100 | |
4101 | |
4102 | |
4103 | |
4104 | |
4105 | |
4106 | |
4107 | |
4108 | |
4109 | |
4110 | |
4111 | |
4112 | |
4113 | |
4114 | |
4115 | |
4116 | |
4117 | |
4118 | |
4119 | |
4120 | |
4121 | |
4122 | |
4123 | |
4124 | |
4125 | |
4126 | |
4127 | |
4128 | |
4129 | |
4130 | |
4131 | |
4132 | |
4133 | |
4134 | |
4135 | |
4136 | |
4137 | |
4138 | |
4139 | |
4140 | |
4141 | |
4142 | |
4143 | |
4144 | |
4145 | |
4146 | |
4147 | |
4148 | |
4149 | |
4150 | |
4151 | |
4152 | |
4153 | |
4154 | |
4155 | |
4156 | |
4157 | |
4158 | |
4159 | |
4160 | |
4161 | |
4162 | |
4163 | |
4164 | |
4165 | |
4166 | |
4167 | |
4168 | |
4169 | |
4170 | |
4171 | |
4172 | |
4173 | |
4174 | |
4175 | |
4176 | |
4177 | |
4178 | |
4179 | |
4180 | |
4181 | |
4182 | |
4183 | |
4184 | |
4185 | |
4186 | |
4187 | |
4188 | |
4189 | |
4190 | |
4191 | |
4192 | |
4193 | |
4194 | |
4195 | |
4196 | |
4197 | |
4198 | |
4199 | |
4200 | |
4201 | |
4202 | |
4203 | |
4204 | |
4205 | |
4206 | |
4207 | |
4208 | |
4209 | |
4210 | |
4211 | |
4212 | |
4213 | |
4214 | |
4215 | |
4216 | |
4217 | |
4218 | |
4219 | |
4220 | |
4221 | |
4222 | |
4223 | |
4224 | |
4225 | |
4226 | |
4227 | |
4228 | |
4229 | |
4230 | |
4231 | |
4232 | |
4233 | |
4234 | |
4235 | |
4236 | |
4237 | |
4238 | |
4239 | |
4240 | |
4241 | |
4242 | |
4243 | |
4244 | |
4245 | |
4246 | |
4247 | |
4248 | void CGOpenMPRuntimeNVPTX::emitReduction( |
4249 | CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, |
4250 | ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, |
4251 | ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { |
4252 | if (!CGF.HaveInsertPoint()) |
4253 | return; |
4254 | |
4255 | bool ParallelReduction = isOpenMPParallelDirective(Options.ReductionKind); |
4256 | #ifndef NDEBUG |
4257 | bool TeamsReduction = isOpenMPTeamsDirective(Options.ReductionKind); |
4258 | #endif |
4259 | |
4260 | if (Options.SimpleReduction) { |
4261 | (0) . __assert_fail ("!TeamsReduction && !ParallelReduction && \"Invalid reduction selection in emitReduction.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 4262, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(!TeamsReduction && !ParallelReduction && |
4262 | (0) . __assert_fail ("!TeamsReduction && !ParallelReduction && \"Invalid reduction selection in emitReduction.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 4262, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true"> "Invalid reduction selection in emitReduction."); |
4263 | CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, |
4264 | ReductionOps, Options); |
4265 | return; |
4266 | } |
4267 | |
4268 | (0) . __assert_fail ("(TeamsReduction || ParallelReduction) && \"Invalid reduction selection in emitReduction.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 4269, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert((TeamsReduction || ParallelReduction) && |
4269 | (0) . __assert_fail ("(TeamsReduction || ParallelReduction) && \"Invalid reduction selection in emitReduction.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 4269, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true"> "Invalid reduction selection in emitReduction."); |
4270 | |
4271 | |
4272 | |
4273 | |
4274 | |
4275 | llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); |
4276 | llvm::Value *ThreadId = getThreadID(CGF, Loc); |
4277 | |
4278 | llvm::Value *Res; |
4279 | ASTContext &C = CGM.getContext(); |
4280 | |
4281 | |
4282 | auto Size = RHSExprs.size(); |
4283 | for (const Expr *E : Privates) { |
4284 | if (E->getType()->isVariablyModifiedType()) |
4285 | |
4286 | ++Size; |
4287 | } |
4288 | llvm::APInt ArraySize(, Size); |
4289 | QualType ReductionArrayTy = |
4290 | C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, |
4291 | ); |
4292 | Address ReductionList = |
4293 | CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); |
4294 | auto IPriv = Privates.begin(); |
4295 | unsigned Idx = 0; |
4296 | for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { |
4297 | Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); |
4298 | CGF.Builder.CreateStore( |
4299 | CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4300 | CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), |
4301 | Elem); |
4302 | if ((*IPriv)->getType()->isVariablyModifiedType()) { |
4303 | |
4304 | ++Idx; |
4305 | Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); |
4306 | llvm::Value *Size = CGF.Builder.CreateIntCast( |
4307 | CGF.getVLASize( |
4308 | CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) |
4309 | .NumElts, |
4310 | CGF.SizeTy, ); |
4311 | CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), |
4312 | Elem); |
4313 | } |
4314 | } |
4315 | |
4316 | llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4317 | ReductionList.getPointer(), CGF.VoidPtrTy); |
4318 | llvm::Function *ReductionFn = emitReductionFunction( |
4319 | Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, |
4320 | LHSExprs, RHSExprs, ReductionOps); |
4321 | llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); |
4322 | llvm::Function *ShuffleAndReduceFn = emitShuffleAndReduceFunction( |
4323 | CGM, Privates, ReductionArrayTy, ReductionFn, Loc); |
4324 | llvm::Value *InterWarpCopyFn = |
4325 | emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy, Loc); |
4326 | |
4327 | if (ParallelReduction) { |
4328 | llvm::Value *Args[] = {RTLoc, |
4329 | ThreadId, |
4330 | CGF.Builder.getInt32(RHSExprs.size()), |
4331 | ReductionArrayTySize, |
4332 | RL, |
4333 | ShuffleAndReduceFn, |
4334 | InterWarpCopyFn}; |
4335 | |
4336 | Res = CGF.EmitRuntimeCall( |
4337 | createNVPTXRuntimeFunction( |
4338 | OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2), |
4339 | Args); |
4340 | } else { |
4341 | (0) . __assert_fail ("TeamsReduction && \"expected teams reduction.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 4341, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(TeamsReduction && "expected teams reduction."); |
4342 | llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> VarFieldMap; |
4343 | llvm::SmallVector<const ValueDecl *, 4> PrivatesReductions(Privates.size()); |
4344 | int Cnt = 0; |
4345 | for (const Expr *DRE : Privates) { |
4346 | PrivatesReductions[Cnt] = cast<DeclRefExpr>(DRE)->getDecl(); |
4347 | ++Cnt; |
4348 | } |
4349 | const RecordDecl *TeamReductionRec = ::buildRecordForGlobalizedVars( |
4350 | CGM.getContext(), PrivatesReductions, llvm::None, VarFieldMap, |
4351 | C.getLangOpts().OpenMPCUDAReductionBufNum); |
4352 | TeamsReductions.push_back(TeamReductionRec); |
4353 | if (!KernelTeamsReductionPtr) { |
4354 | KernelTeamsReductionPtr = new llvm::GlobalVariable( |
4355 | CGM.getModule(), CGM.VoidPtrTy, , |
4356 | llvm::GlobalValue::InternalLinkage, nullptr, |
4357 | "_openmp_teams_reductions_buffer_$_$ptr"); |
4358 | } |
4359 | llvm::Value *GlobalBufferPtr = CGF.EmitLoadOfScalar( |
4360 | Address(KernelTeamsReductionPtr, CGM.getPointerAlign()), |
4361 | , C.getPointerType(C.VoidPtrTy), Loc); |
4362 | llvm::Value *GlobalToBufferCpyFn = ::emitListToGlobalCopyFunction( |
4363 | CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap); |
4364 | llvm::Value *GlobalToBufferRedFn = ::emitListToGlobalReduceFunction( |
4365 | CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap, |
4366 | ReductionFn); |
4367 | llvm::Value *BufferToGlobalCpyFn = ::emitGlobalToListCopyFunction( |
4368 | CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap); |
4369 | llvm::Value *BufferToGlobalRedFn = ::emitGlobalToListReduceFunction( |
4370 | CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap, |
4371 | ReductionFn); |
4372 | |
4373 | llvm::Value *Args[] = { |
4374 | RTLoc, |
4375 | ThreadId, |
4376 | GlobalBufferPtr, |
4377 | CGF.Builder.getInt32(C.getLangOpts().OpenMPCUDAReductionBufNum), |
4378 | RL, |
4379 | ShuffleAndReduceFn, |
4380 | InterWarpCopyFn, |
4381 | GlobalToBufferCpyFn, |
4382 | GlobalToBufferRedFn, |
4383 | BufferToGlobalCpyFn, |
4384 | BufferToGlobalRedFn}; |
4385 | |
4386 | Res = CGF.EmitRuntimeCall( |
4387 | createNVPTXRuntimeFunction( |
4388 | OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2), |
4389 | Args); |
4390 | } |
4391 | |
4392 | |
4393 | llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.reduction.done"); |
4394 | llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.then"); |
4395 | llvm::Value *Cond = CGF.Builder.CreateICmpEQ( |
4396 | Res, llvm::ConstantInt::get(CGM.Int32Ty, )); |
4397 | CGF.Builder.CreateCondBr(Cond, ThenBB, ExitBB); |
4398 | |
4399 | |
4400 | |
4401 | |
4402 | |
4403 | CGF.EmitBlock(ThenBB); |
4404 | |
4405 | |
4406 | auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps, |
4407 | this](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4408 | auto IPriv = Privates.begin(); |
4409 | auto ILHS = LHSExprs.begin(); |
4410 | auto IRHS = RHSExprs.begin(); |
4411 | for (const Expr *E : ReductionOps) { |
4412 | emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), |
4413 | cast<DeclRefExpr>(*IRHS)); |
4414 | ++IPriv; |
4415 | ++ILHS; |
4416 | ++IRHS; |
4417 | } |
4418 | }; |
4419 | llvm::Value *EndArgs[] = {ThreadId}; |
4420 | RegionCodeGenTy RCG(CodeGen); |
4421 | NVPTXActionTy Action( |
4422 | nullptr, llvm::None, |
4423 | createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_reduce_nowait), |
4424 | EndArgs); |
4425 | RCG.setAction(Action); |
4426 | RCG(CGF); |
4427 | |
4428 | (void)ApplyDebugLocation::CreateEmpty(CGF); |
4429 | CGF.EmitBlock(ExitBB, ); |
4430 | } |
4431 | |
4432 | const VarDecl * |
4433 | CGOpenMPRuntimeNVPTX::translateParameter(const FieldDecl *FD, |
4434 | const VarDecl *NativeParam) const { |
4435 | if (!NativeParam->getType()->isReferenceType()) |
4436 | return NativeParam; |
4437 | QualType ArgType = NativeParam->getType(); |
4438 | QualifierCollector QC; |
4439 | const Type *NonQualTy = QC.strip(ArgType); |
4440 | QualType PointeeTy = cast<ReferenceType>(NonQualTy)->getPointeeType(); |
4441 | if (const auto *Attr = FD->getAttr<OMPCaptureKindAttr>()) { |
4442 | if (Attr->getCaptureKind() == OMPC_map) { |
4443 | PointeeTy = CGM.getContext().getAddrSpaceQualType(PointeeTy, |
4444 | LangAS::opencl_global); |
4445 | } else if (Attr->getCaptureKind() == OMPC_firstprivate && |
4446 | PointeeTy.isConstant(CGM.getContext())) { |
4447 | PointeeTy = CGM.getContext().getAddrSpaceQualType(PointeeTy, |
4448 | LangAS::opencl_generic); |
4449 | } |
4450 | } |
4451 | ArgType = CGM.getContext().getPointerType(PointeeTy); |
4452 | QC.addRestrict(); |
4453 | enum { NVPTX_local_addr = 5 }; |
4454 | QC.addAddressSpace(getLangASFromTargetAS(NVPTX_local_addr)); |
4455 | ArgType = QC.apply(CGM.getContext(), ArgType); |
4456 | if (isa<ImplicitParamDecl>(NativeParam)) |
4457 | return ImplicitParamDecl::Create( |
4458 | CGM.getContext(), , NativeParam->getLocation(), |
4459 | NativeParam->getIdentifier(), ArgType, ImplicitParamDecl::Other); |
4460 | return ParmVarDecl::Create( |
4461 | CGM.getContext(), |
4462 | const_cast<DeclContext *>(NativeParam->getDeclContext()), |
4463 | NativeParam->getBeginLoc(), NativeParam->getLocation(), |
4464 | NativeParam->getIdentifier(), ArgType, |
4465 | , SC_None, ); |
4466 | } |
4467 | |
4468 | Address |
4469 | CGOpenMPRuntimeNVPTX::getParameterAddress(CodeGenFunction &CGF, |
4470 | const VarDecl *NativeParam, |
4471 | const VarDecl *TargetParam) const { |
4472 | (0) . __assert_fail ("NativeParam != TargetParam && NativeParam->getType()->isReferenceType() && \"Native arg must not be the same as target arg.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 4474, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(NativeParam != TargetParam && |
4473 | (0) . __assert_fail ("NativeParam != TargetParam && NativeParam->getType()->isReferenceType() && \"Native arg must not be the same as target arg.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 4474, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true"> NativeParam->getType()->isReferenceType() && |
4474 | (0) . __assert_fail ("NativeParam != TargetParam && NativeParam->getType()->isReferenceType() && \"Native arg must not be the same as target arg.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 4474, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true"> "Native arg must not be the same as target arg."); |
4475 | Address LocalAddr = CGF.GetAddrOfLocalVar(TargetParam); |
4476 | QualType NativeParamType = NativeParam->getType(); |
4477 | QualifierCollector QC; |
4478 | const Type *NonQualTy = QC.strip(NativeParamType); |
4479 | QualType NativePointeeTy = cast<ReferenceType>(NonQualTy)->getPointeeType(); |
4480 | unsigned NativePointeeAddrSpace = |
4481 | CGF.getContext().getTargetAddressSpace(NativePointeeTy); |
4482 | QualType TargetTy = TargetParam->getType(); |
4483 | llvm::Value *TargetAddr = CGF.EmitLoadOfScalar( |
4484 | LocalAddr, , TargetTy, SourceLocation()); |
4485 | |
4486 | TargetAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4487 | TargetAddr, TargetAddr->getType()->getPointerElementType()->getPointerTo( |
4488 | )); |
4489 | |
4490 | TargetAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4491 | TargetAddr, TargetAddr->getType()->getPointerElementType()->getPointerTo( |
4492 | NativePointeeAddrSpace)); |
4493 | Address NativeParamAddr = CGF.CreateMemTemp(NativeParamType); |
4494 | CGF.EmitStoreOfScalar(TargetAddr, NativeParamAddr, , |
4495 | NativeParamType); |
4496 | return NativeParamAddr; |
4497 | } |
4498 | |
4499 | void CGOpenMPRuntimeNVPTX::emitOutlinedFunctionCall( |
4500 | CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, |
4501 | ArrayRef<llvm::Value *> Args) const { |
4502 | SmallVector<llvm::Value *, 4> TargetArgs; |
4503 | TargetArgs.reserve(Args.size()); |
4504 | auto *FnType = OutlinedFn.getFunctionType(); |
4505 | for (unsigned I = 0, E = Args.size(); I < E; ++I) { |
4506 | if (FnType->isVarArg() && FnType->getNumParams() <= I) { |
4507 | TargetArgs.append(std::next(Args.begin(), I), Args.end()); |
4508 | break; |
4509 | } |
4510 | llvm::Type *TargetType = FnType->getParamType(I); |
4511 | llvm::Value *NativeArg = Args[I]; |
4512 | if (!TargetType->isPointerTy()) { |
4513 | TargetArgs.emplace_back(NativeArg); |
4514 | continue; |
4515 | } |
4516 | llvm::Value *TargetArg = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4517 | NativeArg, |
4518 | NativeArg->getType()->getPointerElementType()->getPointerTo()); |
4519 | TargetArgs.emplace_back( |
4520 | CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TargetArg, TargetType)); |
4521 | } |
4522 | CGOpenMPRuntime::emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, TargetArgs); |
4523 | } |
4524 | |
4525 | |
4526 | |
4527 | |
4528 | |
4529 | llvm::Function *CGOpenMPRuntimeNVPTX::createParallelDataSharingWrapper( |
4530 | llvm::Function *OutlinedParallelFn, const OMPExecutableDirective &D) { |
4531 | ASTContext &Ctx = CGM.getContext(); |
4532 | const auto &CS = *D.getCapturedStmt(OMPD_parallel); |
4533 | |
4534 | |
4535 | FunctionArgList WrapperArgs; |
4536 | QualType Int16QTy = |
4537 | Ctx.getIntTypeForBitwidth(, ); |
4538 | QualType Int32QTy = |
4539 | Ctx.getIntTypeForBitwidth(, ); |
4540 | ImplicitParamDecl ParallelLevelArg(Ctx, , D.getBeginLoc(), |
4541 | , Int16QTy, |
4542 | ImplicitParamDecl::Other); |
4543 | ImplicitParamDecl WrapperArg(Ctx, , D.getBeginLoc(), |
4544 | , Int32QTy, |
4545 | ImplicitParamDecl::Other); |
4546 | WrapperArgs.emplace_back(&ParallelLevelArg); |
4547 | WrapperArgs.emplace_back(&WrapperArg); |
4548 | |
4549 | const CGFunctionInfo &CGFI = |
4550 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, WrapperArgs); |
4551 | |
4552 | auto *Fn = llvm::Function::Create( |
4553 | CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, |
4554 | Twine(OutlinedParallelFn->getName(), "_wrapper"), &CGM.getModule()); |
4555 | CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); |
4556 | Fn->setLinkage(llvm::GlobalValue::InternalLinkage); |
4557 | Fn->setDoesNotRecurse(); |
4558 | |
4559 | CodeGenFunction CGF(CGM, ); |
4560 | CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, Fn, CGFI, WrapperArgs, |
4561 | D.getBeginLoc(), D.getBeginLoc()); |
4562 | |
4563 | const auto *RD = CS.getCapturedRecordDecl(); |
4564 | auto CurField = RD->field_begin(); |
4565 | |
4566 | Address ZeroAddr = CGF.CreateMemTemp( |
4567 | CGF.getContext().getIntTypeForBitwidth(, ), |
4568 | ".zero.addr"); |
4569 | CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32( 0)); |
4570 | |
4571 | SmallVector<llvm::Value *, 8> Args; |
4572 | |
4573 | Args.emplace_back(CGF.GetAddrOfLocalVar(&WrapperArg).getPointer()); |
4574 | Args.emplace_back(ZeroAddr.getPointer()); |
4575 | |
4576 | CGBuilderTy &Bld = CGF.Builder; |
4577 | auto CI = CS.capture_begin(); |
4578 | |
4579 | |
4580 | |
4581 | Address GlobalArgs = |
4582 | CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy, "global_args"); |
4583 | llvm::Value *GlobalArgsPtr = GlobalArgs.getPointer(); |
4584 | llvm::Value *DataSharingArgs[] = {GlobalArgsPtr}; |
4585 | CGF.EmitRuntimeCall( |
4586 | createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_get_shared_variables), |
4587 | DataSharingArgs); |
4588 | |
4589 | |
4590 | |
4591 | Address SharedArgListAddress = Address::invalid(); |
4592 | if (CS.capture_size() > 0 || |
4593 | isOpenMPLoopBoundSharingDirective(D.getDirectiveKind())) { |
4594 | SharedArgListAddress = CGF.EmitLoadOfPointer( |
4595 | GlobalArgs, CGF.getContext() |
4596 | .getPointerType(CGF.getContext().getPointerType( |
4597 | CGF.getContext().VoidPtrTy)) |
4598 | .castAs<PointerType>()); |
4599 | } |
4600 | unsigned Idx = 0; |
4601 | if (isOpenMPLoopBoundSharingDirective(D.getDirectiveKind())) { |
4602 | Address Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx); |
4603 | Address TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast( |
4604 | Src, CGF.SizeTy->getPointerTo()); |
4605 | llvm::Value *LB = CGF.EmitLoadOfScalar( |
4606 | TypedAddress, |
4607 | , |
4608 | CGF.getContext().getPointerType(CGF.getContext().getSizeType()), |
4609 | cast<OMPLoopDirective>(D).getLowerBoundVariable()->getExprLoc()); |
4610 | Args.emplace_back(LB); |
4611 | ++Idx; |
4612 | Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx); |
4613 | TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast( |
4614 | Src, CGF.SizeTy->getPointerTo()); |
4615 | llvm::Value *UB = CGF.EmitLoadOfScalar( |
4616 | TypedAddress, |
4617 | , |
4618 | CGF.getContext().getPointerType(CGF.getContext().getSizeType()), |
4619 | cast<OMPLoopDirective>(D).getUpperBoundVariable()->getExprLoc()); |
4620 | Args.emplace_back(UB); |
4621 | ++Idx; |
4622 | } |
4623 | if (CS.capture_size() > 0) { |
4624 | ASTContext &CGFContext = CGF.getContext(); |
4625 | for (unsigned I = 0, E = CS.capture_size(); I < E; ++I, ++CI, ++CurField) { |
4626 | QualType ElemTy = CurField->getType(); |
4627 | Address Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, I + Idx); |
4628 | Address TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast( |
4629 | Src, CGF.ConvertTypeForMem(CGFContext.getPointerType(ElemTy))); |
4630 | llvm::Value *Arg = CGF.EmitLoadOfScalar(TypedAddress, |
4631 | , |
4632 | CGFContext.getPointerType(ElemTy), |
4633 | CI->getLocation()); |
4634 | if (CI->capturesVariableByCopy() && |
4635 | !CI->getCapturedVar()->getType()->isAnyPointerType()) { |
4636 | Arg = castValueToType(CGF, Arg, ElemTy, CGFContext.getUIntPtrType(), |
4637 | CI->getLocation()); |
4638 | } |
4639 | Args.emplace_back(Arg); |
4640 | } |
4641 | } |
4642 | |
4643 | emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedParallelFn, Args); |
4644 | CGF.FinishFunction(); |
4645 | return Fn; |
4646 | } |
4647 | |
4648 | void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF, |
4649 | const Decl *D) { |
4650 | if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic) |
4651 | return; |
4652 | |
4653 | (0) . __assert_fail ("D && \"Expected function or captured|block decl.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 4653, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(D && "Expected function or captured|block decl."); |
4654 | (0) . __assert_fail ("FunctionGlobalizedDecls.count(CGF.CurFn) == 0 && \"Function is registered already.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 4655, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(FunctionGlobalizedDecls.count(CGF.CurFn) == 0 && |
4655 | (0) . __assert_fail ("FunctionGlobalizedDecls.count(CGF.CurFn) == 0 && \"Function is registered already.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 4655, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true"> "Function is registered already."); |
4656 | (0) . __assert_fail ("(!TeamAndReductions.first || TeamAndReductions.first == D) && \"Team is set but not processed.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 4657, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert((!TeamAndReductions.first || TeamAndReductions.first == D) && |
4657 | (0) . __assert_fail ("(!TeamAndReductions.first || TeamAndReductions.first == D) && \"Team is set but not processed.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 4657, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true"> "Team is set but not processed."); |
4658 | const Stmt *Body = nullptr; |
4659 | bool NeedToDelayGlobalization = false; |
4660 | if (const auto *FD = dyn_cast<FunctionDecl>(D)) { |
4661 | Body = FD->getBody(); |
4662 | } else if (const auto *BD = dyn_cast<BlockDecl>(D)) { |
4663 | Body = BD->getBody(); |
4664 | } else if (const auto *CD = dyn_cast<CapturedDecl>(D)) { |
4665 | Body = CD->getBody(); |
4666 | NeedToDelayGlobalization = CGF.CapturedStmtInfo->getKind() == CR_OpenMP; |
4667 | if (NeedToDelayGlobalization && |
4668 | getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) |
4669 | return; |
4670 | } |
4671 | if (!Body) |
4672 | return; |
4673 | CheckVarsEscapingDeclContext VarChecker(CGF, TeamAndReductions.second); |
4674 | VarChecker.Visit(Body); |
4675 | const RecordDecl *GlobalizedVarsRecord = |
4676 | VarChecker.getGlobalizedRecord(IsInTTDRegion); |
4677 | TeamAndReductions.first = nullptr; |
4678 | TeamAndReductions.second.clear(); |
4679 | ArrayRef<const ValueDecl *> EscapedVariableLengthDecls = |
4680 | VarChecker.getEscapedVariableLengthDecls(); |
4681 | if (!GlobalizedVarsRecord && EscapedVariableLengthDecls.empty()) |
4682 | return; |
4683 | auto I = FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first; |
4684 | I->getSecond().MappedParams = |
4685 | llvm::make_unique<CodeGenFunction::OMPMapVars>(); |
4686 | I->getSecond().GlobalRecord = GlobalizedVarsRecord; |
4687 | I->getSecond().EscapedParameters.insert( |
4688 | VarChecker.getEscapedParameters().begin(), |
4689 | VarChecker.getEscapedParameters().end()); |
4690 | I->getSecond().EscapedVariableLengthDecls.append( |
4691 | EscapedVariableLengthDecls.begin(), EscapedVariableLengthDecls.end()); |
4692 | DeclToAddrMapTy &Data = I->getSecond().LocalVarData; |
4693 | for (const ValueDecl *VD : VarChecker.getEscapedDecls()) { |
4694 | (0) . __assert_fail ("VD->isCanonicalDecl() && \"Expected canonical declaration\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 4694, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(VD->isCanonicalDecl() && "Expected canonical declaration"); |
4695 | const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD); |
4696 | Data.insert(std::make_pair(VD, MappedVarData(FD, IsInTTDRegion))); |
4697 | } |
4698 | if (!IsInTTDRegion && !NeedToDelayGlobalization && !IsInParallelRegion) { |
4699 | CheckVarsEscapingDeclContext VarChecker(CGF, llvm::None); |
4700 | VarChecker.Visit(Body); |
4701 | I->getSecond().SecondaryGlobalRecord = |
4702 | VarChecker.getGlobalizedRecord(); |
4703 | I->getSecond().SecondaryLocalVarData.emplace(); |
4704 | DeclToAddrMapTy &Data = I->getSecond().SecondaryLocalVarData.getValue(); |
4705 | for (const ValueDecl *VD : VarChecker.getEscapedDecls()) { |
4706 | (0) . __assert_fail ("VD->isCanonicalDecl() && \"Expected canonical declaration\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 4706, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(VD->isCanonicalDecl() && "Expected canonical declaration"); |
4707 | const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD); |
4708 | Data.insert( |
4709 | std::make_pair(VD, MappedVarData(FD, ))); |
4710 | } |
4711 | } |
4712 | if (!NeedToDelayGlobalization) { |
4713 | emitGenericVarsProlog(CGF, D->getBeginLoc(), ); |
4714 | struct GlobalizationScope final : EHScopeStack::Cleanup { |
4715 | GlobalizationScope() = default; |
4716 | |
4717 | void Emit(CodeGenFunction &CGF, Flags flags) override { |
4718 | static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime()) |
4719 | .emitGenericVarsEpilog(CGF, ); |
4720 | } |
4721 | }; |
4722 | CGF.EHStack.pushCleanup<GlobalizationScope>(NormalAndEHCleanup); |
4723 | } |
4724 | } |
4725 | |
4726 | Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF, |
4727 | const VarDecl *VD) { |
4728 | bool UseDefaultAllocator = true; |
4729 | if (VD && VD->hasAttr<OMPAllocateDeclAttr>()) { |
4730 | const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); |
4731 | switch (A->getAllocatorType()) { |
4732 | |
4733 | |
4734 | case OMPAllocateDeclAttr::OMPDefaultMemAlloc: |
4735 | case OMPAllocateDeclAttr::OMPThreadMemAlloc: |
4736 | |
4737 | break; |
4738 | case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: |
4739 | case OMPAllocateDeclAttr::OMPCGroupMemAlloc: |
4740 | case OMPAllocateDeclAttr::OMPHighBWMemAlloc: |
4741 | case OMPAllocateDeclAttr::OMPLowLatMemAlloc: |
4742 | case OMPAllocateDeclAttr::OMPConstMemAlloc: |
4743 | case OMPAllocateDeclAttr::OMPPTeamMemAlloc: |
4744 | case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: |
4745 | UseDefaultAllocator = false; |
4746 | break; |
4747 | } |
4748 | } |
4749 | |
4750 | if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic) |
4751 | return Address::invalid(); |
4752 | |
4753 | VD = VD->getCanonicalDecl(); |
4754 | auto I = FunctionGlobalizedDecls.find(CGF.CurFn); |
4755 | if (I == FunctionGlobalizedDecls.end()) |
4756 | return Address::invalid(); |
4757 | auto VDI = I->getSecond().LocalVarData.find(VD); |
4758 | if (VDI != I->getSecond().LocalVarData.end()) |
4759 | return VDI->second.PrivateAddr; |
4760 | if (VD->hasAttrs()) { |
4761 | for (specific_attr_iterator<OMPReferencedVarAttr> IT(VD->attr_begin()), |
4762 | E(VD->attr_end()); |
4763 | IT != E; ++IT) { |
4764 | auto VDI = I->getSecond().LocalVarData.find( |
4765 | cast<VarDecl>(cast<DeclRefExpr>(IT->getRef())->getDecl()) |
4766 | ->getCanonicalDecl()); |
4767 | if (VDI != I->getSecond().LocalVarData.end()) |
4768 | return VDI->second.PrivateAddr; |
4769 | } |
4770 | } |
4771 | |
4772 | |
4773 | |
4774 | |
4775 | |
4776 | (void)UseDefaultAllocator; |
4777 | return Address::invalid(); |
4778 | } |
4779 | |
4780 | void CGOpenMPRuntimeNVPTX::functionFinished(CodeGenFunction &CGF) { |
4781 | FunctionGlobalizedDecls.erase(CGF.CurFn); |
4782 | CGOpenMPRuntime::functionFinished(CGF); |
4783 | } |
4784 | |
4785 | void CGOpenMPRuntimeNVPTX::getDefaultDistScheduleAndChunk( |
4786 | CodeGenFunction &CGF, const OMPLoopDirective &S, |
4787 | OpenMPDistScheduleClauseKind &ScheduleKind, |
4788 | llvm::Value *&Chunk) const { |
4789 | if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) { |
4790 | ScheduleKind = OMPC_DIST_SCHEDULE_static; |
4791 | Chunk = CGF.EmitScalarConversion(getNVPTXNumThreads(CGF), |
4792 | CGF.getContext().getIntTypeForBitwidth(32, ), |
4793 | S.getIterationVariable()->getType(), S.getBeginLoc()); |
4794 | return; |
4795 | } |
4796 | CGOpenMPRuntime::getDefaultDistScheduleAndChunk( |
4797 | CGF, S, ScheduleKind, Chunk); |
4798 | } |
4799 | |
4800 | void CGOpenMPRuntimeNVPTX::getDefaultScheduleAndChunk( |
4801 | CodeGenFunction &CGF, const OMPLoopDirective &S, |
4802 | OpenMPScheduleClauseKind &ScheduleKind, |
4803 | const Expr *&ChunkExpr) const { |
4804 | ScheduleKind = OMPC_SCHEDULE_static; |
4805 | |
4806 | llvm::APInt ChunkSize(32, 1); |
4807 | ChunkExpr = IntegerLiteral::Create(CGF.getContext(), ChunkSize, |
4808 | CGF.getContext().getIntTypeForBitwidth(32, ), |
4809 | SourceLocation()); |
4810 | } |
4811 | |
4812 | void CGOpenMPRuntimeNVPTX::adjustTargetSpecificDataForLambdas( |
4813 | CodeGenFunction &CGF, const OMPExecutableDirective &D) const { |
4814 | (0) . __assert_fail ("isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && \" Expected target-based directive.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 4815, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && |
4815 | (0) . __assert_fail ("isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && \" Expected target-based directive.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 4815, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true"> " Expected target-based directive."); |
4816 | const CapturedStmt *CS = D.getCapturedStmt(OMPD_target); |
4817 | for (const CapturedStmt::Capture &C : CS->captures()) { |
4818 | |
4819 | |
4820 | if (!C.capturesVariable()) |
4821 | continue; |
4822 | const VarDecl *VD = C.getCapturedVar(); |
4823 | const auto *RD = VD->getType() |
4824 | .getCanonicalType() |
4825 | .getNonReferenceType() |
4826 | ->getAsCXXRecordDecl(); |
4827 | if (!RD || !RD->isLambda()) |
4828 | continue; |
4829 | Address VDAddr = CGF.GetAddrOfLocalVar(VD); |
4830 | LValue VDLVal; |
4831 | if (VD->getType().getCanonicalType()->isReferenceType()) |
4832 | VDLVal = CGF.EmitLoadOfReferenceLValue(VDAddr, VD->getType()); |
4833 | else |
4834 | VDLVal = CGF.MakeAddrLValue( |
4835 | VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); |
4836 | llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; |
4837 | FieldDecl *ThisCapture = nullptr; |
4838 | RD->getCaptureFields(Captures, ThisCapture); |
4839 | if (ThisCapture && CGF.CapturedStmtInfo->isCXXThisExprCaptured()) { |
4840 | LValue ThisLVal = |
4841 | CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); |
4842 | llvm::Value *CXXThis = CGF.LoadCXXThis(); |
4843 | CGF.EmitStoreOfScalar(CXXThis, ThisLVal); |
4844 | } |
4845 | for (const LambdaCapture &LC : RD->captures()) { |
4846 | if (LC.getCaptureKind() != LCK_ByRef) |
4847 | continue; |
4848 | const VarDecl *VD = LC.getCapturedVar(); |
4849 | if (!CS->capturesVariable(VD)) |
4850 | continue; |
4851 | auto It = Captures.find(VD); |
4852 | (0) . __assert_fail ("It != Captures.end() && \"Found lambda capture without field.\"", "/home/seafit/code_projects/clang_source/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp", 4852, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(It != Captures.end() && "Found lambda capture without field."); |
4853 | LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); |
4854 | Address VDAddr = CGF.GetAddrOfLocalVar(VD); |
4855 | if (VD->getType().getCanonicalType()->isReferenceType()) |
4856 | VDAddr = CGF.EmitLoadOfReferenceLValue(VDAddr, |
4857 | VD->getType().getCanonicalType()) |
4858 | .getAddress(); |
4859 | CGF.EmitStoreOfScalar(VDAddr.getPointer(), VarLVal); |
4860 | } |
4861 | } |
4862 | } |
4863 | |
4864 | unsigned CGOpenMPRuntimeNVPTX::getDefaultFirstprivateAddressSpace() const { |
4865 | return CGM.getContext().getTargetAddressSpace(LangAS::cuda_constant); |
4866 | } |
4867 | |
4868 | bool CGOpenMPRuntimeNVPTX::hasAllocateAttributeForGlobalVar(const VarDecl *VD, |
4869 | LangAS &AS) { |
4870 | if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) |
4871 | return false; |
4872 | const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); |
4873 | switch(A->getAllocatorType()) { |
4874 | case OMPAllocateDeclAttr::OMPDefaultMemAlloc: |
4875 | |
4876 | case OMPAllocateDeclAttr::OMPThreadMemAlloc: |
4877 | case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: |
4878 | case OMPAllocateDeclAttr::OMPCGroupMemAlloc: |
4879 | case OMPAllocateDeclAttr::OMPHighBWMemAlloc: |
4880 | case OMPAllocateDeclAttr::OMPLowLatMemAlloc: |
4881 | AS = LangAS::Default; |
4882 | return true; |
4883 | case OMPAllocateDeclAttr::OMPConstMemAlloc: |
4884 | AS = LangAS::cuda_constant; |
4885 | return true; |
4886 | case OMPAllocateDeclAttr::OMPPTeamMemAlloc: |
4887 | AS = LangAS::cuda_shared; |
4888 | return true; |
4889 | case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: |
4890 | llvm_unreachable("Expected predefined allocator for the variables with the " |
4891 | "static storage."); |
4892 | } |
4893 | return false; |
4894 | } |
4895 | |
4896 | |
4897 | static CudaArch getCudaArch(CodeGenModule &CGM) { |
4898 | if (!CGM.getTarget().hasFeature("ptx")) |
4899 | return CudaArch::UNKNOWN; |
4900 | llvm::StringMap<bool> Features; |
4901 | CGM.getTarget().initFeatureMap(Features, CGM.getDiags(), |
4902 | CGM.getTarget().getTargetOpts().CPU, |
4903 | CGM.getTarget().getTargetOpts().Features); |
4904 | for (const auto &Feature : Features) { |
4905 | if (Feature.getValue()) { |
4906 | CudaArch Arch = StringToCudaArch(Feature.getKey()); |
4907 | if (Arch != CudaArch::UNKNOWN) |
4908 | return Arch; |
4909 | } |
4910 | } |
4911 | return CudaArch::UNKNOWN; |
4912 | } |
4913 | |
4914 | |
4915 | |
4916 | void CGOpenMPRuntimeNVPTX::checkArchForUnifiedAddressing( |
4917 | const OMPRequiresDecl *D) const { |
4918 | for (const OMPClause *Clause : D->clauselists()) { |
4919 | if (Clause->getClauseKind() == OMPC_unified_shared_memory) { |
4920 | switch (getCudaArch(CGM)) { |
4921 | case CudaArch::SM_20: |
4922 | case CudaArch::SM_21: |
4923 | case CudaArch::SM_30: |
4924 | case CudaArch::SM_32: |
4925 | case CudaArch::SM_35: |
4926 | case CudaArch::SM_37: |
4927 | case CudaArch::SM_50: |
4928 | case CudaArch::SM_52: |
4929 | case CudaArch::SM_53: |
4930 | case CudaArch::SM_60: |
4931 | case CudaArch::SM_61: |
4932 | case CudaArch::SM_62: |
4933 | CGM.Error(Clause->getBeginLoc(), |
4934 | "Target architecture does not support unified addressing"); |
4935 | return; |
4936 | case CudaArch::SM_70: |
4937 | case CudaArch::SM_72: |
4938 | case CudaArch::SM_75: |
4939 | case CudaArch::GFX600: |
4940 | case CudaArch::GFX601: |
4941 | case CudaArch::GFX700: |
4942 | case CudaArch::GFX701: |
4943 | case CudaArch::GFX702: |
4944 | case CudaArch::GFX703: |
4945 | case CudaArch::GFX704: |
4946 | case CudaArch::GFX801: |
4947 | case CudaArch::GFX802: |
4948 | case CudaArch::GFX803: |
4949 | case CudaArch::GFX810: |
4950 | case CudaArch::GFX900: |
4951 | case CudaArch::GFX902: |
4952 | case CudaArch::GFX904: |
4953 | case CudaArch::GFX906: |
4954 | case CudaArch::GFX909: |
4955 | case CudaArch::UNKNOWN: |
4956 | break; |
4957 | case CudaArch::LAST: |
4958 | llvm_unreachable("Unexpected Cuda arch."); |
4959 | } |
4960 | } |
4961 | } |
4962 | } |
4963 | |
4964 | |
4965 | static std::pair<unsigned, unsigned> getSMsBlocksPerSM(CodeGenModule &CGM) { |
4966 | std::pair<unsigned, unsigned> Data; |
4967 | if (CGM.getLangOpts().OpenMPCUDANumSMs) |
4968 | Data.first = CGM.getLangOpts().OpenMPCUDANumSMs; |
4969 | if (CGM.getLangOpts().OpenMPCUDABlocksPerSM) |
4970 | Data.second = CGM.getLangOpts().OpenMPCUDABlocksPerSM; |
4971 | if (Data.first && Data.second) |
4972 | return Data; |
4973 | switch (getCudaArch(CGM)) { |
4974 | case CudaArch::SM_20: |
4975 | case CudaArch::SM_21: |
4976 | case CudaArch::SM_30: |
4977 | case CudaArch::SM_32: |
4978 | case CudaArch::SM_35: |
4979 | case CudaArch::SM_37: |
4980 | case CudaArch::SM_50: |
4981 | case CudaArch::SM_52: |
4982 | case CudaArch::SM_53: |
4983 | return {16, 16}; |
4984 | case CudaArch::SM_60: |
4985 | case CudaArch::SM_61: |
4986 | case CudaArch::SM_62: |
4987 | return {56, 32}; |
4988 | case CudaArch::SM_70: |
4989 | case CudaArch::SM_72: |
4990 | case CudaArch::SM_75: |
4991 | return {84, 32}; |
4992 | case CudaArch::GFX600: |
4993 | case CudaArch::GFX601: |
4994 | case CudaArch::GFX700: |
4995 | case CudaArch::GFX701: |
4996 | case CudaArch::GFX702: |
4997 | case CudaArch::GFX703: |
4998 | case CudaArch::GFX704: |
4999 | case CudaArch::GFX801: |
5000 | case CudaArch::GFX802: |
5001 | case CudaArch::GFX803: |
5002 | case CudaArch::GFX810: |
5003 | case CudaArch::GFX900: |
5004 | case CudaArch::GFX902: |
5005 | case CudaArch::GFX904: |
5006 | case CudaArch::GFX906: |
5007 | case CudaArch::GFX909: |
5008 | case CudaArch::UNKNOWN: |
5009 | break; |
5010 | case CudaArch::LAST: |
5011 | llvm_unreachable("Unexpected Cuda arch."); |
5012 | } |
5013 | llvm_unreachable("Unexpected NVPTX target without ptx feature."); |
5014 | } |
5015 | |
5016 | void CGOpenMPRuntimeNVPTX::clear() { |
5017 | if (!GlobalizedRecords.empty()) { |
5018 | ASTContext &C = CGM.getContext(); |
5019 | llvm::SmallVector<const GlobalPtrSizeRecsTy *, 4> GlobalRecs; |
5020 | llvm::SmallVector<const GlobalPtrSizeRecsTy *, 4> SharedRecs; |
5021 | RecordDecl *StaticRD = C.buildImplicitRecord( |
5022 | "_openmp_static_memory_type_$_", RecordDecl::TagKind::TTK_Union); |
5023 | StaticRD->startDefinition(); |
5024 | RecordDecl *SharedStaticRD = C.buildImplicitRecord( |
5025 | "_shared_openmp_static_memory_type_$_", RecordDecl::TagKind::TTK_Union); |
5026 | SharedStaticRD->startDefinition(); |
5027 | for (const GlobalPtrSizeRecsTy &Records : GlobalizedRecords) { |
5028 | if (Records.Records.empty()) |
5029 | continue; |
5030 | unsigned Size = 0; |
5031 | unsigned RecAlignment = 0; |
5032 | for (const RecordDecl *RD : Records.Records) { |
5033 | QualType RDTy = C.getRecordType(RD); |
5034 | unsigned Alignment = C.getTypeAlignInChars(RDTy).getQuantity(); |
5035 | RecAlignment = std::max(RecAlignment, Alignment); |
5036 | unsigned RecSize = C.getTypeSizeInChars(RDTy).getQuantity(); |
5037 | Size = |
5038 | llvm::alignTo(llvm::alignTo(Size, Alignment) + RecSize, Alignment); |
5039 | } |
5040 | Size = llvm::alignTo(Size, RecAlignment); |
5041 | llvm::APInt ArySize(, Size); |
5042 | QualType SubTy = C.getConstantArrayType( |
5043 | C.CharTy, ArySize, ArrayType::Normal, ); |
5044 | const bool UseSharedMemory = Size <= SharedMemorySize; |
5045 | auto *Field = |
5046 | FieldDecl::Create(C, UseSharedMemory ? SharedStaticRD : StaticRD, |
5047 | SourceLocation(), SourceLocation(), nullptr, SubTy, |
5048 | C.getTrivialTypeSourceInfo(SubTy, SourceLocation()), |
5049 | , , |
5050 | ICIS_NoInit); |
5051 | Field->setAccess(AS_public); |
5052 | if (UseSharedMemory) { |
5053 | SharedStaticRD->addDecl(Field); |
5054 | SharedRecs.push_back(&Records); |
5055 | } else { |
5056 | StaticRD->addDecl(Field); |
5057 | GlobalRecs.push_back(&Records); |
5058 | } |
5059 | Records.RecSize->setInitializer(llvm::ConstantInt::get(CGM.SizeTy, Size)); |
5060 | Records.UseSharedMemory->setInitializer( |
5061 | llvm::ConstantInt::get(CGM.Int16Ty, UseSharedMemory ? 1 : 0)); |
5062 | } |
5063 | |
5064 | |
5065 | |
5066 | |
5067 | if (!SharedStaticRD->field_empty()) { |
5068 | llvm::APInt ArySize(, SharedMemorySize); |
5069 | QualType SubTy = C.getConstantArrayType( |
5070 | C.CharTy, ArySize, ArrayType::Normal, ); |
5071 | auto *Field = FieldDecl::Create( |
5072 | C, SharedStaticRD, SourceLocation(), SourceLocation(), nullptr, SubTy, |
5073 | C.getTrivialTypeSourceInfo(SubTy, SourceLocation()), |
5074 | , , |
5075 | ); |
5076 | Field->setAccess(AS_public); |
5077 | SharedStaticRD->addDecl(Field); |
5078 | } |
5079 | SharedStaticRD->completeDefinition(); |
5080 | if (!SharedStaticRD->field_empty()) { |
5081 | QualType StaticTy = C.getRecordType(SharedStaticRD); |
5082 | llvm::Type *LLVMStaticTy = CGM.getTypes().ConvertTypeForMem(StaticTy); |
5083 | auto *GV = new llvm::GlobalVariable( |
5084 | CGM.getModule(), LLVMStaticTy, |
5085 | , llvm::GlobalValue::CommonLinkage, |
5086 | llvm::Constant::getNullValue(LLVMStaticTy), |
5087 | "_openmp_shared_static_glob_rd_$_", , |
5088 | llvm::GlobalValue::NotThreadLocal, |
5089 | C.getTargetAddressSpace(LangAS::cuda_shared)); |
5090 | auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( |
5091 | GV, CGM.VoidPtrTy); |
5092 | for (const GlobalPtrSizeRecsTy *Rec : SharedRecs) { |
5093 | Rec->Buffer->replaceAllUsesWith(Replacement); |
5094 | Rec->Buffer->eraseFromParent(); |
5095 | } |
5096 | } |
5097 | StaticRD->completeDefinition(); |
5098 | if (!StaticRD->field_empty()) { |
5099 | QualType StaticTy = C.getRecordType(StaticRD); |
5100 | std::pair<unsigned, unsigned> SMsBlockPerSM = getSMsBlocksPerSM(CGM); |
5101 | llvm::APInt Size1(32, SMsBlockPerSM.second); |
5102 | QualType Arr1Ty = |
5103 | C.getConstantArrayType(StaticTy, Size1, ArrayType::Normal, |
5104 | ); |
5105 | llvm::APInt Size2(32, SMsBlockPerSM.first); |
5106 | QualType Arr2Ty = C.getConstantArrayType(Arr1Ty, Size2, ArrayType::Normal, |
5107 | ); |
5108 | llvm::Type *LLVMArr2Ty = CGM.getTypes().ConvertTypeForMem(Arr2Ty); |
5109 | |
5110 | |
5111 | |
5112 | auto *GV = new llvm::GlobalVariable( |
5113 | CGM.getModule(), LLVMArr2Ty, |
5114 | , llvm::GlobalValue::InternalLinkage, |
5115 | llvm::Constant::getNullValue(LLVMArr2Ty), |
5116 | "_openmp_static_glob_rd_$_"); |
5117 | auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( |
5118 | GV, CGM.VoidPtrTy); |
5119 | for (const GlobalPtrSizeRecsTy *Rec : GlobalRecs) { |
5120 | Rec->Buffer->replaceAllUsesWith(Replacement); |
5121 | Rec->Buffer->eraseFromParent(); |
5122 | } |
5123 | } |
5124 | } |
5125 | if (!TeamsReductions.empty()) { |
5126 | ASTContext &C = CGM.getContext(); |
5127 | RecordDecl *StaticRD = C.buildImplicitRecord( |
5128 | "_openmp_teams_reduction_type_$_", RecordDecl::TagKind::TTK_Union); |
5129 | StaticRD->startDefinition(); |
5130 | for (const RecordDecl *TeamReductionRec : TeamsReductions) { |
5131 | QualType RecTy = C.getRecordType(TeamReductionRec); |
5132 | auto *Field = FieldDecl::Create( |
5133 | C, StaticRD, SourceLocation(), SourceLocation(), nullptr, RecTy, |
5134 | C.getTrivialTypeSourceInfo(RecTy, SourceLocation()), |
5135 | , , |
5136 | ICIS_NoInit); |
5137 | Field->setAccess(AS_public); |
5138 | StaticRD->addDecl(Field); |
5139 | } |
5140 | StaticRD->completeDefinition(); |
5141 | QualType StaticTy = C.getRecordType(StaticRD); |
5142 | llvm::Type *LLVMReductionsBufferTy = |
5143 | CGM.getTypes().ConvertTypeForMem(StaticTy); |
5144 | |
5145 | |
5146 | |
5147 | auto *GV = new llvm::GlobalVariable( |
5148 | CGM.getModule(), LLVMReductionsBufferTy, |
5149 | , llvm::GlobalValue::InternalLinkage, |
5150 | llvm::Constant::getNullValue(LLVMReductionsBufferTy), |
5151 | "_openmp_teams_reductions_buffer_$_"); |
5152 | KernelTeamsReductionPtr->setInitializer( |
5153 | llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, |
5154 | CGM.VoidPtrTy)); |
5155 | } |
5156 | CGOpenMPRuntime::clear(); |
5157 | } |
5158 | |