1 | // RUN: %clang_cc1 -fcuda-is-device \ |
2 | // RUN: -triple nvptx-nvidia-cuda -emit-llvm -o - %s | \ |
3 | // RUN: FileCheck %s -check-prefix CHECK -check-prefix NOFTZ |
4 | // RUN: %clang_cc1 -fcuda-is-device -fcuda-flush-denormals-to-zero \ |
5 | // RUN: -triple nvptx-nvidia-cuda -emit-llvm -o - %s | \ |
6 | // RUN: FileCheck %s -check-prefix CHECK -check-prefix FTZ |
7 | |
8 | // RUN: %clang_cc1 -fcuda-is-device -x hip \ |
9 | // RUN: -triple amdgcn-amd-amdhsa -target-cpu gfx900 -emit-llvm -o - %s | \ |
10 | // RUN: FileCheck %s -check-prefix CHECK -check-prefix AMDNOFTZ |
11 | // RUN: %clang_cc1 -fcuda-is-device -x hip -fcuda-flush-denormals-to-zero \ |
12 | // RUN: -triple amdgcn-amd-amdhsa -target-cpu gfx900 -emit-llvm -o - %s | \ |
13 | // RUN: FileCheck %s -check-prefix CHECK -check-prefix AMDFTZ |
14 | |
15 | #include "Inputs/cuda.h" |
16 | |
17 | // Checks that device function calls get emitted with the "ntpvx-f32ftz" |
18 | // attribute set to "true" when we compile CUDA device code with |
19 | // -fcuda-flush-denormals-to-zero. Further, check that we reflect the presence |
20 | // or absence of -fcuda-flush-denormals-to-zero in a module flag. |
21 | |
22 | // AMDGCN targets always have +fp64-fp16-denormals. |
23 | // AMDGCN targets without fast FMAF (e.g. gfx803) always have +fp32-denormals. |
24 | // For AMDGCN target with fast FMAF (e.g. gfx900), it has +fp32-denormals |
25 | // by default and -fp32-denormals when there is option |
26 | // -fcuda-flush-denormals-to-zero. |
27 | |
28 | // CHECK-LABEL: define void @foo() #0 |
29 | extern "C" __device__ void foo() {} |
30 | |
31 | // FTZ: attributes #0 = {{.*}} "nvptx-f32ftz"="true" |
32 | // NOFTZ-NOT: attributes #0 = {{.*}} "nvptx-f32ftz" |
33 | // AMDNOFTZ: attributes #0 = {{.*}}+fp32-denormals{{.*}}+fp64-fp16-denormals |
34 | // AMDFTZ: attributes #0 = {{.*}}+fp64-fp16-denormals{{.*}}-fp32-denormals |
35 | |
36 | // FTZ:!llvm.module.flags = !{{{.*}}[[MODFLAG:![0-9]+]]} |
37 | // FTZ:[[MODFLAG]] = !{i32 4, !"nvvm-reflect-ftz", i32 1} |
38 | |
39 | // NOFTZ:!llvm.module.flags = !{{{.*}}[[MODFLAG:![0-9]+]]} |
40 | // NOFTZ:[[MODFLAG]] = !{i32 4, !"nvvm-reflect-ftz", i32 0} |
41 | |