diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-12-18 20:11:37 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-12-18 20:11:37 +0000 |
commit | 461a67fa15370a9ec88f8f8a240bf7c123bb2029 (patch) | |
tree | 6942083d7d56bba40ec790a453ca58ad3baf6832 /test/OpenMP | |
parent | 75c3240472ba6ac2669ee72ca67eb72d4e2851fc (diff) |
Vendor import of clang trunk r321017:vendor/clang/clang-trunk-r321017
Notes
Notes:
svn path=/vendor/clang/dist/; revision=326941
svn path=/vendor/clang/clang-trunk-r321017/; revision=326942; tag=vendor/clang/clang-trunk-r321017
Diffstat (limited to 'test/OpenMP')
304 files changed, 30619 insertions, 3038 deletions
diff --git a/test/OpenMP/atomic_capture_codegen.cpp b/test/OpenMP/atomic_capture_codegen.cpp index 0a22e4276b53..306b83f62463 100644 --- a/test/OpenMP/atomic_capture_codegen.cpp +++ b/test/OpenMP/atomic_capture_codegen.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -target-cpu core2 -fopenmp -x c -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -target-cpu core2 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -target-cpu core2 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s // expected-no-diagnostics #ifndef HEADER #define HEADER @@ -611,50 +611,6 @@ int main() { // CHECK: store i8 [[DESIRED_I8]], i8* @{{.+}}, #pragma omp atomic capture {bx = civ - bx; bv = bx;} -// CHECK: [[EXPR_RE:%.+]] = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @{{.+}}, i32 0, i32 0) -// CHECK: [[EXPR_IM:%.+]] = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @{{.+}}, i32 0, i32 1) -// CHECK: [[X:%.+]] = load atomic i16, i16* [[X_ADDR:@.+]] monotonic -// CHECK: br label %[[CONT:.+]] -// CHECK: [[CONT]] -// CHECK: [[EXPECTED:%.+]] = phi i16 [ [[X]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ] -// CHECK: [[CONV:%.+]] = zext i16 [[EXPECTED]] to i32 -// CHECK: [[X_RVAL:%.+]] = sitofp i32 [[CONV]] to float -// <Skip checks for complex calculations> -// CHECK: [[X_RE_ADDR:%.+]] = getelementptr inbounds { float, float }, { float, float }* [[TEMP:%.+]], i32 0, i32 0 -// CHECK: [[X_RE:%.+]] = load float, float* [[X_RE_ADDR]] -// CHECK: [[X_IM_ADDR:%.+]] = getelementptr inbounds { float, float }, { float, float }* [[TEMP]], i32 0, i32 1 -// CHECK: [[X_IM:%.+]] = load float, float* [[X_IM_ADDR]] -// CHECK: [[NEW:%.+]] = fptoui float [[X_RE]] to i16 -// CHECK: store i16 [[NEW]], i16* [[TEMP:%.+]], -// CHECK: [[DESIRED:%.+]] = load i16, i16* [[TEMP]], -// CHECK: [[RES:%.+]] = cmpxchg i16* [[X_ADDR]], i16 [[EXPECTED]], i16 [[DESIRED]] monotonic monotonic -// CHECK: [[OLD_X]] = extractvalue { i16, i1 } [[RES]], 0 -// CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i16, i1 } [[RES]], 1 -// CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]] -// CHECK: [[EXIT]] -// CHECK: store i16 [[NEW]], i16* @{{.+}}, -#pragma omp atomic capture - usv = usx /= cfv; -// CHECK: [[EXPR_RE:%.+]] = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @{{.+}}, i32 0, i32 0) -// CHECK: [[EXPR_IM:%.+]] = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @{{.+}}, i32 0, i32 1) -// CHECK: [[X:%.+]] = load atomic i64, i64* [[X_ADDR:@.+]] monotonic -// CHECK: br label %[[CONT:.+]] -// CHECK: [[CONT]] -// CHECK: [[EXPECTED:%.+]] = phi i64 [ [[X]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ] -// CHECK: [[X_RVAL:%.+]] = sitofp i64 [[EXPECTED]] to double -// CHECK: [[ADD_RE:%.+]] = fadd double [[X_RVAL]], [[EXPR_RE]] -// CHECK: [[ADD_IM:%.+]] = fadd double 0.000000e+00, [[EXPR_IM]] -// CHECK: [[DESIRED:%.+]] = fptosi double [[ADD_RE]] to i64 -// CHECK: store i64 [[DESIRED]], i64* [[TEMP:%.+]], -// CHECK: [[DESIRED:%.+]] = load i64, i64* [[TEMP]], -// CHECK: [[RES:%.+]] = cmpxchg i64* [[X_ADDR]], i64 [[EXPECTED]], i64 [[DESIRED]] monotonic monotonic -// CHECK: [[OLD_X]] = extractvalue { i64, i1 } [[RES]], 0 -// CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i64, i1 } [[RES]], 1 -// CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]] -// CHECK: [[EXIT]] -// CHECK: store i64 [[EXPECTED]], i64* @{{.+}}, -#pragma omp atomic capture - {llv = llx; llx += cdv;} // CHECK: [[IDX:%.+]] = load i16, i16* @{{.+}} // CHECK: load i8, i8* // CHECK: [[VEC_ITEM_VAL:%.+]] = zext i1 %{{.+}} to i32 diff --git a/test/OpenMP/atomic_read_codegen.c b/test/OpenMP/atomic_read_codegen.c index 0cd46e3821fd..0cfb2d26f243 100644 --- a/test/OpenMP/atomic_read_codegen.c +++ b/test/OpenMP/atomic_read_codegen.c @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -target-cpu core2 -fopenmp -x c -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -target-cpu core2 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -target-cpu core2 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s // expected-no-diagnostics // REQUIRES: x86-registered-target #ifndef HEADER diff --git a/test/OpenMP/atomic_update_codegen.cpp b/test/OpenMP/atomic_update_codegen.cpp index 8c02a43d64e3..1343cd8ad2cd 100644 --- a/test/OpenMP/atomic_update_codegen.cpp +++ b/test/OpenMP/atomic_update_codegen.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -target-cpu core2 -fopenmp -x c -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -target-cpu core2 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -target-cpu core2 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s // expected-no-diagnostics #ifndef HEADER #define HEADER @@ -554,48 +554,6 @@ int main() { // CHECK: [[EXIT]] #pragma omp atomic bx = civ - bx; -// CHECK: [[EXPR_RE:%.+]] = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @{{.+}}, i32 0, i32 0) -// CHECK: [[EXPR_IM:%.+]] = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @{{.+}}, i32 0, i32 1) -// CHECK: [[X:%.+]] = load atomic i16, i16* [[X_ADDR:@.+]] monotonic -// CHECK: br label %[[CONT:.+]] -// CHECK: [[CONT]] -// CHECK: [[EXPECTED:%.+]] = phi i16 [ [[X]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ] -// CHECK: [[CONV:%.+]] = zext i16 [[EXPECTED]] to i32 -// CHECK: [[X_RVAL:%.+]] = sitofp i32 [[CONV]] to float -// <Skip checks for complex calculations> -// CHECK: [[X_RE_ADDR:%.+]] = getelementptr inbounds { float, float }, { float, float }* [[TEMP:%.+]], i32 0, i32 0 -// CHECK: [[X_RE:%.+]] = load float, float* [[X_RE_ADDR]] -// CHECK: [[X_IM_ADDR:%.+]] = getelementptr inbounds { float, float }, { float, float }* [[TEMP]], i32 0, i32 1 -// CHECK: [[X_IM:%.+]] = load float, float* [[X_IM_ADDR]] -// CHECK: [[DESIRED:%.+]] = fptoui float [[X_RE]] to i16 -// CHECK: store i16 [[DESIRED]], i16* [[TEMP:%.+]] -// CHECK: [[DESIRED:%.+]] = load i16, i16* [[TEMP]] -// CHECK: [[RES:%.+]] = cmpxchg i16* [[X_ADDR]], i16 [[EXPECTED]], i16 [[DESIRED]] monotonic monotonic -// CHECK: [[OLD_X]] = extractvalue { i16, i1 } [[RES]], 0 -// CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i16, i1 } [[RES]], 1 -// CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]] -// CHECK: [[EXIT]] -#pragma omp atomic update - usx /= cfv; -// CHECK: [[EXPR_RE:%.+]] = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @{{.+}}, i32 0, i32 0) -// CHECK: [[EXPR_IM:%.+]] = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @{{.+}}, i32 0, i32 1) -// CHECK: [[X:%.+]] = load atomic i64, i64* [[X_ADDR:@.+]] monotonic -// CHECK: br label %[[CONT:.+]] -// CHECK: [[CONT]] -// CHECK: [[EXPECTED:%.+]] = phi i64 [ [[X]], %{{.+}} ], [ [[OLD_X:%.+]], %[[CONT]] ] -// CHECK: [[X_RVAL:%.+]] = sitofp i64 [[EXPECTED]] to double -// CHECK: [[ADD_RE:%.+]] = fadd double [[X_RVAL]], [[EXPR_RE]] -// CHECK: [[ADD_IM:%.+]] = fadd double 0.000000e+00, [[EXPR_IM]] -// CHECK: [[DESIRED:%.+]] = fptosi double [[ADD_RE]] to i64 -// CHECK: store i64 [[DESIRED]], i64* [[TEMP:%.+]] -// CHECK: [[DESIRED:%.+]] = load i64, i64* [[TEMP]] -// CHECK: [[RES:%.+]] = cmpxchg i64* [[X_ADDR]], i64 [[EXPECTED]], i64 [[DESIRED]] monotonic monotonic -// CHECK: [[OLD_X]] = extractvalue { i64, i1 } [[RES]], 0 -// CHECK: [[SUCCESS_FAIL:%.+]] = extractvalue { i64, i1 } [[RES]], 1 -// CHECK: br i1 [[SUCCESS_FAIL]], label %[[EXIT:.+]], label %[[CONT]] -// CHECK: [[EXIT]] -#pragma omp atomic - llx += cdv; // CHECK: [[IDX:%.+]] = load i16, i16* @{{.+}} // CHECK: load i8, i8* // CHECK: [[VEC_ITEM_VAL:%.+]] = zext i1 %{{.+}} to i32 diff --git a/test/OpenMP/atomic_write_codegen.c b/test/OpenMP/atomic_write_codegen.c index 050d7a510566..0c85b6e88a3f 100644 --- a/test/OpenMP/atomic_write_codegen.c +++ b/test/OpenMP/atomic_write_codegen.c @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -target-cpu core2 -fopenmp -x c -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -target-cpu core2 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-apple-darwin10 -target-cpu core2 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s // expected-no-diagnostics // REQUIRES: x86-registered-target #ifndef HEADER diff --git a/test/OpenMP/capturing_in_templates.cpp b/test/OpenMP/capturing_in_templates.cpp index a4dcbee8fab9..9b2d9d304e0a 100644 --- a/test/OpenMP/capturing_in_templates.cpp +++ b/test/OpenMP/capturing_in_templates.cpp @@ -15,7 +15,7 @@ pair<T1, T2> make_pair(T1 &&t1, T2 &&t2) { // CHECK-LABEL: @main int main(int argc, char **argv) { -// CHECK: call i32 @__tgt_target(i32 -1, i8* @{{.+}}.region_id, i32 0, i8** null, i8** null, i64* null, i32* null) +// CHECK: call i32 @__tgt_target(i64 -1, i8* @{{.+}}.region_id, i32 0, i8** null, i8** null, i64* null, i64* null) #pragma omp target { for (int i = 0; i < 64; ++i) { diff --git a/test/OpenMP/declare_reduction_messages.cpp b/test/OpenMP/declare_reduction_messages.cpp index 198d7756d035..3e5a15ee3dd9 100644 --- a/test/OpenMP/declare_reduction_messages.cpp +++ b/test/OpenMP/declare_reduction_messages.cpp @@ -134,3 +134,21 @@ int main() { } return fun(15) + foo(15); // expected-note {{in instantiation of function template specialization 'foo<int>' requested here}} } + +#if __cplusplus == 201103L +struct A { + A() {} + // expected-note@+1 {{copy constructor is implicitly deleted because 'A' has a user-declared move assignment operator}} + A& operator=(A&&) = default; +}; + +int A_TEST() { + A test; +// expected-error@+1 {{call to implicitly-deleted copy constructor of 'A'}} +#pragma omp declare reduction(+ : A : omp_out) initializer(omp_priv = A()) +// expected-error@+1 {{invalid operands to binary expression ('A' and 'A')}} +#pragma omp parallel reduction(+ : test) + {} + return 0; +} +#endif diff --git a/test/OpenMP/declare_simd_codegen.cpp b/test/OpenMP/declare_simd_codegen.cpp index 47e951999b6a..9ae476677c42 100644 --- a/test/OpenMP/declare_simd_codegen.cpp +++ b/test/OpenMP/declare_simd_codegen.cpp @@ -8,11 +8,22 @@ #pragma omp declare simd linear(d : 8) #pragma omp declare simd inbranch simdlen(32) #pragma omp declare simd notinbranch +void add_1(float *d); + +#pragma omp declare simd linear(d : 8) +#pragma omp declare simd inbranch simdlen(32) +#pragma omp declare simd notinbranch void add_1(float *d) {} +#pragma omp declare simd linear(d : 8) +#pragma omp declare simd inbranch simdlen(32) +#pragma omp declare simd notinbranch +void add_2(float *d); + #pragma omp declare simd aligned(hp, hp2) template <class C> void h(C *hp, C *hp2, C *hq, C *lin) { + add_2(0); } // Explicit specialization with <C=int>. @@ -110,6 +121,7 @@ double foo(double x) { return 0; } // CHECK-DAG: define {{.+}}@_Z3bax2VVPdi( // CHECK-DAG: define {{.+}}@_Z3fooPffi( // CHECK-DAG: define {{.+}}@_Z3food( +// CHECK-DAG: declare {{.+}}@_Z5add_2Pf( // CHECK-DAG: "_ZGVbM4l8__Z5add_1Pf" // CHECK-DAG: "_ZGVbN4l8__Z5add_1Pf" @@ -277,6 +289,23 @@ double foo(double x) { return 0; } // CHECK-DAG: "_ZGVeM16ua16vl1__Z3fooPffi" // CHECK-DAG: "_ZGVeN16ua16vl1__Z3fooPffi" +// CHECK-DAG: "_ZGVbM4l8__Z5add_2Pf" +// CHECK-DAG: "_ZGVbN4l8__Z5add_2Pf" +// CHECK-DAG: "_ZGVcM8l8__Z5add_2Pf" +// CHECK-DAG: "_ZGVcN8l8__Z5add_2Pf" +// CHECK-DAG: "_ZGVdM8l8__Z5add_2Pf" +// CHECK-DAG: "_ZGVdN8l8__Z5add_2Pf" +// CHECK-DAG: "_ZGVeM16l8__Z5add_2Pf" +// CHECK-DAG: "_ZGVeN16l8__Z5add_2Pf" +// CHECK-DAG: "_ZGVbM32v__Z5add_2Pf" +// CHECK-DAG: "_ZGVcM32v__Z5add_2Pf" +// CHECK-DAG: "_ZGVdM32v__Z5add_2Pf" +// CHECK-DAG: "_ZGVeM32v__Z5add_2Pf" +// CHECK-DAG: "_ZGVbN2v__Z5add_2Pf" +// CHECK-DAG: "_ZGVcN4v__Z5add_2Pf" +// CHECK-DAG: "_ZGVdN4v__Z5add_2Pf" +// CHECK-DAG: "_ZGVeN8v__Z5add_2Pf" + // CHECK-DAG: "_ZGVbN2v__Z3food" // CHECK-DAG: "_ZGVcN4v__Z3food" // CHECK-DAG: "_ZGVdN4v__Z3food" diff --git a/test/OpenMP/declare_simd_messages.cpp b/test/OpenMP/declare_simd_messages.cpp index 15971eb14de5..af46283f9a57 100644 --- a/test/OpenMP/declare_simd_messages.cpp +++ b/test/OpenMP/declare_simd_messages.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple=x86_64-pc-win32 -verify -fopenmp -x c++ -std=c++11 -fms-extensions %s +// RUN: %clang_cc1 -triple=x86_64-pc-win32 -verify -fopenmp -x c++ -std=c++11 -fms-extensions -Wno-pragma-pack %s // expected-error@+1 {{expected an OpenMP directive}} #pragma omp declare diff --git a/test/OpenMP/declare_target_ast_print.cpp b/test/OpenMP/declare_target_ast_print.cpp index 591844b79498..6c59563d1d1e 100644 --- a/test/OpenMP/declare_target_ast_print.cpp +++ b/test/OpenMP/declare_target_ast_print.cpp @@ -108,9 +108,7 @@ void f2() { // CHECK: #pragma omp end declare target int c1, c2, c3; -void f3() { -} -#pragma omp declare target link(c1) link(c2), link(c3, f3) +#pragma omp declare target link(c1) link(c2), link(c3) // CHECK: #pragma omp declare target link // CHECK: int c1; // CHECK: #pragma omp end declare target @@ -120,9 +118,33 @@ void f3() { // CHECK: #pragma omp declare target link // CHECK: int c3; // CHECK: #pragma omp end declare target -// CHECK: #pragma omp declare target link -// CHECK: void f3() + +struct SSSt { +#pragma omp declare target + static int a; + int b; +#pragma omp end declare target +}; + +// CHECK: struct SSSt { +// CHECK: #pragma omp declare target +// CHECK: static int a; +// CHECK: #pragma omp end declare target +// CHECK: int b; + +template <class T> +struct SSSTt { +#pragma omp declare target + static T a; + int b; +#pragma omp end declare target +}; + +// CHECK: template <class T> struct SSSTt { +// CHECK: #pragma omp declare target +// CHECK: static T a; // CHECK: #pragma omp end declare target +// CHECK: int b; int main (int argc, char **argv) { foo(); diff --git a/test/OpenMP/declare_target_messages.cpp b/test/OpenMP/declare_target_messages.cpp index b858d53c1ecf..c1c03951a3c1 100644 --- a/test/OpenMP/declare_target_messages.cpp +++ b/test/OpenMP/declare_target_messages.cpp @@ -13,8 +13,16 @@ void f(); #pragma omp declare target map(a) // expected-error {{unexpected 'map' clause, only 'to' or 'link' clauses expected}} +#pragma omp declare target to(foo1) // expected-error {{use of undeclared identifier 'foo1'}} + +#pragma omp declare target link(foo2) // expected-error {{use of undeclared identifier 'foo2'}} + void c(); // expected-warning {{declaration is not declared in any declare target region}} +void func() {} // expected-note {{'func' defined here}} + +#pragma omp declare target link(func) // expected-error {{function name is not allowed in 'link' clause}} + extern int b; struct NonT { @@ -28,16 +36,16 @@ typedef int sint; extern int b; int g; -struct T { // expected-note {{mappable type cannot be polymorphic}} +struct T { int a; virtual int method(); }; -class VC { // expected-note {{mappable type cannot be polymorphic}} +class VC { T member; NonT member1; public: - virtual int method() { T a; return 0; } // expected-error {{type 'T' is not mappable to target}} + virtual int method() { T a; return 0; } }; struct C { @@ -56,7 +64,7 @@ void foo() { b = 0; // expected-note {{used here}} t = 1; // expected-error {{threadprivate variables cannot be used in target constructs}} C object; - VC object1; // expected-error {{type 'VC' is not mappable to target}} + VC object1; g = object.method(); g += object.method1(); g += object1.method(); @@ -73,9 +81,9 @@ int C::method() { } struct S { -#pragma omp declare target // expected-error {{directive must be at file or namespace scope}} +#pragma omp declare target int v; -#pragma omp end declare target // expected-error {{unexpected OpenMP directive '#pragma omp end declare target'}} +#pragma omp end declare target }; int main (int argc, char **argv) { diff --git a/test/OpenMP/distribute_codegen.cpp b/test/OpenMP/distribute_codegen.cpp index 37f00f09178b..7ea17b116c88 100644 --- a/test/OpenMP/distribute_codegen.cpp +++ b/test/OpenMP/distribute_codegen.cpp @@ -23,6 +23,7 @@ // CHECK-DAG: %ident_t = type { i32, i32, i32, i32, i8* } // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" // CHECK-DAG: [[DEF_LOC_0:@.+]] = private unnamed_addr constant %ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } +// CHECK-DAG: [[DEF_LOC_DISTRIBUTE_0:@.+]] = private unnamed_addr constant %ident_t { i32 0, i32 2050, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } // CHECK-LABEL: define {{.*void}} @{{.*}}without_schedule_clause{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}}) void without_schedule_clause(float *a, float *b, float *c, float *d) { @@ -34,7 +35,7 @@ void without_schedule_clause(float *a, float *b, float *c, float *d) { } } -// CHECK: define {{.*}}void @.omp_outlined.(i32* noalias [[GBL_TIDP:%.+]], i32* noalias [[BND_TID:%.+]], float** dereferenceable({{[0-9]+}}) [[APTR:%.+]], float** dereferenceable({{[0-9]+}}) [[BPTR:%.+]], float** dereferenceable({{[0-9]+}}) [[CPTR:%.+]], float** dereferenceable({{[0-9]+}}) [[DPTR:%.+]]) +// CHECK: define {{.*}}void @{{.+}}(i32* noalias [[GBL_TIDP:%.+]], i32* noalias [[BND_TID:%.+]], float** dereferenceable({{[0-9]+}}) [[APTR:%.+]], float** dereferenceable({{[0-9]+}}) [[BPTR:%.+]], float** dereferenceable({{[0-9]+}}) [[CPTR:%.+]], float** dereferenceable({{[0-9]+}}) [[DPTR:%.+]]) // CHECK: [[TID_ADDR:%.+]] = alloca i32* // CHECK: [[IV:%.+iv]] = alloca i32 // CHECK: [[LB:%.+lb]] = alloca i32 @@ -48,7 +49,7 @@ void without_schedule_clause(float *a, float *b, float *c, float *d) { // CHECK-DAG: store i32 0, i32* [[LAST]] // CHECK-DAG: [[GBL_TID:%.+]] = load i32*, i32** [[TID_ADDR]] // CHECK-DAG: [[GBL_TIDV:%.+]] = load i32, i32* [[GBL_TID]] -// CHECK: call void @__kmpc_for_static_init_{{.+}}(%ident_t* [[DEF_LOC_0]], i32 [[GBL_TIDV]], i32 92, i32* %.omp.is_last, i32* %.omp.lb, i32* %.omp.ub, i32* %.omp.stride, i32 1, i32 1) +// CHECK: call void @__kmpc_for_static_init_{{.+}}(%ident_t* [[DEF_LOC_DISTRIBUTE_0]], i32 [[GBL_TIDV]], i32 92, i32* %.omp.is_last, i32* %.omp.lb, i32* %.omp.ub, i32* %.omp.stride, i32 1, i32 1) // CHECK-DAG: [[UBV0:%.+]] = load i32, i32* [[UB]] // CHECK-DAG: [[USWITCH:%.+]] = icmp sgt i32 [[UBV0]], 4571423 // CHECK: br i1 [[USWITCH]], label %[[BBCT:.+]], label %[[BBCF:.+]] @@ -82,7 +83,7 @@ void without_schedule_clause(float *a, float *b, float *c, float *d) { // CHECK: [[BBINNEND]]: // CHECK: br label %[[LPEXIT:.+]] // CHECK: [[LPEXIT]]: -// CHECK: call void @__kmpc_for_static_fini(%ident_t* [[DEF_LOC_0]], i32 [[GBL_TIDV]]) +// CHECK: call void @__kmpc_for_static_fini(%ident_t* [[DEF_LOC_DISTRIBUTE_0]], i32 [[GBL_TIDV]]) // CHECK: ret void @@ -110,7 +111,7 @@ void static_not_chunked(float *a, float *b, float *c, float *d) { // CHECK-DAG: store i32 0, i32* [[LAST]] // CHECK-DAG: [[GBL_TID:%.+]] = load i32*, i32** [[TID_ADDR]] // CHECK-DAG: [[GBL_TIDV:%.+]] = load i32, i32* [[GBL_TID]] -// CHECK: call void @__kmpc_for_static_init_{{.+}}(%ident_t* [[DEF_LOC_0]], i32 [[GBL_TIDV]], i32 92, i32* %.omp.is_last, i32* %.omp.lb, i32* %.omp.ub, i32* %.omp.stride, i32 1, i32 1) +// CHECK: call void @__kmpc_for_static_init_{{.+}}(%ident_t* [[DEF_LOC_DISTRIBUTE_0]], i32 [[GBL_TIDV]], i32 92, i32* %.omp.is_last, i32* %.omp.lb, i32* %.omp.ub, i32* %.omp.stride, i32 1, i32 1) // CHECK-DAG: [[UBV0:%.+]] = load i32, i32* [[UB]] // CHECK-DAG: [[USWITCH:%.+]] = icmp sgt i32 [[UBV0]], 4571423 // CHECK: br i1 [[USWITCH]], label %[[BBCT:.+]], label %[[BBCF:.+]] @@ -144,7 +145,7 @@ void static_not_chunked(float *a, float *b, float *c, float *d) { // CHECK: [[BBINNEND]]: // CHECK: br label %[[LPEXIT:.+]] // CHECK: [[LPEXIT]]: -// CHECK: call void @__kmpc_for_static_fini(%ident_t* [[DEF_LOC_0]], i32 [[GBL_TIDV]]) +// CHECK: call void @__kmpc_for_static_fini(%ident_t* [[DEF_LOC_DISTRIBUTE_0]], i32 [[GBL_TIDV]]) // CHECK: ret void @@ -172,7 +173,7 @@ void static_chunked(float *a, float *b, float *c, float *d) { // CHECK-DAG: store i32 0, i32* [[LAST]] // CHECK-DAG: [[GBL_TID:%.+]] = load i32*, i32** [[TID_ADDR]] // CHECK-DAG: [[GBL_TIDV:%.+]] = load i32, i32* [[GBL_TID]] -// CHECK: call void @__kmpc_for_static_init_{{.+}}(%ident_t* [[DEF_LOC_0]], i32 [[GBL_TIDV]], i32 91, i32* %.omp.is_last, i32* %.omp.lb, i32* %.omp.ub, i32* %.omp.stride, i32 1, i32 5) +// CHECK: call void @__kmpc_for_static_init_{{.+}}(%ident_t* [[DEF_LOC_DISTRIBUTE_0]], i32 [[GBL_TIDV]], i32 91, i32* %.omp.is_last, i32* %.omp.lb, i32* %.omp.ub, i32* %.omp.stride, i32 1, i32 5) // CHECK-DAG: [[UBV0:%.+]] = load i32, i32* [[UB]] // CHECK-DAG: [[USWITCH:%.+]] = icmp ugt i32 [[UBV0]], 16908288 // CHECK: br i1 [[USWITCH]], label %[[BBCT:.+]], label %[[BBCF:.+]] @@ -206,7 +207,7 @@ void static_chunked(float *a, float *b, float *c, float *d) { // CHECK: [[BBINNEND]]: // CHECK: br label %[[LPEXIT:.+]] // CHECK: [[LPEXIT]]: -// CHECK: call void @__kmpc_for_static_fini(%ident_t* [[DEF_LOC_0]], i32 [[GBL_TIDV]]) +// CHECK: call void @__kmpc_for_static_fini(%ident_t* [[DEF_LOC_DISTRIBUTE_0]], i32 [[GBL_TIDV]]) // CHECK: ret void // CHECK-LABEL: test_precond diff --git a/test/OpenMP/distribute_firstprivate_codegen.cpp b/test/OpenMP/distribute_firstprivate_codegen.cpp index 718fc875254c..98c99d805ed5 100644 --- a/test/OpenMP/distribute_firstprivate_codegen.cpp +++ b/test/OpenMP/distribute_firstprivate_codegen.cpp @@ -63,16 +63,13 @@ int main() { #pragma omp teams #pragma omp distribute firstprivate(g, g1, svar, sfvar) for (int i = 0; i < 2; ++i) { - // LAMBDA-64: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i{{[0-9]+}} [[G_IN:%.+]], i{{[0-9]+}} [[G1_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]], i{{[0-9]+}} [[SFVAR_IN:%.+]]) - // LAMBDA-32: define internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, double*{{.*}} [[G_IN:%.+]], i{{[0-9]+}} [[G1_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]], i{{[0-9]+}} [[SFVAR_IN:%.+]]) + // LAMBDA: define internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, double*{{.*}} [[G_IN:%.+]], double*{{.*}} [[G1_IN:%.+]], i32*{{.*}} [[SVAR_IN:%.+]], float*{{.*}} [[SFVAR_IN:%.+]]) // Private alloca's for conversion - // LAMBDA-64: [[G_ADDR:%.+]] = alloca i{{[0-9]+}}, - // LAMBDA-32: [[G_ADDR:%.+]] = alloca double*, - // LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}}, - // LAMBDA: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}, - // LAMBDA: [[SFVAR_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G_ADDR:%.+]] = alloca double*, + // LAMBDA: [[G1_ADDR:%.+]] = alloca double*, + // LAMBDA: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, + // LAMBDA: [[SFVAR_ADDR:%.+]] = alloca float*, // LAMBDA: [[G1_REF:%.+]] = alloca double*, - // LAMBDA: [[TMP:%.+]] = alloca double*, // Actual private variables to be used in the body (tmp is used for the reference type) // LAMBDA: [[G_PRIVATE:%.+]] = alloca double, @@ -82,31 +79,25 @@ int main() { // LAMBDA: [[SFVAR_PRIVATE:%.+]] = alloca float, // Store input parameter addresses into private alloca's for conversion - // LAMBDA-64: store i{{[0-9]+}} [[G_IN]], i{{[0-9]+}}* [[G_ADDR]], - // LAMBDA-32: store double* [[G_IN]], double** [[G_ADDR]], - // LAMBDA: store i{{[0-9]+}} [[G1_IN]], i{{[0-9]+}}* [[G1_ADDR]], - // LAMBDA: store i{{[0-9]+}} [[SVAR_IN]], i{{[0-9]+}}* [[SVAR_ADDR]], - // LAMBDA: store i{{[0-9]+}} [[SFVAR_IN]], i{{[0-9]+}}* [[SFVAR_ADDR]], - - // LAMBDA-64-DAG: [[G_CONV:%.+]] = bitcast i{{[0-9]+}}* [[G_ADDR]] to double* - // LAMBDA-32-DAG: [[G_ADDR_VAL:%.+]] = load double*, double** [[G_ADDR]], - // LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast i{{[0-9]+}}* [[G1_ADDR]] to double* - // LAMBDA-DAG: store double* [[G1_CONV]], double** [[G1_REF]], - // LAMBDA-64-DAG: [[SVAR_CONV:%.+]] = bitcast i{{[0-9]+}}* [[SVAR_ADDR]] to i{{[0-9]+}}* - // LAMBDA-DAG: [[SFVAR_CONV:%.+]] = bitcast i{{[0-9]+}}* [[SFVAR_ADDR]] to float* - // LAMBDA-DAG: [[G1_REF_VAL:%.+]] = load double*, double** [[G1_REF]], - // LAMBDA-DAG: store double* [[G1_REF_VAL]], double** [[TMP]], - // LAMBDA-64-DAG: [[G_CONV_VAL:%.+]] = load{{.*}} double, double* [[G_CONV]], - // LAMBDA-32-DAG: [[G_CONV_VAL:%.+]] = load{{.*}} double, double* [[G_ADDR_VAL]], + // LAMBDA: store double* [[G_IN]], double** [[G_ADDR]], + // LAMBDA: store double* [[G1_IN]], double** [[G1_ADDR]], + // LAMBDA: store i{{[0-9]+}}* [[SVAR_IN]], i{{[0-9]+}}** [[SVAR_ADDR]], + // LAMBDA: store float* [[SFVAR_IN]], float** [[SFVAR_ADDR]], + + // LAMBDA-DAG: [[G_ADDR_VAL:%.+]] = load double*, double** [[G_ADDR]], + // LAMBDA-DAG: [[SVAR_ADDR_VAL:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]], + // LAMBDA-DAG: [[SFVAR_ADDR_VAL:%.+]] = load float*, float** [[SFVAR_ADDR]], + // LAMBDA-DAG: [[G1_ADDR_VAL:%.+]] = load double*, double** [[G1_ADDR]], + // LAMBDA-DAG: store double* [[G1_ADDR_VAL]], double** [[G1_REF]], + // LAMBDA-DAG: [[G_CONV_VAL:%.+]] = load{{.*}} double, double* [[G_ADDR_VAL]], // LAMBDA-DAG: store double [[G_CONV_VAL]], double* [[G_PRIVATE]], - // LAMBDA-DAG: [[TMP_VAL:%.+]] = load double*, double** [[TMP]], + // LAMBDA-DAG: [[TMP_VAL:%.+]] = load double*, double** [[G1_REF]], // LAMBDA-DAG: [[TMP_VAL_VAL:%.+]] = load{{.*}} double, double* [[TMP_VAL]], // LAMBDA-DAG: store double [[TMP_VAL_VAL]], double* [[G1_PRIVATE]], // LAMBDA-DAG: store double* [[G1_PRIVATE]], double** [[TMP_PRIVATE]], - // LAMBDA-64-DAG: [[SVAR_CONV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_CONV]], - // LAMBDA-32-DAG: [[SVAR_CONV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_ADDR]], + // LAMBDA-DAG: [[SVAR_CONV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_ADDR_VAL]], // LAMBDA-DAG: store i{{[0-9]+}} [[SVAR_CONV_VAL]], i{{[0-9]+}}* [[SVAR_PRIVATE]], - // LAMBDA-DAG: [[SFVAR_CONV_VAL:%.+]] = load float, float* [[SFVAR_CONV]], + // LAMBDA-DAG: [[SFVAR_CONV_VAL:%.+]] = load float, float* [[SFVAR_ADDR_VAL]], // LAMBDA-DAG: store float [[SFVAR_CONV_VAL]], float* [[SFVAR_PRIVATE]], // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4( g += 1; @@ -204,16 +195,18 @@ int main() { // CHECK: ret // CHECK: define{{.+}} [[OFFLOAD_FUN]]( -// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i{{[0-9]+}}, [2 x i{{[0-9]+}}]*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, i{{[0-9]+}})* [[OMP_OUTLINED:@.+]] to void +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i{{[0-9]+}}]*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, i{{[0-9]+}}*)* [[OMP_OUTLINED:@.+]] to void // CHECK: ret // -// CHECK: define internal void [[OMP_OUTLINED]](i{{[0-9]+}}*{{.+}}, i{{[0-9]+}}*{{.+}}, i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]*{{.*}} [[VEC_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.*}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.*}} [[VAR_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]]) +// CHECK: define internal void [[OMP_OUTLINED]](i{{[0-9]+}}*{{.+}}, i{{[0-9]+}}*{{.+}}, i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]*{{.*}} [[VEC_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.*}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.*}} [[VAR_IN:%.+]], i{{[0-9]+}}*{{.+}} [[SVAR_IN:%.+]]) -// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}*, +// CHECK: alloca i{{[0-9]+}}*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, // CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, // CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, // CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*, -// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, // CHECK: [[TMP:%.+]] = alloca [[S_FLOAT_TY]]*, // discard omp loop variables @@ -222,6 +215,7 @@ int main() { // CHECK: {{.*}} = alloca i{{[0-9]+}}, // CHECK: {{.*}} = alloca i{{[0-9]+}}, // CHECK: {{.*}} = alloca i{{[0-9]+}}, +// CHECK: {{.*}} = alloca i{{[0-9]+}}, // CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, // CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], @@ -230,17 +224,16 @@ int main() { // CHECK-DAG: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*, // CHECK: [[SVAR_PRIV:%.+]] = alloca i{{[0-9]+}}, -// CHECK: store i{{[0-9]+}} [[T_VAR_IN]], i{{[0-9]+}}* [[T_VAR_ADDR]], +// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]], // CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]], // CHECK: store [2 x [[S_FLOAT_TY]]]* [[S_ARR_IN]], [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], // CHECK: store [[S_FLOAT_TY]]* [[VAR_IN]], [[S_FLOAT_TY]]** [[VAR_ADDR]], -// CHECK: store i{{[0-9]+}} [[SVAR_IN]], i{{[0-9]+}}* [[SVAR_ADDR]], +// CHECK: store i{{[0-9]+}}* [[SVAR_IN]], i{{[0-9]+}}** [[SVAR_ADDR]], // init t_var -// CHECK-64-DAG: [[T_VAR_ADDR_CONV:%.+]] = bitcast i{{[0-9]+}}* [[T_VAR_ADDR]] to i{{[0-9]+}}* -// CHECK-64-DAG: [[T_VAR_ADDR_CONV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_ADDR_CONV]], -// CHECK-32-DAG: [[T_VAR_ADDR_CONV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_ADDR]], -// CHECK-DAG: store i{{[0-9]+}} [[T_VAR_ADDR_CONV_VAL]], i{{[0-9]+}}* [[T_VAR_PRIV]], +// CHECK-DAG: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]], +// CHECK-DAG: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_REF]], +// CHECK-DAG: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_PRIV]], // init vec // CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], @@ -278,10 +271,9 @@ int main() { // CHECK-DAG: store [[S_FLOAT_TY]]* [[VAR_PRIV]], [[S_FLOAT_TY]]** [[TMP_PRIV]], // init svar -// CHECK-64-DAG: [[SVAR_ADDR_CONV:%.+]] = bitcast{{.+}} [[SVAR_ADDR]] to{{.+}} -// CHECK-64-DAG: [[SVAR_CONV_VAL:%.+]] = load{{.+}},{{.+}} [[SVAR_ADDR_CONV]], -// CHECK-32-DAG: [[SVAR_CONV_VAL:%.+]] = load{{.+}},{{.+}} [[SVAR_ADDR]], -// CHECK-DAG: store{{.+}} [[SVAR_CONV_VAL]],{{.+}} [[SVAR_PRIV]], +// CHECK-DAG: [[SVAR_REF:%.+]] = load{{.+}},{{.+}} [[SVAR_ADDR]], +// CHECK-DAG: [[SVAR_VAL:%.+]] = load{{.+}},{{.+}} [[SVAR_REF]], +// CHECK-DAG: store{{.+}} [[SVAR_VAL]],{{.+}} [[SVAR_PRIV]], // CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* %.omp{{.+}}, // CHECK-DAG: store i{{[0-9]+}} 1, i{{[0-9]+}}* %.omp{{.+}}, @@ -301,12 +293,14 @@ int main() { // CHECK: ret // CHECK: define{{.+}} [[OFFLOAD_FUN_1]]( -// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i{{[0-9]+}}, [2 x i{{[0-9]+}}]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[OMP_OUTLINED_1:@.+]] to void +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i{{[0-9]+}}]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[OMP_OUTLINED_1:@.+]] to void // CHECK: ret // -// CHECK: define internal void [[OMP_OUTLINED_1]](i{{[0-9]+}}*{{.+}}, i{{[0-9]+}}*{{.+}}, i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]*{{.*}} [[VEC_IN:%.+]], [2 x [[S_INT_TY]]]*{{.*}} [[S_ARR_IN:%.+]], [[S_INT_TY]]*{{.*}} [[VAR_IN:%.+]]) +// CHECK: define internal void [[OMP_OUTLINED_1]](i{{[0-9]+}}*{{.+}}, i{{[0-9]+}}*{{.+}}, i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]*{{.*}} [[VEC_IN:%.+]], [2 x [[S_INT_TY]]]*{{.*}} [[S_ARR_IN:%.+]], [[S_INT_TY]]*{{.*}} [[VAR_IN:%.+]]) -// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}*, +// CHECK: alloca i{{[0-9]+}}*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, // CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, // CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, @@ -318,6 +312,7 @@ int main() { // CHECK: {{.*}} = alloca i{{[0-9]+}}, // CHECK: {{.*}} = alloca i{{[0-9]+}}, // CHECK: {{.*}} = alloca i{{[0-9]+}}, +// CHECK: {{.*}} = alloca i{{[0-9]+}}, // CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, // CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], @@ -325,16 +320,15 @@ int main() { // CHECK-DAG: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], // CHECK-DAG: [[TMP_PRIV:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store i{{[0-9]+}} [[T_VAR_IN]], i{{[0-9]+}}* [[T_VAR_ADDR]], +// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]], // CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]], // CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_IN]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]], // CHECK: store [[S_INT_TY]]* [[VAR_IN]], [[S_INT_TY]]** [[VAR_ADDR]], // init t_var -// CHECK-64-DAG: [[T_VAR_ADDR_CONV:%.+]] = bitcast i{{[0-9]+}}* [[T_VAR_ADDR]] to i{{[0-9]+}}* -// CHECK-64-DAG: [[T_VAR_ADDR_CONV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_ADDR_CONV]], -// CHECK-32-DAG: [[T_VAR_ADDR_CONV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_ADDR]], -// CHECK-DAG: store i{{[0-9]+}} [[T_VAR_ADDR_CONV_VAL]], i{{[0-9]+}}* [[T_VAR_PRIV]], +// CHECK-DAG: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]], +// CHECK-DAG: [[T_VAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_REF]], +// CHECK-DAG: store i{{[0-9]+}} [[T_VAR]], i{{[0-9]+}}* [[T_VAR_PRIV]], // init vec // CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], diff --git a/test/OpenMP/distribute_firstprivate_messages.cpp b/test/OpenMP/distribute_firstprivate_messages.cpp index 5d371abbfa8e..a59c9528090f 100644 --- a/test/OpenMP/distribute_firstprivate_messages.cpp +++ b/test/OpenMP/distribute_firstprivate_messages.cpp @@ -135,11 +135,11 @@ int main(int argc, char **argv) { for (i = 0; i < argc; ++i) foo(); // expected-error {{loop iteration variable in the associated loop of 'omp distribute' directive may not be firstprivate, predetermined as private}} #pragma omp target #pragma omp teams private(argc) // expected-note {{defined as private}} - #pragma omp distribute firstprivate(argc) // expected-error {{private variable in '#pragma omp teams' cannot be firstprivate in '#pragma omp distribute'}} + #pragma omp distribute firstprivate(argc) // expected-error {{firstprivate variable must be shared}} for (i = 0; i < argc; ++i) foo(); #pragma omp target #pragma omp teams reduction(+:argc) // expected-note {{defined as reduction}} - #pragma omp distribute firstprivate(argc) // expected-error {{reduction variable in '#pragma omp teams' cannot be firstprivate in '#pragma omp distribute'}} + #pragma omp distribute firstprivate(argc) // expected-error {{firstprivate variable must be shared}} for (i = 0; i < argc; ++i) foo(); #pragma omp target #pragma omp teams @@ -147,11 +147,11 @@ int main(int argc, char **argv) { for (i = 0; i < argc; ++i) foo(); #pragma omp target #pragma omp teams - #pragma omp distribute lastprivate(argc), firstprivate(argc) // expected-error {{lastprivate variable cannot be firstprivate in '#pragma omp distribute'}} expected-note{{defined as lastprivate}} + #pragma omp distribute lastprivate(argc), firstprivate(argc) // expected-error {{lastprivate variable cannot be firstprivate}} expected-note{{defined as lastprivate}} for (i = 0; i < argc; ++i) foo(); #pragma omp target #pragma omp teams -#pragma omp distribute firstprivate(argc), lastprivate(argc) // expected-error {{lastprivate variable cannot be firstprivate in '#pragma omp distribute'}} expected-note{{defined as firstprivate}} +#pragma omp distribute firstprivate(argc), lastprivate(argc) // expected-error {{firstprivate variable cannot be lastprivate}} expected-note{{defined as firstprivate}} for (i = 0; i < argc; ++i) foo(); return 0; } diff --git a/test/OpenMP/distribute_lastprivate_codegen.cpp b/test/OpenMP/distribute_lastprivate_codegen.cpp index abcbfd8d69c4..bd9dae956f1c 100644 --- a/test/OpenMP/distribute_lastprivate_codegen.cpp +++ b/test/OpenMP/distribute_lastprivate_codegen.cpp @@ -74,6 +74,7 @@ int main() { // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, // LAMBDA: [[G_PRIVATE:%.+]] = alloca double, // LAMBDA: [[G1_PRIVATE:%.+]] = alloca double, @@ -202,6 +203,7 @@ int main() { // CHECK: {{.+}} = alloca i{{[0-9]+}}, // CHECK: {{.+}} = alloca i{{[0-9]+}}, // CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, // CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], @@ -303,6 +305,7 @@ int main() { // CHECK: {{.+}} = alloca i{{[0-9]+}}, // CHECK: {{.+}} = alloca i{{[0-9]+}}, // CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, // CHECK: [[OMP_IS_LAST1:%.+]] = alloca i{{[0-9]+}}, // CHECK: [[T_VAR_PRIV1:%.+]] = alloca i{{[0-9]+}}, // CHECK: [[VEC_PRIV1:%.+]] = alloca [2 x i{{[0-9]+}}], diff --git a/test/OpenMP/distribute_parallel_for_ast_print.cpp b/test/OpenMP/distribute_parallel_for_ast_print.cpp index 54a3649e578b..351e15803c09 100644 --- a/test/OpenMP/distribute_parallel_for_ast_print.cpp +++ b/test/OpenMP/distribute_parallel_for_ast_print.cpp @@ -82,7 +82,7 @@ T tmain(T argc) { // CHECK-NEXT: a = 2; #pragma omp target #pragma omp teams -#pragma omp distribute parallel for private(argc, b), firstprivate(c, d), lastprivate(d, f) collapse(N) schedule(static, N) if (parallel :argc) num_threads(N) default(shared) shared(e) reduction(+ : h) dist_schedule(static,N) +#pragma omp distribute parallel for private(argc, b), firstprivate(c, d), lastprivate(f) collapse(N) schedule(static, N) if (parallel :argc) num_threads(N) default(shared) shared(e) reduction(+ : h) dist_schedule(static,N) for (int i = 0; i < 2; ++i) for (int j = 0; j < 2; ++j) for (int j = 0; j < 2; ++j) @@ -93,8 +93,8 @@ T tmain(T argc) { for (int j = 0; j < 2; ++j) for (int j = 0; j < 2; ++j) for (int j = 0; j < 2; ++j) - a++; - // CHECK: #pragma omp distribute parallel for private(argc,b) firstprivate(c,d) lastprivate(d,f) collapse(N) schedule(static, N) if(parallel: argc) num_threads(N) default(shared) shared(e) reduction(+: h) dist_schedule(static, N) + a++; + // CHECK: #pragma omp distribute parallel for private(argc,b) firstprivate(c,d) lastprivate(f) collapse(N) schedule(static, N) if(parallel: argc) num_threads(N) default(shared) shared(e) reduction(+: h) dist_schedule(static, N) // CHECK-NEXT: for (int i = 0; i < 2; ++i) // CHECK-NEXT: for (int j = 0; j < 2; ++j) // CHECK-NEXT: for (int j = 0; j < 2; ++j) diff --git a/test/OpenMP/distribute_parallel_for_codegen.cpp b/test/OpenMP/distribute_parallel_for_codegen.cpp index 62c38f188851..4fbca7d2f29a 100644 --- a/test/OpenMP/distribute_parallel_for_codegen.cpp +++ b/test/OpenMP/distribute_parallel_for_codegen.cpp @@ -28,6 +28,7 @@ T tmain() { #pragma omp teams #pragma omp distribute parallel for for (int i = 0; i < n; ++i) { + #pragma omp cancel for a[i] = b[i] + c[i]; } diff --git a/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp b/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp index d12c0aa11cf2..de348272950c 100644 --- a/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp +++ b/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp @@ -63,17 +63,14 @@ int main() { #pragma omp teams #pragma omp distribute parallel for firstprivate(g, g1, svar, sfvar) for (int i = 0; i < 2; ++i) { - // LAMBDA-64: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i{{[0-9]+}} [[G_IN:%.+]], i{{[0-9]+}} [[G1_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]], i{{[0-9]+}} [[SFVAR_IN:%.+]]) - // LAMBDA-32: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, double* {{.+}} [[G_IN:%.+]], i{{[0-9]+}} [[G1_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]], i{{[0-9]+}} [[SFVAR_IN:%.+]]) + // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, double* {{.+}} [[G_IN:%.+]], double*{{.+}} [[G1_IN:%.+]], i{{[0-9]+}}*{{.+}} [[SVAR_IN:%.+]], float*{{.+}} [[SFVAR_IN:%.+]]) // addr alloca's - // LAMBDA-64: [[G_ADDR:%.+]] = alloca i{{[0-9]+}}, - // LAMBDA-32: [[G_ADDR:%.+]] = alloca double*, - // LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}}, - // LAMBDA: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}, - // LAMBDA: [[SFVAR_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G_ADDR:%.+]] = alloca double*, + // LAMBDA: [[G1_ADDR:%.+]] = alloca double*, + // LAMBDA: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, + // LAMBDA: [[SFVAR_ADDR:%.+]] = alloca float*, // LAMBDA: [[G1_REF:%.+]] = alloca double*, - // LAMBDA: [[TMP:%.+]] = alloca double*, // private alloca's // LAMBDA: [[G_PRIV:%.+]] = alloca double, @@ -88,31 +85,27 @@ int main() { // LAMBDA-DAG: store {{.+}} [[SVAR_IN]], {{.+}} [[SVAR_ADDR]], // LAMBDA-DAG: store {{.+}} [[SFVAR_IN]], {{.+}} [[SFVAR_ADDR]], + // LAMBDA-DAG: [[G_CONV:%.+]] = load {{.+}}*, {{.+}}** [[G_ADDR]] + // LAMBDA-DAG: [[G1_CONV:%.+]] = load {{.+}}*, {{.+}}** [[G1_ADDR]] + // LAMBDA-DAG: [[SVAR_CONV:%.+]] = load {{.+}}*, {{.+}}** [[SVAR_ADDR]] + // LAMBDA-DAG: [[SFVAR_CONV:%.+]] = load {{.+}}*, {{.+}}** [[SFVAR_ADDR]] + // init private alloca's with addr alloca's // g - // LAMBDA-64-DAG: [[G_CONV:%.+]] = bitcast {{.+}}* [[G_ADDR]] to - // LAMBDA-32-DAG: [[G_CONV:%.+]] = load {{.+}}*, {{.+}}** [[G_ADDR]] // LAMBDA-DAG: [[G_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[G_CONV]], // LAMBDA-DAG: store {{.+}} [[G_ADDR_VAL]], {{.+}}* [[G_PRIV]], // g1 - // LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast {{.+}}* [[G1_ADDR]] to - // LAMBDA-DAG: store {{.+}}* [[G1_CONV]], {{.+}}** [[G1_REF]], - // LAMBDA-DAG: [[G1_REF_VAL:%.+]] = load {{.+}}*, {{.+}}** [[G1_REF]], - // LAMBDA-DAG: store {{.+}}* [[G1_REF_VAL]], {{.+}}** [[TMP]], - // LAMBDA-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}** [[TMP]], + // LAMBDA-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}** [[G1_REF]], // LAMBDA-DAG: [[TMP_VAL:%.+]] = load {{.+}}, {{.+}}* [[TMP_REF]], // LAMBDA-DAG: store {{.+}} [[TMP_VAL]], {{.+}}* [[G1_PRIV]] // LAMBDA-DAG: store {{.+}}* [[G1_PRIV]], {{.+}}** [[TMP_PRIV]], // svar - // LAMBDA-64-DAG: [[SVAR_CONV:%.+]] = bitcast {{.+}}* [[SVAR_ADDR]] to - // LAMBDA-64-DAG: [[SVAR_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_CONV]], - // LAMBDA-32-DAG: [[SVAR_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_ADDR]], + // LAMBDA-DAG: [[SVAR_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_CONV]], // LAMBDA-DAG: store {{.+}} [[SVAR_VAL]], {{.+}}* [[SVAR_PRIV]], // sfvar - // LAMBDA-DAG: [[SFVAR_CONV:%.+]] = bitcast {{.+}}* [[SFVAR_ADDR]] to // LAMBDA-DAG: [[SFVAR_VAL:%.+]] = load {{.+}}, {{.+}}* [[SFVAR_CONV]], // LAMBDA-DAG: store {{.+}} [[SFVAR_VAL]], {{.+}}* [[SFVAR_PRIV]], @@ -271,17 +264,19 @@ int main() { // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) // CHECK: define{{.+}} [[OFFLOAD_FUN_0]](i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], [2 x [[S_FLOAT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]* {{.+}} [[VAR_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]]) -// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i{{[0-9]+}}, [2 x i{{[0-9]+}}]*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, i{{[0-9]+}})* [[OMP_OUTLINED_0:@.+]] to void +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i{{[0-9]+}}]*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, i{{[0-9]+}}*)* [[OMP_OUTLINED_0:@.+]] to void // CHECK: ret -// CHECK: define internal void [[OMP_OUTLINED_0]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], [2 x [[S_FLOAT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]* {{.+}} [[VAR_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]]) +// CHECK: define internal void [[OMP_OUTLINED_0]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], [2 x [[S_FLOAT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]* {{.+}} [[VAR_IN:%.+]], i{{[0-9]+}}*{{.+}} [[SVAR_IN:%.+]]) +// CHECK: alloca i{{[0-9]+}}*, +// CHECK: alloca i{{[0-9]+}}*, // addr alloca's -// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, // CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, // CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, // CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*, -// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, // CHECK: [[TMP:%.+]] = alloca [[S_FLOAT_TY]]*, // skip loop alloca's @@ -310,9 +305,8 @@ int main() { // init private alloca's with addr alloca's // t-var -// CHECK-64-DAG: [[T_VAR_CONV:%.+]] = bitcast {{.+}} [[T_VAR_ADDR]] to -// CHECK-64-DAG: [[T_VAR_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_CONV]], -// CHECK-32-DAG: [[T_VAR_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_ADDR]], +// CHECK-DAG: [[T_VAR_REF:%.+]] = load {{.+}}, {{.+}}** [[T_VAR_ADDR]], +// CHECK-DAG: [[T_VAR_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_REF]], // CHECK-DAG: store {{.+}} [[T_VAR_ADDR_VAL]], {{.+}} [[T_VAR_PRIV]], // vec @@ -340,10 +334,9 @@ int main() { // CHECK-DAG: store {{.+}}* [[VAR_PRIV]], {{.+}}** [[TMP_PRIV]], // svar -// CHECK-64-DAG: [[SVAR_CONV:%.+]] = bitcast {{.+}}* [[SVAR_ADDR]] to -// CHECK-64-DAG: [[SVAR_CONV_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_CONV]], -// CHECK-32-DAG: [[SVAR_CONV_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_ADDR]], -// CHECK-DAG: store {{.+}} [[SVAR_CONV_VAL]], {{.+}}* [[SVAR_PRIV]], +// CHECK-DAG: [[SVAR_REF:%.+]] = load {{.+}}*, {{.+}}** [[SVAR_ADDR]], +// CHECK-DAG: [[SVAR:%.+]] = load {{.+}}, {{.+}}* [[SVAR_REF]], +// CHECK-DAG: store {{.+}} [[SVAR]], {{.+}}* [[SVAR_PRIV]], // CHECK: call void @__kmpc_for_static_init_4( // pass private alloca's to fork @@ -456,13 +449,15 @@ int main() { // CHECK: call {{.*}} [[S_INT_TY_DEF_DESTR:@.+]]([[S_INT_TY]]* [[TEST]]) // CHECK: define{{.+}} [[OFFLOAD_FUN_0]](i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], [2 x [[S_INT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_INT_TY]]* {{.+}} [[VAR_IN:%.+]]) -// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i{{[0-9]+}}, [2 x i{{[0-9]+}}]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[OMP_OUTLINED_0:@.+]] to void +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i{{[0-9]+}}]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[OMP_OUTLINED_0:@.+]] to void // CHECK: ret -// CHECK: define internal void [[OMP_OUTLINED_0]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], [2 x [[S_INT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_INT_TY]]* {{.+}} [[VAR_IN:%.+]]) +// CHECK: define internal void [[OMP_OUTLINED_0]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], [2 x [[S_INT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_INT_TY]]* {{.+}} [[VAR_IN:%.+]]) // addr alloca's -// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}*, +// CHECK: alloca i{{[0-9]+}}*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, // CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, // CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, @@ -492,9 +487,8 @@ int main() { // init private alloca's with addr alloca's // t-var -// CHECK-64-DAG: [[T_VAR_CONV:%.+]] = bitcast {{.+}} [[T_VAR_ADDR]] to -// CHECK-64-DAG: [[T_VAR_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_CONV]], -// CHECK-32-DAG: [[T_VAR_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_ADDR]], +// CHECK-DAG: [[T_VAR_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}** [[T_VAR_ADDR]], +// CHECK-DAG: [[T_VAR_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_ADDR_REF]], // CHECK-DAG: store {{.+}} [[T_VAR_ADDR_VAL]], {{.+}} [[T_VAR_PRIV]], // vec diff --git a/test/OpenMP/distribute_parallel_for_firstprivate_messages.cpp b/test/OpenMP/distribute_parallel_for_firstprivate_messages.cpp index 3e288c37d42c..67075ca12939 100644 --- a/test/OpenMP/distribute_parallel_for_firstprivate_messages.cpp +++ b/test/OpenMP/distribute_parallel_for_firstprivate_messages.cpp @@ -42,7 +42,7 @@ public: }; class S5 { int a; - S5(const S5 &s5) : a(s5.a) {} // expected-note 4 {{implicitly declared private here}} + S5(const S5 &s5) : a(s5.a) {} // expected-note 2 {{implicitly declared private here}} public: S5() : a(0) {} @@ -50,7 +50,7 @@ public: }; class S6 { int a; - S6() : a(0) {} + S6() : a(0) {} // expected-note {{implicitly declared private here}} public: S6(const S6 &s6) : a(s6.a) {} @@ -150,9 +150,10 @@ int foomain(int argc, char **argv) { #pragma omp distribute parallel for firstprivate(i) for (int k = 0; k < argc; ++k) ++k; +// expected-error@+3 {{lastprivate variable cannot be firstprivate}} expected-note@+3 {{defined as lastprivate}} #pragma omp target #pragma omp teams -#pragma omp distribute parallel for lastprivate(g) firstprivate(g) // expected-error {{calling a private constructor of class 'S5'}} +#pragma omp distribute parallel for lastprivate(g) firstprivate(g) for (i = 0; i < argc; ++i) foo(); #pragma omp parallel private(i) @@ -314,14 +315,16 @@ int main(int argc, char **argv) { #pragma omp distribute parallel for firstprivate(j) for (i = 0; i < argc; ++i) foo(); +// expected-error@+3 {{lastprivate variable cannot be firstprivate}} expected-note@+3 {{defined as lastprivate}} #pragma omp target #pragma omp teams -#pragma omp distribute parallel for lastprivate(g) firstprivate(g) // expected-error {{calling a private constructor of class 'S5'}} +#pragma omp distribute parallel for lastprivate(g) firstprivate(g) for (i = 0; i < argc; ++i) foo(); +// expected-error@+3 {{lastprivate variable cannot be firstprivate}} expected-note@+3 {{defined as lastprivate}} #pragma omp target #pragma omp teams -#pragma omp distribute parallel for lastprivate(n) firstprivate(n) // OK +#pragma omp distribute parallel for lastprivate(n) firstprivate(n) // expected-error {{calling a private constructor of class 'S6'}} for (i = 0; i < argc; ++i) foo(); #pragma omp parallel diff --git a/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp b/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp index 2e8da79c03bd..fccb0cfdb3d8 100644 --- a/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp +++ b/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp @@ -74,6 +74,7 @@ int main() { // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, // LAMBDA: [[G_PRIVATE:%.+]] = alloca double, // LAMBDA: [[G1_PRIVATE:%.+]] = alloca double, @@ -141,6 +142,7 @@ int main() { // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, // private alloca's // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, @@ -162,7 +164,7 @@ int main() { // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]], // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]], - // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1]], + // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]], // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4( @@ -273,6 +275,7 @@ int main() { // CHECK: {{.+}} = alloca i{{[0-9]+}}, // CHECK: {{.+}} = alloca i{{[0-9]+}}, // CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, // CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], @@ -371,6 +374,7 @@ int main() { // CHECK: {{.+}} = alloca i{{[0-9]+}}, // CHECK: {{.+}} = alloca i{{[0-9]+}}, // CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, // CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], @@ -486,6 +490,7 @@ int main() { // CHECK: {{.+}} = alloca i{{[0-9]+}}, // CHECK: {{.+}} = alloca i{{[0-9]+}}, // CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, // CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], @@ -566,6 +571,7 @@ int main() { // CHECK: {{.+}} = alloca i{{[0-9]+}}, // CHECK: {{.+}} = alloca i{{[0-9]+}}, // CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, // CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], diff --git a/test/OpenMP/distribute_parallel_for_lastprivate_messages.cpp b/test/OpenMP/distribute_parallel_for_lastprivate_messages.cpp index 745007fc48a8..b38d978cca03 100644 --- a/test/OpenMP/distribute_parallel_for_lastprivate_messages.cpp +++ b/test/OpenMP/distribute_parallel_for_lastprivate_messages.cpp @@ -18,14 +18,14 @@ public: const S2 &operator =(const S2&) const; S2 &operator =(const S2&); static float S2s; // expected-note {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note {{static data member is predetermined as shared}} }; -const float S2::S2sc = 0; // expected-note {{static data member is predetermined as shared}} +const float S2::S2sc = 0; const S2 b; const S2 ba[5]; class S3 { int a; - S3 &operator=(const S3 &s3); // expected-note 2 {{implicitly declared private here}} + S3 &operator=(const S3 &s3); // expected-note {{implicitly declared private here}} public: S3() : a(0) {} @@ -52,7 +52,7 @@ public: }; class S6 { int a; - S6() : a(0) {} + S6() : a(0) {} // expected-note {{implicitly declared private here}} public: S6(const S6 &s6) : a(s6.a) {} @@ -313,14 +313,16 @@ int main(int argc, char **argv) { #pragma omp distribute parallel for lastprivate(j) for (i = 0; i < argc; ++i) foo(); +// expected-error@+3 {{firstprivate variable cannot be lastprivate}} expected-note@+3 {{defined as firstprivate}} #pragma omp target #pragma omp teams -#pragma omp distribute parallel for firstprivate(m) lastprivate(m) // expected-error {{'operator=' is a private member of 'S3'}} +#pragma omp distribute parallel for firstprivate(m) lastprivate(m) for (i = 0; i < argc; ++i) foo(); +// expected-error@+3 {{lastprivate variable cannot be firstprivate}} expected-note@+3 {{defined as lastprivate}} #pragma omp target #pragma omp teams -#pragma omp distribute parallel for lastprivate(n) firstprivate(n) // OK +#pragma omp distribute parallel for lastprivate(n) firstprivate(n) // expected-error {{calling a private constructor of class 'S6'}} for (i = 0; i < argc; ++i) foo(); static int si; diff --git a/test/OpenMP/distribute_parallel_for_messages.cpp b/test/OpenMP/distribute_parallel_for_messages.cpp index 35a61df39e40..b1bb8d088e90 100644 --- a/test/OpenMP/distribute_parallel_for_messages.cpp +++ b/test/OpenMP/distribute_parallel_for_messages.cpp @@ -38,7 +38,7 @@ int main(int argc, char **argv) { foo(); #pragma omp target #pragma omp teams -#pragma omp distribute parallel for +#pragma omp distribute parallel for linear(argc) // expected-error {{unexpected OpenMP clause 'linear' in directive '#pragma omp distribute parallel for'}} for (int i = 0; i < argc; ++i) foo(); #pragma omp target @@ -116,3 +116,14 @@ void test_ordered() { ; } +void test_cancel() { +#pragma omp target +#pragma omp teams +#pragma omp distribute parallel for + for (int i = 0; i < 16; ++i) + for (int j = 0; j < 16; ++j) { +#pragma omp cancel for + ; + } +} + diff --git a/test/OpenMP/distribute_parallel_for_reduction_messages.cpp b/test/OpenMP/distribute_parallel_for_reduction_messages.cpp index 95654a9e5018..0bf1bc0472eb 100644 --- a/test/OpenMP/distribute_parallel_for_reduction_messages.cpp +++ b/test/OpenMP/distribute_parallel_for_reduction_messages.cpp @@ -27,9 +27,9 @@ public: S2() : a(0) {} S2(S2 &s2) : a(s2.a) {} static float S2s; // expected-note 2 {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note 2 {{'S2sc' declared here}} }; -const float S2::S2sc = 0; // expected-note 2 {{'S2sc' defined here}} +const float S2::S2sc = 0; S2 b; // expected-note 3 {{'b' defined here}} const S2 ba[5]; // expected-note 2 {{'ba' defined here}} class S3 { diff --git a/test/OpenMP/distribute_parallel_for_simd_ast_print.cpp b/test/OpenMP/distribute_parallel_for_simd_ast_print.cpp index 6ed3f2192258..21010638be15 100644 --- a/test/OpenMP/distribute_parallel_for_simd_ast_print.cpp +++ b/test/OpenMP/distribute_parallel_for_simd_ast_print.cpp @@ -28,17 +28,18 @@ public: ++this->a.a; } S7 &operator=(S7 &s) { + int k; #pragma omp target #pragma omp teams -#pragma omp distribute parallel for simd private(a) private(this->a) - for (int k = 0; k < s.a.a; ++k) +#pragma omp distribute parallel for simd private(a) private(this->a) linear(k) + for (k = 0; k < s.a.a; ++k) ++s.a.a; return *this; } }; // CHECK: #pragma omp distribute parallel for simd private(this->a) private(this->a) private(T::a) -// CHECK: #pragma omp distribute parallel for simd private(this->a) private(this->a) +// CHECK: #pragma omp distribute parallel for simd private(this->a) private(this->a) linear(k) // CHECK: #pragma omp distribute parallel for simd private(this->a) private(this->a) private(this->S::a) class S8 : public S7<S> { @@ -82,7 +83,7 @@ T tmain(T argc) { // CHECK-NEXT: a = 2; #pragma omp target #pragma omp teams -#pragma omp distribute parallel for simd private(argc, b), firstprivate(c, d), lastprivate(d, f) collapse(N) schedule(static, N) if (parallel :argc) num_threads(N) default(shared) shared(e) reduction(+ : h) dist_schedule(static,N) +#pragma omp distribute parallel for simd private(argc, b), firstprivate(c, d), lastprivate(f) collapse(N) schedule(static, N) if (parallel :argc) num_threads(N) default(shared) shared(e) reduction(+ : h) dist_schedule(static,N) for (int i = 0; i < 2; ++i) for (int j = 0; j < 2; ++j) for (int j = 0; j < 2; ++j) @@ -93,8 +94,8 @@ T tmain(T argc) { for (int j = 0; j < 2; ++j) for (int j = 0; j < 2; ++j) for (int j = 0; j < 2; ++j) - a++; - // CHECK: #pragma omp distribute parallel for simd private(argc,b) firstprivate(c,d) lastprivate(d,f) collapse(N) schedule(static, N) if(parallel: argc) num_threads(N) default(shared) shared(e) reduction(+: h) dist_schedule(static, N) + a++; + // CHECK: #pragma omp distribute parallel for simd private(argc,b) firstprivate(c,d) lastprivate(f) collapse(N) schedule(static, N) if(parallel: argc) num_threads(N) default(shared) shared(e) reduction(+: h) dist_schedule(static, N) // CHECK-NEXT: for (int i = 0; i < 2; ++i) // CHECK-NEXT: for (int j = 0; j < 2; ++j) // CHECK-NEXT: for (int j = 0; j < 2; ++j) @@ -135,14 +136,15 @@ int main(int argc, char **argv) { // CHECK-NEXT: for (int j = 0; j < 10; ++j) // CHECK-NEXT: a++; + int i; #pragma omp target #pragma omp teams -#pragma omp distribute parallel for simd aligned(x:8) linear(h:2) safelen(8) simdlen(8) - for (int i = 0; i < 100; i++) +#pragma omp distribute parallel for simd aligned(x:8) linear(i:2) safelen(8) simdlen(8) + for (i = 0; i < 100; i++) for (int j = 0; j < 200; j++) a += h + x[j]; - // CHECK: #pragma omp distribute parallel for simd aligned(x: 8) linear(h: 2) safelen(8) simdlen(8) - // CHECK-NEXT: for (int i = 0; i < 100; i++) + // CHECK: #pragma omp distribute parallel for simd aligned(x: 8) linear(i: 2) safelen(8) simdlen(8) + // CHECK-NEXT: for (i = 0; i < 100; i++) // CHECK-NEXT: for (int j = 0; j < 200; j++) // CHECK-NEXT: a += h + x[j]; diff --git a/test/OpenMP/distribute_parallel_for_simd_codegen.cpp b/test/OpenMP/distribute_parallel_for_simd_codegen.cpp new file mode 100644 index 000000000000..59e6df90bbcb --- /dev/null +++ b/test/OpenMP/distribute_parallel_for_simd_codegen.cpp @@ -0,0 +1,2262 @@ +// Test host code gen +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 + +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + + +template <typename T> +T tmain() { + T *a, *b, *c; + int n = 10000; + int ch = 100; + + // no schedule clauses + #pragma omp target + #pragma omp teams + #pragma omp distribute parallel for simd + for (int i = 0; i < n; ++i) { + a[i] = b[i] + c[i]; + } + + // dist_schedule: static no chunk + #pragma omp target + #pragma omp teams + #pragma omp distribute parallel for simd dist_schedule(static) + for (int i = 0; i < n; ++i) { + a[i] = b[i] + c[i]; + } + + // dist_schedule: static chunk + #pragma omp target + #pragma omp teams + #pragma omp distribute parallel for simd dist_schedule(static, ch) + for (int i = 0; i < n; ++i) { + a[i] = b[i] + c[i]; + } + + // schedule: static no chunk + #pragma omp target + #pragma omp teams + #pragma omp distribute parallel for simd schedule(static) + for (int i = 0; i < n; ++i) { + a[i] = b[i] + c[i]; + } + + // schedule: static chunk + #pragma omp target + #pragma omp teams + #pragma omp distribute parallel for simd schedule(static, ch) + for (int i = 0; i < n; ++i) { + a[i] = b[i] + c[i]; + } + + // schedule: dynamic no chunk + #pragma omp target + #pragma omp teams + #pragma omp distribute parallel for simd schedule(dynamic) + for (int i = 0; i < n; ++i) { + a[i] = b[i] + c[i]; + } + + // schedule: dynamic chunk + #pragma omp target + #pragma omp teams + #pragma omp distribute parallel for simd schedule(dynamic, ch) + for (int i = 0; i < n; ++i) { + a[i] = b[i] + c[i]; + } + + return T(); +} + +int main() { + double *a, *b, *c; + int n = 10000; + int ch = 100; + +#ifdef LAMBDA + // LAMBDA-LABEL: @main + // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]]( + [&]() { + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + + // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( + // LAMBDA: call void [[OFFLOADING_FUN_1:@.+]]( + + // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( + // LAMBDA: call void [[OFFLOADING_FUN_2:@.+]]( + + // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( + // LAMBDA: call void [[OFFLOADING_FUN_3:@.+]]( + + // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( + // LAMBDA: call void [[OFFLOADING_FUN_4:@.+]]( + + // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( + // LAMBDA: call void [[OFFLOADING_FUN_5:@.+]]( + + // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( + // LAMBDA: call void [[OFFLOADING_FUN_6:@.+]]( + + // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( + // LAMBDA: call void [[OFFLOADING_FUN_7:@.+]]( + + // no schedule clauses + #pragma omp target + #pragma omp teams + // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_1]]( + // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) + + #pragma omp distribute parallel for simd + for (int i = 0; i < n; ++i) { + a[i] = b[i] + c[i]; + // LAMBDA: define{{.+}} void [[OMP_OUTLINED_1]]( + // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca + // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca + // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca + // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca + + // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, + + // check EUB for distribute + // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], + // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}, + // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] + // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] + // LAMBDA-DAG: [[EUB_TRUE]]: + // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}}, + // LAMBDA: br label %[[EUB_END:.+]] + // LAMBDA-DAG: [[EUB_FALSE]]: + // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], + // LAMBDA: br label %[[EUB_END]] + // LAMBDA-DAG: [[EUB_END]]: + // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] + // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], + + // initialize omp.iv + // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], + // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], + // LAMBDA: br label %[[OMP_JUMP_BACK:.+]] + + // check exit condition + // LAMBDA: [[OMP_JUMP_BACK]]: + // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], + // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], + // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] + // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] + + // check that PrevLB and PrevUB are passed to the 'for' + // LAMBDA: [[DIST_BODY]]: + // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], + // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to + // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], + // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to + // check that distlb and distub are properly passed to fork_call + // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) + // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) + // LAMBDA: br label %[[DIST_INC:.+]] + + // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch + // LAMBDA: [[DIST_INC]]: + // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], + // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], + // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] + // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], + // LAMBDA: br label %[[OMP_JUMP_BACK]] + + // LAMBDA-DAG: call void @__kmpc_for_static_fini( + // LAMBDA: ret + + // implementation of 'parallel for' + // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) + + // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, + // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, + // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, + + // initialize lb and ub to PrevLB and PrevUB + // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], + // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], + // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], + // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} + // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], + // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} + // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], + // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], + // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], + // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], + // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) + + // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used + // In this case we use EUB + // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], + // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}}, + // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] + // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] + // LAMBDA: [[PF_EUB_TRUE]]: + // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}}, + // LAMBDA: br label %[[PF_EUB_END:.+]] + // LAMBDA-DAG: [[PF_EUB_FALSE]]: + // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], + // LAMBDA: br label %[[PF_EUB_END]] + // LAMBDA-DAG: [[PF_EUB_END]]: + // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] + // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], + + // initialize omp.iv + // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], + // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], + // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]] + + // check exit condition + // LAMBDA: [[OMP_PF_JUMP_BACK]]: + // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], + // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], + // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] + // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] + + // check that PrevLB and PrevUB are passed to the 'for' + // LAMBDA: [[PF_BODY]]: + // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], + // LAMBDA: br label {{.+}} + + // check stride 1 for 'for' in 'distribute parallel for simd' + // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], + // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 + // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], + // LAMBDA: br label %[[OMP_PF_JUMP_BACK]] + + // LAMBDA-DAG: call void @__kmpc_for_static_fini( + // LAMBDA: ret + + [&]() { + a[i] = b[i] + c[i]; + }(); + } + + // dist_schedule: static no chunk (same sa default - no dist_schedule) + #pragma omp target + #pragma omp teams + // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_2]]( + // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}}) + + #pragma omp distribute parallel for simd dist_schedule(static) + for (int i = 0; i < n; ++i) { + a[i] = b[i] + c[i]; + // LAMBDA: define{{.+}} void [[OMP_OUTLINED_2]]( + // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca + // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca + // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca + // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca + + // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, + + // check EUB for distribute + // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], + // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}, + // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] + // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] + // LAMBDA-DAG: [[EUB_TRUE]]: + // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}}, + // LAMBDA: br label %[[EUB_END:.+]] + // LAMBDA-DAG: [[EUB_FALSE]]: + // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], + // LAMBDA: br label %[[EUB_END]] + // LAMBDA-DAG: [[EUB_END]]: + // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] + // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], + + // initialize omp.iv + // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], + // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], + // LAMBDA: br label %[[OMP_JUMP_BACK:.+]] + + // check exit condition + // LAMBDA: [[OMP_JUMP_BACK]]: + // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], + // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], + // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] + // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] + + // check that PrevLB and PrevUB are passed to the 'for' + // LAMBDA: [[DIST_BODY]]: + // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], + // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to + // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], + // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to + // check that distlb and distub are properly passed to fork_call + // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) + // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) + // LAMBDA: br label %[[DIST_INC:.+]] + + // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch + // LAMBDA: [[DIST_INC]]: + // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], + // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], + // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] + // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], + // LAMBDA: br label %[[OMP_JUMP_BACK]] + + // LAMBDA-DAG: call void @__kmpc_for_static_fini( + // LAMBDA: ret + + // implementation of 'parallel for' + // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) + + // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, + // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, + // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, + + // initialize lb and ub to PrevLB and PrevUB + // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], + // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], + // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], + // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} + // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], + // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} + // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], + // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], + // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], + // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], + // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) + + // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used + // In this case we use EUB + // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], + // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}}, + // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] + // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] + // LAMBDA: [[PF_EUB_TRUE]]: + // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}}, + // LAMBDA: br label %[[PF_EUB_END:.+]] + // LAMBDA-DAG: [[PF_EUB_FALSE]]: + // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], + // LAMBDA: br label %[[PF_EUB_END]] + // LAMBDA-DAG: [[PF_EUB_END]]: + // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] + // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], + + // initialize omp.iv + // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], + // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], + // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]] + + // check exit condition + // LAMBDA: [[OMP_PF_JUMP_BACK]]: + // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], + // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], + // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] + // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] + + // check that PrevLB and PrevUB are passed to the 'for' + // LAMBDA: [[PF_BODY]]: + // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], + // LAMBDA: br label {{.+}} + + // check stride 1 for 'for' in 'distribute parallel for simd' + // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], + // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 + // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], + // LAMBDA: br label %[[OMP_PF_JUMP_BACK]] + + // LAMBDA-DAG: call void @__kmpc_for_static_fini( + // LAMBDA: ret + [&]() { + a[i] = b[i] + c[i]; + }(); + } + + // dist_schedule: static chunk + #pragma omp target + #pragma omp teams + // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_3]]( + // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}}) + + #pragma omp distribute parallel for simd dist_schedule(static, ch) + for (int i = 0; i < n; ++i) { + a[i] = b[i] + c[i]; + // LAMBDA: define{{.+}} void [[OMP_OUTLINED_3]]( + // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca + // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca + // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca + // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca + + // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' + // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, + // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER:.+]] + + // LAMBDA: [[DIST_OUTER_LOOP_HEADER]]: + // check EUB for distribute + // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], + // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}, + // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] + // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] + // LAMBDA-DAG: [[EUB_TRUE]]: + // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}}, + // LAMBDA: br label %[[EUB_END:.+]] + // LAMBDA-DAG: [[EUB_FALSE]]: + // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], + // LAMBDA: br label %[[EUB_END]] + // LAMBDA-DAG: [[EUB_END]]: + // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] + // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], + + // initialize omp.iv + // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], + // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], + + // check exit condition + // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], + // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], + // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] + // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]] + + // LAMBDA: [[DIST_OUTER_LOOP_BODY]]: + // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER:.+]] + + // LAMBDA: [[DIST_INNER_LOOP_HEADER]]: + // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]], + // LAMBDA-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]], + // LAMBDA: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]] + // LAMBDA: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] + + // check that PrevLB and PrevUB are passed to the 'for' + // LAMBDA: [[DIST_INNER_LOOP_BODY]]: + // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], + // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} + // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], + // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} + // check that distlb and distub are properly passed to fork_call + // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) + // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) + // LAMBDA: br label %[[DIST_INNER_LOOP_INC:.+]] + + // check DistInc + // LAMBDA: [[DIST_INNER_LOOP_INC]]: + // LAMBDA-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], + // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], + // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] + // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], + // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER]] + + // LAMBDA: [[DIST_INNER_LOOP_END]]: + // LAMBDA: br label %[[DIST_OUTER_LOOP_INC:.+]] + + // LAMBDA: [[DIST_OUTER_LOOP_INC]]: + // check NextLB and NextUB + // LAMBDA-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], + // LAMBDA-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], + // LAMBDA-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] + // LAMBDA: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]], + // LAMBDA-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], + // LAMBDA-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], + // LAMBDA-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] + // LAMBDA: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], + // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER]] + + // outer loop exit + // LAMBDA: [[DIST_OUTER_LOOP_END]]: + // LAMBDA-DAG: call void @__kmpc_for_static_fini( + // LAMBDA: ret + + // skip implementation of 'parallel for': using default scheduling and was tested above + [&]() { + a[i] = b[i] + c[i]; + }(); + } + + // schedule: static no chunk + #pragma omp target + #pragma omp teams + // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_4]]( + // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}}) + + #pragma omp distribute parallel for simd schedule(static) + for (int i = 0; i < n; ++i) { + a[i] = b[i] + c[i]; + // LAMBDA: define{{.+}} void [[OMP_OUTLINED_4]]( + // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca + // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca + // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca + // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca + + // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, + // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}}, + // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case + // LAMBDA: ret + + // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default) + // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) + + // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, + // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, + // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, + + // initialize lb and ub to PrevLB and PrevUB + // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], + // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], + // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], + // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} + // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], + // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} + // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], + // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], + // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], + // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], + // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) + + // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used + // In this case we use EUB + // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], + // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}}, + // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] + // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] + // LAMBDA: [[PF_EUB_TRUE]]: + // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}}, + // LAMBDA: br label %[[PF_EUB_END:.+]] + // LAMBDA-DAG: [[PF_EUB_FALSE]]: + // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], + // LAMBDA: br label %[[PF_EUB_END]] + // LAMBDA-DAG: [[PF_EUB_END]]: + // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] + // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], + + // initialize omp.iv + // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], + // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], + // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]] + + // check exit condition + // LAMBDA: [[OMP_PF_JUMP_BACK]]: + // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], + // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], + // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] + // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] + + // check that PrevLB and PrevUB are passed to the 'for' + // LAMBDA: [[PF_BODY]]: + // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], + // LAMBDA: br label {{.+}} + + // check stride 1 for 'for' in 'distribute parallel for simd' + // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], + // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 + // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], + // LAMBDA: br label %[[OMP_PF_JUMP_BACK]] + + // LAMBDA-DAG: call void @__kmpc_for_static_fini( + // LAMBDA: ret + + [&]() { + a[i] = b[i] + c[i]; + }(); + } + + // schedule: static chunk + #pragma omp target + #pragma omp teams + // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_5]]( + // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}}) + + #pragma omp distribute parallel for simd schedule(static, ch) + for (int i = 0; i < n; ++i) { + a[i] = b[i] + c[i]; + // LAMBDA: define{{.+}} void [[OMP_OUTLINED_5]]( + // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, + // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}}, + // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case + // LAMBDA: ret + + // 'parallel for' implementation using outer and inner loops and PrevEUB + // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) + // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, + // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, + // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, + // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, + + // initialize lb and ub to PrevLB and PrevUB + // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], + // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], + // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], + // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} + // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], + // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} + // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], + // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], + // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], + // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], + // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) + // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] + + // check PrevEUB (using PrevUB instead of NumIt as upper bound) + // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]: + // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], + // LAMBDA-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to + // LAMBDA: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], + // LAMBDA-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]] + // LAMBDA-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]] + // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] + // LAMBDA: [[PF_EUB_TRUE]]: + // LAMBDA: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], + // LAMBDA: br label %[[PF_EUB_END:.+]] + // LAMBDA-DAG: [[PF_EUB_FALSE]]: + // LAMBDA: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]], + // LAMBDA-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to + // LAMBDA: br label %[[PF_EUB_END]] + // LAMBDA-DAG: [[PF_EUB_END]]: + // LAMBDA-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ] + // LAMBDA-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ] + // LAMBDA-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to + // LAMBDA-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]], + // LAMBDA-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]], + + // initialize omp.iv (IV = LB) + // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], + // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], + + // outer loop: while (IV < UB) { + // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], + // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], + // LAMBDA: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] + // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] + + // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]: + // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER:.+]] + + // LAMBDA: [[OMP_PF_INNER_FOR_HEADER]]: + // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], + // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], + // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] + // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] + + // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]: + // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], + // skip body branch + // LAMBDA: br{{.+}} + // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]] + + // IV = IV + 1 and inner loop latch + // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]: + // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], + // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 + // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], + // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER]] + + // check NextLB and NextUB + // LAMBDA: [[OMP_PF_INNER_LOOP_END]]: + // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] + + // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]: + // LAMBDA-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], + // LAMBDA-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], + // LAMBDA-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]] + // LAMBDA: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]], + // LAMBDA-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], + // LAMBDA-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], + // LAMBDA-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]] + // LAMBDA: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]], + // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]] + + // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]: + // LAMBDA-DAG: call void @__kmpc_for_static_fini( + // LAMBDA: ret + [&]() { + a[i] = b[i] + c[i]; + }(); + } + + // schedule: dynamic no chunk + #pragma omp target + #pragma omp teams + // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_6]]( + // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}}) + + #pragma omp distribute parallel for simd schedule(dynamic) + for (int i = 0; i < n; ++i) { + a[i] = b[i] + c[i]; + // LAMBDA: define{{.+}} void [[OMP_OUTLINED_6]]( + // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, + // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}}, + // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case + // LAMBDA: ret + + // 'parallel for' implementation using outer and inner loops and PrevEUB + // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) + // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, + // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, + // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, + // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, + + // initialize lb and ub to PrevLB and PrevUB + // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], + // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], + // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], + // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} + // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], + // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} + // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], + // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], + // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], + // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], + // LAMBDA-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], + // LAMBDA-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], + // LAMBDA: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) + // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] + + // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]: + // LAMBDA: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) + // LAMBDA: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 + // LAMBDA: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] + + // initialize omp.iv (IV = LB) + // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]: + // LAMBDA-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], + // LAMBDA-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], + // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] + + // LAMBDA: [[OMP_PF_INNER_LOOP_HEADER]]: + // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], + // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], + // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] + // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] + + // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]: + // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], + // skip body branch + // LAMBDA: br{{.+}} + // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]] + + // IV = IV + 1 and inner loop latch + // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]: + // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], + // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 + // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], + // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER]] + + // check NextLB and NextUB + // LAMBDA: [[OMP_PF_INNER_LOOP_END]]: + // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] + + // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]: + // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]] + + // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]: + // LAMBDA: ret + [&]() { + a[i] = b[i] + c[i]; + }(); + } + + // schedule: dynamic chunk + #pragma omp target + #pragma omp teams + // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_7]]( + // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}}) + + #pragma omp distribute parallel for simd schedule(dynamic, ch) + for (int i = 0; i < n; ++i) { + a[i] = b[i] + c[i]; + // LAMBDA: define{{.+}} void [[OMP_OUTLINED_7]]( + // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, + // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}}, + // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case + // LAMBDA: ret + + // 'parallel for' implementation using outer and inner loops and PrevEUB + // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) + // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, + // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, + // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, + // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, + + // initialize lb and ub to PrevLB and PrevUB + // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], + // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], + // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], + // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} + // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], + // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} + // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], + // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], + // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], + // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], + // LAMBDA-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], + // LAMBDA-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], + // LAMBDA: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) + // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] + + // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]: + // LAMBDA: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) + // LAMBDA: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 + // LAMBDA: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] + + // initialize omp.iv (IV = LB) + // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]: + // LAMBDA-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], + // LAMBDA-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], + // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] + + // LAMBDA: [[OMP_PF_INNER_LOOP_HEADER]]: + // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], + // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], + // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] + // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] + + // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]: + // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], + // skip body branch + // LAMBDA: br{{.+}} + // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]] + + // IV = IV + 1 and inner loop latch + // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]: + // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], + // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 + // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], + // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER]] + + // check NextLB and NextUB + // LAMBDA: [[OMP_PF_INNER_LOOP_END]]: + // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] + + // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]: + // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]] + + // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]: + // LAMBDA: ret + [&]() { + a[i] = b[i] + c[i]; + }(); + } + }(); + return 0; +#else + // CHECK-LABEL: @main + + // CHECK: call i{{[0-9]+}} @__tgt_target_teams( + // CHECK: call void [[OFFLOADING_FUN_1:@.+]]( + + // CHECK: call i{{[0-9]+}} @__tgt_target_teams( + // CHECK: call void [[OFFLOADING_FUN_2:@.+]]( + + // CHECK: call i{{[0-9]+}} @__tgt_target_teams( + // CHECK: call void [[OFFLOADING_FUN_3:@.+]]( + + // CHECK: call i{{[0-9]+}} @__tgt_target_teams( + // CHECK: call void [[OFFLOADING_FUN_4:@.+]]( + + // CHECK: call i{{[0-9]+}} @__tgt_target_teams( + // CHECK: call void [[OFFLOADING_FUN_5:@.+]]( + + // CHECK: call i{{[0-9]+}} @__tgt_target_teams( + // CHECK: call void [[OFFLOADING_FUN_6:@.+]]( + + // CHECK: call i{{[0-9]+}} @__tgt_target_teams( + // CHECK: call void [[OFFLOADING_FUN_7:@.+]]( + + // CHECK: call{{.+}} [[TMAIN:@.+]]() + + // no schedule clauses + #pragma omp target + #pragma omp teams + // CHECK: define internal void [[OFFLOADING_FUN_1]]( + // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) + + #pragma omp distribute parallel for simd + for (int i = 0; i < n; ++i) { + a[i] = b[i] + c[i]; + // CHECK: define{{.+}} void [[OMP_OUTLINED_1]]( + // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca + // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca + // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca + // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca + + // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, + + // check EUB for distribute + // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], + // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, + // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] + // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] + // CHECK-DAG: [[EUB_TRUE]]: + // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, + // CHECK: br label %[[EUB_END:.+]] + // CHECK-DAG: [[EUB_FALSE]]: + // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], + // CHECK: br label %[[EUB_END]] + // CHECK-DAG: [[EUB_END]]: + // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] + // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], + + // initialize omp.iv + // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], + // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], + // CHECK: br label %[[OMP_JUMP_BACK:.+]] + + // check exit condition + // CHECK: [[OMP_JUMP_BACK]]: + // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], + // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], + // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] + // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] + + // check that PrevLB and PrevUB are passed to the 'for' + // CHECK: [[DIST_BODY]]: + // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], + // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} + // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], + // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} + // check that distlb and distub are properly passed to fork_call + // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) + // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) + // CHECK: br label %[[DIST_INC:.+]] + + // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch + // CHECK: [[DIST_INC]]: + // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], + // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], + // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] + // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], + // CHECK: br label %[[OMP_JUMP_BACK]] + + // CHECK-DAG: call void @__kmpc_for_static_fini( + // CHECK: ret + + // implementation of 'parallel for' + // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) + + // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, + // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, + // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, + + // initialize lb and ub to PrevLB and PrevUB + // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], + // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], + // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], + // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} + // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], + // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} + // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], + // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], + // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], + // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], + // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) + + // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used + // In this case we use EUB + // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], + // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, + // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] + // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] + // CHECK: [[PF_EUB_TRUE]]: + // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, + // CHECK: br label %[[PF_EUB_END:.+]] + // CHECK-DAG: [[PF_EUB_FALSE]]: + // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], + // CHECK: br label %[[PF_EUB_END]] + // CHECK-DAG: [[PF_EUB_END]]: + // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] + // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], + + // initialize omp.iv + // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], + // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], + // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] + + // check exit condition + // CHECK: [[OMP_PF_JUMP_BACK]]: + // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], + // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], + // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] + // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] + + // check that PrevLB and PrevUB are passed to the 'for' + // CHECK: [[PF_BODY]]: + // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], + // CHECK: br label {{.+}} + + // check stride 1 for 'for' in 'distribute parallel for simd' + // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], + // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 + // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], + // CHECK: br label %[[OMP_PF_JUMP_BACK]] + + // CHECK-DAG: call void @__kmpc_for_static_fini( + // CHECK: ret + } + + // dist_schedule: static no chunk + #pragma omp target + #pragma omp teams + // CHECK: define{{.+}} void [[OFFLOADING_FUN_2]]( + // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}}) + + #pragma omp distribute parallel for simd dist_schedule(static) + for (int i = 0; i < n; ++i) { + a[i] = b[i] + c[i]; + // CHECK: define{{.+}} void [[OMP_OUTLINED_2]]( + // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca + // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca + // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca + // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca + + // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, + + // check EUB for distribute + // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], + // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, + // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] + // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] + // CHECK-DAG: [[EUB_TRUE]]: + // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, + // CHECK: br label %[[EUB_END:.+]] + // CHECK-DAG: [[EUB_FALSE]]: + // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], + // CHECK: br label %[[EUB_END]] + // CHECK-DAG: [[EUB_END]]: + // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] + // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], + + // initialize omp.iv + // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], + // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], + // CHECK: br label %[[OMP_JUMP_BACK:.+]] + + // check exit condition + // CHECK: [[OMP_JUMP_BACK]]: + // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], + // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], + // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] + // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] + + // check that PrevLB and PrevUB are passed to the 'for' + // CHECK: [[DIST_BODY]]: + // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], + // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} + // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], + // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} + // check that distlb and distub are properly passed to fork_call + // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) + // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) + // CHECK: br label %[[DIST_INC:.+]] + + // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch + // CHECK: [[DIST_INC]]: + // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], + // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], + // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] + // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], + // CHECK: br label %[[OMP_JUMP_BACK]] + + // CHECK-DAG: call void @__kmpc_for_static_fini( + // CHECK: ret + + // implementation of 'parallel for' + // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) + + // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, + // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, + // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, + + // initialize lb and ub to PrevLB and PrevUB + // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], + // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], + // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], + // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} + // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], + // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} + // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], + // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], + // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], + // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], + // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) + + // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used + // In this case we use EUB + // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], + // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, + // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] + // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] + // CHECK: [[PF_EUB_TRUE]]: + // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, + // CHECK: br label %[[PF_EUB_END:.+]] + // CHECK-DAG: [[PF_EUB_FALSE]]: + // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], + // CHECK: br label %[[PF_EUB_END]] + // CHECK-DAG: [[PF_EUB_END]]: + // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] + // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], + + // initialize omp.iv + // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], + // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], + // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] + + // check exit condition + // CHECK: [[OMP_PF_JUMP_BACK]]: + // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], + // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], + // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] + // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] + + // check that PrevLB and PrevUB are passed to the 'for' + // CHECK: [[PF_BODY]]: + // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], + // CHECK: br label {{.+}} + + // check stride 1 for 'for' in 'distribute parallel for simd' + // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], + // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 + // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], + // CHECK: br label %[[OMP_PF_JUMP_BACK]] + + // CHECK-DAG: call void @__kmpc_for_static_fini( + // CHECK: ret + } + + // dist_schedule: static chunk + #pragma omp target + #pragma omp teams + // CHECK: define{{.+}} void [[OFFLOADING_FUN_3]]( + // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}}) + + #pragma omp distribute parallel for simd dist_schedule(static, ch) + for (int i = 0; i < n; ++i) { + a[i] = b[i] + c[i]; + // CHECK: define{{.+}} void [[OMP_OUTLINED_3]]( + // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca + // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca + // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca + // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca + + // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' + // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, + // CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]] + + // CHECK: [[DIST_OUTER_LOOP_HEADER]]: + // check EUB for distribute + // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], + // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, + // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] + // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] + // CHECK-DAG: [[EUB_TRUE]]: + // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, + // CHECK: br label %[[EUB_END:.+]] + // CHECK-DAG: [[EUB_FALSE]]: + // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], + // CHECK: br label %[[EUB_END]] + // CHECK-DAG: [[EUB_END]]: + // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] + // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], + + // initialize omp.iv + // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], + // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], + + // check exit condition + // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], + // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], + // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] + // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]] + + // CHECK: [[DIST_OUTER_LOOP_BODY]]: + // CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]] + + // CHECK: [[DIST_INNER_LOOP_HEADER]]: + // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]], + // CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]], + // CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]] + // CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] + + // check that PrevLB and PrevUB are passed to the 'for' + // CHECK: [[DIST_INNER_LOOP_BODY]]: + // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], + // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} + // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], + // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} + // check that distlb and distub are properly passed to fork_call + // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) + // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) + // CHECK: br label %[[DIST_INNER_LOOP_INC:.+]] + + // check DistInc + // CHECK: [[DIST_INNER_LOOP_INC]]: + // CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], + // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], + // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] + // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], + // CHECK: br label %[[DIST_INNER_LOOP_HEADER]] + + // CHECK: [[DIST_INNER_LOOP_END]]: + // CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]] + + // CHECK: [[DIST_OUTER_LOOP_INC]]: + // check NextLB and NextUB + // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], + // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], + // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] + // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]], + // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], + // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], + // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] + // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], + // CHECK: br label %[[DIST_OUTER_LOOP_HEADER]] + + // outer loop exit + // CHECK: [[DIST_OUTER_LOOP_END]]: + // CHECK-DAG: call void @__kmpc_for_static_fini( + // CHECK: ret + + // skip implementation of 'parallel for': using default scheduling and was tested above + } + + // schedule: static no chunk + #pragma omp target + #pragma omp teams + // CHECK: define{{.+}} void [[OFFLOADING_FUN_4]]( + // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}}) + + #pragma omp distribute parallel for simd schedule(static) + for (int i = 0; i < n; ++i) { + a[i] = b[i] + c[i]; + // CHECK: define{{.+}} void [[OMP_OUTLINED_4]]( + // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca + // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca + // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca + // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca + + // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, + // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}}, + // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case + // CHECK: ret + + // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default) + // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) + + // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, + // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, + // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, + + // initialize lb and ub to PrevLB and PrevUB + // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], + // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], + // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], + // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} + // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], + // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} + // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], + // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], + // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], + // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], + // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) + + // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used + // In this case we use EUB + // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], + // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, + // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] + // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] + // CHECK: [[PF_EUB_TRUE]]: + // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, + // CHECK: br label %[[PF_EUB_END:.+]] + // CHECK-DAG: [[PF_EUB_FALSE]]: + // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], + // CHECK: br label %[[PF_EUB_END]] + // CHECK-DAG: [[PF_EUB_END]]: + // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] + // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], + + // initialize omp.iv + // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], + // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], + // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] + + // check exit condition + // CHECK: [[OMP_PF_JUMP_BACK]]: + // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], + // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], + // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] + // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] + + // check that PrevLB and PrevUB are passed to the 'for' + // CHECK: [[PF_BODY]]: + // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], + // CHECK: br label {{.+}} + + // check stride 1 for 'for' in 'distribute parallel for simd' + // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], + // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 + // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], + // CHECK: br label %[[OMP_PF_JUMP_BACK]] + + // CHECK-DAG: call void @__kmpc_for_static_fini( + // CHECK: ret + } + + // schedule: static chunk + #pragma omp target + #pragma omp teams + // CHECK: define{{.+}} void [[OFFLOADING_FUN_5]]( + // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}}) + + #pragma omp distribute parallel for simd schedule(static, ch) + for (int i = 0; i < n; ++i) { + a[i] = b[i] + c[i]; + // CHECK: define{{.+}} void [[OMP_OUTLINED_5]]( + // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, + // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}}, + // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case + // CHECK: ret + + // 'parallel for' implementation using outer and inner loops and PrevEUB + // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) + // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, + // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, + // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, + // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, + + // initialize lb and ub to PrevLB and PrevUB + // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], + // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], + // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], + // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} + // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], + // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} + // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], + // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], + // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], + // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], + // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) + // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] + + // check PrevEUB (using PrevUB instead of NumIt as upper bound) + // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: + // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], + // CHECK-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to + // CHECK: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], + // CHECK-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]] + // CHECK-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]] + // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] + // CHECK: [[PF_EUB_TRUE]]: + // CHECK: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], + // CHECK: br label %[[PF_EUB_END:.+]] + // CHECK-DAG: [[PF_EUB_FALSE]]: + // CHECK: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]], + // CHECK-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to + // CHECK: br label %[[PF_EUB_END]] + // CHECK-DAG: [[PF_EUB_END]]: + // CHECK-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ] + // CHECK-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ] + // CHECK-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to + // CHECK-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]], + // CHECK-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]], + + // initialize omp.iv (IV = LB) + // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], + // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], + + // outer loop: while (IV < UB) { + // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], + // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], + // CHECK: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] + // CHECK: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] + + // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: + // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER:.+]] + + // CHECK: [[OMP_PF_INNER_FOR_HEADER]]: + // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], + // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], + // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] + // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] + + // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: + // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], + // skip body branch + // CHECK: br{{.+}} + // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] + + // IV = IV + 1 and inner loop latch + // CHECK: [[OMP_PF_INNER_LOOP_INC]]: + // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], + // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 + // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], + // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]] + + // check NextLB and NextUB + // CHECK: [[OMP_PF_INNER_LOOP_END]]: + // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] + + // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: + // CHECK-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], + // CHECK-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], + // CHECK-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]] + // CHECK: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]], + // CHECK-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], + // CHECK-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], + // CHECK-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]] + // CHECK: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]], + // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] + + // CHECK: [[OMP_PF_OUTER_LOOP_END]]: + // CHECK-DAG: call void @__kmpc_for_static_fini( + // CHECK: ret + } + + // schedule: dynamic no chunk + #pragma omp target + #pragma omp teams + // CHECK: define{{.+}} void [[OFFLOADING_FUN_6]]( + // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}}) + + #pragma omp distribute parallel for simd schedule(dynamic) + for (int i = 0; i < n; ++i) { + a[i] = b[i] + c[i]; + // CHECK: define{{.+}} void [[OMP_OUTLINED_6]]( + // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, + // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}}, + // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case + // CHECK: ret + + // 'parallel for' implementation using outer and inner loops and PrevEUB + // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) + // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, + // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, + // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, + // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, + + // initialize lb and ub to PrevLB and PrevUB + // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], + // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], + // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], + // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} + // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], + // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} + // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], + // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], + // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], + // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], + // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], + // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], + // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) + // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] + + // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: + // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) + // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 + // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] + + // initialize omp.iv (IV = LB) + // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: + // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], + // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], + // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] + + // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: + // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], + // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], + // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] + // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] + + // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: + // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], + // skip body branch + // CHECK: br{{.+}} + // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] + + // IV = IV + 1 and inner loop latch + // CHECK: [[OMP_PF_INNER_LOOP_INC]]: + // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], + // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 + // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], + // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] + + // check NextLB and NextUB + // CHECK: [[OMP_PF_INNER_LOOP_END]]: + // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] + + // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: + // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] + + // CHECK: [[OMP_PF_OUTER_LOOP_END]]: + // CHECK: ret + } + + // schedule: dynamic chunk + #pragma omp target + #pragma omp teams + // CHECK: define{{.+}} void [[OFFLOADING_FUN_7]]( + // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}}) + + #pragma omp distribute parallel for simd schedule(dynamic, ch) + for (int i = 0; i < n; ++i) { + a[i] = b[i] + c[i]; + // CHECK: define{{.+}} void [[OMP_OUTLINED_7]]( + // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, + // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}}, + // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case + // CHECK: ret + + // 'parallel for' implementation using outer and inner loops and PrevEUB + // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) + // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, + // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, + // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, + // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, + + // initialize lb and ub to PrevLB and PrevUB + // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], + // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], + // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], + // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} + // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], + // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} + // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], + // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], + // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], + // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], + // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], + // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], + // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) + // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] + + // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: + // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) + // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 + // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] + + // initialize omp.iv (IV = LB) + // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: + // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], + // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], + // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] + + // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: + // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], + // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], + // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] + // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] + + // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: + // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], + // skip body branch + // CHECK: br{{.+}} + // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] + + // IV = IV + 1 and inner loop latch + // CHECK: [[OMP_PF_INNER_LOOP_INC]]: + // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], + // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 + // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], + // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] + + // check NextLB and NextUB + // CHECK: [[OMP_PF_INNER_LOOP_END]]: + // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] + + // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: + // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] + + // CHECK: [[OMP_PF_OUTER_LOOP_END]]: + // CHECK: ret + } + + return tmain<int>(); +#endif +} + +// check code +// CHECK: define{{.+}} [[TMAIN]]() + +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOADING_FUN_1:@.+]]( + +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOADING_FUN_2:@.+]]( + +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOADING_FUN_3:@.+]]( + +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOADING_FUN_4:@.+]]( + +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOADING_FUN_5:@.+]]( + +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOADING_FUN_6:@.+]]( + +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOADING_FUN_7:@.+]]( + +// CHECK: define{{.+}} void [[OFFLOADING_FUN_1]]( +// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) + +// CHECK: define{{.+}} void [[OMP_OUTLINED_1]]( +// CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca +// CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca +// CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca +// CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca + +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, + +// check EUB for distribute +// CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], +// CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, +// CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] +// CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] +// CHECK-DAG: [[EUB_TRUE]]: +// CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, +// CHECK: br label %[[EUB_END:.+]] +// CHECK-DAG: [[EUB_FALSE]]: +// CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], +// CHECK: br label %[[EUB_END]] +// CHECK-DAG: [[EUB_END]]: +// CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] +// CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], + +// initialize omp.iv +// CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], +// CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], +// CHECK: br label %[[OMP_JUMP_BACK:.+]] + +// check exit condition +// CHECK: [[OMP_JUMP_BACK]]: +// CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], +// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], +// CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] +// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] + +// check that PrevLB and PrevUB are passed to the 'for' +// CHECK: [[DIST_BODY]]: +// CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], +// CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} +// CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], +// CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} +// check that distlb and distub are properly passed to fork_call +// CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) +// CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) +// CHECK: br label %[[DIST_INC:.+]] + +// increment by stride (distInc - 'parallel for' executes the whole chunk) and latch +// CHECK: [[DIST_INC]]: +// CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], +// CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], +// CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] +// CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], +// CHECK: br label %[[OMP_JUMP_BACK]] + +// CHECK-DAG: call void @__kmpc_for_static_fini( +// CHECK: ret + +// implementation of 'parallel for' +// CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) + +// CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, +// CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, +// CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, + +// initialize lb and ub to PrevLB and PrevUB +// CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], +// CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], +// CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], +// CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} +// CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], +// CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} +// CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], +// CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], +// CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], +// CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) + +// PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used +// In this case we use EUB +// CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], +// CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, +// CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] +// CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] +// CHECK: [[PF_EUB_TRUE]]: +// CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, +// CHECK: br label %[[PF_EUB_END:.+]] +// CHECK-DAG: [[PF_EUB_FALSE]]: +// CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], +// CHECK: br label %[[PF_EUB_END]] +// CHECK-DAG: [[PF_EUB_END]]: +// CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] +// CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], + +// initialize omp.iv +// CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], +// CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], +// CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] + +// check exit condition +// CHECK: [[OMP_PF_JUMP_BACK]]: +// CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], +// CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], +// CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] +// CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] + +// check that PrevLB and PrevUB are passed to the 'for' +// CHECK: [[PF_BODY]]: +// CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], +// CHECK: br label {{.+}} + +// check stride 1 for 'for' in 'distribute parallel for simd' +// CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], +// CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 +// CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], +// CHECK: br label %[[OMP_PF_JUMP_BACK]] + +// CHECK-DAG: call void @__kmpc_for_static_fini( +// CHECK: ret + +// CHECK: define{{.+}} void [[OFFLOADING_FUN_2]]( +// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}}) + +// CHECK: define{{.+}} void [[OMP_OUTLINED_2]]( +// CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca +// CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca +// CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca +// CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca + +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, + +// check EUB for distribute +// CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], +// CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, +// CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] +// CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] +// CHECK-DAG: [[EUB_TRUE]]: +// CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, +// CHECK: br label %[[EUB_END:.+]] +// CHECK-DAG: [[EUB_FALSE]]: +// CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], +// CHECK: br label %[[EUB_END]] +// CHECK-DAG: [[EUB_END]]: +// CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] +// CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], + +// initialize omp.iv +// CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], +// CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], +// CHECK: br label %[[OMP_JUMP_BACK:.+]] + +// check exit condition +// CHECK: [[OMP_JUMP_BACK]]: +// CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], +// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], +// CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] +// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] + +// check that PrevLB and PrevUB are passed to the 'for' +// CHECK: [[DIST_BODY]]: +// CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], +// CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} +// CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], +// CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} +// check that distlb and distub are properly passed to fork_call +// CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) +// CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) +// CHECK: br label %[[DIST_INC:.+]] + +// increment by stride (distInc - 'parallel for' executes the whole chunk) and latch +// CHECK: [[DIST_INC]]: +// CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], +// CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], +// CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] +// CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], +// CHECK: br label %[[OMP_JUMP_BACK]] + +// CHECK-DAG: call void @__kmpc_for_static_fini( +// CHECK: ret + +// implementation of 'parallel for' +// CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) + +// CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, +// CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, +// CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, + +// initialize lb and ub to PrevLB and PrevUB +// CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], +// CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], +// CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], +// CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} +// CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], +// CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} +// CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], +// CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], +// CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], +// CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) + +// PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used +// In this case we use EUB +// CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], +// CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, +// CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] +// CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] +// CHECK: [[PF_EUB_TRUE]]: +// CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, +// CHECK: br label %[[PF_EUB_END:.+]] +// CHECK-DAG: [[PF_EUB_FALSE]]: +// CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], +// CHECK: br label %[[PF_EUB_END]] +// CHECK-DAG: [[PF_EUB_END]]: +// CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] +// CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], + +// initialize omp.iv +// CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], +// CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], +// CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] + +// check exit condition +// CHECK: [[OMP_PF_JUMP_BACK]]: +// CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], +// CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], +// CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] +// CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] + +// check that PrevLB and PrevUB are passed to the 'for' +// CHECK: [[PF_BODY]]: +// CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], +// CHECK: br label {{.+}} + +// check stride 1 for 'for' in 'distribute parallel for simd' +// CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], +// CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 +// CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], +// CHECK: br label %[[OMP_PF_JUMP_BACK]] + +// CHECK-DAG: call void @__kmpc_for_static_fini( +// CHECK: ret + +// CHECK: define{{.+}} void [[OFFLOADING_FUN_3]]( +// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}}) + +// CHECK: define{{.+}} void [[OMP_OUTLINED_3]]( +// CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca +// CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca +// CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca +// CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca + +// unlike the previous tests, in this one we have a outer and inner loop for 'distribute' +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, +// CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]] + +// CHECK: [[DIST_OUTER_LOOP_HEADER]]: +// check EUB for distribute +// CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], +// CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, +// CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] +// CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] +// CHECK-DAG: [[EUB_TRUE]]: +// CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, +// CHECK: br label %[[EUB_END:.+]] +// CHECK-DAG: [[EUB_FALSE]]: +// CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], +// CHECK: br label %[[EUB_END]] +// CHECK-DAG: [[EUB_END]]: +// CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] +// CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], + +// initialize omp.iv +// CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], +// CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], + +// check exit condition +// CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], +// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], +// CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] +// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]] + +// CHECK: [[DIST_OUTER_LOOP_BODY]]: +// CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]] + +// CHECK: [[DIST_INNER_LOOP_HEADER]]: +// CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]], +// CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]], +// CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]] +// CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] + +// check that PrevLB and PrevUB are passed to the 'for' +// CHECK: [[DIST_INNER_LOOP_BODY]]: +// CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], +// CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} +// CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], +// CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} +// check that distlb and distub are properly passed to fork_call +// CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) +// CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) +// CHECK: br label %[[DIST_INNER_LOOP_INC:.+]] + +// check DistInc +// CHECK: [[DIST_INNER_LOOP_INC]]: +// CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], +// CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], +// CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] +// CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], +// CHECK: br label %[[DIST_INNER_LOOP_HEADER]] + +// CHECK: [[DIST_INNER_LOOP_END]]: +// CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]] + +// CHECK: [[DIST_OUTER_LOOP_INC]]: +// check NextLB and NextUB +// CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], +// CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], +// CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] +// CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]], +// CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], +// CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], +// CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] +// CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], +// CHECK: br label %[[DIST_OUTER_LOOP_HEADER]] + +// outer loop exit +// CHECK: [[DIST_OUTER_LOOP_END]]: +// CHECK-DAG: call void @__kmpc_for_static_fini( +// CHECK: ret + +// skip implementation of 'parallel for': using default scheduling and was tested above + +// CHECK: define{{.+}} void [[OFFLOADING_FUN_4]]( +// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}}) + +// CHECK: define{{.+}} void [[OMP_OUTLINED_4]]( +// CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca +// CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca +// CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca +// CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca + +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, +// CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}}, +// skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case +// CHECK: ret + +// 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default) +// CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) + +// CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, +// CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, +// CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, + +// initialize lb and ub to PrevLB and PrevUB +// CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], +// CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], +// CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], +// CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} +// CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], +// CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} +// CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], +// CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], +// CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], +// CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) + +// PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used +// In this case we use EUB +// CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], +// CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, +// CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] +// CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] +// CHECK: [[PF_EUB_TRUE]]: +// CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, +// CHECK: br label %[[PF_EUB_END:.+]] +// CHECK-DAG: [[PF_EUB_FALSE]]: +// CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], +// CHECK: br label %[[PF_EUB_END]] +// CHECK-DAG: [[PF_EUB_END]]: +// CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] +// CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], + +// initialize omp.iv +// CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], +// CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], +// CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] + +// check exit condition +// CHECK: [[OMP_PF_JUMP_BACK]]: +// CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], +// CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], +// CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] +// CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] + +// check that PrevLB and PrevUB are passed to the 'for' +// CHECK: [[PF_BODY]]: +// CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], +// CHECK: br label {{.+}} + +// check stride 1 for 'for' in 'distribute parallel for simd' +// CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], +// CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 +// CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], +// CHECK: br label %[[OMP_PF_JUMP_BACK]] + +// CHECK-DAG: call void @__kmpc_for_static_fini( +// CHECK: ret + +// CHECK: define{{.+}} void [[OFFLOADING_FUN_5]]( +// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}}) + +// CHECK: define{{.+}} void [[OMP_OUTLINED_5]]( +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, +// CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}}, +// skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case +// CHECK: ret + +// 'parallel for' implementation using outer and inner loops and PrevEUB +// CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) +// CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, +// CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, +// CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, +// CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, + +// initialize lb and ub to PrevLB and PrevUB +// CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], +// CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], +// CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], +// CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} +// CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], +// CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} +// CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], +// CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], +// CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], +// CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) +// CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] + +// check PrevEUB (using PrevUB instead of NumIt as upper bound) +// CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: +// CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], +// CHECK-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to +// CHECK: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], +// CHECK-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]] +// CHECK-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]] +// CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] +// CHECK: [[PF_EUB_TRUE]]: +// CHECK: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], +// CHECK: br label %[[PF_EUB_END:.+]] +// CHECK-DAG: [[PF_EUB_FALSE]]: +// CHECK: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]], +// CHECK-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to +// CHECK: br label %[[PF_EUB_END]] +// CHECK-DAG: [[PF_EUB_END]]: +// CHECK-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ] +// CHECK-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ] +// CHECK-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to +// CHECK-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]], +// CHECK-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]], + +// initialize omp.iv (IV = LB) +// CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], +// CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], + +// outer loop: while (IV < UB) { +// CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], +// CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], +// CHECK: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] +// CHECK: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] + +// CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: +// CHECK: br label %[[OMP_PF_INNER_FOR_HEADER:.+]] + +// CHECK: [[OMP_PF_INNER_FOR_HEADER]]: +// CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], +// CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], +// CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] +// CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] + +// CHECK: [[OMP_PF_INNER_LOOP_BODY]]: +// CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], +// skip body branch +// CHECK: br{{.+}} +// CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] + +// IV = IV + 1 and inner loop latch +// CHECK: [[OMP_PF_INNER_LOOP_INC]]: +// CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], +// CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 +// CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], +// CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]] + +// check NextLB and NextUB +// CHECK: [[OMP_PF_INNER_LOOP_END]]: +// CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] + +// CHECK: [[OMP_PF_OUTER_LOOP_INC]]: +// CHECK-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], +// CHECK-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], +// CHECK-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]] +// CHECK: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]], +// CHECK-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], +// CHECK-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], +// CHECK-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]] +// CHECK: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]], +// CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] + +// CHECK: [[OMP_PF_OUTER_LOOP_END]]: +// CHECK-DAG: call void @__kmpc_for_static_fini( +// CHECK: ret + +// CHECK: define{{.+}} void [[OFFLOADING_FUN_6]]( +// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}}) + +// CHECK: define{{.+}} void [[OMP_OUTLINED_6]]( +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, +// CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}}, +// skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case +// CHECK: ret + +// 'parallel for' implementation using outer and inner loops and PrevEUB +// CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) +// CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, +// CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, +// CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, +// CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, + +// initialize lb and ub to PrevLB and PrevUB +// CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], +// CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], +// CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], +// CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} +// CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], +// CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} +// CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], +// CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], +// CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], +// CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], +// CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], +// CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], +// CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) +// CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] + +// CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: +// CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) +// CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 +// CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] + +// initialize omp.iv (IV = LB) +// CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: +// CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], +// CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], +// CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] + +// CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: +// CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], +// CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], +// CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] +// CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] + +// CHECK: [[OMP_PF_INNER_LOOP_BODY]]: +// CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], +// skip body branch +// CHECK: br{{.+}} +// CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] + +// IV = IV + 1 and inner loop latch +// CHECK: [[OMP_PF_INNER_LOOP_INC]]: +// CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], +// CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 +// CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], +// CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] + +// check NextLB and NextUB +// CHECK: [[OMP_PF_INNER_LOOP_END]]: +// CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] + +// CHECK: [[OMP_PF_OUTER_LOOP_INC]]: +// CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] + +// CHECK: [[OMP_PF_OUTER_LOOP_END]]: +// CHECK: ret + +// CHECK: define{{.+}} void [[OFFLOADING_FUN_7]]( +// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}}) + +// CHECK: define{{.+}} void [[OMP_OUTLINED_7]]( +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, +// CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}}, +// skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case +// CHECK: ret + +// 'parallel for' implementation using outer and inner loops and PrevEUB +// CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) +// CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, +// CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, +// CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, +// CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, + +// initialize lb and ub to PrevLB and PrevUB +// CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], +// CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], +// CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], +// CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} +// CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], +// CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} +// CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], +// CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], +// CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], +// CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], +// CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], +// CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], +// CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) +// CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] + +// CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: +// CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) +// CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 +// CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] + +// initialize omp.iv (IV = LB) +// CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: +// CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], +// CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], +// CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] + +// CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: +// CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], +// CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], +// CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] +// CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] + +// CHECK: [[OMP_PF_INNER_LOOP_BODY]]: +// CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], +// skip body branch +// CHECK: br{{.+}} +// CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] + +// IV = IV + 1 and inner loop latch +// CHECK: [[OMP_PF_INNER_LOOP_INC]]: +// CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], +// CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 +// CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], +// CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] + +// check NextLB and NextUB +// CHECK: [[OMP_PF_INNER_LOOP_END]]: +// CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] + +// CHECK: [[OMP_PF_OUTER_LOOP_INC]]: +// CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] + +// CHECK: [[OMP_PF_OUTER_LOOP_END]]: +// CHECK: ret + +// CHECK: !{!"llvm.loop.vectorize.enable", i1 true} +#endif diff --git a/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp b/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp new file mode 100644 index 000000000000..b8d0f4ad7540 --- /dev/null +++ b/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp @@ -0,0 +1,611 @@ +// RxUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 + +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +template <class T> +struct S { + T f; + S(T a) : f(a) {} + S() : f() {} + operator T() { return T(); } + ~S() {} +}; + +// CHECK: [[S_FLOAT_TY:%.+]] = type { float } +// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } +template <typename T> +T tmain() { + S<T> test; + T t_var = T(); + T vec[] = {1, 2}; + S<T> s_arr[] = {1, 2}; + S<T> &var = test; + #pragma omp target + #pragma omp teams + #pragma omp distribute parallel for simd firstprivate(t_var, vec, s_arr, s_arr, var, var) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + return T(); +} + +int main() { + static int svar; + volatile double g; + volatile double &g1 = g; + + #ifdef LAMBDA + // LAMBDA-LABEL: @main + // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]]( + [&]() { + static float sfvar; + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( + // LAMBDA: call void [[OFFLOADING_FUN:@.+]]( + + // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]]( + // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}}) + #pragma omp target + #pragma omp teams + #pragma omp distribute parallel for simd firstprivate(g, g1, svar, sfvar) + for (int i = 0; i < 2; ++i) { + // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, double* {{.+}} [[G_IN:%.+]], double*{{.+}} [[G1_IN:%.+]], i{{[0-9]+}}*{{.+}} [[SVAR_IN:%.+]], float*{{.+}} [[SFVAR_IN:%.+]]) + + // addr alloca's + // LAMBDA: [[G_ADDR:%.+]] = alloca double*, + // LAMBDA: [[G1_ADDR:%.+]] = alloca double*, + // LAMBDA: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, + // LAMBDA: [[SFVAR_ADDR:%.+]] = alloca float*, + // LAMBDA: [[G1_REF:%.+]] = alloca double*, + + // private alloca's + // LAMBDA: [[G_PRIV:%.+]] = alloca double, + // LAMBDA: [[G1_PRIV:%.+]] = alloca double, + // LAMBDA: [[TMP_PRIV:%.+]] = alloca double*, + // LAMBDA: [[SVAR_PRIV:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[SFVAR_PRIV:%.+]] = alloca float, + + // transfer input parameters into addr alloca's + // LAMBDA-DAG: store {{.+}} [[G_IN]], {{.+}} [[G_ADDR]], + // LAMBDA-DAG: store {{.+}} [[G1_IN]], {{.+}} [[G1_ADDR]], + // LAMBDA-DAG: store {{.+}} [[SVAR_IN]], {{.+}} [[SVAR_ADDR]], + // LAMBDA-DAG: store {{.+}} [[SFVAR_IN]], {{.+}} [[SFVAR_ADDR]], + + // init private alloca's with addr alloca's + // g + // LAMBDA-DAG: [[G_CONV:%.+]] = load {{.+}}*, {{.+}}** [[G_ADDR]] + // LAMBDA-DAG: [[G_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[G_CONV]], + // LAMBDA-DAG: store {{.+}} [[G_ADDR_VAL]], {{.+}}* [[G_PRIV]], + + // g1 + // LAMBDA-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}** [[G1_REF]], + // LAMBDA-DAG: [[TMP_VAL:%.+]] = load {{.+}}, {{.+}}* [[TMP_REF]], + // LAMBDA-DAG: store {{.+}} [[TMP_VAL]], {{.+}}* [[G1_PRIV]] + // LAMBDA-DAG: store {{.+}}* [[G1_PRIV]], {{.+}}** [[TMP_PRIV]], + + // svar + // LAMBDA-DAG: [[SVAR_REF:%.+]] = load {{.+}}*, {{.+}}** [[SVAR_ADDR]], + // LAMBDA-DAG: [[SVAR_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_REF]], + // LAMBDA-DAG: store {{.+}} [[SVAR_VAL]], {{.+}}* [[SVAR_PRIV]], + + // sfvar + // LAMBDA-DAG: [[SFVAR_REF:%.+]] = load {{.+}}*, {{.+}}** [[SFVAR_ADDR]], + // LAMBDA-DAG: [[SFVAR_VAL:%.+]] = load {{.+}}, {{.+}}* [[SFVAR_REF]], + // LAMBDA-DAG: store {{.+}} [[SFVAR_VAL]], {{.+}}* [[SFVAR_PRIV]], + + // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4( + // pass firstprivate parameters to parallel outlined function + // g + // LAMBDA-64-DAG: [[G_PRIV_VAL:%.+]] = load {{.+}}, {{.+}}* [[G_PRIV]], + // LAMBDA-64: [[G_CAST_CONV:%.+]] = bitcast {{.+}}* [[G_CAST:%.+]] to + // LAMBDA-64-DAG: store {{.+}} [[G_PRIV_VAL]], {{.+}}* [[G_CAST_CONV]], + // LAMBDA-64-DAG: [[G_PAR:%.+]] = load {{.+}}, {{.+}}* [[G_CAST]], + + // g1 + // LAMBDA-DAG: [[TMP_PRIV_VAL:%.+]] = load {{.+}}, {{.+}}* [[TMP_PRIV]], + // LAMBDA-DAG: [[G1_PRIV_VAL:%.+]] = load {{.+}}, {{.+}}* [[TMP_PRIV_VAL]], + // LAMBDA: [[G1_CAST_CONV:%.+]] = bitcast {{.+}}* [[G1_CAST:%.+]] to + // LAMBDA-DAG: store {{.+}} [[G1_PRIV_VAL]], {{.+}}* [[G1_CAST_CONV]], + // LAMBDA-DAG: [[G1_PAR:%.+]] = load {{.+}}, {{.+}}* [[G1_CAST]], + + // svar + // LAMBDA: [[SVAR_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_PRIV]], + // LAMBDA-64-DAG: [[SVAR_CAST_CONV:%.+]] = bitcast {{.+}}* [[SVAR_CAST:%.+]] to + // LAMBDA-64-DAG: store {{.+}} [[SVAR_VAL]], {{.+}}* [[SVAR_CAST_CONV]], + // LAMBDA-32-DAG: store {{.+}} [[SVAR_VAL]], {{.+}}* [[SVAR_CAST:%.+]], + // LAMBDA-DAG: [[SVAR_PAR:%.+]] = load {{.+}}, {{.+}}* [[SVAR_CAST]], + + // sfvar + // LAMBDA: [[SFVAR_VAL:%.+]] = load {{.+}}, {{.+}}* [[SFVAR_PRIV]], + // LAMBDA-DAG: [[SFVAR_CAST_CONV:%.+]] = bitcast {{.+}}* [[SFVAR_CAST:%.+]] to + // LAMBDA-DAG: store {{.+}} [[SFVAR_VAL]], {{.+}}* [[SFVAR_CAST_CONV]], + // LAMBDA-DAG: [[SFVAR_PAR:%.+]] = load {{.+}}, {{.+}}* [[SFVAR_CAST]], + + // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, {{.+}} [[G_PAR]], {{.+}} [[G1_PAR]], {{.+}} [[SVAR_PAR]], {{.+}} [[SFVAR_PAR]]) + // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, {{.+}} [[G_PRIV]], {{.+}} [[G1_PAR]], {{.+}} [[SVAR_PAR]], {{.+}} [[SFVAR_PAR]]) + // LAMBDA: call {{.*}}void @__kmpc_for_static_fini( + // LAMBDA: ret void + + + // LAMBDA-64: define{{.+}} void [[OMP_PARFOR_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, i{{[0-9]+}} [[G_IN:%.+]], i{{[0-9]+}} [[G1_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]], i{{[0-9]+}} [[SFVAR_IN:%.+]]) + // LAMBDA-32: define{{.+}} void [[OMP_PARFOR_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, double* {{.+}} [[G_IN:%.+]], i{{[0-9]+}} [[G1_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]], i{{[0-9]+}} [[SFVAR_IN:%.+]]) + // skip initial params + // LAMBDA: {{.+}} = alloca{{.+}}, + // LAMBDA: {{.+}} = alloca{{.+}}, + // LAMBDA: {{.+}} = alloca{{.+}}, + // LAMBDA: {{.+}} = alloca{{.+}}, + + // addr alloca's + // LAMBDA-64: [[G_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA-32: [[G_ADDR:%.+]] = alloca double*, + // LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[SFVAR_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_REF:%.+]] = alloca double*, + + // private alloca's (only for 32-bit) + // LAMBDA-32: [[G_PRIV:%.+]] = alloca double, + + // transfer input parameters into addr alloca's + // LAMBDA-DAG: store {{.+}} [[G_IN]], {{.+}} [[G_ADDR]], + // LAMBDA-DAG: store {{.+}} [[G1_IN]], {{.+}} [[G1_ADDR]], + // LAMBDA-DAG: store {{.+}} [[SVAR_IN]], {{.+}} [[SVAR_ADDR]], + // LAMBDA-DAG: store {{.+}} [[SFVAR_IN]], {{.+}} [[SFVAR_ADDR]], + + // prepare parameters for lambda + // g + // LAMBDA-64-DAG: [[G_CONV:%.+]] = bitcast {{.+}}* [[G_ADDR]] to + // LAMBDA-32-DAG: [[G_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}** [[G_ADDR]] + // LAMBDA-32-DAG: [[G_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[G_ADDR_REF]], + // LAMBDA-32-DAG: store {{.+}} [[G_ADDR_VAL]], {{.+}}* [[G_PRIV]], + + // g1 + // LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast {{.+}}* [[G1_ADDR]] to + // LAMBDA-DAG: store {{.+}}* [[G1_CONV]], {{.+}}* [[G1_REF]], + + // svar + // LAMBDA-64-DAG: [[SVAR_CONV:%.+]] = bitcast {{.+}}* [[SVAR_ADDR]] to + + // sfvar + // LAMBDA-DAG: [[SFVAR_CONV:%.+]] = bitcast {{.+}}* [[SFVAR_ADDR]] to + + // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4( + g = 1; + g1 = 1; + svar = 3; + sfvar = 4.0; + // LAMBDA-64: store double 1.0{{.+}}, double* [[G_CONV]], + // LAMBDA-32: store double 1.0{{.+}}, double* [[G_PRIV]], + // LAMBDA: [[G1_REF_REF:%.+]] = load {{.+}}*, {{.+}}** [[G1_REF]], + // LAMBDA: store {{.+}} 1.0{{.+}}, {{.+}}* [[G1_REF_REF]], + // LAMBDA-64: store {{.+}} 3, {{.+}}* [[SVAR_CONV]], + // LAMBDA-32: store {{.+}} 3, {{.+}}* [[SVAR_ADDR]], + // LAMBDA: store {{.+}} 4.0{{.+}}, {{.+}}* [[SFVAR_CONV]], + + // pass params to inner lambda + // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA-64: store double* [[G_CONV]], double** [[G_PRIVATE_ADDR_REF]], + // LAMBDA-32: store double* [[G_PRIV]], double** [[G_PRIVATE_ADDR_REF]], + // LAMBDA: [[G1_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_REF_REF:%.+]] = load double*, double** [[G1_REF]], + // LAMBDA: store double* [[G1_REF_REF]], double** [[G1_PRIVATE_ADDR_REF]], + // LAMBDA: [[SVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA-64: store i{{[0-9]+}}* [[SVAR_CONV]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]] + // LAMBDA-32: store i{{[0-9]+}}* [[SVAR_ADDR]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]] + // LAMBDA: [[SFVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 + // LAMBDA: store float* [[SFVAR_CONV]], float** [[SFVAR_PRIVATE_ADDR_REF]] + // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]]) + // LAMBDA: call {{.*}}void @__kmpc_for_static_fini( + // LAMBDA: ret void + [&]() { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + g = 2; + g1 = 2; + svar = 4; + sfvar = 8.0; + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]] + // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]] + + // LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]] + // LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]], + // LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]] + // LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 + // LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]] + // LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]] + }(); + } + }(); + return 0; + #else + S<float> test; + int t_var = 0; + int vec[] = {1, 2}; + S<float> s_arr[] = {1, 2}; + S<float> &var = test; + + #pragma omp target + #pragma omp teams + #pragma omp distribute parallel for simd firstprivate(t_var, vec, s_arr, s_arr, var, var, svar) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + return tmain<int>(); + #endif +} + +// CHECK-LABEL: define{{.*}} i{{[0-9]+}} @main() +// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOAD_FUN_0:@.+]]( +// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) + +// CHECK: define{{.+}} [[OFFLOAD_FUN_0]](i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], [2 x [[S_FLOAT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]* {{.+}} [[VAR_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]]) +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i{{[0-9]+}}]*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, i{{[0-9]+}}*)* [[OMP_OUTLINED_0:@.+]] to void +// CHECK: ret + +// CHECK: define internal void [[OMP_OUTLINED_0]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], [2 x [[S_FLOAT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]* {{.+}} [[VAR_IN:%.+]], i{{[0-9]+}}*{{.+}} [[SVAR_IN:%.+]]) + +// CHECK: alloca i{{[0-9]+}}*, +// CHECK: alloca i{{[0-9]+}}*, +// addr alloca's +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, +// CHECK: [[TMP:%.+]] = alloca [[S_FLOAT_TY]]*, + +// skip loop alloca's +// CHECK: [[OMP_IV:.omp.iv+]] = alloca i{{[0-9]+}}, +// CHECK: [[OMP_LB:.omp.comb.lb+]] = alloca i{{[0-9]+}}, +// CHECK: [[OMP_UB:.omp.comb.ub+]] = alloca i{{[0-9]+}}, +// CHECK: [[OMP_ST:.omp.stride+]] = alloca i{{[0-9]+}}, +// CHECK: [[OMP_IS_LAST:.omp.is_last+]] = alloca i{{[0-9]+}}, + +// private alloca's +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[SVAR_PRIV:%.+]] = alloca i{{[0-9]+}}, + +// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]] + +// init addr alloca's with input values +// CHECK-DAG: store {{.+}} [[T_VAR_IN]], {{.+}}* [[T_VAR_ADDR]], +// CHECK-DAG: store {{.+}} [[VEC_IN]], {{.+}} [[VEC_ADDR]], +// CHECK-DAG: store {{.+}} [[S_ARR_IN]], {{.+}} [[S_ARR_ADDR]], +// CHECK-DAG: store {{.+}} [[VAR_IN]], {{.+}} [[VAR_ADDR]], +// CHECK-DAG: store {{.+}} [[SVAR_IN]], {{.+}} [[SVAR_ADDR]], + +// init private alloca's with addr alloca's +// t-var +// CHECK-DAG: [[T_VAR_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}** [[T_VAR_ADDR]], +// CHECK-DAG: [[T_VAR:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_ADDR_REF]], +// CHECK-DAG: store {{.+}} [[T_VAR]], {{.+}} [[T_VAR_PRIV]], + +// vec +// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[VEC_ADDR]], +// CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast {{.+}} [[VEC_PRIV]] to +// CHECK-DAG: [[VEC_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VEC_ADDR_VAL]] to +// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* [[VEC_PRIV_BCAST]], {{.+}}* [[VEC_ADDR_BCAST]], + +// s_arr +// CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[S_ARR_ADDR]], +// CHECK-DAG: [[S_ARR_BGN:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[S_ARR_ADDR_VAL]] to +// CHECK-DAG: [[S_ARR_BGN_GEP:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_BGN]], +// CHECK-DAG: [[S_ARR_EMPTY:%.+]] = icmp {{.+}} [[S_ARR_BGN]], [[S_ARR_BGN_GEP]] +// CHECK-DAG: br {{.+}} [[S_ARR_EMPTY]], label %[[CPY_DONE:.+]], label %[[CPY_BODY:.+]] +// CHECK-DAG: [[CPY_BODY]]: +// CHECK-DAG: call void @llvm.memcpy{{.+}}( +// CHECK-DAG: [[CPY_DONE]]: + +// var +// CHECK-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}* [[TMP]], +// CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to +// CHECK-DAG: [[TMP_REF_BCAST:%.+]] = bitcast {{.+}}* [[TMP_REF]] to +// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[TMP_REF_BCAST]], +// CHECK-DAG: store {{.+}}* [[VAR_PRIV]], {{.+}}** [[TMP_PRIV]], + +// svar +// CHECK-DAG: [[SVAR_CONV_REF:%.+]] = load {{.+}}*, {{.+}}** [[SVAR_ADDR]], +// CHECK-DAG: [[SVAR_CONV_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_CONV_REF]], +// CHECK-DAG: store {{.+}} [[SVAR_CONV_VAL]], {{.+}}* [[SVAR_PRIV]], + +// CHECK: call void @__kmpc_for_static_init_4( +// pass private alloca's to fork +// CHECK-DAG: [[T_VAR_PRIV_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_PRIV]], +// not dag to distinguish with S_VAR_CAST +// CHECK-64: [[T_VAR_CAST_CONV:%.+]] = bitcast {{.+}}* [[T_VAR_CAST:%.+]] to +// CHECK-64-DAG: store {{.+}} [[T_VAR_PRIV_VAL]], {{.+}} [[T_VAR_CAST_CONV]], +// CHECK-32: store {{.+}} [[T_VAR_PRIV_VAL]], {{.+}} [[T_VAR_CAST:%.+]], +// CHECK-DAG: [[T_VAR_CAST_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_CAST]], +// CHECK-DAG: [[TMP_PRIV_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], +// CHECK-DAG: [[SVAR_PRIV_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_PRIV]], +// CHECK-64-DAG: [[SVAR_CAST_CONV:%.+]] = bitcast {{.+}}* [[SVAR_CAST:%.+]] to +// CHECK-64-DAG: store {{.+}} [[SVAR_PRIV_VAL]], {{.+}}* [[SVAR_CAST_CONV]], +// CHECK-32-DAG: store {{.+}} [[SVAR_PRIV_VAL]], {{.+}}* [[SVAR_CAST:%.+]], +// CHECK-DAG: [[SVAR_CAST_VAL:%.+]] = load {{.+}}, {{.+}}* [[SVAR_CAST]], +// CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_0:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]* [[VEC_PRIV]], i{{[0-9]+}} [[T_VAR_CAST_VAL]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], [[S_FLOAT_TY]]* [[TMP_PRIV_VAL]], i{{[0-9]+}} [[SVAR_CAST_VAL]]) +// CHECK: call void @__kmpc_for_static_fini( + +// call destructors: var.. +// CHECK-DAG: call {{.+}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]]) + +// ..and s_arr +// CHECK: {{.+}}: +// CHECK: [[S_ARR_EL_PAST:%.+]] = phi [[S_FLOAT_TY]]* +// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = getelementptr {{.+}}, {{.+}} [[S_ARR_EL_PAST]], +// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[S_ARR_PRIV_ITEM]]) + +// CHECK: ret void + +// By OpenMP specifications, 'firstprivate' applies to both distribute and parallel for. +// However, the support for 'firstprivate' of 'parallel' is only used when 'parallel' +// is found alone. Therefore we only have one 'firstprivate' support for 'parallel for' +// in combination +// CHECK: define internal void [[OMP_PARFOR_OUTLINED_0]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x [[S_FLOAT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]* {{.+}} [[VAR_IN:%.+]], i{{[0-9]+}} [[SVAR_IN:%.+]]) + +// addr alloca's +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}, + +// skip loop alloca's +// CHECK: [[OMP_IV:.omp.iv+]] = alloca i{{[0-9]+}}, +// CHECK: [[OMP_LB:.omp.lb+]] = alloca i{{[0-9]+}}, +// CHECK: [[OMP_UB:.omp.ub+]] = alloca i{{[0-9]+}}, +// CHECK: [[OMP_ST:.omp.stride+]] = alloca i{{[0-9]+}}, +// CHECK: [[OMP_IS_LAST:.omp.is_last+]] = alloca i{{[0-9]+}}, + +// private alloca's +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*, + +// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]] + +// init addr alloca's with input values +// CHECK-DAG: store {{.+}} [[VEC_IN]], {{.+}} [[VEC_ADDR]], +// CHECK-DAG: store {{.+}} [[T_VAR_IN]], {{.+}}* [[T_VAR_ADDR]], +// CHECK-DAG: store {{.+}} [[S_ARR_IN]], {{.+}} [[S_ARR_ADDR]], +// CHECK-DAG: store {{.+}} [[VAR_IN]], {{.+}} [[VAR_ADDR]], +// CHECK-DAG: store {{.+}} [[SVAR_IN]], {{.+}} [[SVAR_ADDR]], + +// init private alloca's with addr alloca's +// vec +// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[VEC_ADDR]], +// CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast {{.+}} [[VEC_PRIV]] to +// CHECK-DAG: [[VEC_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VEC_ADDR_VAL]] to +// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* [[VEC_PRIV_BCAST]], {{.+}}* [[VEC_ADDR_BCAST]], + +// s_arr +// CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[S_ARR_ADDR]], +// CHECK-DAG: [[S_ARR_BGN:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[S_ARR_ADDR_VAL]] to +// CHECK-DAG: [[S_ARR_BGN_GEP:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_BGN]], +// CHECK-DAG: [[S_ARR_EMPTY:%.+]] = icmp {{.+}} [[S_ARR_BGN]], [[S_ARR_BGN_GEP]] +// CHECK-DAG: br {{.+}} [[S_ARR_EMPTY]], label %[[CPY_DONE:.+]], label %[[CPY_BODY:.+]] +// CHECK-DAG: [[CPY_BODY]]: +// CHECK-DAG: call void @llvm.memcpy{{.+}}( +// CHECK-DAG: [[CPY_DONE]]: + +// var +// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}* [[VAR_ADDR]], +// CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to +// CHECK-DAG: [[VAR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[VAR_ADDR_REF]] to +// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[VAR_ADDR_BCAST]], +// CHECK-DAG: store {{.+}}* [[VAR_PRIV]], {{.+}}** [[TMP_PRIV]], + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call void @__kmpc_for_static_fini( + +// call destructors: var.. +// CHECK-DAG: call {{.+}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]]) + +// ..and s_arr +// CHECK: {{.+}}: +// CHECK: [[S_ARR_EL_PAST:%.+]] = phi [[S_FLOAT_TY]]* +// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = getelementptr {{.+}}, {{.+}} [[S_ARR_EL_PAST]], +// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[S_ARR_PRIV_ITEM]]) + +// CHECK: ret void + +// template tmain with S_INT_TY +// CHECK-LABEL: define{{.*}} i{{[0-9]+}} @{{.+}}tmain{{.+}}() +// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], +// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOAD_FUN_0:@.+]]( +// CHECK: call {{.*}} [[S_INT_TY_DEF_DESTR:@.+]]([[S_INT_TY]]* [[TEST]]) + +// CHECK: define{{.+}} [[OFFLOAD_FUN_0]](i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], [2 x [[S_INT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_INT_TY]]* {{.+}} [[VAR_IN:%.+]]) +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i{{[0-9]+}}]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[OMP_OUTLINED_0:@.+]] to void +// CHECK: ret + +// CHECK: define internal void [[OMP_OUTLINED_0]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], [2 x [[S_INT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_INT_TY]]* {{.+}} [[VAR_IN:%.+]]) + +// CHECK: alloca i{{[0-9]+}}*, +// CHECK: alloca i{{[0-9]+}}*, +// addr alloca's +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, +// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*, + +// skip loop alloca's +// CHECK: [[OMP_IV:.omp.iv+]] = alloca i{{[0-9]+}}, +// CHECK: [[OMP_LB:.omp.comb.lb+]] = alloca i{{[0-9]+}}, +// CHECK: [[OMP_UB:.omp.comb.ub+]] = alloca i{{[0-9]+}}, +// CHECK: [[OMP_ST:.omp.stride+]] = alloca i{{[0-9]+}}, +// CHECK: [[OMP_IS_LAST:.omp.is_last+]] = alloca i{{[0-9]+}}, + +// private alloca's +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_INT_TY]]*, + +// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]] + +// init addr alloca's with input values +// CHECK-DAG: store {{.+}} [[T_VAR_IN]], {{.+}}* [[T_VAR_ADDR]], +// CHECK-DAG: store {{.+}} [[VEC_IN]], {{.+}} [[VEC_ADDR]], +// CHECK-DAG: store {{.+}} [[S_ARR_IN]], {{.+}} [[S_ARR_ADDR]], +// CHECK-DAG: store {{.+}} [[VAR_IN]], {{.+}} [[VAR_ADDR]], + +// init private alloca's with addr alloca's +// t-var +// CHECK-DAG: [[T_VAR_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}** [[T_VAR_ADDR]], +// CHECK-DAG: [[T_VAR_ADDR_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_ADDR_REF]], +// CHECK-DAG: store {{.+}} [[T_VAR_ADDR_VAL]], {{.+}} [[T_VAR_PRIV]], + +// vec +// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[VEC_ADDR]], +// CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast {{.+}} [[VEC_PRIV]] to +// CHECK-DAG: [[VEC_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VEC_ADDR_VAL]] to +// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* [[VEC_PRIV_BCAST]], {{.+}}* [[VEC_ADDR_BCAST]], + +// s_arr +// CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[S_ARR_ADDR]], +// CHECK-DAG: [[S_ARR_BGN:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[S_ARR_ADDR_VAL]] to +// CHECK-DAG: [[S_ARR_BGN_GEP:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_BGN]], +// CHECK-DAG: [[S_ARR_EMPTY:%.+]] = icmp {{.+}} [[S_ARR_BGN]], [[S_ARR_BGN_GEP]] +// CHECK-DAG: br {{.+}} [[S_ARR_EMPTY]], label %[[CPY_DONE:.+]], label %[[CPY_BODY:.+]] +// CHECK-DAG: [[CPY_BODY]]: +// CHECK-DAG: call void @llvm.memcpy{{.+}}( +// CHECK-DAG: [[CPY_DONE]]: + +// var +// CHECK-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}* [[TMP]], +// CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to +// CHECK-DAG: [[TMP_REF_BCAST:%.+]] = bitcast {{.+}}* [[TMP_REF]] to +// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[TMP_REF_BCAST]], +// CHECK-DAG: store {{.+}}* [[VAR_PRIV]], {{.+}}** [[TMP_PRIV]], + +// CHECK: call void @__kmpc_for_static_init_4( +// pass private alloca's to fork +// CHECK-DAG: [[T_VAR_PRIV_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_PRIV]], +// not dag to distinguish with S_VAR_CAST +// CHECK-64: [[T_VAR_CAST_CONV:%.+]] = bitcast {{.+}}* [[T_VAR_CAST:%.+]] to +// CHECK-64-DAG: store {{.+}} [[T_VAR_PRIV_VAL]], {{.+}} [[T_VAR_CAST_CONV]], +// CHECK-32: store {{.+}} [[T_VAR_PRIV_VAL]], {{.+}} [[T_VAR_CAST:%.+]], +// CHECK-DAG: [[T_VAR_CAST_VAL:%.+]] = load {{.+}}, {{.+}}* [[T_VAR_CAST]], +// CHECK-DAG: [[TMP_PRIV_VAL:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV]], +// CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_0:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]* [[VEC_PRIV]], i{{[0-9]+}} [[T_VAR_CAST_VAL]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], [[S_INT_TY]]* [[TMP_PRIV_VAL]]) +// CHECK: call void @__kmpc_for_static_fini( + +// call destructors: var.. +// CHECK-DAG: call {{.+}} [[S_INT_TY_DEF_DESTR]]([[S_INT_TY]]* [[VAR_PRIV]]) + +// ..and s_arr +// CHECK: {{.+}}: +// CHECK: [[S_ARR_EL_PAST:%.+]] = phi [[S_INT_TY]]* +// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = getelementptr {{.+}}, {{.+}} [[S_ARR_EL_PAST]], +// CHECK: call {{.*}} [[S_INT_TY_DEF_DESTR]]([[S_INT_TY]]* [[S_ARR_PRIV_ITEM]]) + +// CHECK: ret void + +// By OpenMP specifications, 'firstprivate' applies to both distribute and parallel for. +// However, the support for 'firstprivate' of 'parallel' is only used when 'parallel' +// is found alone. Therefore we only have one 'firstprivate' support for 'parallel for' +// in combination +// CHECK: define internal void [[OMP_PARFOR_OUTLINED_0]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]* {{.+}} [[VEC_IN:%.+]], i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x [[S_INT_TY]]]* {{.+}} [[S_ARR_IN:%.+]], [[S_INT_TY]]* {{.+}} [[VAR_IN:%.+]]) + +// addr alloca's +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, + +// skip loop alloca's +// CHECK: [[OMP_IV:.omp.iv+]] = alloca i{{[0-9]+}}, +// CHECK: [[OMP_LB:.omp.lb+]] = alloca i{{[0-9]+}}, +// CHECK: [[OMP_UB:.omp.ub+]] = alloca i{{[0-9]+}}, +// CHECK: [[OMP_ST:.omp.stride+]] = alloca i{{[0-9]+}}, +// CHECK: [[OMP_IS_LAST:.omp.is_last+]] = alloca i{{[0-9]+}}, + +// private alloca's +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_INT_TY]]*, + +// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]] + +// init addr alloca's with input values +// CHECK-DAG: store {{.+}} [[VEC_IN]], {{.+}} [[VEC_ADDR]], +// CHECK-DAG: store {{.+}} [[T_VAR_IN]], {{.+}}* [[T_VAR_ADDR]], +// CHECK-DAG: store {{.+}} [[S_ARR_IN]], {{.+}} [[S_ARR_ADDR]], +// CHECK-DAG: store {{.+}} [[VAR_IN]], {{.+}} [[VAR_ADDR]], + +// init private alloca's with addr alloca's +// vec +// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[VEC_ADDR]], +// CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast {{.+}} [[VEC_PRIV]] to +// CHECK-DAG: [[VEC_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VEC_ADDR_VAL]] to +// CHECK-DAG: call void @llvm.memcpy{{.+}}({{.+}}* [[VEC_PRIV_BCAST]], {{.+}}* [[VEC_ADDR_BCAST]], + +// s_arr +// CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load {{.+}}*, {{.+}}** [[S_ARR_ADDR]], +// CHECK-DAG: [[S_ARR_BGN:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[S_ARR_ADDR_VAL]] to +// CHECK-DAG: [[S_ARR_BGN_GEP:%.+]] = getelementptr {{.+}}, {{.+}}* [[S_ARR_BGN]], +// CHECK-DAG: [[S_ARR_EMPTY:%.+]] = icmp {{.+}} [[S_ARR_BGN]], [[S_ARR_BGN_GEP]] +// CHECK-DAG: br {{.+}} [[S_ARR_EMPTY]], label %[[CPY_DONE:.+]], label %[[CPY_BODY:.+]] +// CHECK-DAG: [[CPY_BODY]]: +// CHECK-DAG: call void @llvm.memcpy{{.+}}( +// CHECK-DAG: [[CPY_DONE]]: + +// var +// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}* [[VAR_ADDR]], +// CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to +// CHECK-DAG: [[VAR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[VAR_ADDR_REF]] to +// CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[VAR_ADDR_BCAST]], +// CHECK-DAG: store {{.+}}* [[VAR_PRIV]], {{.+}}** [[TMP_PRIV]], + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call void @__kmpc_for_static_fini( + +// call destructors: var.. +// CHECK-DAG: call {{.+}} [[S_INT_TY_DEF_DESTR]]([[S_INT_TY]]* [[VAR_PRIV]]) + +// ..and s_arr +// CHECK: {{.+}}: +// CHECK: [[S_ARR_EL_PAST:%.+]] = phi [[S_INT_TY]]* +// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = getelementptr {{.+}}, {{.+}} [[S_ARR_EL_PAST]], +// CHECK: call {{.*}} [[S_INT_TY_DEF_DESTR]]([[S_INT_TY]]* [[S_ARR_PRIV_ITEM]]) + +// CHECK: ret void + +#endif diff --git a/test/OpenMP/distribute_parallel_for_simd_firstprivate_messages.cpp b/test/OpenMP/distribute_parallel_for_simd_firstprivate_messages.cpp index 07d30e48312b..646407643b25 100644 --- a/test/OpenMP/distribute_parallel_for_simd_firstprivate_messages.cpp +++ b/test/OpenMP/distribute_parallel_for_simd_firstprivate_messages.cpp @@ -42,7 +42,7 @@ public: }; class S5 { int a; - S5(const S5 &s5) : a(s5.a) {} // expected-note 4 {{implicitly declared private here}} + S5(const S5 &s5) : a(s5.a) {} // expected-note 2 {{implicitly declared private here}} public: S5() : a(0) {} @@ -50,7 +50,7 @@ public: }; class S6 { int a; - S6() : a(0) {} + S6() : a(0) {} // expected-note {{implicitly declared private here}} public: S6(const S6 &s6) : a(s6.a) {} @@ -150,9 +150,10 @@ int foomain(int argc, char **argv) { #pragma omp distribute parallel for simd firstprivate(i) for (int k = 0; k < argc; ++k) ++k; +// expected-error@+3 {{lastprivate variable cannot be firstprivate}} expected-note@+3 {{defined as lastprivate}} #pragma omp target #pragma omp teams -#pragma omp distribute parallel for simd lastprivate(g) firstprivate(g) // expected-error {{calling a private constructor of class 'S5'}} +#pragma omp distribute parallel for simd lastprivate(g) firstprivate(g) for (i = 0; i < argc; ++i) foo(); #pragma omp parallel private(i) @@ -314,14 +315,16 @@ int main(int argc, char **argv) { #pragma omp distribute parallel for simd firstprivate(j) for (i = 0; i < argc; ++i) foo(); +// expected-error@+3 {{lastprivate variable cannot be firstprivate}} expected-note@+3 {{defined as lastprivate}} #pragma omp target #pragma omp teams -#pragma omp distribute parallel for simd lastprivate(g) firstprivate(g) // expected-error {{calling a private constructor of class 'S5'}} +#pragma omp distribute parallel for simd lastprivate(g) firstprivate(g) for (i = 0; i < argc; ++i) foo(); +// expected-error@+3 {{lastprivate variable cannot be firstprivate}} expected-note@+3 {{defined as lastprivate}} #pragma omp target #pragma omp teams -#pragma omp distribute parallel for simd lastprivate(n) firstprivate(n) // OK +#pragma omp distribute parallel for simd lastprivate(n) firstprivate(n) // expected-error {{calling a private constructor of class 'S6'}} for (i = 0; i < argc; ++i) foo(); #pragma omp parallel diff --git a/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp b/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp new file mode 100644 index 000000000000..b55038720134 --- /dev/null +++ b/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp @@ -0,0 +1,192 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +void fn1(); +void fn2(); +void fn3(); +void fn4(); +void fn5(); +void fn6(); + +int Arg; + +// CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test +void gtid_test() { +#pragma omp target +#pragma omp teams +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOADING_FUN_0:@.+]]( +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOADING_FUN_1:@.+]]( +#pragma omp distribute parallel for simd + for(int i = 0 ; i < 100; i++) {} + // CHECK: define internal void [[OFFLOADING_FUN_0]]( + // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}}) + // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]]( + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_0:@.+]] to void + // CHECK: call void @__kmpc_for_static_fini( + + // CHECK: define{{.+}} void [[OMP_OUTLINED_0]]( + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call void @__kmpc_for_static_fini( + // CHECK: ret +#pragma omp target +#pragma omp teams +#pragma omp distribute parallel for simd if (parallel: false) + for(int i = 0 ; i < 100; i++) { + // CHECK: define internal void [[OFFLOADING_FUN_1]]( + // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}}) + // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]]( + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call void @__kmpc_serialized_parallel( + // CHECK: call void [[OMP_OUTLINED_1:@.+]]( + // CHECK: call void @__kmpc_end_serialized_parallel( + // CHECK: call void @__kmpc_for_static_fini( + // CHECK: define{{.+}} void [[OMP_OUTLINED_1]]( + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call void @{{.+}}gtid_test + // CHECK: call void @__kmpc_for_static_fini( + // CHECK: ret + gtid_test(); + } +} + + +template <typename T> +int tmain(T Arg) { +#pragma omp target +#pragma omp teams +#pragma omp distribute parallel for simd if (true) + for(int i = 0 ; i < 100; i++) { + fn1(); + } +#pragma omp target +#pragma omp teams +#pragma omp distribute parallel for simd if (false) + for(int i = 0 ; i < 100; i++) { + fn2(); + } +#pragma omp target +#pragma omp teams +#pragma omp distribute parallel for simd if (parallel: Arg) + for(int i = 0 ; i < 100; i++) { + fn3(); + } + return 0; +} + +// CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main() +int main() { +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOADING_FUN_0:@.+]]( +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOADING_FUN_1:@.+]]( +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOADING_FUN_2:@.+]]( +// CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain +#pragma omp target +#pragma omp teams +#pragma omp distribute parallel for simd if (true) + for(int i = 0 ; i < 100; i++) { + // CHECK: define internal void [[OFFLOADING_FUN_0]]( + // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}}) + // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]]( + + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_2:@.+]] to void + // CHECK: call void @__kmpc_for_static_fini( + // CHECK: define{{.+}} void [[OMP_OUTLINED_2]]( + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call {{.*}}void @{{.+}}fn4 + // CHECK: call void @__kmpc_for_static_fini( + + fn4(); + } + +#pragma omp target +#pragma omp teams +#pragma omp distribute parallel for simd if (false) + for(int i = 0 ; i < 100; i++) { + // CHECK: define internal void [[OFFLOADING_FUN_1]]( + // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}}) + // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]]( + + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call void @__kmpc_serialized_parallel( + // CHECK: call void [[OMP_OUTLINED_3:@.+]]( + // CHECK: call void @__kmpc_end_serialized_parallel( + // CHECK: call void @__kmpc_for_static_fini( + + // CHECK: define{{.+}} void [[OMP_OUTLINED_3]]( + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call {{.*}}void @{{.+}}fn5 + // CHECK: call void @__kmpc_for_static_fini( + fn5(); + } + +#pragma omp target +#pragma omp teams +#pragma omp distribute parallel for simd if (Arg) + for(int i = 0 ; i < 100; i++) { + // CHECK: define internal void [[OFFLOADING_FUN_2]]( + // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}}* [[OMP_TEAMS_OUTLINED_2:@.+]] to {{.+}}) + // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_2]]( + + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_4:@.+]] to void + // CHECK: call void @__kmpc_serialized_parallel( + // CHECK: call void [[OMP_OUTLINED_4:@.+]]( + // CHECK: call void @__kmpc_end_serialized_parallel( + // CHECK: call void @__kmpc_for_static_fini( + + // CHECK: define{{.+}} void [[OMP_OUTLINED_4]]( + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call {{.*}}void @{{.+}}fn6 + // CHECK: call void @__kmpc_for_static_fini( + fn6(); + } + + return tmain(Arg); +} + +// CHECK-LABEL: define {{.+}} @{{.+}}tmain + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, void {{.+}}* [[T_OUTLINE_FUN_1:@.+]] to void +// CHECK: call void @__kmpc_for_static_fini( + +// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_1]] +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call {{.*}}void @{{.+}}fn1 +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call {{.*}}void @__kmpc_serialized_parallel( +// CHECK: call void [[T_OUTLINE_FUN_2:@.+]]( +// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel( +// CHECK: call void @__kmpc_for_static_fini( + +// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_2]] +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call {{.*}}void @{{.+}}fn2 +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, void {{.+}}* [[T_OUTLINE_FUN_3:@.+]] to void +// CHECK: call {{.*}}void @__kmpc_serialized_parallel( +// call void [[T_OUTLINE_FUN_3:@.+]]( +// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel( + +// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_3]] +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call {{.*}}void @{{.+}}fn3 +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void +#endif diff --git a/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp b/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp new file mode 100644 index 000000000000..89ef33abd64d --- /dev/null +++ b/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp @@ -0,0 +1,671 @@ +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 + +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +template <class T> +struct S { + T f; + S(T a) : f(a) {} + S() : f() {} + operator T() { return T(); } + ~S() {} +}; + +// CHECK: [[S_FLOAT_TY:%.+]] = type { float } +// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } +template <typename T> +T tmain() { + S<T> test; + T t_var = T(); + T vec[] = {1, 2}; + S<T> s_arr[] = {1, 2}; + S<T> &var = test; + #pragma omp target + #pragma omp teams +#pragma omp distribute parallel for simd lastprivate(t_var, vec, s_arr, s_arr, var, var) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + return T(); +} + +int main() { + static int svar; + volatile double g; + volatile double &g1 = g; + + #ifdef LAMBDA + // LAMBDA-LABEL: @main + // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]]( + [&]() { + static float sfvar; + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( + // LAMBDA: call void [[OFFLOADING_FUN:@.+]]( + + // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]]( + // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}}) + #pragma omp target + #pragma omp teams +#pragma omp distribute parallel for simd lastprivate(g, g1, svar, sfvar) + for (int i = 0; i < 2; ++i) { + // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, double*{{.+}} [[G_IN:%.+]], double*{{.+}} [[G1_IN:%.+]], i{{[0-9]+}}*{{.+}} [[SVAR_IN:%.+]], float*{{.+}} [[SFVAR_IN:%.+]]) + // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double*, + // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double*, + // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}*, + // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float*, + // LAMBDA: [[TMP_G1:%.+]] = alloca double*, + // loop variables + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G_PRIVATE:%.+]] = alloca double, + // LAMBDA: [[G1_PRIVATE:%.+]] = alloca double, + // LAMBDA: [[TMP_G1_PRIVATE:%.+]] = alloca double*, + // LAMBDA: [[SVAR_PRIVATE:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[SFVAR_PRIVATE:%.+]] = alloca float, + + // init addr alloca's + // LAMBDA: store double* [[G_IN]], double** [[G_PRIVATE_ADDR]], + // LAMBDA: store double* [[G1_IN]], double** [[G1_PRIVATE_ADDR]], + // LAMBDA: store i{{[0-9]+}}* [[SVAR_IN]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]], + // LAMBDA: store float* [[SFVAR_IN]], float** [[SFVAR_PRIVATE_ADDR]], + + // init private variables + // LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]], + // LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]], + // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]], + // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]], + // LAMBDA: store double* [[G1_IN_REF]], double** [[TMP_G1]], + // LAMBDA: [[TMP_G1_VAL:%.+]] = load double*, double** [[TMP_G1]], + // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]], + + // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4( + // LAMBDA: [[G1_PAR:%.+]] = load{{.+}}, {{.+}} [[TMP_G1_PRIVATE]], + // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, {{.+}} [[G_PRIVATE]], {{.+}} [[G1_PAR]], {{.+}} [[SVAR_PRIVATE]], {{.+}} [[SFVAR_PRIVATE]]) + // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, {{.+}} [[G_PRIVATE]], {{.+}} [[G1_PAR]], {{.+}} [[SVAR_PRIVATE]], {{.+}} [[SFVAR_PRIVATE]]) + // LAMBDA: call {{.*}}void @__kmpc_for_static_fini( + + // linear counter + // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]], + // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 + // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_COUNTER_BLOCK:.+]], label %[[OMP_COUNTER_DONE:.+]] + // LAMBDA: [[OMP_COUNTER_BLOCK]]: + // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* % + // LAMBDA: br label %[[OMP_COUNTER_DONE]] + // LAMBDA: [[OMP_COUNTER_DONE]]: + + // lastprivate + // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]], + // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 + // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] + + // LAMBDA: [[OMP_LASTPRIV_BLOCK]]: + // LAMBDA: [[G_PRIV_VAL:%.+]] = load double, double* [[G_PRIVATE]], + // LAMBDA: store{{.*}} double [[G_PRIV_VAL]], double* [[G_IN_REF]], + // LAMBDA: [[TMP_G1_PRIV_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]], + // LAMBDA: [[TMP_G1_PRIV_VAL:%.+]] = load double, double* [[TMP_G1_PRIV_REF]], + // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* [[TMP_G1_VAL]], + + // LAMBDA: [[SVAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]], + // LAMBDA: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_IN_REF]], + // LAMBDA: [[SFVAR_PRIV_VAL:%.+]] = load float, float* [[SFVAR_PRIVATE]], + // LAMBDA: store float [[SFVAR_PRIV_VAL]], float* [[SFVAR_IN_REF]], + // LAMBDA: br label %[[OMP_LASTPRIV_DONE]] + // LAMBDA: [[OMP_LASTPRIV_DONE]]: + // LAMBDA: ret + + g = 1; + g1 = 1; + svar = 3; + sfvar = 4.0; + // outlined function for 'parallel for' + // LAMBDA-64: define{{.+}} void [[OMP_PARFOR_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, {{.+}} [[G_IN:%.+]], {{.+}} [[G1_IN:%.+]], {{.+}} [[SVAR_IN:%.+]], {{.+}} [[SFVAR_IN:%.+]]) + // LAMBDA-32: define{{.+}} void [[OMP_PARFOR_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, {{.+}} [[G_IN:%.+]], {{.+}} [[G1_IN:%.+]], {{.+}} [[SVAR_IN:%.+]], {{.+}} [[SFVAR_IN:%.+]]) + + // addr alloca's + // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double*, + // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double*, + // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}*, + // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float*, + + // loop variables + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + + // private alloca's + // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G_PRIVATE:%.+]] = alloca double, + // LAMBDA: [[G1_PRIVATE:%.+]] = alloca double, + // LAMBDA: [[TMP_G1_PRIVATE:%.+]] = alloca double*, + // LAMBDA: [[SVAR_PRIVATE:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[SFVAR_PRIVATE:%.+]] = alloca float, + + // init addr alloca's + // LAMBDA: store double* [[G_IN]], double** [[G_PRIVATE_ADDR]], + // LAMBDA: store double* [[G1_IN]], double** [[G1_PRIVATE_ADDR]], + // LAMBDA: store i{{[0-9]+}}* [[SVAR_IN]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]], + // LAMBDA: store float* [[SFVAR_IN]], float** [[SFVAR_PRIVATE_ADDR]], + + // init private variables + // LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]], + // LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]], + // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]], + + // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]], + // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]], + + // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4( + + // loop body + // LAMBDA: store double 1.0{{.+}}, double* [[G_PRIVATE]], + // LAMBDA: [[TMP_G1_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]], + // LAMBDA: store{{.+}} double 1.0{{.+}}, double* [[TMP_G1_REF]], + // LAMBDA: store i{{[0-9]+}} 3, i{{[0-9]+}}* [[SVAR_PRIVATE]], + // LAMBDA: store float 4.0{{.+}}, float* [[SFVAR_PRIVATE]], + // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: store double* [[G_PRIVATE]], double** [[G_PRIVATE_ADDR_REF]], + // LAMBDA: [[TMP_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_PRIVATE_ADDR_FROM_TMP:%.+]] = load double*, double** [[TMP_G1_PRIVATE]], + // LAMBDA: store double* [[G1_PRIVATE_ADDR_FROM_TMP]], double** [[TMP_PRIVATE_ADDR_REF]], + // LAMBDA: [[SVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: store i{{[0-9]+}}* [[SVAR_PRIVATE]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]] + // LAMBDA: [[SFVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 + // LAMBDA: store float* [[SFVAR_PRIVATE]], float** [[SFVAR_PRIVATE_ADDR_REF]] + // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]]) + + // LAMBDA: call {{.*}}void @__kmpc_for_static_fini( + + // lastprivate + // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]], + // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]], + // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 + // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] + // LAMBDA: [[OMP_LASTPRIV_BLOCK]]: + // LAMBDA: [[G_PRIV_VAL:%.+]] = load double, double* [[G_PRIVATE]], + // LAMBDA: store{{.*}} double [[G_PRIV_VAL]], double* [[G_IN_REF]], + // LAMBDA: [[TMP_G1_PRIV_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]], + // LAMBDA: [[TMP_G1_PRIV_VAL:%.+]] = load double, double* [[TMP_G1_PRIV_REF]], + // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* [[G1_IN_REF]], + // LAMBDA: [[SVAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]], + // LAMBDA: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_IN_REF]], + // LAMBDA: [[SFVAR_PRIV_VAL:%.+]] = load float, float* [[SFVAR_PRIVATE]], + // LAMBDA: store float [[SFVAR_PRIV_VAL]], float* [[SFVAR_IN_REF]], + // LAMBDA: br label %[[OMP_LASTPRIV_DONE]] + // LAMBDA: [[OMP_LASTPRIV_DONE]]: + // LAMBDA: ret + + [&]() { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + g = 2; + g1 = 2; + svar = 4; + sfvar = 8.0; + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]] + // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]] + + // LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]] + // LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]], + // LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]] + // LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 + // LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]] + // LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]] + }(); + } + }(); + return 0; + #else + S<float> test; + int t_var = 0; + int vec[] = {1, 2}; + S<float> s_arr[] = {1, 2}; + S<float> &var = test; + + #pragma omp target + #pragma omp teams +#pragma omp distribute parallel for simd lastprivate(t_var, vec, s_arr, s_arr, var, var, svar) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + int i; + + return tmain<int>(); + #endif +} + +// CHECK: define{{.*}} i{{[0-9]+}} @main() +// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOAD_FUN:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}}) +// CHECK: ret + +// CHECK: define{{.+}} [[OFFLOAD_FUN]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]*{{.+}} {{.+}}, [2 x [[S_FLOAT_TY]]]*{{.+}} {{.+}}, [[S_FLOAT_TY]]*{{.+}} {{.+}}, i{{[0-9]+}} {{.+}}) +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams( +// CHECK: ret +// +// CHECK: define internal void [[OMP_OUTLINED:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.+}} [[VAR_IN:%.+]], i{{[0-9]+}}*{{.*}} [[S_VAR_IN:%.+]]) +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, +// CHECK: [[TMP:%.*]] = alloca [[S_FLOAT_TY]]*, +// skip loop variables +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, + +// copy from parameters to local address variables +// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]], +// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: store [2 x [[S_FLOAT_TY]]]* [[S_ARR_IN]], [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK: store [[S_FLOAT_TY]]* [[VAR_IN]], [[S_FLOAT_TY]]** [[VAR_ADDR]], +// CHECK: store i{{[0-9]+}}* [[S_VAR_IN]], i{{[0-9]+}}** [[SVAR_ADDR]], + +// load content of local address variables +// CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]], +// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]], +// CHECK: [[VAR_ADDR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_ADDR]], +// CHECK: store [[S_FLOAT_TY]]* [[VAR_ADDR_REF]], [[S_FLOAT_TY]]** [[TMP]], +// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]], + +// call constructor for s_arr +// CHECK: [[S_ARR_BGN:%.+]] = getelementptr{{.+}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], +// CHECK: [[S_ARR_END:%.+]] = getelementptr {{.+}} [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BGN]], +// CHECK: br label %[[S_ARR_CST_LOOP:.+]] +// CHECK: [[S_ARR_CST_LOOP]]: +// CHECK: [[S_ARR_CTOR:%.+]] = phi {{.+}} +// CHECK: call void [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[S_ARR_CTOR]]) +// CHECK: [[S_ARR_NEXT:%.+]] = getelementptr {{.+}} [[S_ARR_CTOR]], +// CHECK: [[S_ARR_DONE:%.+]] = icmp {{.+}} [[S_ARR_NEXT]], [[S_ARR_END]] +// CHECK: br i1 [[S_ARR_DONE]], label %[[S_ARR_CST_END:.+]], label %[[S_ARR_CST_LOOP]] +// CHECK: [[S_ARR_CST_END]]: +// CHECK: [[TMP_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP]], +// CHECK: call void [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]]) +// CHECK: store [[S_FLOAT_TY]]* [[VAR_PRIV]], [[S_FLOAT_TY]]** [[TMP_PRIV]], + +// the distribute loop +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: [[TMP_PRIV_VAL:%.+]] = load {{.+}}, {{.+}} [[TMP_PRIV]], +// CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, {{.+}} [[VEC_PRIV]], {{.+}} [[T_VAR_PRIV]], {{.+}} [[S_ARR_PRIV]], {{.+}} [[TMP_PRIV_VAL]], {{.+}} [[S_VAR_PRIV]]) +// CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, {{.+}} [[VEC_PRIV]], {{.+}} [[T_VAR_PRIV]], {{.+}} [[S_ARR_PRIV]], {{.+}} [[TMP_PRIV_VAL]], {{.+}} [[S_VAR_PRIV]]) + +// CHECK: call void @__kmpc_for_static_fini( + +// lastprivates +// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]], +// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 +// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] + +// CHECK: [[OMP_LASTPRIV_BLOCK]]: +// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]], +// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]], +// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* +// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]* +// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 +// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]] +// CHECK: [[S_ARR_COPY_BLOCK]]: +// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8* +// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 +// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} +// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]] +// CHECK: [[S_ARR_COPY_DONE]]: +// CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], +// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_REF]] to i8* +// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]], +// CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]], +// CHECK: ret void + +// outlined function for 'parallel for' +// CHECK-64: define{{.+}} void [[OMP_PARFOR_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, {{.+}} [[VEC_IN:%.+]], {{.+}} [[T_VAR_IN:%.+]], {{.+}} [[S_ARR_IN:%.+]], {{.+}} [[VAR_IN:%.+]], {{.+}} [[SVAR_IN:%.+]]) +// CHECK-32: define{{.+}} void [[OMP_PARFOR_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, {{.+}} [[VEC_IN:%.+]], {{.+}} [[T_VAR_IN:%.+]], {{.+}} [[S_ARR_IN:%.+]], {{.+}} [[VAR_IN:%.+]], {{.+}} [[SVAR_IN:%.+]]) + +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, +// skip loop variables +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, + +// copy from parameters to local address variables +// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]], +// CHECK: store [2 x [[S_FLOAT_TY]]]* [[S_ARR_IN]], [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK: store [[S_FLOAT_TY]]* [[VAR_IN]], [[S_FLOAT_TY]]** [[VAR_ADDR]], +// CHECK: store i{{[0-9]+}}* [[S_VAR_IN]], i{{[0-9]+}}** [[SVAR_ADDR]], + +// load content of local address variables +// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]], +// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]], +// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]], + +// call constructor for s_arr +// CHECK: [[S_ARR_BGN:%.+]] = getelementptr{{.+}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], +// CHECK: [[S_ARR_END:%.+]] = getelementptr {{.+}} [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BGN]], +// CHECK: br label %[[S_ARR_CST_LOOP:.+]] +// CHECK: [[S_ARR_CST_LOOP]]: +// CHECK: [[S_ARR_CTOR:%.+]] = phi {{.+}} +// CHECK: call void [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[S_ARR_CTOR]]) +// CHECK: [[S_ARR_NEXT:%.+]] = getelementptr {{.+}} [[S_ARR_CTOR]], +// CHECK: [[S_ARR_DONE:%.+]] = icmp {{.+}} [[S_ARR_NEXT]], [[S_ARR_END]] +// CHECK: br i1 [[S_ARR_DONE]], label %[[S_ARR_CST_END:.+]], label %[[S_ARR_CST_LOOP]] +// CHECK: [[S_ARR_CST_END]]: +// CHECK: [[VAR_ADDR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_ADDR]], +// CHECK: call void [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]]) +// CHECK: store [[S_FLOAT_TY]]* [[VAR_PRIV]], [[S_FLOAT_TY]]** [[TMP_PRIV]], + +// CHECK: call void @__kmpc_for_static_init_4( + +// loop body +// assignment: vec[i] = t_var; +// CHECK: [[T_VAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]], +// CHECK: [[VEC_PTR:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}} +// CHECK: store i{{[0-9]+}} [[T_VAR_PRIV_VAL]], i{{[0-9]+}}* [[VEC_PTR]], + +// assignment: s_arr[i] = var; +// CHECK-DAG: [[S_ARR_PTR:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], +// CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_PTR]] to i8* +// CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST]], i8* [[TMP_VAL_BCAST]], + +// CHECK: call void @__kmpc_for_static_fini( + +// lastprivates +// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]], +// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 +// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] + +// CHECK: [[OMP_LASTPRIV_BLOCK]]: +// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]], +// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]], +// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* +// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]* +// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 +// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]] +// CHECK: [[S_ARR_COPY_BLOCK]]: +// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8* +// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 +// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} +// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]] +// CHECK: [[S_ARR_COPY_DONE]]: +// CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], +// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8* +// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]], +// CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]], +// CHECK: ret void + +// template tmain +// CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]() +// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], +// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOAD_FUN_1:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}}) +// CHECK: ret + +// CHECK: define internal void [[OFFLOAD_FUN_1]]( +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, +// CHECK: ret + +// CHECK: define internal void [[OMP_OUTLINED_1:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN:%.+]], [2 x [[S_INT_TY]]]*{{.+}} [[S_ARR_IN:%.+]], [[S_INT_TY]]*{{.+}} [[VAR_IN:%.+]]) +// skip alloca of global_tid and bound_tid +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, +// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*, +// skip loop variables +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_INT_TY]]*, + +// skip init of bound and global tid +// CHECK: store i{{[0-9]+}}* {{.*}}, +// CHECK: store i{{[0-9]+}}* {{.*}}, +// copy from parameters to local address variables +// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]], +// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_IN]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]], +// CHECK: store [[S_INT_TY]]* [[VAR_IN]], [[S_INT_TY]]** [[VAR_ADDR]], + +// load content of local address variables +// CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]], +// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]], +// CHECK: [[VAR_ADDR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR]], +// CHECK-DAG: store [[S_INT_TY]]* [[VAR_ADDR_REF]], [[S_INT_TY]]** [[TMP]], +// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]], +// CHECK-DAG: [[TMP_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP]], + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: [[TMP_PRIV_VAL:%.+]] = load {{.+}}, {{.+}} [[TMP_PRIV]], +// CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, {{.+}} [[VEC_PRIV]], {{.+}} [[T_VAR_PRIV]], {{.+}} [[S_ARR_PRIV]], {{.+}} [[TMP_PRIV_VAL]]) +// CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to void ({{.+}})*), {{.+}}, {{.+}}, {{.+}} [[VEC_PRIV]], {{.+}} [[T_VAR_PRIV]], {{.+}} [[S_ARR_PRIV]], {{.+}} [[TMP_PRIV_VAL]]) + +// CHECK: call void @__kmpc_for_static_fini( + +// lastprivates +// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]], +// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 +// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] + +// CHECK: [[OMP_LASTPRIV_BLOCK]]: +// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]], +// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]], +// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* +// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]] to [[S_INT_TY]]* +// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 +// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]] +// CHECK: [[S_ARR_COPY_BLOCK]]: +// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8* +// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 +// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} +// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]] +// CHECK: [[S_ARR_COPY_DONE]]: +// CHECK: [[TMP_VAL:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV]], +// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_REF]] to i8* +// CHECK: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL_BCAST]],{{.+}}) +// CHECK: ret void + +// outlined function for 'parallel for' +// CHECK-64: define{{.+}} void [[OMP_PARFOR_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, {{.+}} [[VEC_IN:%.+]], {{.+}} [[T_VAR_IN:%.+]], {{.+}} [[S_ARR_IN:%.+]], {{.+}} [[VAR_IN:%.+]]) +// CHECK-32: define{{.+}} void [[OMP_PARFOR_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, {{.+}} [[VEC_IN:%.+]], {{.+}} [[T_VAR_IN:%.+]], {{.+}} [[S_ARR_IN:%.+]], {{.+}} [[VAR_IN:%.+]]) + +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, +// skip loop variables +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_INT_TY]]*, + +// copy from parameters to local address variables +// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]], +// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_IN]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]], +// CHECK: store [[S_INT_TY]]* [[VAR_IN]], [[S_INT_TY]]** [[VAR_ADDR]], + +// load content of local address variables +// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]], +// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]], +// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]], + +// call constructor for s_arr +// CHECK: [[S_ARR_BGN:%.+]] = getelementptr{{.+}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], +// CHECK: [[S_ARR_END:%.+]] = getelementptr {{.+}} [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BGN]], +// CHECK: br label %[[S_ARR_CST_LOOP:.+]] +// CHECK: [[S_ARR_CST_LOOP]]: +// CHECK: [[S_ARR_CTOR:%.+]] = phi {{.+}} +// CHECK: call void [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[S_ARR_CTOR]]) +// CHECK: [[S_ARR_NEXT:%.+]] = getelementptr {{.+}} [[S_ARR_CTOR]], +// CHECK: [[S_ARR_DONE:%.+]] = icmp {{.+}} [[S_ARR_NEXT]], [[S_ARR_END]] +// CHECK: br i1 [[S_ARR_DONE]], label %[[S_ARR_CST_END:.+]], label %[[S_ARR_CST_LOOP]] +// CHECK: [[S_ARR_CST_END]]: +// CHECK: [[VAR_ADDR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR]], +// CHECK: call void [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]]) +// CHECK: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP_PRIV]], + +// CHECK: call void @__kmpc_for_static_init_4( + +// assignment: vec[i] = t_var; +// CHECK: [[IV_VAL:%.+]] = +// CHECK: [[T_VAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]], +// CHECK: [[VEC_PTR:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}} +// CHECK: store i{{[0-9]+}} [[T_VAR_PRIV_VAL]], i{{[0-9]+}}* [[VEC_PTR]], + +// assignment: s_arr[i] = var; +// CHECK-DAG: [[S_ARR_PTR:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV]], +// CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_PTR]] to i8* +// CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL]] to i8* +// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST]], i8* [[TMP_VAL_BCAST]], + +// CHECK: call void @__kmpc_for_static_fini( + +// lastprivates +// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]], +// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 +// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] + +// CHECK: [[OMP_LASTPRIV_BLOCK]]: +// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]], +// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]], +// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* +// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]] to [[S_INT_TY]]* +// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 +// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]] +// CHECK: [[S_ARR_COPY_BLOCK]]: +// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8* +// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 +// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} +// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]] +// CHECK: [[S_ARR_COPY_DONE]]: +// CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV]], +// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR_REF]] to i8* +// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: ret void + +// CHECK: !{!"llvm.loop.vectorize.enable", i1 true} +#endif + diff --git a/test/OpenMP/distribute_parallel_for_simd_lastprivate_messages.cpp b/test/OpenMP/distribute_parallel_for_simd_lastprivate_messages.cpp index 109fde0577c4..73450d4f1181 100644 --- a/test/OpenMP/distribute_parallel_for_simd_lastprivate_messages.cpp +++ b/test/OpenMP/distribute_parallel_for_simd_lastprivate_messages.cpp @@ -18,14 +18,14 @@ public: const S2 &operator =(const S2&) const; S2 &operator =(const S2&); static float S2s; // expected-note {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note {{static data member is predetermined as shared}} }; -const float S2::S2sc = 0; // expected-note {{static data member is predetermined as shared}} +const float S2::S2sc = 0; const S2 b; const S2 ba[5]; class S3 { int a; - S3 &operator=(const S3 &s3); // expected-note 2 {{implicitly declared private here}} + S3 &operator=(const S3 &s3); // expected-note {{implicitly declared private here}} public: S3() : a(0) {} @@ -52,7 +52,7 @@ public: }; class S6 { int a; - S6() : a(0) {} + S6() : a(0) {} // expected-note {{implicitly declared private here}} public: S6(const S6 &s6) : a(s6.a) {} @@ -313,14 +313,16 @@ int main(int argc, char **argv) { #pragma omp distribute parallel for simd lastprivate(j) for (i = 0; i < argc; ++i) foo(); +// expected-error@+3 {{firstprivate variable cannot be lastprivate}} expected-note@+3 {{defined as firstprivate}} #pragma omp target #pragma omp teams -#pragma omp distribute parallel for simd firstprivate(m) lastprivate(m) // expected-error {{'operator=' is a private member of 'S3'}} +#pragma omp distribute parallel for simd firstprivate(m) lastprivate(m) for (i = 0; i < argc; ++i) foo(); +// expected-error@+3 {{lastprivate variable cannot be firstprivate}} expected-note@+3 {{defined as lastprivate}} #pragma omp target #pragma omp teams -#pragma omp distribute parallel for simd lastprivate(n) firstprivate(n) // OK +#pragma omp distribute parallel for simd lastprivate(n) firstprivate(n) // expected-error {{calling a private constructor of class 'S6'}} for (i = 0; i < argc; ++i) foo(); static int si; diff --git a/test/OpenMP/distribute_parallel_for_simd_linear_messages.cpp b/test/OpenMP/distribute_parallel_for_simd_linear_messages.cpp index 632ef066c0fa..66fd94b9e0f2 100644 --- a/test/OpenMP/distribute_parallel_for_simd_linear_messages.cpp +++ b/test/OpenMP/distribute_parallel_for_simd_linear_messages.cpp @@ -18,41 +18,49 @@ void test_linear_colons() { int B = 0; +// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams #pragma omp distribute parallel for simd linear(B:bfoo()) for (int i = 0; i < 10; ++i) ; +// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams #pragma omp distribute parallel for simd linear(B::ib:B:bfoo()) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'}} for (int i = 0; i < 10; ++i) ; +// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams #pragma omp distribute parallel for simd linear(B:ib) // expected-error {{use of undeclared identifier 'ib'; did you mean 'B::ib'}} for (int i = 0; i < 10; ++i) ; +// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams #pragma omp distribute parallel for simd linear(z:B:ib) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'?}} for (int i = 0; i < 10; ++i) ; +// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams #pragma omp distribute parallel for simd linear(B:B::bfoo()) for (int i = 0; i < 10; ++i) ; +// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams #pragma omp distribute parallel for simd linear(X::x : ::z) for (int i = 0; i < 10; ++i) ; +// expected-error@+3 3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams #pragma omp distribute parallel for simd linear(B,::z, X::x) for (int i = 0; i < 10; ++i) ; +// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams #pragma omp distribute parallel for simd linear(::z) @@ -63,6 +71,7 @@ void test_linear_colons() #pragma omp distribute parallel for simd linear(B::bfoo()) // expected-error {{expected variable name}} for (int i = 0; i < 10; ++i) ; +// expected-error@+3 2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams #pragma omp distribute parallel for simd linear(B::ib,B:C1+C2) @@ -148,11 +157,13 @@ template<class I, class C> int foomain(I argc, C **argv) { #pragma omp distribute parallel for simd linear () // expected-error {{expected expression}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams #pragma omp distribute parallel for simd linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams #pragma omp distribute parallel for simd linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} @@ -163,6 +174,7 @@ template<class I, class C> int foomain(I argc, C **argv) { #pragma omp distribute parallel for simd linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams #pragma omp distribute parallel for simd linear (argc : 5) @@ -183,6 +195,7 @@ template<class I, class C> int foomain(I argc, C **argv) { #pragma omp distribute parallel for simd linear (argv[1]) // expected-error {{expected variable name}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+3 2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams #pragma omp distribute parallel for simd linear(e, g) @@ -193,6 +206,7 @@ template<class I, class C> int foomain(I argc, C **argv) { #pragma omp distribute parallel for simd linear(h) // expected-error {{threadprivate or thread local variable cannot be linear}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams #pragma omp distribute parallel for simd linear(i) @@ -202,28 +216,13 @@ template<class I, class C> int foomain(I argc, C **argv) { { int v = 0; int i; + int k; #pragma omp target #pragma omp teams - #pragma omp distribute parallel for simd linear(v:i) - for (int k = 0; k < argc; ++k) { i = k; v += i; } + #pragma omp distribute parallel for simd linear(k:i) + for (k = 0; k < argc; ++k) { i = k; v += i; } } -#pragma omp target -#pragma omp teams -#pragma omp parallel for simd linear(j) - for (int k = 0; k < argc; ++k) ++k; - - int v = 0; - -#pragma omp target -#pragma omp teams -#pragma omp distribute parallel for simd linear(v:j) - for (int k = 0; k < argc; ++k) { ++k; v += j; } - -#pragma omp target -#pragma omp teams -#pragma omp distribute parallel for simd linear(i) - for (int k = 0; k < argc; ++k) ++k; return 0; } @@ -262,11 +261,13 @@ int main(int argc, char **argv) { #pragma omp distribute parallel for simd linear () // expected-error {{expected expression}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams #pragma omp distribute parallel for simd linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams #pragma omp distribute parallel for simd linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} @@ -277,6 +278,7 @@ int main(int argc, char **argv) { #pragma omp distribute parallel for simd linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams #pragma omp distribute parallel for simd linear (argc) @@ -314,24 +316,14 @@ int main(int argc, char **argv) { #pragma omp target #pragma omp teams #pragma omp distribute parallel for simd linear(i) - for (int k = 0; k < argc; ++k) ++k; + for (i = 0; i < argc; ++i) ++i; #pragma omp target #pragma omp teams #pragma omp distribute parallel for simd linear(i : 4) - for (int k = 0; k < argc; ++k) { ++k; i += 4; } + for (i = 0; i < argc; ++i) { ++i; i += 4; } } -#pragma omp target -#pragma omp teams -#pragma omp distribute parallel for simd linear(j) - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target -#pragma omp teams -#pragma omp distribute parallel for simd linear(i) - for (int k = 0; k < argc; ++k) ++k; - foomain<int,char>(argc,argv); // expected-note {{in instantiation of function template specialization 'foomain<int, char>' requested here}} return 0; } diff --git a/test/OpenMP/distribute_parallel_for_simd_loop_messages.cpp b/test/OpenMP/distribute_parallel_for_simd_loop_messages.cpp index 1fedb3c02d79..d137d47ca2a4 100644 --- a/test/OpenMP/distribute_parallel_for_simd_loop_messages.cpp +++ b/test/OpenMP/distribute_parallel_for_simd_loop_messages.cpp @@ -2,7 +2,7 @@ class S { int a; - S() : a(0) {} + S() : a(0) {} // expected-note {{implicitly declared private here}} public: S(int v) : a(v) {} @@ -790,9 +790,10 @@ void test_loop_eh() { void test_loop_firstprivate_lastprivate() { S s(4); +// expected-error@+3 {{lastprivate variable cannot be firstprivate}} expected-note@+3 {{defined as lastprivate}} #pragma omp target #pragma omp teams -#pragma omp distribute parallel for simd lastprivate(s) firstprivate(s) +#pragma omp distribute parallel for simd lastprivate(s) firstprivate(s) // expected-error {{calling a private constructor of class 'S'}} for (int i = 0; i < 16; ++i) ; } diff --git a/test/OpenMP/distribute_parallel_for_simd_misc_messages.c b/test/OpenMP/distribute_parallel_for_simd_misc_messages.c index 01c079e3dac1..4d071621fcb8 100644 --- a/test/OpenMP/distribute_parallel_for_simd_misc_messages.c +++ b/test/OpenMP/distribute_parallel_for_simd_misc_messages.c @@ -68,7 +68,7 @@ void test_non_identifiers() { #pragma omp target #pragma omp teams // expected-warning@+1 {{extra tokens at the end of '#pragma omp distribute parallel for simd' are ignored}} -#pragma omp distribute parallel for simd linear(x); +#pragma omp distribute parallel for simd firstprivate(x); for (i = 0; i < 16; ++i) ; @@ -548,89 +548,6 @@ void test_linear() { #pragma omp distribute parallel for simd linear(x, y, z) for (i = 0; i < 16; ++i) ; - - int x, y; -#pragma omp target -#pragma omp teams -// expected-error@+1 {{expected expression}} -#pragma omp distribute parallel for simd linear(x :) - for (i = 0; i < 16; ++i) - ; -#pragma omp target -#pragma omp teams -// expected-error@+1 {{expected expression}} expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}} -#pragma omp distribute parallel for simd linear(x :, ) - for (i = 0; i < 16; ++i) - ; -#pragma omp target -#pragma omp teams -#pragma omp distribute parallel for simd linear(x : 1) - for (i = 0; i < 16; ++i) - ; -#pragma omp target -#pragma omp teams -#pragma omp distribute parallel for simd linear(x : 2 * 2) - for (i = 0; i < 16; ++i) - ; -#pragma omp target -#pragma omp teams -// expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}} -#pragma omp distribute parallel for simd linear(x : 1, y) - for (i = 0; i < 16; ++i) - ; -#pragma omp target -#pragma omp teams -// expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}} -#pragma omp distribute parallel for simd linear(x : 1, y, z : 1) - for (i = 0; i < 16; ++i) - ; - -#pragma omp target -#pragma omp teams -// expected-note@+2 {{defined as linear}} -// expected-error@+1 {{linear variable cannot be linear}} -#pragma omp distribute parallel for simd linear(x) linear(x) - for (i = 0; i < 16; ++i) - ; - -#pragma omp target -#pragma omp teams -// expected-note@+2 {{defined as private}} -// expected-error@+1 {{private variable cannot be linear}} -#pragma omp distribute parallel for simd private(x) linear(x) - for (i = 0; i < 16; ++i) - ; - -#pragma omp target -#pragma omp teams -// expected-note@+2 {{defined as linear}} -// expected-error@+1 {{linear variable cannot be private}} -#pragma omp distribute parallel for simd linear(x) private(x) - for (i = 0; i < 16; ++i) - ; - -#pragma omp target -#pragma omp teams -// expected-warning@+1 {{zero linear step (x and other variables in clause should probably be const)}} -#pragma omp distribute parallel for simd linear(x, y : 0) - for (i = 0; i < 16; ++i) - ; - -#pragma omp target -#pragma omp teams -// expected-note@+2 {{defined as linear}} -// expected-error@+1 {{linear variable cannot be lastprivate}} -#pragma omp distribute parallel for simd linear(x) lastprivate(x) - for (i = 0; i < 16; ++i) - ; - -#pragma omp target -#pragma omp teams -// expected-note@+2 {{defined as lastprivate}} -// expected-error@+1 {{lastprivate variable cannot be linear}} -#pragma omp distribute parallel for simd lastprivate(x) linear(x) - for (i = 0; i < 16; ++i) - ; } void test_aligned() { @@ -934,16 +851,19 @@ void test_firstprivate() { ; int x, y, z; +// expected-error@+3 {{lastprivate variable cannot be firstprivate}} expected-note@+3 {{defined as lastprivate}} #pragma omp target #pragma omp teams #pragma omp distribute parallel for simd lastprivate(x) firstprivate(x) for (i = 0; i < 16; ++i) ; +// expected-error@+3 2 {{lastprivate variable cannot be firstprivate}} expected-note@+3 2 {{defined as lastprivate}} #pragma omp target #pragma omp teams #pragma omp distribute parallel for simd lastprivate(x, y) firstprivate(x, y) for (i = 0; i < 16; ++i) ; +// expected-error@+3 3 {{lastprivate variable cannot be firstprivate}} expected-note@+3 3 {{defined as lastprivate}} #pragma omp target #pragma omp teams #pragma omp distribute parallel for simd lastprivate(x, y, z) firstprivate(x, y, z) diff --git a/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp b/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp new file mode 100644 index 000000000000..5804a2a72b71 --- /dev/null +++ b/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp @@ -0,0 +1,121 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +typedef __INTPTR_TYPE__ intptr_t; + +// CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[S_TY:%.+]] = type { [[INTPTR_T_TY:i[0-9]+]], [[INTPTR_T_TY]], [[INTPTR_T_TY]] } +// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" +// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } + +void foo(); + +struct S { + intptr_t a, b, c; + S(intptr_t a) : a(a) {} + operator char() { return a; } + ~S() {} +}; + +template <typename T, int C> +int tmain() { +#pragma omp target +#pragma omp teams +#pragma omp distribute parallel for simd num_threads(C) + for (int i = 0; i < 100; i++) + foo(); +#pragma omp target +#pragma omp teams +#pragma omp distribute parallel for simd num_threads(T(23)) + for (int i = 0; i < 100; i++) + foo(); + return 0; +} + +int main() { + S s(0); + char a = s; +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOADING_FUN_0:@.+]]( +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOADING_FUN_1:@.+]]( +// CHECK: invoke{{.+}} [[TMAIN_5:@.+]]() +// CHECK: invoke{{.+}} [[TMAIN_1:@.+]]() +#pragma omp target +#pragma omp teams + // CHECK: define internal void [[OFFLOADING_FUN_0]]( + // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}}) +#pragma omp distribute parallel for simd num_threads(2) + for (int i = 0; i < 100; i++) { + // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]]( + // CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 2) + // CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( + foo(); + } +#pragma omp target +#pragma omp teams + // CHECK: define internal void [[OFFLOADING_FUN_1]]( + + // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}}* [[OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}}) +#pragma omp distribute parallel for simd num_threads(a) + for (int i = 0; i < 100; i++) { + // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]]( + // CHECK-DAG: [[A_ADDR:%.+]] = alloca i8*, + // CHECK-DAG: [[A_REF:%.+]] = load i8*, i8** [[A_ADDR]], + // CHECK-DAG: [[A_VAL:%.+]] = load i8, i8* [[A_REF]], + // CHECK-DAG: [[A_EXT:%.+]] = sext i8 [[A_VAL]] to {{.+}} + // CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[A_EXT]]) + // CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( + foo(); + } + return a + tmain<char, 5>() + tmain<S, 1>(); +} + +// tmain 5 +// CHECK-DAG: define {{.*}}i{{[0-9]+}} [[TMAIN_5]]() +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[T_OFFLOADING_FUN_0:@.+]]( +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[T_OFFLOADING_FUN_1:@.+]]( + +// tmain 1 +// CHECK-DAG: define {{.*}}i{{[0-9]+}} [[TMAIN_1]]() +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[T_OFFLOADING_FUN_2:@.+]]( +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[T_OFFLOADING_FUN_3:@.+]]( + +// CHECK: define internal void [[T_OFFLOADING_FUN_0]]( +// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}}) + +// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_0]]( +// CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 5) +// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( + +// CHECK: define internal void [[T_OFFLOADING_FUN_1]]( +// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}}) + +// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_1]]( +// CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 23) +// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( + +// CHECK: define internal void [[T_OFFLOADING_FUN_2]]( +// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_2:@.+]] to {{.+}}) + +// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_2]]( +// CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 1) +// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( + +// CHECK: define internal void [[T_OFFLOADING_FUN_3]]( +// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_3:@.+]] to {{.+}}) + +// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_3]]( +// CHECK-DAG: [[CALL_RES:%.+]] = invoke{{.*}} i8 [[S_TY_CHAR_OP:@.+]]([[S_TY]]* {{.+}}) +// CHECK-DAG: [[CALL_RES_SEXT:%.+]] = sext i8 [[CALL_RES]] to {{.+}} +// CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[CALL_RES_SEXT]]) +// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( +#endif diff --git a/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp b/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp new file mode 100644 index 000000000000..8ffe521bfcbc --- /dev/null +++ b/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp @@ -0,0 +1,297 @@ +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 + +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +template <class T> +struct S { + T f; + S(T a) : f(a) {} + S() : f() {} + operator T() { return T(); } + ~S() {} +}; + +// CHECK: [[S_FLOAT_TY:%.+]] = type { float } +// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } +template <typename T> +T tmain() { + S<T> test; + T t_var = T(); + T vec[] = {1, 2}; + S<T> s_arr[] = {1, 2}; + S<T> &var = test; + #pragma omp target + #pragma omp teams + #pragma omp distribute parallel for simd private(t_var, vec, s_arr, s_arr, var, var) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + return T(); +} + +int main() { + static int svar; + volatile double g; + volatile double &g1 = g; + + #ifdef LAMBDA + // LAMBDA-LABEL: @main + // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]]( + [&]() { + static float sfvar; + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( + // LAMBDA: call void [[OFFLOADING_FUN:@.+]]( + + // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]]() + // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}}) + #pragma omp target + #pragma omp teams + #pragma omp distribute parallel for simd private(g, g1, svar, sfvar) + for (int i = 0; i < 2; ++i) { + // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}) + // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double, + // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double, + // LAMBDA: [[TMP_PRIVATE_ADDR:%.+]] = alloca double*, + // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float, + // LAMBDA: store double* [[G1_PRIVATE_ADDR]], double** [[TMP_PRIVATE_ADDR]], + // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4( + // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED:@.+]] to {{.+}}, + // LAMBDA: call {{.*}}void @__kmpc_for_static_fini( + // LAMBDA: ret void + + // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED]]( + // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double, + // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double, + // LAMBDA: [[TMP_PRIVATE_ADDR:%.+]] = alloca double*, + // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float, + + g = 1; + g1 = 1; + svar = 3; + sfvar = 4.0; + // LAMBDA: store double* [[G1_PRIVATE_ADDR]], double** [[TMP_PRIVATE_ADDR]], + // LAMBDA: store double 1.0{{.+}}, double* [[G_PRIVATE_ADDR]], + // LAMBDA: store i{{[0-9]+}} 3, i{{[0-9]+}}* [[SVAR_PRIVATE_ADDR]], + // LAMBDA: store float 4.0{{.+}}, float* [[SFVAR_PRIVATE_ADDR]], + // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: store double* [[G_PRIVATE_ADDR]], double** [[G_PRIVATE_ADDR_REF]], + // LAMBDA: [[TMP_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_PRIVATE_ADDR_FROM_TMP:%.+]] = load double*, double** [[TMP_PRIVATE_ADDR]], + // LAMBDA: store double* [[G1_PRIVATE_ADDR_FROM_TMP]], double** [[TMP_PRIVATE_ADDR_REF]], + // LAMBDA: [[SVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: store i{{[0-9]+}}* [[SVAR_PRIVATE_ADDR]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]] + // LAMBDA: [[SFVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 + // LAMBDA: store float* [[SFVAR_PRIVATE_ADDR]], float** [[SFVAR_PRIVATE_ADDR_REF]] + // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]]) + // LAMBDA: call {{.*}}void @__kmpc_for_static_fini( + // LAMBDA: ret void + [&]() { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + g = 2; + g1 = 2; + svar = 4; + sfvar = 8.0; + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]] + // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]] + + // LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]] + // LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]], + // LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]] + // LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 + // LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]] + // LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]] + }(); + } + }(); + return 0; + #else + S<float> test; + int t_var = 0; + int vec[] = {1, 2}; + S<float> s_arr[] = {1, 2}; + S<float> &var = test; + + #pragma omp target + #pragma omp teams + #pragma omp distribute parallel for simd private(t_var, vec, s_arr, s_arr, var, var, svar) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + return tmain<int>(); + #endif +} + +// CHECK: define{{.*}} i{{[0-9]+}} @main() +// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOAD_FUN_0:@.+]]( + +// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) +// CHECK: ret + +// CHECK: define{{.+}} [[OFFLOAD_FUN_0]]() +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[OMP_OUTLINED_0:@.+]] to void +// CHECK: ret +// +// CHECK: define internal void [[OMP_OUTLINED_0]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}) +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK-NOT: alloca [2 x [[S_FLOAT_TY]]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK-NOT: alloca [[S_FLOAT_TY]], +// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]] +// CHECK-NOT: [[T_VAR_PRIV]] +// CHECK-NOT: [[VEC_PRIV]] +// this is the ctor loop +// CHECK: {{.+}}: +// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = phi [[S_FLOAT_TY]]* +// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[S_ARR_PRIV_ITEM]]) +// CHECK-NOT: [[T_VAR_PRIV]] +// CHECK-NOT: [[VEC_PRIV]] +// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]]) +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_0:@.+]] to {{.+}}, +// CHECK: call void @__kmpc_for_static_fini( + +// call destructors: var.. +// CHECK-DAG: call {{.+}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]]) + +// ..and s_arr +// CHECK: {{.+}}: +// CHECK: [[S_ARR_EL_PAST:%.+]] = phi [[S_FLOAT_TY]]* +// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = getelementptr {{.+}}, {{.+}} [[S_ARR_EL_PAST]], +// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[S_ARR_PRIV_ITEM]]) + +// CHECK: ret void + +// By OpenMP specifications, private applies to both distribute and parallel for. +// However, the support for 'private' of 'parallel' is only used when 'parallel' +// is found alone. Therefore we only have one 'private' support for 'parallel for' +// in combination +// CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_0]]( +// CHECK: [[T_VAR_PRIV:%t_var+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%vec+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%s_arr+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK-NOT: alloca [2 x [[S_FLOAT_TY]]], +// CHECK: [[VAR_PRIV:%var+]] = alloca [[S_FLOAT_TY]], +// CHECK-NOT: alloca [[S_FLOAT_TY]], +// CHECK: [[S_VAR_PRIV:%svar+]] = alloca i{{[0-9]+}}, +// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]] +// CHECK-NOT: [[T_VAR_PRIV]] +// CHECK-NOT: [[VEC_PRIV]] +// this is the ctor loop +// CHECK: {{.+}}: +// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = phi [[S_FLOAT_TY]]* +// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[S_ARR_PRIV_ITEM]]) +// CHECK-NOT: [[T_VAR_PRIV]] +// CHECK-NOT: [[VEC_PRIV]] +// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]]) +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call void @__kmpc_for_static_fini( + +// call destructors: var.. +// CHECK-DAG: call {{.+}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]]) + +// ..and s_arr +// CHECK: {{.+}}: +// CHECK: [[S_ARR_EL_PAST:%.+]] = phi [[S_FLOAT_TY]]* +// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = getelementptr {{.+}}, {{.+}} [[S_ARR_EL_PAST]], +// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR]]([[S_FLOAT_TY]]* [[S_ARR_PRIV_ITEM]]) + +// CHECK: ret void + +// template tmain with S_INT_TY +// CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]() +// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], +// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOAD_FUN_1:@.+]]( +// CHECK: call {{.*}} [[S_INT_TY_DEF_DESTR:@.+]]([[S_INT_TY]]* [[TEST]]) +// CHECK: ret + +// CHECK: ret + +// CHECK: define internal void [[OFFLOAD_FUN_1]]() +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[OMP_OUTLINED_1:@.+]] to void +// CHECK: ret +// +// CHECK: define internal void [[OMP_OUTLINED_1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}) +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK-NOT: alloca [2 x [[S_INT_TY]]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], +// CHECK-NOT: alloca [[S_INT_TY]], +// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]] +// CHECK-NOT: [[T_VAR_PRIV]] +// CHECK-NOT: [[VEC_PRIV]] +// CHECK: {{.+}}: +// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = phi [[S_INT_TY]]* +// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[S_ARR_PRIV_ITEM]]) +// CHECK-NOT: [[T_VAR_PRIV]] +// CHECK-NOT: [[VEC_PRIV]] +// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]]) +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]( +// CHECK: [[T_VAR_PRIV:%t_var+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%vec+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%s_arr+]] = alloca [2 x [[S_INT_TY]]], +// CHECK-NOT: alloca [2 x [[S_INT_TY]]], +// CHECK: [[VAR_PRIV:%var+]] = alloca [[S_INT_TY]], +// CHECK-NOT: alloca [[S_INT_TY]], +// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]] +// CHECK-NOT: [[T_VAR_PRIV]] +// CHECK-NOT: [[VEC_PRIV]] +// this is the ctor loop +// CHECK: {{.+}}: +// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = phi [[S_INT_TY]]* +// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[S_ARR_PRIV_ITEM]]) +// CHECK-NOT: [[T_VAR_PRIV]] +// CHECK-NOT: [[VEC_PRIV]] +// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]]) +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call void @__kmpc_for_static_fini( + +// call destructors: var.. +// CHECK-DAG: call {{.+}} [[S_INT_TY_DEF_DESTR]]([[S_INT_TY]]* [[VAR_PRIV]]) + +// ..and s_arr +// CHECK: {{.+}}: +// CHECK: [[S_ARR_EL_PAST:%.+]] = phi [[S_INT_TY]]* +// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = getelementptr {{.+}}, {{.+}} [[S_ARR_EL_PAST]], +// CHECK: call {{.*}} [[S_INT_TY_DEF_DESTR]]([[S_INT_TY]]* [[S_ARR_PRIV_ITEM]]) + +// CHECK: ret void + +#endif diff --git a/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp b/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp new file mode 100644 index 000000000000..7f0b631aee7f --- /dev/null +++ b/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp @@ -0,0 +1,93 @@ +// add -fopenmp-targets + +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +typedef __INTPTR_TYPE__ intptr_t; + +// CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" +// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } + +void foo(); + +struct S { + intptr_t a, b, c; + S(intptr_t a) : a(a) {} + operator char() { return a; } + ~S() {} +}; + +template <typename T> +T tmain() { +#pragma omp target +#pragma omp teams +#pragma omp distribute parallel for simd proc_bind(master) + for(int i = 0; i < 1000; i++) {} + return T(); +} + +int main() { + // CHECK-LABEL: @main +#pragma omp target +#pragma omp teams +#pragma omp distribute parallel for simd proc_bind(spread) + for(int i = 0; i < 1000; i++) {} +#pragma omp target +#pragma omp teams +#pragma omp distribute parallel for simd proc_bind(close) + for(int i = 0; i < 1000; i++) {} + return tmain<int>(); +} + +// CHECK: call {{.*}}@__tgt_target_teams({{.+}}) +// CHECK: call void [[OFFL1:@.+]]() +// CHECK: call {{.*}}@__tgt_target_teams({{.+}}) +// CHECK: call void [[OFFL2:@.+]]() +// CHECK: [[CALL_RET:%.+]] = call{{.+}} i32 [[TMAIN:@.+]]() +// CHECK: ret i32 [[CALL_RET]] + +// CHECK: define{{.+}} void [[OFFL1]]( +// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) + +// CHECK: define{{.+}} [[OMP_OUTLINED_1]](i32* {{.+}} [[GTID_IN:%.+]], +// CHECK: [[GTID_ADDR:%.+]] = alloca i32*, +// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]], +// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]], +// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]], +// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 4) +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call( +// CHECK: ret void + +// CHECK: define{{.+}} [[OFFL2]]() +// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) + +// CHECK: define{{.+}} [[OMP_OUTLINED_1]](i32* {{.+}} [[GTID_IN:%.+]], +// CHECK: [[GTID_ADDR:%.+]] = alloca i32*, +// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]], +// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]], +// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]], +// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 3) +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call( +// CHECK: ret void + +// CHECK: define{{.+}} [[TMAIN]]() +// CHECK: call {{.*}}@__tgt_target_teams({{.+}}) +// CHECK: call void [[OFFL3:@.+]]() + +// CHECK: define{{.+}} [[OFFL3]]() +// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}}) + +// CHECK: define{{.+}} [[OMP_OUTLINED_3]](i32* {{.+}} [[GTID_IN:%.+]], +// CHECK: [[GTID_ADDR:%.+]] = alloca i32*, +// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]], +// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]], +// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]], +// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 2) +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call( +// CHECK: ret void +#endif diff --git a/test/OpenMP/distribute_parallel_for_simd_reduction_messages.cpp b/test/OpenMP/distribute_parallel_for_simd_reduction_messages.cpp index 6ad41d72bf78..94e4541b901b 100644 --- a/test/OpenMP/distribute_parallel_for_simd_reduction_messages.cpp +++ b/test/OpenMP/distribute_parallel_for_simd_reduction_messages.cpp @@ -27,9 +27,9 @@ public: S2() : a(0) {} S2(S2 &s2) : a(s2.a) {} static float S2s; // expected-note 2 {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note 2 {{'S2sc' declared here}} }; -const float S2::S2sc = 0; // expected-note 2 {{'S2sc' defined here}} +const float S2::S2sc = 0; S2 b; // expected-note 3 {{'b' defined here}} const S2 ba[5]; // expected-note 2 {{'ba' defined here}} class S3 { diff --git a/test/OpenMP/distribute_private_messages.cpp b/test/OpenMP/distribute_private_messages.cpp index 518b64d9e9ca..0aa1e4796975 100644 --- a/test/OpenMP/distribute_private_messages.cpp +++ b/test/OpenMP/distribute_private_messages.cpp @@ -87,7 +87,7 @@ int main(int argc, char **argv) { #pragma omp teams { int i; // expected-note {{predetermined as private}} - #pragma omp distribute firstprivate(i), private(i) // expected-error {{private variable in '#pragma omp teams' cannot be firstprivate in '#pragma omp distribute'}} + #pragma omp distribute firstprivate(i) // expected-error {{firstprivate variable must be shared}} for (int k = 0; k < argc; ++k) ++k; } #pragma omp target diff --git a/test/OpenMP/distribute_simd_ast_print.cpp b/test/OpenMP/distribute_simd_ast_print.cpp index 6d2e916292f5..3246ccb46ae3 100644 --- a/test/OpenMP/distribute_simd_ast_print.cpp +++ b/test/OpenMP/distribute_simd_ast_print.cpp @@ -84,14 +84,14 @@ T tmain(T argc) { #pragma omp target #pragma omp teams -#pragma omp distribute simd private(argc, b), firstprivate(c, d), lastprivate(d, f) collapse(N) reduction(+ : h) dist_schedule(static,N) +#pragma omp distribute simd private(argc, b), firstprivate(c, d), lastprivate(f) collapse(N) reduction(+ : h) dist_schedule(static,N) for (int i = 0; i < 2; ++i) for (int j = 0; j < 2; ++j) for (int k = 0; k < 10; ++k) for (int m = 0; m < 10; ++m) for (int n = 0; n < 10; ++n) a++; -// CHECK: #pragma omp distribute simd private(argc,b) firstprivate(c,d) lastprivate(d,f) collapse(N) reduction(+: h) dist_schedule(static, N) +// CHECK: #pragma omp distribute simd private(argc,b) firstprivate(c,d) lastprivate(f) collapse(N) reduction(+: h) dist_schedule(static, N) // CHECK-NEXT: for (int i = 0; i < 2; ++i) // CHECK-NEXT: for (int j = 0; j < 2; ++j) // CHECK-NEXT: for (int k = 0; k < 10; ++k) @@ -129,14 +129,15 @@ int main(int argc, char **argv) { // CHECK-NEXT: for (int j = 0; j < 10; ++j) // CHECK-NEXT: a++; + int i; #pragma omp target #pragma omp teams -#pragma omp distribute simd aligned(x:8) linear(h:2) safelen(8) simdlen(8) - for (int i = 0; i < 100; i++) +#pragma omp distribute simd aligned(x:8) linear(i:2) safelen(8) simdlen(8) + for (i = 0; i < 100; i++) for (int j = 0; j < 200; j++) a += h + x[j]; -// CHECK: #pragma omp distribute simd aligned(x: 8) linear(h: 2) safelen(8) simdlen(8) -// CHECK-NEXT: for (int i = 0; i < 100; i++) +// CHECK: #pragma omp distribute simd aligned(x: 8) linear(i: 2) safelen(8) simdlen(8) +// CHECK-NEXT: for (i = 0; i < 100; i++) // CHECK-NEXT: for (int j = 0; j < 200; j++) // CHECK-NEXT: a += h + x[j]; diff --git a/test/OpenMP/distribute_simd_codegen.cpp b/test/OpenMP/distribute_simd_codegen.cpp new file mode 100644 index 000000000000..d8f18c3e431b --- /dev/null +++ b/test/OpenMP/distribute_simd_codegen.cpp @@ -0,0 +1,269 @@ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix HCHECK +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix HCHECK +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix HCHECK + +// Test target codegen - host bc file has to be created first. (no significant differences with host version of target region) +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK-DAG: %ident_t = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" +// CHECK-DAG: [[DEF_LOC_0:@.+]] = private unnamed_addr constant %ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } +// CHECK-DAG: [[DEF_LOC_DISTRIBUTE_0:@.+]] = private unnamed_addr constant %ident_t { i32 0, i32 2050, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } + +// CHECK-LABEL: define {{.*void}} @{{.*}}without_schedule_clause{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}}) +void without_schedule_clause(float *a, float *b, float *c, float *d) { + #pragma omp target + #pragma omp teams + #pragma omp distribute simd simdlen(8) aligned(a) + for (int i = 33; i < 32000000; i += 7) { + a[i] = b[i] * c[i] * d[i]; + } +} + +// CHECK: define {{.*}}void @{{.+}}(i32* noalias [[GBL_TIDP:%.+]], i32* noalias [[BND_TID:%.+]], float** dereferenceable({{[0-9]+}}) [[APTR:%.+]], float** dereferenceable({{[0-9]+}}) [[BPTR:%.+]], float** dereferenceable({{[0-9]+}}) [[CPTR:%.+]], float** dereferenceable({{[0-9]+}}) [[DPTR:%.+]]) +// CHECK: [[TID_ADDR:%.+]] = alloca i32* +// CHECK: [[IV:%.+iv]] = alloca i32 +// CHECK: [[LB:%.+lb]] = alloca i32 +// CHECK: [[UB:%.+ub]] = alloca i32 +// CHECK: [[ST:%.+stride]] = alloca i32 +// CHECK: [[LAST:%.+last]] = alloca i32 +// CHECK-DAG: store i32* [[GBL_TIDP]], i32** [[TID_ADDR]] +// CHECK-DAG: call void @llvm.assume( +// CHECK-DAG: store i32 0, i32* [[LB]] +// CHECK-DAG: store i32 4571423, i32* [[UB]] +// CHECK-DAG: store i32 1, i32* [[ST]] +// CHECK-DAG: store i32 0, i32* [[LAST]] +// CHECK-DAG: [[GBL_TID:%.+]] = load i32*, i32** [[TID_ADDR]] +// CHECK-DAG: [[GBL_TIDV:%.+]] = load i32, i32* [[GBL_TID]] +// CHECK: call void @__kmpc_for_static_init_{{.+}}(%ident_t* [[DEF_LOC_DISTRIBUTE_0]], i32 [[GBL_TIDV]], i32 92, i32* %.omp.is_last, i32* %.omp.lb, i32* %.omp.ub, i32* %.omp.stride, i32 1, i32 1) +// CHECK-DAG: [[UBV0:%.+]] = load i32, i32* [[UB]] +// CHECK-DAG: [[USWITCH:%.+]] = icmp sgt i32 [[UBV0]], 4571423 +// CHECK: br i1 [[USWITCH]], label %[[BBCT:.+]], label %[[BBCF:.+]] +// CHECK-DAG: [[BBCT]]: +// CHECK-DAG: br label %[[BBCE:.+]] +// CHECK-DAG: [[BBCF]]: +// CHECK-DAG: [[UBV1:%.+]] = load i32, i32* [[UB]] +// CHECK-DAG: br label %[[BBCE]] +// CHECK: [[BBCE]]: +// CHECK: [[SELUB:%.+]] = phi i32 [ 4571423, %[[BBCT]] ], [ [[UBV1]], %[[BBCF]] ] +// CHECK: store i32 [[SELUB]], i32* [[UB]] +// CHECK: [[LBV0:%.+]] = load i32, i32* [[LB]] +// CHECK: store i32 [[LBV0]], i32* [[IV]] +// CHECK: br label %[[BBINNFOR:.+]] +// CHECK: [[BBINNFOR]]: +// CHECK: [[IVVAL0:%.+]] = load i32, i32* [[IV]] +// CHECK: [[UBV2:%.+]] = load i32, i32* [[UB]] +// CHECK: [[IVLEUB:%.+]] = icmp sle i32 [[IVVAL0]], [[UBV2]] +// CHECK: br i1 [[IVLEUB]], label %[[BBINNBODY:.+]], label %[[BBINNEND:.+]] +// CHECK: [[BBINNBODY]]: +// CHECK: {{.+}} = load i32, i32* [[IV]] +// ... loop body ... +// CHECK: br label %[[BBBODYCONT:.+]] +// CHECK: [[BBBODYCONT]]: +// CHECK: br label %[[BBINNINC:.+]] +// CHECK: [[BBINNINC]]: +// CHECK: [[IVVAL1:%.+]] = load i32, i32* [[IV]] +// CHECK: [[IVINC:%.+]] = add nsw i32 [[IVVAL1]], 1 +// CHECK: store i32 [[IVINC]], i32* [[IV]] +// CHECK: br label %[[BBINNFOR]] +// CHECK: [[BBINNEND]]: +// CHECK: br label %[[LPEXIT:.+]] +// CHECK: [[LPEXIT]]: +// CHECK: call void @__kmpc_for_static_fini(%ident_t* [[DEF_LOC_DISTRIBUTE_0]], i32 [[GBL_TIDV]]) +// CHECK: ret void + + +// CHECK-LABEL: define {{.*void}} @{{.*}}static_not_chunked{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}}) +void static_not_chunked(float *a, float *b, float *c, float *d) { + #pragma omp target + #pragma omp teams + #pragma omp distribute simd dist_schedule(static) safelen(32) + for (int i = 32000000; i > 33; i += -7) { + a[i] = b[i] * c[i] * d[i]; + } +} + +// CHECK: define {{.*}}void @.omp_outlined.{{.*}}(i32* noalias [[GBL_TIDP:%.+]], i32* noalias [[BND_TID:%.+]], float** dereferenceable({{[0-9]+}}) [[APTR:%.+]], float** dereferenceable({{[0-9]+}}) [[BPTR:%.+]], float** dereferenceable({{[0-9]+}}) [[CPTR:%.+]], float** dereferenceable({{[0-9]+}}) [[DPTR:%.+]]) +// CHECK: [[TID_ADDR:%.+]] = alloca i32* +// CHECK: [[IV:%.+iv]] = alloca i32 +// CHECK: [[LB:%.+lb]] = alloca i32 +// CHECK: [[UB:%.+ub]] = alloca i32 +// CHECK: [[ST:%.+stride]] = alloca i32 +// CHECK: [[LAST:%.+last]] = alloca i32 +// CHECK-DAG: store i32* [[GBL_TIDP]], i32** [[TID_ADDR]] +// CHECK-DAG: store i32 0, i32* [[LB]] +// CHECK-DAG: store i32 4571423, i32* [[UB]] +// CHECK-DAG: store i32 1, i32* [[ST]] +// CHECK-DAG: store i32 0, i32* [[LAST]] +// CHECK-DAG: [[GBL_TID:%.+]] = load i32*, i32** [[TID_ADDR]] +// CHECK-DAG: [[GBL_TIDV:%.+]] = load i32, i32* [[GBL_TID]] +// CHECK: call void @__kmpc_for_static_init_{{.+}}(%ident_t* [[DEF_LOC_DISTRIBUTE_0]], i32 [[GBL_TIDV]], i32 92, i32* %.omp.is_last, i32* %.omp.lb, i32* %.omp.ub, i32* %.omp.stride, i32 1, i32 1) +// CHECK-DAG: [[UBV0:%.+]] = load i32, i32* [[UB]] +// CHECK-DAG: [[USWITCH:%.+]] = icmp sgt i32 [[UBV0]], 4571423 +// CHECK: br i1 [[USWITCH]], label %[[BBCT:.+]], label %[[BBCF:.+]] +// CHECK-DAG: [[BBCT]]: +// CHECK-DAG: br label %[[BBCE:.+]] +// CHECK-DAG: [[BBCF]]: +// CHECK-DAG: [[UBV1:%.+]] = load i32, i32* [[UB]] +// CHECK-DAG: br label %[[BBCE]] +// CHECK: [[BBCE]]: +// CHECK: [[SELUB:%.+]] = phi i32 [ 4571423, %[[BBCT]] ], [ [[UBV1]], %[[BBCF]] ] +// CHECK: store i32 [[SELUB]], i32* [[UB]] +// CHECK: [[LBV0:%.+]] = load i32, i32* [[LB]] +// CHECK: store i32 [[LBV0]], i32* [[IV]] +// CHECK: br label %[[BBINNFOR:.+]] +// CHECK: [[BBINNFOR]]: +// CHECK: [[IVVAL0:%.+]] = load i32, i32* [[IV]] +// CHECK: [[UBV2:%.+]] = load i32, i32* [[UB]] +// CHECK: [[IVLEUB:%.+]] = icmp sle i32 [[IVVAL0]], [[UBV2]] +// CHECK: br i1 [[IVLEUB]], label %[[BBINNBODY:.+]], label %[[BBINNEND:.+]] +// CHECK: [[BBINNBODY]]: +// CHECK: {{.+}} = load i32, i32* [[IV]] +// ... loop body ... +// CHECK: br label %[[BBBODYCONT:.+]] +// CHECK: [[BBBODYCONT]]: +// CHECK: br label %[[BBINNINC:.+]] +// CHECK: [[BBINNINC]]: +// CHECK: [[IVVAL1:%.+]] = load i32, i32* [[IV]] +// CHECK: [[IVINC:%.+]] = add nsw i32 [[IVVAL1]], 1 +// CHECK: store i32 [[IVINC]], i32* [[IV]] +// CHECK: br label %[[BBINNFOR]] +// CHECK: [[BBINNEND]]: +// CHECK: br label %[[LPEXIT:.+]] +// CHECK: [[LPEXIT]]: +// CHECK: call void @__kmpc_for_static_fini(%ident_t* [[DEF_LOC_DISTRIBUTE_0]], i32 [[GBL_TIDV]]) +// CHECK: ret void + + +// CHECK-LABEL: define {{.*void}} @{{.*}}static_chunked{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}}) +void static_chunked(float *a, float *b, float *c, float *d) { + #pragma omp target + #pragma omp teams +#pragma omp distribute simd dist_schedule(static, 5) + for (unsigned i = 131071; i <= 2147483647; i += 127) { + a[i] = b[i] * c[i] * d[i]; + } +} + +// CHECK: define {{.*}}void @.omp_outlined.{{.*}}(i32* noalias [[GBL_TIDP:%.+]], i32* noalias [[BND_TID:%.+]], float** dereferenceable({{[0-9]+}}) [[APTR:%.+]], float** dereferenceable({{[0-9]+}}) [[BPTR:%.+]], float** dereferenceable({{[0-9]+}}) [[CPTR:%.+]], float** dereferenceable({{[0-9]+}}) [[DPTR:%.+]]) +// CHECK: [[TID_ADDR:%.+]] = alloca i32* +// CHECK: [[IV:%.+iv]] = alloca i32 +// CHECK: [[LB:%.+lb]] = alloca i32 +// CHECK: [[UB:%.+ub]] = alloca i32 +// CHECK: [[ST:%.+stride]] = alloca i32 +// CHECK: [[LAST:%.+last]] = alloca i32 +// CHECK-DAG: store i32* [[GBL_TIDP]], i32** [[TID_ADDR]] +// CHECK-DAG: store i32 0, i32* [[LB]] +// CHECK-DAG: store i32 16908288, i32* [[UB]] +// CHECK-DAG: store i32 1, i32* [[ST]] +// CHECK-DAG: store i32 0, i32* [[LAST]] +// CHECK-DAG: [[GBL_TID:%.+]] = load i32*, i32** [[TID_ADDR]] +// CHECK-DAG: [[GBL_TIDV:%.+]] = load i32, i32* [[GBL_TID]] +// CHECK: call void @__kmpc_for_static_init_{{.+}}(%ident_t* [[DEF_LOC_DISTRIBUTE_0]], i32 [[GBL_TIDV]], i32 91, i32* %.omp.is_last, i32* %.omp.lb, i32* %.omp.ub, i32* %.omp.stride, i32 1, i32 5) +// CHECK-DAG: [[UBV0:%.+]] = load i32, i32* [[UB]] +// CHECK-DAG: [[USWITCH:%.+]] = icmp ugt i32 [[UBV0]], 16908288 +// CHECK: br i1 [[USWITCH]], label %[[BBCT:.+]], label %[[BBCF:.+]] +// CHECK-DAG: [[BBCT]]: +// CHECK-DAG: br label %[[BBCE:.+]] +// CHECK-DAG: [[BBCF]]: +// CHECK-DAG: [[UBV1:%.+]] = load i32, i32* [[UB]] +// CHECK-DAG: br label %[[BBCE]] +// CHECK: [[BBCE]]: +// CHECK: [[SELUB:%.+]] = phi i32 [ 16908288, %[[BBCT]] ], [ [[UBV1]], %[[BBCF]] ] +// CHECK: store i32 [[SELUB]], i32* [[UB]] +// CHECK: [[LBV0:%.+]] = load i32, i32* [[LB]] +// CHECK: store i32 [[LBV0]], i32* [[IV]] +// CHECK: br label %[[BBINNFOR:.+]] +// CHECK: [[BBINNFOR]]: +// CHECK: [[IVVAL0:%.+]] = load i32, i32* [[IV]] +// CHECK: [[UBV2:%.+]] = load i32, i32* [[UB]] +// CHECK: [[IVLEUB:%.+]] = icmp ule i32 [[IVVAL0]], [[UBV2]] +// CHECK: br i1 [[IVLEUB]], label %[[BBINNBODY:.+]], label %[[BBINNEND:.+]] +// CHECK: [[BBINNBODY]]: +// CHECK: {{.+}} = load i32, i32* [[IV]] +// ... loop body ... +// CHECK: br label %[[BBBODYCONT:.+]] +// CHECK: [[BBBODYCONT]]: +// CHECK: br label %[[BBINNINC:.+]] +// CHECK: [[BBINNINC]]: +// CHECK: [[IVVAL1:%.+]] = load i32, i32* [[IV]] +// CHECK: [[IVINC:%.+]] = add i32 [[IVVAL1]], 1 +// CHECK: store i32 [[IVINC]], i32* [[IV]] +// CHECK: br label %[[BBINNFOR]] +// CHECK: [[BBINNEND]]: +// CHECK: br label %[[LPEXIT:.+]] +// CHECK: [[LPEXIT]]: +// CHECK: call void @__kmpc_for_static_fini(%ident_t* [[DEF_LOC_DISTRIBUTE_0]], i32 [[GBL_TIDV]]) +// CHECK: ret void + +// CHECK-LABEL: test_precond +void test_precond() { + char a = 0; char i; + #pragma omp target + #pragma omp teams + #pragma omp distribute simd linear(i) + for(i = a; i < 10; ++i); +} + +// a is passed as a parameter to the outlined functions +// CHECK: define {{.*}}void @.omp_outlined.{{.*}}(i32* noalias [[GBL_TIDP:%.+]], i32* noalias [[BND_TID:%.+]], i8* dereferenceable({{[0-9]+}}) [[APARM:%.+]]) +// CHECK: store i8* [[APARM]], i8** [[APTRADDR:%.+]] +// ..many loads of %0.. +// CHECK: [[A2:%.+]] = load i8*, i8** [[APTRADDR]] +// CHECK: [[AVAL0:%.+]] = load i8, i8* [[A2]] +// CHECK: store i8 [[AVAL0]], i8* [[CAP_EXPR:%.+]], +// CHECK: [[AVAL1:%.+]] = load i8, i8* [[CAP_EXPR]] +// CHECK: load i8, i8* [[CAP_EXPR]] +// CHECK: [[AVAL2:%.+]] = load i8, i8* [[CAP_EXPR]] +// CHECK: [[ACONV:%.+]] = sext i8 [[AVAL2]] to i32 +// CHECK: [[ACMP:%.+]] = icmp slt i32 [[ACONV]], 10 +// CHECK: br i1 [[ACMP]], label %[[PRECOND_THEN:.+]], label %[[PRECOND_END:.+]] +// CHECK: [[PRECOND_THEN]] +// CHECK: call void @__kmpc_for_static_init_4 +// CHECK: call void @__kmpc_for_static_fini +// CHECK: [[PRECOND_END]] + +// no templates for now, as these require special handling in target regions and/or declare target + +// HCHECK-LABEL: fint +// HCHECK: call {{.*}}i32 {{.+}}ftemplate +// HCHECK: ret i32 + +// HCHECK: load i16, i16* +// HCHECK: store i16 % +// HCHECK: call i32 @__tgt_target_teams( +// HCHECK: call void @__kmpc_for_static_init_4( +template <typename T> +T ftemplate() { + short aa = 0; + +#pragma omp target +#pragma omp teams +#pragma omp distribute simd dist_schedule(static, aa) + for (int i = 0; i < 100; i++) { + } + return T(); +} + +int fint(void) { return ftemplate<int>(); } + +#endif + +// CHECK: !{!"llvm.loop.vectorize.width", i32 8} +// CHECK: !{!"llvm.loop.vectorize.enable", i1 true} +// CHECK: !{!"llvm.loop.vectorize.width", i32 32} diff --git a/test/OpenMP/distribute_simd_firstprivate_codegen.cpp b/test/OpenMP/distribute_simd_firstprivate_codegen.cpp new file mode 100644 index 000000000000..5da9c5dc3b1a --- /dev/null +++ b/test/OpenMP/distribute_simd_firstprivate_codegen.cpp @@ -0,0 +1,380 @@ +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 + +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +template <class T> +struct S { + T f; + S(T a) : f(a) {} + S() : f() {} + operator T() { return T(); } + ~S() {} +}; + +// CHECK: [[S_FLOAT_TY:%.+]] = type { float } +// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } +template <typename T> +T tmain() { + S<T> test; + T t_var = T(); + T vec[] = {1, 2}; + S<T> s_arr[] = {1, 2}; + S<T> &var = test; + #pragma omp target + #pragma omp teams +#pragma omp distribute simd firstprivate(t_var, vec, s_arr, s_arr, var, var) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + return T(); +} + +int main() { + static int svar; + volatile double g; + volatile double &g1 = g; + + #ifdef LAMBDA + // LAMBDA-LABEL: @main + // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]]( + [&]() { + static float sfvar; + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( + // LAMBDA: call void [[OFFLOADING_FUN:@.+]]( + + // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]]( + // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}}) + #pragma omp target + #pragma omp teams +#pragma omp distribute simd firstprivate(g, g1, svar, sfvar) + for (int i = 0; i < 2; ++i) { + // LAMBDA: define internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, double*{{.*}} [[G_IN:%.+]], double*{{.+}} [[G1_IN:%.+]], i{{[0-9]+}}*{{.+}} [[SVAR_IN:%.+]], float*{{.+}} [[SFVAR_IN:%.+]]) + // Private alloca's for conversion + // LAMBDA: [[G_ADDR:%.+]] = alloca double*, + // LAMBDA: [[G1_ADDR:%.+]] = alloca double*, + // LAMBDA: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, + // LAMBDA: [[SFVAR_ADDR:%.+]] = alloca float*, + // LAMBDA: [[G1_REF:%.+]] = alloca double*, + + // Actual private variables to be used in the body (tmp is used for the reference type) + // LAMBDA: [[G_PRIVATE:%.+]] = alloca double, + // LAMBDA: [[G1_PRIVATE:%.+]] = alloca double, + // LAMBDA: [[TMP_PRIVATE:%.+]] = alloca double*, + // LAMBDA: [[SVAR_PRIVATE:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[SFVAR_PRIVATE:%.+]] = alloca float, + + // Store input parameter addresses into private alloca's for conversion + // LAMBDA: store double* [[G_IN]], double** [[G_ADDR]], + // LAMBDA: store double* [[G1_IN]], double** [[G1_ADDR]], + // LAMBDA: store i{{[0-9]+}}* [[SVAR_IN]], i{{[0-9]+}}** [[SVAR_ADDR]], + // LAMBDA: store float* [[SFVAR_IN]], float** [[SFVAR_ADDR]], + + // LAMBDA-DAG: [[G_ADDR_VAL:%.+]] = load double*, double** [[G_ADDR]], + // LAMBDA-DAG: [[G1_ADDR_VAL:%.+]] = load double*, double** [[G1_ADDR]], + // LAMBDA-DAG: [[SVAR_ADDR_VAL:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]], + // LAMBDA-DAG: [[SFVAR_ADDR_VAL:%.+]] = load float*, float** [[SFVAR_ADDR]], + // LAMBDA-DAG: store double* [[G1_ADDR_VAL]], double** [[G1_REF]], + + // LAMBDA-DAG: [[G_VAL:%.+]] = load{{.+}} double, double* [[G_ADDR_VAL]], + // LAMBDA-DAG: store double [[G_VAL]], double* [[G_PRIVATE]], + // LAMBDA-DAG: [[G1_VAL_REF:%.+]] = load double*, double** [[G1_REF]], + // LAMBDA-DAG: [[G1_VAL:%.+]] = load{{.+}} double, double* [[G1_VAL_REF]], + // LAMBDA-DAG: store double [[G1_VAL]], double* [[G1_PRIVATE]], + // LAMBDA-DAG: store double* [[G1_PRIVATE]], double** [[TMP_PRIVATE]], + // LAMBDA-DAG: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_ADDR_VAL]], + // LAMBDA-DAG: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_PRIVATE]], + // LAMBDA-DAG: [[SFVAR_VAL:%.+]] = load float, float* [[SFVAR_ADDR_VAL]], + // LAMBDA-DAG: store float [[SFVAR_VAL]], float* [[SFVAR_PRIVATE]], + + // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4( + g += 1; + g1 += 1; + svar += 3; + sfvar += 4.0; + // LAMBDA-DAG: [[G_VAL:%.+]] = load double, double* [[G_PRIVATE]], + // LAMBDA-DAG: [[G_NEXT:%.+]] = fadd double [[G_VAL]], 1.{{.+}} + // LAMBDA-DAG: store double [[G_NEXT]], double* [[G_PRIVATE]], + // LAMBDA-DAG: [[TMP_VAL1:%.+]] = load double*, double** [[TMP_PRIVATE]], + // LAMBDA-DAG: [[TMP_VAL_VAL1:%.+]] = load{{.*}} double, double* [[TMP_VAL1]], + // LAMBDA-DAG: [[TMP_ADD:%.+]] = fadd double [[TMP_VAL_VAL1]], 1.{{.+}} + // LAMBDA-DAG: store{{.*}} double [[TMP_ADD]], double* [[TMP_VAL1]], + // LAMBDA-DAG: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]], + // LAMBDA-DAG: [[SVAR_ADD:%.+]] = add{{.*}} i{{[0-9]+}} [[SVAR_VAL]], 3 + // LAMBDA-DAG: store i{{[0-9]+}} [[SVAR_ADD]], i{{[0-9]+}}* [[SVAR_PRIVATE]], + // LAMBDA-DAG: [[SFVAR_VAL:%.+]] = load float, float* [[SFVAR_PRIVATE]], + // LAMBDA-DAG: [[SFVAR_CONV_VAL1:%.+]] = fpext float [[SFVAR_VAL]] to double + // LAMBDA-DAG: [[SFVAR_ADD:%.+]] = fadd double [[SFVAR_CONV_VAL1]], 4.{{.+}} + // LAMBDA-DAG: [[SFVAR_CONV_VAL2:%.+]] = fptrunc double [[SFVAR_ADD]] to float + // LAMBDA-DAG: store float [[SFVAR_CONV_VAL2:%.+]], float* [[SFVAR_PRIVATE]], + + // call inner lambda (use refs to private alloca's) + // LAMBDA: [[GEP_0:%.+]] = getelementptr{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: store double* [[G_PRIVATE]], double** [[GEP_0]], + // LAMBDA: [[GEP_1:%.+]] = getelementptr{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[TMP_PAR:%.+]] = load double*, double** [[TMP_PRIVATE]], + // LAMBDA: store double* [[TMP_PAR]], double** [[GEP_1]], + // LAMBDA: [[GEP_2:%.+]] = getelementptr{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: store i{{[0-9]+}}* [[SVAR_PRIVATE]], i{{[0-9]+}}** [[GEP_2]], + // LAMBDA: [[GEP_3:%.+]] = getelementptr{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3 + // LAMBDA: store float* [[SFVAR_PRIVATE]], float** [[GEP_3]], + // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* {{.+}}) + // LAMBDA: call {{.*}}void @__kmpc_for_static_fini( + [&]() { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + g += 2; + g1 += 2; + svar += 4; + sfvar += 8.0; + // LAMBDA-DAG: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + // LAMBDA-DAG: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA-DAG: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]], + // LAMBDA-DAG: [[G_REF_VAL:%.+]] = load double, double* [[G_REF]], + // LAMBDA-DAG: [[G_REF_ADD:%.+]] = fadd double [[G_REF_VAL]], 2.{{.+}} + // LAMBDA-DAG: store double [[G_REF_ADD]], double* [[G_REF]] + + // LAMBDA-DAG: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA-DAG: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]] + // LAMBDA-DAG: [[G1_REF_VAL:%.+]] = load double, double* [[G1_REF]], + // LAMBDA-DAG: [[G1_ADD:%.+]] = fadd double [[G1_REF_VAL]], 2.{{.+}} + // LAMBDA-DAG: store double [[G1_ADD]], double* [[G1_REF]], + + // LAMBDA-DAG: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA-DAG: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]] + // LAMBDA-DAG: [[SVAR_REF_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_REF]] + // LAMBDA-DAG: [[SVAR_ADD:%.+]] = add{{.*}} i{{[0-9]+}} [[SVAR_REF_VAL]], 4 + // LAMBDA-DAG: store i{{[0-9]+}} [[SVAR_ADD]], i{{[0-9]+}}* [[SVAR_REF]] + + // LAMBDA-DAG: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 + // LAMBDA-DAG: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]] + // LAMBDA-DAG: [[SFVAR_REF_VAL:%.+]] = load float, float* [[SFVAR_REF]] + // LAMBDA-DAG: [[SFVAR_REF_CONV:%.+]] = fpext float [[SFVAR_REF_VAL]] to double + // LAMBDA-DAG: [[SFVAR_ADD:%.+]] = fadd double [[SFVAR_REF_CONV]], 8.{{.+}} + // LAMBDA-DAG: [[SFVAR_ADD_CONV:%.+]] = fptrunc double [[SFVAR_ADD]] to float + // LAMBDA-DAG: store float [[SFVAR_ADD_CONV]], float* [[SFVAR_REF]], + }(); + } + }(); + return 0; + #else + S<float> test; + int t_var = 0; + int vec[] = {1, 2}; + S<float> s_arr[] = {1, 2}; + S<float> &var = test; + + #pragma omp target + #pragma omp teams + #pragma omp distribute simd firstprivate(t_var, vec, s_arr, s_arr, var, var, svar) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + return tmain<int>(); + #endif +} + +// CHECK: define{{.*}} i{{[0-9]+}} @main() +// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOAD_FUN:@.+]]( +// CHECK: ret + +// CHECK: define{{.+}} [[OFFLOAD_FUN]]( +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i{{[0-9]+}}]*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, i{{[0-9]+}}*)* [[OMP_OUTLINED:@.+]] to void +// CHECK: ret +// +// CHECK: define internal void [[OMP_OUTLINED]](i{{[0-9]+}}*{{.+}}, i{{[0-9]+}}*{{.+}}, i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]*{{.*}} [[VEC_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.*}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.*}} [[VAR_IN:%.+]], i{{[0-9]+}}*{{.+}} [[SVAR_IN:%.+]]) + +// CHECK: alloca i{{[0-9]+}}*, +// CHECK: alloca i{{[0-9]+}}*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, +// CHECK: [[TMP:%.+]] = alloca [[S_FLOAT_TY]]*, + +// discard omp loop variables +// CHECK: {{.*}} = alloca i{{[0-9]+}}, +// CHECK: {{.*}} = alloca i{{[0-9]+}}, +// CHECK: {{.*}} = alloca i{{[0-9]+}}, +// CHECK: {{.*}} = alloca i{{[0-9]+}}, +// CHECK: {{.*}} = alloca i{{[0-9]+}}, +// CHECK: {{.*}} = alloca i{{[0-9]+}}, + +// CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK-DAG: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK-DAG: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK-DAG: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[SVAR_PRIV:%.+]] = alloca i{{[0-9]+}}, + +// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]], +// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: store [2 x [[S_FLOAT_TY]]]* [[S_ARR_IN]], [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK: store [[S_FLOAT_TY]]* [[VAR_IN]], [[S_FLOAT_TY]]** [[VAR_ADDR]], +// CHECK: store i{{[0-9]+}}* [[SVAR_IN]], i{{[0-9]+}}** [[SVAR_ADDR]], + +// init t_var +// CHECK-DAG: [[T_VAR_ADDR_CONV_VAL_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]], +// CHECK-DAG: [[T_VAR_ADDR_CONV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_ADDR_CONV_VAL_REF]], +// CHECK-DAG: store i{{[0-9]+}} [[T_VAR_ADDR_CONV_VAL]], i{{[0-9]+}}* [[T_VAR_PRIV]], + +// init vec +// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK-DAG: [[VEC_ADDR_VAL_BCAST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i{{[0-9]+}}* +// CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i{{[0-9]+}}* +// CHECK-DAG: call void @llvm.memcpy.{{.*}}(i{{[0-9]+}}* [[VEC_PRIV_BCAST]], i{{[0-9]+}}* [[VEC_ADDR_VAL_BCAST]],{{.+}}) + +// init s_arr +// CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK-DAG: [[S_ARR_ADDR_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_VAL]] to [[S_FLOAT_TY]]* +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.+}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]]{{.+}} +// CHECK-DAG: [[S_ARR_PRIV_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_PRIV_BGN]]{{.+}} +// CHECK-DAG: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_PRIV_BGN]], [[S_ARR_PRIV_NEXT]] +// CHECK-DAG: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_CPY_DONE:.+]], label %[[S_ARR_CPY_BODY:.+]] + +// CHECK-DAG: [[S_ARR_CPY_BODY]]: +// CHECK-DAG: [[S_ARR_SRC_PAST:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BCAST]],{{.+}} ], [ [[S_ARR_SRC:%.+]],{{.+}} ] +// CHECK-DAG: [[S_ARR_DST_PAST:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]],{{.+}} ], [ [[S_ARR_DST:%.+]],{{.+}} ] +// CHECK-DAG: [[S_ARR_SRC_BCAST:%.+]] = bitcast{{.+}} [[S_ARR_SRC_PAST]] to{{.+}} +// CHECK-DAG: [[S_ARR_DST_BCAST:%.+]] = bitcast{{.+}} [[S_ARR_DST_PAST]] to{{.+}} +// CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* [[S_ARR_DST_BCAST]], {{.+}}* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK-DAG: [[S_ARR_SRC]] = getelementptr{{.+}} +// CHECK-DAG: [[S_ARR_DST]] = getelementptr{{.+}} +// CHECK-DAG: [[S_ARR_CPY_FIN:%.+]] = icmp{{.+}} [[S_ARR_DST]], [[S_ARR_PRIV_NEXT]] +// CHECK-DAG: br i1 [[S_ARR_CPY_FIN]], label %[[S_ARR_CPY_DONE]], label %[[S_ARR_CPY_BODY]] +// CHECK-DAG: [[S_ARR_CPY_DONE]]: + +// init var +// CHECK-DAG: [[VAR_ADDR_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_ADDR]], +// CHECK-DAG: store{{.+}} [[VAR_ADDR_VAL]],{{.+}} [[TMP]], +// CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP]], +// CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_PRIV]] to{{.+}} +// CHECK-DAG: [[TMP_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to{{.+}} +// CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[TMP_BCAST]],{{.+}}) +// CHECK-DAG: store [[S_FLOAT_TY]]* [[VAR_PRIV]], [[S_FLOAT_TY]]** [[TMP_PRIV]], + +// init svar +// CHECK-DAG: [[SVAR_CONV_VAL_REF:%.+]] = load{{.+}},{{.+}} [[SVAR_ADDR]], +// CHECK-DAG: [[SVAR_CONV_VAL:%.+]] = load{{.+}},{{.+}} [[SVAR_CONV_VAL_REF]], +// CHECK-DAG: store{{.+}} [[SVAR_CONV_VAL]],{{.+}} [[SVAR_PRIV]], + +// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* %.omp{{.+}}, +// CHECK-DAG: store i{{[0-9]+}} 1, i{{[0-9]+}}* %.omp{{.+}}, +// CHECK-DAG: store i{{[0-9]+}} 1, i{{[0-9]+}}* %.omp{{.+}}, +// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* %.omp{{.+}}, + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// Template +// CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]() +// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], +// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOAD_FUN_1:@.+]]( +// CHECK: ret + +// CHECK: define{{.+}} [[OFFLOAD_FUN_1]]( +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i{{[0-9]+}}]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[OMP_OUTLINED_1:@.+]] to void +// CHECK: ret +// +// CHECK: define internal void [[OMP_OUTLINED_1]](i{{[0-9]+}}*{{.+}}, i{{[0-9]+}}*{{.+}}, i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]*{{.*}} [[VEC_IN:%.+]], [2 x [[S_INT_TY]]]*{{.*}} [[S_ARR_IN:%.+]], [[S_INT_TY]]*{{.*}} [[VAR_IN:%.+]]) + +// CHECK: alloca i{{[0-9]+}}*, +// CHECK: alloca i{{[0-9]+}}*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, +// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*, + +// discard omp loop variables +// CHECK: {{.*}} = alloca i{{[0-9]+}}, +// CHECK: {{.*}} = alloca i{{[0-9]+}}, +// CHECK: {{.*}} = alloca i{{[0-9]+}}, +// CHECK: {{.*}} = alloca i{{[0-9]+}}, +// CHECK: {{.*}} = alloca i{{[0-9]+}}, +// CHECK: {{.*}} = alloca i{{[0-9]+}}, + +// CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK-DAG: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK-DAG: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], +// CHECK-DAG: [[TMP_PRIV:%.+]] = alloca [[S_INT_TY]]*, + +// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]], +// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_IN]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]], +// CHECK: store [[S_INT_TY]]* [[VAR_IN]], [[S_INT_TY]]** [[VAR_ADDR]], + +// init t_var +// CHECK-DAG: [[T_VAR_ADDR_CONV_VAL_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]], +// CHECK-DAG: [[T_VAR_ADDR_CONV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_ADDR_CONV_VAL_REF]], +// CHECK-DAG: store i{{[0-9]+}} [[T_VAR_ADDR_CONV_VAL]], i{{[0-9]+}}* [[T_VAR_PRIV]], + +// init vec +// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK-DAG: [[VEC_ADDR_VAL_BCAST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i{{[0-9]+}}* +// CHECK-DAG: [[VEC_PRIV_BCAST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i{{[0-9]+}}* +// CHECK-DAG: call void @llvm.memcpy.{{.*}}(i{{[0-9]+}}* [[VEC_PRIV_BCAST]], i{{[0-9]+}}* [[VEC_ADDR_VAL_BCAST]],{{.+}}) + +// init s_arr +// CHECK-DAG: [[S_ARR_ADDR_VAL:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]], +// CHECK-DAG: [[S_ARR_ADDR_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_VAL]] to [[S_INT_TY]]* +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.+}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]]{{.+}} +// CHECK-DAG: [[S_ARR_PRIV_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_PRIV_BGN]]{{.+}} +// CHECK-DAG: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_PRIV_BGN]], [[S_ARR_PRIV_NEXT]] +// CHECK-DAG: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_CPY_DONE:.+]], label %[[S_ARR_CPY_BODY:.+]] + +// CHECK-DAG: [[S_ARR_CPY_BODY]]: +// CHECK-DAG: [[S_ARR_SRC_PAST:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BCAST]],{{.+}} ], [ [[S_ARR_SRC:%.+]],{{.+}} ] +// CHECK-DAG: [[S_ARR_DST_PAST:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]],{{.+}} ], [ [[S_ARR_DST:%.+]],{{.+}} ] +// CHECK-DAG: [[S_ARR_SRC_BCAST:%.+]] = bitcast{{.+}} [[S_ARR_SRC_PAST]] to{{.+}} +// CHECK-DAG: [[S_ARR_DST_BCAST:%.+]] = bitcast{{.+}} [[S_ARR_DST_PAST]] to{{.+}} +// CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* [[S_ARR_DST_BCAST]], {{.+}}* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK-DAG: [[S_ARR_SRC]] = getelementptr{{.+}} +// CHECK-DAG: [[S_ARR_DST]] = getelementptr{{.+}} +// CHECK-DAG: [[S_ARR_CPY_FIN:%.+]] = icmp{{.+}} [[S_ARR_DST]], [[S_ARR_PRIV_NEXT]] +// CHECK-DAG: br i1 [[S_ARR_CPY_FIN]], label %[[S_ARR_CPY_DONE]], label %[[S_ARR_CPY_BODY]] +// CHECK-DAG: [[S_ARR_CPY_DONE]]: + +// init var +// CHECK-DAG: [[VAR_ADDR_VAL:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR]], +// CHECK-DAG: store{{.+}} [[VAR_ADDR_VAL]],{{.+}} [[TMP]], +// CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP]], +// CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_PRIV]] to{{.+}} +// CHECK-DAG: [[TMP_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL]] to{{.+}} +// CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* [[VAR_PRIV_BCAST]], {{.+}}* [[TMP_BCAST]],{{.+}}) +// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP_PRIV]], + +// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* %.omp{{.+}}, +// CHECK-DAG: store i{{[0-9]+}} 1, i{{[0-9]+}}* %.omp{{.+}}, +// CHECK-DAG: store i{{[0-9]+}} 1, i{{[0-9]+}}* %.omp{{.+}}, +// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* %.omp{{.+}}, + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: !{!"llvm.loop.vectorize.enable", i1 true} +#endif diff --git a/test/OpenMP/distribute_simd_firstprivate_messages.cpp b/test/OpenMP/distribute_simd_firstprivate_messages.cpp index b9267a3037f0..f0c293b78d80 100644 --- a/test/OpenMP/distribute_simd_firstprivate_messages.cpp +++ b/test/OpenMP/distribute_simd_firstprivate_messages.cpp @@ -42,7 +42,7 @@ public: }; class S5 { int a; - S5(const S5 &s5) : a(s5.a) {} // expected-note 4 {{implicitly declared private here}} + S5(const S5 &s5) : a(s5.a) {} // expected-note 2 {{implicitly declared private here}} public: S5() : a(0) {} @@ -50,7 +50,7 @@ public: }; class S6 { int a; - S6() : a(0) {} + S6() : a(0) {} // expected-note {{implicitly declared private here}} public: S6(const S6 &s6) : a(s6.a) {} @@ -150,9 +150,10 @@ int foomain(int argc, char **argv) { #pragma omp distribute simd firstprivate(i) for (int k = 0; k < argc; ++k) ++k; +// expected-error@+3 {{lastprivate variable cannot be firstprivate}} expected-note@+3 {{defined as lastprivate}} #pragma omp target #pragma omp teams -#pragma omp distribute simd lastprivate(g) firstprivate(g) // expected-error {{calling a private constructor of class 'S5'}} +#pragma omp distribute simd lastprivate(g) firstprivate(g) for (i = 0; i < argc; ++i) foo(); #pragma omp parallel private(i) @@ -314,14 +315,16 @@ int main(int argc, char **argv) { #pragma omp distribute simd firstprivate(j) for (i = 0; i < argc; ++i) foo(); +// expected-error@+3 {{lastprivate variable cannot be firstprivate}} expected-note@+3 {{defined as lastprivate}} #pragma omp target #pragma omp teams -#pragma omp distribute simd lastprivate(g) firstprivate(g) // expected-error {{calling a private constructor of class 'S5'}} +#pragma omp distribute simd lastprivate(g) firstprivate(g) for (i = 0; i < argc; ++i) foo(); +// expected-error@+3 {{lastprivate variable cannot be firstprivate}} expected-note@+3 {{defined as lastprivate}} #pragma omp target #pragma omp teams -#pragma omp distribute simd lastprivate(n) firstprivate(n) // OK +#pragma omp distribute simd lastprivate(n) firstprivate(n) // expected-error {{calling a private constructor of class 'S6'}} for (i = 0; i < argc; ++i) foo(); #pragma omp parallel diff --git a/test/OpenMP/distribute_simd_lastprivate_codegen.cpp b/test/OpenMP/distribute_simd_lastprivate_codegen.cpp new file mode 100644 index 000000000000..df842a009879 --- /dev/null +++ b/test/OpenMP/distribute_simd_lastprivate_codegen.cpp @@ -0,0 +1,393 @@ +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 + +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +template <class T> +struct S { + T f; + S(T a) : f(a) {} + S() : f() {} + operator T() { return T(); } + ~S() {} +}; + +// CHECK: [[S_FLOAT_TY:%.+]] = type { float } +// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } +template <typename T> +T tmain() { + S<T> test; + T t_var = T(); + T vec[] = {1, 2}; + S<T> s_arr[] = {1, 2}; + S<T> &var = test; + #pragma omp target + #pragma omp teams +#pragma omp distribute simd lastprivate(t_var, vec, s_arr, s_arr, var, var) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + return T(); +} + +int main() { + static int svar; + volatile double g; + volatile double &g1 = g; + + #ifdef LAMBDA + // LAMBDA-LABEL: @main + // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]]( + [&]() { + static float sfvar; + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( + // LAMBDA: call void [[OFFLOADING_FUN:@.+]]( + + // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]]( + // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}}) + #pragma omp target + #pragma omp teams +#pragma omp distribute simd lastprivate(g, g1, svar, sfvar) + for (int i = 0; i < 2; ++i) { + // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, double*{{.+}} [[G_IN:%.+]], double*{{.+}} [[G1_IN:%.+]], i{{[0-9]+}}*{{.+}} [[SVAR_IN:%.+]], float*{{.+}} [[SFVAR_IN:%.+]]) + // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double*, + // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double*, + // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}*, + // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float*, + // LAMBDA: [[TMP_G1:%.+]] = alloca double*, + // loop variables + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G_PRIVATE:%.+]] = alloca double, + // LAMBDA: [[G1_PRIVATE:%.+]] = alloca double, + // LAMBDA: [[TMP_G1_PRIVATE:%.+]] = alloca double*, + // LAMBDA: [[SVAR_PRIVATE:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[SFVAR_PRIVATE:%.+]] = alloca float, + // LAMBDA: store double* [[G_IN]], double** [[G_PRIVATE_ADDR]], + // LAMBDA: store double* [[G1_IN]], double** [[G1_PRIVATE_ADDR]], + // LAMBDA: store i{{[0-9]+}}* [[SVAR_IN]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]], + // LAMBDA: store float* [[SFVAR_IN]], float** [[SFVAR_PRIVATE_ADDR]], + + // init private variables + // LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]], + // LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]], + // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]], + // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]], + // LAMBDA: store double* [[G1_IN_REF]], double** [[TMP_G1]], + // LAMBDA: [[TMP_G1_VAL:%.+]] = load double*, double** [[TMP_G1]], + // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]], + g = 1; + g1 = 1; + svar = 3; + sfvar = 4.0; + // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4( + // LAMBDA: store double 1.0{{.+}}, double* [[G_PRIVATE]], + // LAMBDA: [[TMP_G1_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]], + // LAMBDA: store{{.+}} double 1.0{{.+}}, double* [[TMP_G1_REF]], + // LAMBDA: store i{{[0-9]+}} 3, i{{[0-9]+}}* [[SVAR_PRIVATE]], + // LAMBDA: store float 4.0{{.+}}, float* [[SFVAR_PRIVATE]], + // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: store double* [[G_PRIVATE]], double** [[G_PRIVATE_ADDR_REF]], + // LAMBDA: [[TMP_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_PRIVATE_ADDR_FROM_TMP:%.+]] = load double*, double** [[TMP_G1_PRIVATE]], + // LAMBDA: store double* [[G1_PRIVATE_ADDR_FROM_TMP]], double** [[TMP_PRIVATE_ADDR_REF]], + // LAMBDA: [[SVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: store i{{[0-9]+}}* [[SVAR_PRIVATE]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]] + // LAMBDA: [[SFVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 + // LAMBDA: store float* [[SFVAR_PRIVATE]], float** [[SFVAR_PRIVATE_ADDR_REF]] + // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]]) + // LAMBDA: call {{.*}}void @__kmpc_for_static_fini( + + // linear counter + // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]], + // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 + // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_COUNTER_BLOCK:.+]], label %[[OMP_COUNTER_DONE:.+]] + // LAMBDA: [[OMP_COUNTER_BLOCK]]: + // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* % + // LAMBDA: br label %[[OMP_COUNTER_DONE]] + // LAMBDA: [[OMP_COUNTER_DONE]]: + + + // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]], + // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 + // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] + + // LAMBDA: [[OMP_LASTPRIV_BLOCK]]: + // LAMBDA: [[G_PRIV_VAL:%.+]] = load double, double* [[G_PRIVATE]], + // LAMBDA: store{{.*}} double [[G_PRIV_VAL]], double* [[G_IN_REF]], + // LAMBDA: [[TMP_G1_PRIV_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]], + // LAMBDA: [[TMP_G1_PRIV_VAL:%.+]] = load double, double* [[TMP_G1_PRIV_REF]], + // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* [[TMP_G1_VAL]], + + // LAMBDA: [[SVAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]], + // LAMBDA: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_IN_REF]], + // LAMBDA: [[SFVAR_PRIV_VAL:%.+]] = load float, float* [[SFVAR_PRIVATE]], + // LAMBDA: store float [[SFVAR_PRIV_VAL]], float* [[SFVAR_IN_REF]], + // LAMBDA: br label %[[OMP_LASTPRIV_DONE]] + // LAMBDA: [[OMP_LASTPRIV_DONE]]: + // LAMBDA: ret + [&]() { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + g = 2; + g1 = 2; + svar = 4; + sfvar = 8.0; + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]] + // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]] + + // LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]] + // LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]], + // LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]] + // LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 + // LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]] + // LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]] + }(); + } + }(); + return 0; + #else + S<float> test; + int t_var = 0; + int vec[] = {1, 2}; + S<float> s_arr[] = {1, 2}; + S<float> &var = test; + + #pragma omp target + #pragma omp teams +#pragma omp distribute simd lastprivate(t_var, vec, s_arr, s_arr, var, var, svar) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + int i; + + return tmain<int>(); + #endif +} + +// CHECK: define{{.*}} i{{[0-9]+}} @main() +// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOAD_FUN:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}}) +// CHECK: ret + +// CHECK: define{{.+}} [[OFFLOAD_FUN]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]*{{.+}} {{.+}}, [2 x [[S_FLOAT_TY]]]*{{.+}} {{.+}}, [[S_FLOAT_TY]]*{{.+}} {{.+}}, i{{[0-9]+}} {{.+}}) +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams( +// CHECK: ret +// +// CHECK: define internal void [[OMP_OUTLINED:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.+}} [[VAR_IN:%.+]], i{{[0-9]+}}*{{.*}} [[S_VAR_IN:%.+]]) +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, +// CHECK: [[TMP:%.*]] = alloca [[S_FLOAT_TY]]*, +// skip loop variables +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, + +// copy from parameters to local address variables +// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]], +// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: store [2 x [[S_FLOAT_TY]]]* [[S_ARR_IN]], [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK: store [[S_FLOAT_TY]]* [[VAR_IN]], [[S_FLOAT_TY]]** [[VAR_ADDR]], +// CHECK: store i{{[0-9]+}}* [[S_VAR_IN]], i{{[0-9]+}}** [[SVAR_ADDR]], + +// load content of local address variables +// CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]], +// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]], +// CHECK: [[VAR_ADDR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_ADDR]], +// CHECK: store [[S_FLOAT_TY]]* [[VAR_ADDR_REF]], [[S_FLOAT_TY]]** [[TMP]], +// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]], +// CHECK: [[TMP_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP]], +// the distribute loop +// CHECK: call void @__kmpc_for_static_init_4( +// assignment: vec[i] = t_var; +// CHECK: [[T_VAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]], +// CHECK: [[VEC_PTR:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}} +// CHECK: store i{{[0-9]+}} [[T_VAR_PRIV_VAL]], i{{[0-9]+}}* [[VEC_PTR]], + +// assignment: s_arr[i] = var; +// CHECK-DAG: [[S_ARR_PTR:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], +// CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_PTR]] to i8* +// CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST]], i8* [[TMP_VAL_BCAST]], +// CHECK: call void @__kmpc_for_static_fini( + +// lastprivates +// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]], +// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 +// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] + +// CHECK: [[OMP_LASTPRIV_BLOCK]]: +// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]], +// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]], +// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* +// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]* +// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 +// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]] +// CHECK: [[S_ARR_COPY_BLOCK]]: +// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8* +// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 +// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} +// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]] +// CHECK: [[S_ARR_COPY_DONE]]: +// CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], +// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_REF]] to i8* +// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]], +// CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]], +// CHECK: ret void + +// template tmain +// CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]() +// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], +// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOAD_FUN_1:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}}) +// CHECK: ret + + +// CHECK: define internal void [[OFFLOAD_FUN_1]]( +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, +// CHECK: ret + +// CHECK: define internal void [[OMP_OUTLINED_1:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR1:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i{{[0-9]+}}*{{.+}} [[T_VAR_IN1:%.+]], [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN1:%.+]], [2 x [[S_INT_TY]]]*{{.+}} [[S_ARR_IN1:%.+]], [[S_INT_TY]]*{{.+}} [[VAR_IN1:%.+]]) +// skip alloca of global_tid and bound_tid +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: [[T_VAR_ADDR1:%.+]] = alloca i{{[0-9]+}}*, +// CHECK: [[VEC_ADDR1:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[S_ARR_ADDR1:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK: [[VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*, +// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*, +// skip loop variables +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: [[OMP_IS_LAST1:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_PRIV1:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV1:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV1:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK: [[VAR_PRIV1:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[TMP_PRIV1:%.+]] = alloca [[S_INT_TY]]*, + +// skip init of bound and global tid +// CHECK: store i{{[0-9]+}}* {{.*}}, +// CHECK: store i{{[0-9]+}}* {{.*}}, +// copy from parameters to local address variables +// CHECK: store i{{[0-9]+}}* [[T_VAR_IN1]], i{{[0-9]+}}** [[T_VAR_ADDR1]], +// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN1]], [2 x i{{[0-9]+}}]** [[VEC_ADDR1]], +// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_IN1]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]], +// CHECK: store [[S_INT_TY]]* [[VAR_IN1]], [[S_INT_TY]]** [[VAR_ADDR1]], + +// load content of local address variables +// CHECK: [[T_VAR_ADDR_REF1:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR1]], +// CHECK: [[VEC_ADDR_REF1:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR1]], +// CHECK: [[S_ARR_ADDR_REF1:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]], +// CHECK: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR1]], +// CHECK-DAG: store [[S_INT_TY]]* [[VAR_ADDR1_REF]], [[S_INT_TY]]** [[TMP]], +// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]], +// CHECK-DAG: [[TMP_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP]], +// CHECK: call void @__kmpc_for_static_init_4( +// assignment: vec[i] = t_var; +// CHECK: [[IV_VAL1:%.+]] = +// CHECK: [[T_VAR_PRIV_VAL1:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]], +// CHECK: [[VEC_PTR1:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV1]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}} +// CHECK: store i{{[0-9]+}} [[T_VAR_PRIV_VAL1]], i{{[0-9]+}}* [[VEC_PTR1]], + +// assignment: s_arr[i] = var; +// CHECK-DAG: [[S_ARR_PTR1:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]], +// CHECK-DAG: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]], +// CHECK-DAG: [[S_ARR_PTR_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_PTR1]] to i8* +// CHECK-DAG: [[TMP_VAL_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* +// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST1]], i8* [[TMP_VAL_BCAST1]], +// CHECK: call void @__kmpc_for_static_fini( + +// lastprivates +// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST1]], +// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 +// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] + +// CHECK: [[OMP_LASTPRIV_BLOCK]]: +// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]], +// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF1]], +// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF1]] to i8* +// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV1]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]] to [[S_INT_TY]]* +// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 +// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]] +// CHECK: [[S_ARR_COPY_BLOCK]]: +// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8* +// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 +// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} +// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]] +// CHECK: [[S_ARR_COPY_DONE]]: +// CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]], +// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_REF]] to i8* +// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: ret void + +// CHECK: !{!"llvm.loop.vectorize.enable", i1 true} +#endif diff --git a/test/OpenMP/distribute_simd_lastprivate_messages.cpp b/test/OpenMP/distribute_simd_lastprivate_messages.cpp index 0f96cb4c3ec8..82ce15e55048 100644 --- a/test/OpenMP/distribute_simd_lastprivate_messages.cpp +++ b/test/OpenMP/distribute_simd_lastprivate_messages.cpp @@ -18,14 +18,14 @@ public: const S2 &operator =(const S2&) const; S2 &operator =(const S2&); static float S2s; // expected-note {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note {{static data member is predetermined as shared}} }; -const float S2::S2sc = 0; // expected-note {{static data member is predetermined as shared}} +const float S2::S2sc = 0; const S2 b; const S2 ba[5]; class S3 { int a; - S3 &operator=(const S3 &s3); // expected-note 2 {{implicitly declared private here}} + S3 &operator=(const S3 &s3); // expected-note {{implicitly declared private here}} public: S3() : a(0) {} @@ -52,7 +52,7 @@ public: }; class S6 { int a; - S6() : a(0) {} + S6() : a(0) {} // expected-note {{implicitly declared private here}} public: S6(const S6 &s6) : a(s6.a) {} @@ -313,14 +313,16 @@ int main(int argc, char **argv) { #pragma omp distribute simd lastprivate(j) for (i = 0; i < argc; ++i) foo(); +// expected-error@+3 {{firstprivate variable cannot be lastprivate}} expected-note@+3 {{defined as firstprivate}} #pragma omp target #pragma omp teams -#pragma omp distribute simd firstprivate(m) lastprivate(m) // expected-error {{'operator=' is a private member of 'S3'}} +#pragma omp distribute simd firstprivate(m) lastprivate(m) for (i = 0; i < argc; ++i) foo(); +// expected-error@+3 {{lastprivate variable cannot be firstprivate}} expected-note@+3 {{defined as lastprivate}} #pragma omp target #pragma omp teams -#pragma omp distribute simd lastprivate(n) firstprivate(n) // OK +#pragma omp distribute simd lastprivate(n) firstprivate(n) // expected-error {{calling a private constructor of class 'S6'}} for (i = 0; i < argc; ++i) foo(); static int si; diff --git a/test/OpenMP/distribute_simd_linear_messages.cpp b/test/OpenMP/distribute_simd_linear_messages.cpp index c60e0a247071..e4d71fee8250 100644 --- a/test/OpenMP/distribute_simd_linear_messages.cpp +++ b/test/OpenMP/distribute_simd_linear_messages.cpp @@ -18,41 +18,49 @@ void test_linear_colons() { int B = 0; +// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams #pragma omp distribute simd linear(B:bfoo()) for (int i = 0; i < 10; ++i) ; +// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams #pragma omp distribute simd linear(B::ib:B:bfoo()) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'}} for (int i = 0; i < 10; ++i) ; +// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams #pragma omp distribute simd linear(B:ib) // expected-error {{use of undeclared identifier 'ib'; did you mean 'B::ib'}} for (int i = 0; i < 10; ++i) ; +// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams #pragma omp distribute simd linear(z:B:ib) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'?}} for (int i = 0; i < 10; ++i) ; +// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams #pragma omp distribute simd linear(B:B::bfoo()) for (int i = 0; i < 10; ++i) ; +// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams #pragma omp distribute simd linear(X::x : ::z) for (int i = 0; i < 10; ++i) ; +// expected-error@+3 3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams #pragma omp distribute simd linear(B,::z, X::x) for (int i = 0; i < 10; ++i) ; +// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams #pragma omp distribute simd linear(::z) @@ -63,6 +71,7 @@ void test_linear_colons() #pragma omp distribute simd linear(B::bfoo()) // expected-error {{expected variable name}} for (int i = 0; i < 10; ++i) ; +// expected-error@+3 2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams #pragma omp distribute simd linear(B::ib,B:C1+C2) @@ -148,11 +157,13 @@ template<class I, class C> int foomain(I argc, C **argv) { #pragma omp distribute simd linear () // expected-error {{expected expression}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams #pragma omp distribute simd linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams #pragma omp distribute simd linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} @@ -163,6 +174,7 @@ template<class I, class C> int foomain(I argc, C **argv) { #pragma omp distribute simd linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams #pragma omp distribute simd linear (argc : 5) @@ -183,6 +195,7 @@ template<class I, class C> int foomain(I argc, C **argv) { #pragma omp distribute simd linear (argv[1]) // expected-error {{expected variable name}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+3 2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams #pragma omp distribute simd linear(e, g) @@ -193,37 +206,12 @@ template<class I, class C> int foomain(I argc, C **argv) { #pragma omp distribute simd linear(h) // expected-error {{threadprivate or thread local variable cannot be linear}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams #pragma omp distribute simd linear(i) for (int k = 0; k < argc; ++k) ++k; - #pragma omp parallel - { - int v = 0; - int i; - #pragma omp target - #pragma omp teams - #pragma omp distribute simd linear(v:i) - for (int k = 0; k < argc; ++k) { i = k; v += i; } - } - -#pragma omp target -#pragma omp teams -#pragma omp parallel for simd linear(j) - for (int k = 0; k < argc; ++k) ++k; - - int v = 0; - -#pragma omp target -#pragma omp teams -#pragma omp distribute simd linear(v:j) - for (int k = 0; k < argc; ++k) { ++k; v += j; } - -#pragma omp target -#pragma omp teams -#pragma omp distribute simd linear(i) - for (int k = 0; k < argc; ++k) ++k; return 0; } @@ -262,11 +250,13 @@ int main(int argc, char **argv) { #pragma omp distribute simd linear () // expected-error {{expected expression}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams #pragma omp distribute simd linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams #pragma omp distribute simd linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} @@ -277,6 +267,7 @@ int main(int argc, char **argv) { #pragma omp distribute simd linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams #pragma omp distribute simd linear (argc) @@ -310,28 +301,18 @@ int main(int argc, char **argv) { #pragma omp parallel { - int i; + int k; #pragma omp target #pragma omp teams - #pragma omp distribute simd linear(i) - for (int k = 0; k < argc; ++k) ++k; + #pragma omp distribute simd linear(k) + for (k = 0; k < argc; ++k) ++k; #pragma omp target #pragma omp teams - #pragma omp distribute simd linear(i : 4) - for (int k = 0; k < argc; ++k) { ++k; i += 4; } + #pragma omp distribute simd linear(k : 4) + for (k = 0; k < argc; k+=4) { } } -#pragma omp target -#pragma omp teams -#pragma omp distribute simd linear(j) - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target -#pragma omp teams -#pragma omp distribute simd linear(i) - for (int k = 0; k < argc; ++k) ++k; - foomain<int,char>(argc,argv); // expected-note {{in instantiation of function template specialization 'foomain<int, char>' requested here}} return 0; } diff --git a/test/OpenMP/distribute_simd_misc_messages.c b/test/OpenMP/distribute_simd_misc_messages.c index 5fc2cb64cc62..f643ed1d5b8e 100644 --- a/test/OpenMP/distribute_simd_misc_messages.c +++ b/test/OpenMP/distribute_simd_misc_messages.c @@ -561,89 +561,6 @@ void test_linear() { #pragma omp distribute simd linear(x, y, z) for (i = 0; i < 16; ++i) ; - - int x, y; -#pragma omp target -#pragma omp teams -// expected-error@+1 {{expected expression}} -#pragma omp distribute simd linear(x :) - for (i = 0; i < 16; ++i) - ; -#pragma omp target -#pragma omp teams -// expected-error@+1 {{expected expression}} expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}} -#pragma omp distribute simd linear(x :, ) - for (i = 0; i < 16; ++i) - ; -#pragma omp target -#pragma omp teams -#pragma omp distribute simd linear(x : 1) - for (i = 0; i < 16; ++i) - ; -#pragma omp target -#pragma omp teams -#pragma omp distribute simd linear(x : 2 * 2) - for (i = 0; i < 16; ++i) - ; -#pragma omp target -#pragma omp teams -// expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}} -#pragma omp distribute simd linear(x : 1, y) - for (i = 0; i < 16; ++i) - ; -#pragma omp target -#pragma omp teams -// expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}} -#pragma omp distribute simd linear(x : 1, y, z : 1) - for (i = 0; i < 16; ++i) - ; - -#pragma omp target -#pragma omp teams -// expected-note@+2 {{defined as linear}} -// expected-error@+1 {{linear variable cannot be linear}} -#pragma omp distribute simd linear(x) linear(x) - for (i = 0; i < 16; ++i) - ; - -#pragma omp target -#pragma omp teams -// expected-note@+2 {{defined as private}} -// expected-error@+1 {{private variable cannot be linear}} -#pragma omp distribute simd private(x) linear(x) - for (i = 0; i < 16; ++i) - ; - -#pragma omp target -#pragma omp teams -// expected-note@+2 {{defined as linear}} -// expected-error@+1 {{linear variable cannot be private}} -#pragma omp distribute simd linear(x) private(x) - for (i = 0; i < 16; ++i) - ; - -#pragma omp target -#pragma omp teams -// expected-warning@+1 {{zero linear step (x and other variables in clause should probably be const)}} -#pragma omp distribute simd linear(x, y : 0) - for (i = 0; i < 16; ++i) - ; - -#pragma omp target -#pragma omp teams -// expected-note@+2 {{defined as linear}} -// expected-error@+1 {{linear variable cannot be lastprivate}} -#pragma omp distribute simd linear(x) lastprivate(x) - for (i = 0; i < 16; ++i) - ; - -#pragma omp target -#pragma omp teams -// expected-note@+2 {{defined as lastprivate}} -// expected-error@+1 {{lastprivate variable cannot be linear}} -#pragma omp distribute simd lastprivate(x) linear(x) - for (i = 0; i < 16; ++i) - ; } void test_aligned() { @@ -1083,26 +1000,26 @@ void test_loop_messages() { } void linear_modifiers(int argc) { - int f; + int k; #pragma omp target #pragma omp teams -#pragma omp distribute simd linear(f) - for (int k = 0; k < argc; ++k) ++k; +#pragma omp distribute simd linear(k) + for (k = 0; k < argc; ++k) ++k; #pragma omp target #pragma omp teams -#pragma omp distribute simd linear(val(f)) - for (int k = 0; k < argc; ++k) ++k; +#pragma omp distribute simd linear(val(k)) + for (k = 0; k < argc; ++k) ++k; #pragma omp target #pragma omp teams -#pragma omp distribute simd linear(uval(f)) // expected-error {{expected 'val' modifier}} - for (int k = 0; k < argc; ++k) ++k; +#pragma omp distribute simd linear(uval(k)) // expected-error {{expected 'val' modifier}} + for (k = 0; k < argc; ++k) ++k; #pragma omp target #pragma omp teams -#pragma omp distribute simd linear(ref(f)) // expected-error {{expected 'val' modifier}} - for (int k = 0; k < argc; ++k) ++k; +#pragma omp distribute simd linear(ref(k)) // expected-error {{expected 'val' modifier}} + for (k = 0; k < argc; ++k) ++k; #pragma omp target #pragma omp teams -#pragma omp distribute simd linear(foo(f)) // expected-error {{expected 'val' modifier}} - for (int k = 0; k < argc; ++k) ++k; +#pragma omp distribute simd linear(foo(k)) // expected-error {{expected 'val' modifier}} + for (k = 0; k < argc; ++k) ++k; } diff --git a/test/OpenMP/distribute_simd_private_codegen.cpp b/test/OpenMP/distribute_simd_private_codegen.cpp new file mode 100644 index 000000000000..ede807a0fb63 --- /dev/null +++ b/test/OpenMP/distribute_simd_private_codegen.cpp @@ -0,0 +1,208 @@ +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 + +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +template <class T> +struct S { + T f; + S(T a) : f(a) {} + S() : f() {} + operator T() { return T(); } + ~S() {} +}; + +// CHECK: [[S_FLOAT_TY:%.+]] = type { float } +// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } +template <typename T> +T tmain() { + S<T> test; + T t_var = T(); + T vec[] = {1, 2}; + S<T> s_arr[] = {1, 2}; + S<T> &var = test; + #pragma omp target + #pragma omp teams +#pragma omp distribute simd private(t_var, vec, s_arr, s_arr, var, var) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + return T(); +} + +int main() { + static int svar; + volatile double g; + volatile double &g1 = g; + + #ifdef LAMBDA + // LAMBDA-LABEL: @main + // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]]( + [&]() { + static float sfvar; + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( + // LAMBDA: call void [[OFFLOADING_FUN:@.+]]( + + // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]]() + // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}}) + #pragma omp target + #pragma omp teams +#pragma omp distribute simd private(g, g1, svar, sfvar) + for (int i = 0; i < 2; ++i) { + // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}) + // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double, + // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double, + // LAMBDA: [[TMP_PRIVATE_ADDR:%.+]] = alloca double*, + // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float, + // LAMBDA: store double* [[G1_PRIVATE_ADDR]], double** [[TMP_PRIVATE_ADDR]], + g = 1; + g1 = 1; + svar = 3; + sfvar = 4.0; + // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4( + // LAMBDA: store double 1.0{{.+}}, double* [[G_PRIVATE_ADDR]], + // LAMBDA: store i{{[0-9]+}} 3, i{{[0-9]+}}* [[SVAR_PRIVATE_ADDR]], + // LAMBDA: store float 4.0{{.+}}, float* [[SFVAR_PRIVATE_ADDR]], + // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: store double* [[G_PRIVATE_ADDR]], double** [[G_PRIVATE_ADDR_REF]], + // LAMBDA: [[TMP_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_PRIVATE_ADDR_FROM_TMP:%.+]] = load double*, double** [[TMP_PRIVATE_ADDR]], + // LAMBDA: store double* [[G1_PRIVATE_ADDR_FROM_TMP]], double** [[TMP_PRIVATE_ADDR_REF]], + // LAMBDA: [[SVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: store i{{[0-9]+}}* [[SVAR_PRIVATE_ADDR]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]] + // LAMBDA: [[SFVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 + // LAMBDA: store float* [[SFVAR_PRIVATE_ADDR]], float** [[SFVAR_PRIVATE_ADDR_REF]] + // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]]) + // LAMBDA: call {{.*}}void @__kmpc_for_static_fini( + [&]() { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + g = 2; + g1 = 2; + svar = 4; + sfvar = 8.0; + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]] + // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]] + + // LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]] + // LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]], + // LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]] + // LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 + // LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]] + // LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]] + }(); + } + }(); + return 0; + #else + S<float> test; + int t_var = 0; + int vec[] = {1, 2}; + S<float> s_arr[] = {1, 2}; + S<float> &var = test; + + #pragma omp target + #pragma omp teams +#pragma omp distribute simd private(t_var, vec, s_arr, s_arr, var, var, svar) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + int i; + + #pragma omp target + #pragma omp teams +#pragma omp distribute simd + for (i = 0; i < 2; ++i) { + ; + } + return tmain<int>(); + #endif +} + +// CHECK: define{{.*}} i{{[0-9]+}} @main() +// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOAD_FUN:@.+]]( +// CHECK: ret + +// CHECK: define{{.+}} [[OFFLOAD_FUN]]() +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[OMP_OUTLINED:@.+]] to void +// CHECK: ret +// +// CHECK: define internal void [[OMP_OUTLINED]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}) +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK-NOT: alloca [2 x [[S_FLOAT_TY]]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK-NOT: alloca [[S_FLOAT_TY]], +// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]] +// CHECK-NOT: [[T_VAR_PRIV]] +// CHECK-NOT: [[VEC_PRIV]] +// CHECK: {{.+}}: +// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = phi [[S_FLOAT_TY]]* +// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[S_ARR_PRIV_ITEM]]) +// CHECK-NOT: [[T_VAR_PRIV]] +// CHECK-NOT: [[VEC_PRIV]] +// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]]) +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]() +// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], +// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOAD_FUN_1:@.+]]( +// CHECK: ret + + +// CHECK: define internal void [[OFFLOAD_FUN_1]]() +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[OMP_OUTLINED_1:@.+]] to void +// CHECK: ret +// +// CHECK: define internal void [[OMP_OUTLINED_1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}) +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK-NOT: alloca [2 x [[S_INT_TY]]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], +// CHECK-NOT: alloca [[S_INT_TY]], +// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]] +// CHECK-NOT: [[T_VAR_PRIV]] +// CHECK-NOT: [[VEC_PRIV]] +// CHECK: {{.+}}: +// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = phi [[S_INT_TY]]* +// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[S_ARR_PRIV_ITEM]]) +// CHECK-NOT: [[T_VAR_PRIV]] +// CHECK-NOT: [[VEC_PRIV]] +// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]]) +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: !{!"llvm.loop.vectorize.enable", i1 true} +#endif diff --git a/test/OpenMP/distribute_simd_reduction_codegen.cpp b/test/OpenMP/distribute_simd_reduction_codegen.cpp new file mode 100644 index 000000000000..8485d8d27d52 --- /dev/null +++ b/test/OpenMP/distribute_simd_reduction_codegen.cpp @@ -0,0 +1,204 @@ +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 + +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +template <typename T> +T tmain() { + T t_var = T(); + T vec[] = {1, 2}; +#pragma omp target +#pragma omp teams +#pragma omp distribute simd reduction(+: t_var) + for (int i = 0; i < 2; ++i) { + t_var += (T) i; + } + return T(); +} + +int main() { + static int sivar; +#ifdef LAMBDA + // LAMBDA-LABEL: @main + // LAMBDA: call void [[OUTER_LAMBDA:@.+]]( + [&]() { + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // LAMBDA: call void @[[LOFFL1:.+]]( + // LAMBDA: ret +#pragma omp target +#pragma omp teams +#pragma omp distribute simd reduction(+: sivar) + for (int i = 0; i < 2; ++i) { + // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} [[SIVAR_ARG:%.+]]) + // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}, + // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]], + // LAMBDA: [[SIVAR_CONV:%.+]] = bitcast{{.+}} [[SIVAR_ADDR]] to + // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[LOUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_CONV]]) + // LAMBDA: ret void + + // LAMBDA: define internal void @[[LOUTL1]]({{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]]) + // Skip global and bound tid vars + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*, + // LAMBDA: alloca i{{.+}}, + // LAMBDA: alloca i{{.+}}, + // LAMBDA: alloca i{{.+}}, + // LAMBDA: alloca i{{.+}}, + // LAMBDA: alloca i{{.+}}, + // LAMBDA: alloca i{{.+}}, + // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{.+}}, + // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]], + // LAMBDA: [[SIVAR_REF:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]] + // LAMBDA: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]], + + // LAMBDA: call void @__kmpc_for_static_init_4( + // LAMBDA: store{{.+}}, {{.+}} [[SIVAR_PRIV]], + // LAMBDA: call void [[INNER_LAMBDA:@.+]]( + // LAMBDA: call void @__kmpc_for_static_fini( + // LAMBDA: [[LAST_ITER:%.+]] = load i32, i32* % + // LAMBDA: [[IS_LAST:%.+]] = icmp ne i32 [[LAST_ITER]], 0 + // LAMBDA: br i1 [[IS_LAST]], label %[[THEN:.+]], label %[[DONE:.+]] + // LAMBDA: [[THEN]] + // LAMBDA: store i32 2, i32* % + // LAMBDA: br label %[[DONE]] + // LAMBDA: [[DONE]] + // LAMBDA: [[SIVAR_ORIG_VAL:%.+]] = load i32, i32* [[SIVAR_REF]], + // LAMBDA: [[SIVAR_PRIV_VAL:%.+]] = load i32, i32* [[SIVAR_PRIV]], + // LAMBDA: [[ADD:%.+]] = add nsw i32 [[SIVAR_ORIG_VAL]], [[SIVAR_PRIV_VAL]] + // LAMBDA: store i32 [[ADD]], i32* [[SIVAR_REF]], + // LAMBDA: ret void + + sivar += i; + + [&]() { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + + sivar += 4; + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + + // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]] + // LAMBDA: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF]] + // LAMBDA: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], 4 + // LAMBDA: store i{{[0-9]+}} [[SIVAR_INC]], i{{[0-9]+}}* [[SIVAR_REF]] + }(); + } + }(); + return 0; +#else +#pragma omp target +#pragma omp teams +#pragma omp distribute simd reduction(+: sivar) + for (int i = 0; i < 2; ++i) { + sivar += i; + } + return tmain<int>(); +#endif +} + +// CHECK: define {{.*}}i{{[0-9]+}} @main() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) +// CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}}) +// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]() +// CHECK: ret + +// CHECK: define{{.*}} void @[[OFFL1]](i{{64|32}} [[SIVAR_ARG:%.+]]) +// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}, +// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]], +// CHECK-64: [[SIVAR_CONV:%.+]] = bitcast{{.+}} [[SIVAR_ADDR]] to +// CHECK-64: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_CONV]]) +// CHECK-32: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_ADDR]]) +// CHECK: ret void + +// CHECK: define internal void @[[OUTL1]]({{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]]) +// Skip global and bound tid vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*, +// CHECK: alloca i{{.+}}, +// CHECK: alloca i{{.+}}, +// CHECK: alloca i{{.+}}, +// CHECK: alloca i{{.+}}, +// CHECK: alloca i{{.+}}, +// CHECK: alloca i{{.+}}, +// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{.+}}, +// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]], +// CHECK: [[SIVAR_REF:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]] +// CHECK: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]], + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: store{{.+}}, {{.+}} [[SIVAR_PRIV]], +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: [[LAST_ITER:%.+]] = load i32, i32* % +// CHECK: [[IS_LAST:%.+]] = icmp ne i32 [[LAST_ITER]], 0 +// CHECK: br i1 [[IS_LAST]], label %[[THEN:.+]], label %[[DONE:.+]] +// CHECK: [[THEN]] +// CHECK: store i32 2, i32* % +// CHECK: br label %[[DONE]] +// CHECK: [[DONE]] +// CHECK: [[SIVAR_ORIG_VAL:%.+]] = load i32, i32* [[SIVAR_REF]], +// CHECK: [[SIVAR_PRIV_VAL:%.+]] = load i32, i32* [[SIVAR_PRIV]], +// CHECK: [[ADD:%.+]] = add nsw i32 [[SIVAR_ORIG_VAL]], [[SIVAR_PRIV_VAL]] +// CHECK: store i32 [[ADD]], i32* [[SIVAR_REF]], +// CHECK: ret void + +// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, +// CHECK: call void @[[TOFFL1:.+]]({{.+}}) +// CHECK: ret + +// CHECK: define{{.*}} void @[[TOFFL1]](i{{64|32}} [[TVAR_ARG:%.+]]) +// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}}, +// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]], +// CHECK-64: [[TVAR_CONV:%.+]] = bitcast{{.+}} [[TVAR_ADDR]] to +// CHECK-64: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[TOUTL1:.+]] to {{.+}}, {{.+}} [[TVAR_CONV]]) +// CHECK-32: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[TOUTL1:.+]] to {{.+}}, {{.+}} [[TVAR_ADDR]]) +// CHECK: ret void + +// CHECK: define internal void @[[TOUTL1]]({{.+}}, {{.+}}, {{.+}} [[TVAR_ARG:%.+]]) +// Skip global and bound tid vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}}*, +// CHECK: alloca i{{.+}}, +// CHECK: alloca i{{.+}}, +// CHECK: alloca i{{.+}}, +// CHECK: alloca i{{.+}}, +// CHECK: alloca i{{.+}}, +// CHECK: alloca i{{.+}}, +// CHECK: [[TVAR_PRIV:%.+]] = alloca i{{.+}}, +// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]], +// CHECK: [[TVAR_REF:%.+]] = load{{.+}}, {{.+}} [[TVAR_ADDR]] +// CHECK: store{{.+}} 0, {{.+}} [[TVAR_PRIV]], + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: store{{.+}}, {{.+}} [[TVAR_PRIV]], +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: [[LAST_ITER:%.+]] = load i32, i32* % +// CHECK: [[IS_LAST:%.+]] = icmp ne i32 [[LAST_ITER]], 0 +// CHECK: br i1 [[IS_LAST]], label %[[THEN:.+]], label %[[DONE:.+]] +// CHECK: [[THEN]] +// CHECK: store i32 2, i32* % +// CHECK: br label %[[DONE]] +// CHECK: [[DONE]] +// CHECK: [[TVAR_ORIG_VAL:%.+]] = load i32, i32* [[TVAR_REF]], +// CHECK: [[TVAR_PRIV_VAL:%.+]] = load i32, i32* [[TVAR_PRIV]], +// CHECK: [[ADD:%.+]] = add nsw i32 [[TVAR_ORIG_VAL]], [[TVAR_PRIV_VAL]] +// CHECK: store i32 [[ADD]], i32* [[TVAR_REF]], +// CHECK: ret void + +// CHECK: !{!"llvm.loop.vectorize.enable", i1 true} +#endif diff --git a/test/OpenMP/distribute_simd_reduction_messages.cpp b/test/OpenMP/distribute_simd_reduction_messages.cpp index fca3e85a7104..ddedcb0892ba 100644 --- a/test/OpenMP/distribute_simd_reduction_messages.cpp +++ b/test/OpenMP/distribute_simd_reduction_messages.cpp @@ -27,9 +27,9 @@ public: S2() : a(0) {} S2(S2 &s2) : a(s2.a) {} static float S2s; // expected-note 2 {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note 2 {{'S2sc' declared here}} }; -const float S2::S2sc = 0; // expected-note 2 {{'S2sc' defined here}} +const float S2::S2sc = 0; S2 b; // expected-note 3 {{'b' defined here}} const S2 ba[5]; // expected-note 2 {{'ba' defined here}} class S3 { diff --git a/test/OpenMP/dump.cpp b/test/OpenMP/dump.cpp index 378b53ce5bf1..5ec202cc3040 100644 --- a/test/OpenMP/dump.cpp +++ b/test/OpenMP/dump.cpp @@ -52,11 +52,11 @@ struct S { // CHECK-NEXT: | |-OMPScheduleClause {{.+}} <col:61, col:79> // CHECK-NEXT: | | `-ImplicitCastExpr {{.+}} <col:78> 'int' <LValueToRValue> // CHECK-NEXT: | | `-DeclRefExpr {{.+}} <col:78> 'int' lvalue OMPCapturedExpr {{.+}} '.capture_expr.' 'int' -// CHECK-NEXT: | |-CapturedStmt {{.+}} <line:40:5, <invalid sloc>> +// CHECK-NEXT: | |-CapturedStmt {{.+}} <line:40:5, line:41:9> // CHECK-NEXT: | | |-CapturedDecl {{.+}} <<invalid sloc>> <invalid sloc> -// CHECK-NEXT: | | | |-ForStmt {{.+}} <col:5, <invalid sloc>> -// CHECK: | | | | `-UnaryOperator {{.+}} <line:41:7, <invalid sloc>> 'int' lvalue prefix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.+}} <<invalid sloc>> 'int' lvalue OMPCapturedExpr {{.+}} 'a' 'int &' +// CHECK-NEXT: | | | |-ForStmt {{.+}} <line:40:5, line:41:9> +// CHECK: | | | | `-UnaryOperator {{.+}} <line:41:7, col:9> 'int' lvalue prefix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr {{.+}} <col:9> 'int' lvalue OMPCapturedExpr {{.+}} 'a' 'int &' #pragma omp declare simd #pragma omp declare simd inbranch diff --git a/test/OpenMP/for_codegen.cpp b/test/OpenMP/for_codegen.cpp index 466139cfb2fc..1f5dd7ddd0dd 100644 --- a/test/OpenMP/for_codegen.cpp +++ b/test/OpenMP/for_codegen.cpp @@ -11,15 +11,26 @@ // CHECK: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } // CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8* +// CHECK-DAG: [[LOOP_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 514, i32 0, i32 0, i8* // CHECK-DAG: [[I:@.+]] = global i8 1, // CHECK-DAG: [[J:@.+]] = global i8 2, // CHECK-DAG: [[K:@.+]] = global i8 3, +// CHECK-LABEL: loop_with_counter_collapse +void loop_with_counter_collapse() { + // CHECK: call void @__kmpc_for_static_init_8(%ident_t* @ + // CHECK: call void @__kmpc_for_static_fini(%ident_t* @ + #pragma omp for collapse(2) + for (int i = 0; i < 4; i++) { + for (int j = i; j < 4; j++) { + } + } +} // CHECK-LABEL: define {{.*void}} @{{.*}}without_schedule_clause{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}}) void without_schedule_clause(float *a, float *b, float *c, float *d) { // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]]) #pragma omp for nowait -// CHECK: call void @__kmpc_for_static_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1) +// CHECK: call void @__kmpc_for_static_init_4([[IDENT_T_TY]]* [[LOOP_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1) // UB = min(UB, GlobalUB) // CHECK-NEXT: [[UB:%.+]] = load i32, i32* [[OMP_UB]] // CHECK-NEXT: [[UBCMP:%.+]] = icmp sgt i32 [[UB]], 4571423 @@ -51,7 +62,7 @@ void without_schedule_clause(float *a, float *b, float *c, float *d) { // CHECK-NEXT: br label %{{.+}} } // CHECK: [[LOOP1_END]] -// CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) +// CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[LOOP_LOC]], i32 [[GTID]]) // CHECK-NOT: __kmpc_barrier // CHECK: ret void } @@ -60,7 +71,7 @@ void without_schedule_clause(float *a, float *b, float *c, float *d) { void static_not_chunked(float *a, float *b, float *c, float *d) { // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]]) #pragma omp for schedule(static) -// CHECK: call void @__kmpc_for_static_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1) +// CHECK: call void @__kmpc_for_static_init_4([[IDENT_T_TY]]* [[LOOP_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1) // UB = min(UB, GlobalUB) // CHECK-NEXT: [[UB:%.+]] = load i32, i32* [[OMP_UB]] // CHECK-NEXT: [[UBCMP:%.+]] = icmp sgt i32 [[UB]], 4571423 @@ -92,7 +103,7 @@ void static_not_chunked(float *a, float *b, float *c, float *d) { // CHECK-NEXT: br label %{{.+}} } // CHECK: [[LOOP1_END]] -// CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) +// CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[LOOP_LOC]], i32 [[GTID]]) // CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]]) // CHECK: ret void } @@ -101,7 +112,7 @@ void static_not_chunked(float *a, float *b, float *c, float *d) { void static_chunked(float *a, float *b, float *c, float *d) { // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]]) #pragma omp for schedule(monotonic: static, 5) -// CHECK: call void @__kmpc_for_static_init_4u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 536870945, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 5) +// CHECK: call void @__kmpc_for_static_init_4u([[IDENT_T_TY]]* [[LOOP_LOC]], i32 [[GTID]], i32 536870945, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 5) // UB = min(UB, GlobalUB) // CHECK: [[UB:%.+]] = load i32, i32* [[OMP_UB]] // CHECK-NEXT: [[UBCMP:%.+]] = icmp ugt i32 [[UB]], 16908288 @@ -152,7 +163,7 @@ void static_chunked(float *a, float *b, float *c, float *d) { // CHECK-NEXT: store i32 [[ADD_UB]], i32* [[OMP_UB]] // CHECK: [[O_LOOP1_END]] -// CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) +// CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[LOOP_LOC]], i32 [[GTID]]) // CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]]) // CHECK: ret void } @@ -329,8 +340,8 @@ void runtime(float *a, float *b, float *c, float *d) { // CHECK-LABEL: test_precond void test_precond() { // CHECK: [[A_ADDR:%.+]] = alloca i8, - // CHECK: [[CAP:%.+]] = alloca i8, // CHECK: [[I_ADDR:%.+]] = alloca i8, + // CHECK: [[CAP:%.+]] = alloca i8, char a = 0; // CHECK: store i8 0, // CHECK: store i32 @@ -375,7 +386,9 @@ void parallel_for(float *a) { char i = 1, j = 2, k = 3; // CHECK-LABEL: for_with_global_lcv void for_with_global_lcv() { +// CHECK: alloca i8, // CHECK: [[I_ADDR:%.+]] = alloca i8, +// CHECK: alloca i8, // CHECK: [[J_ADDR:%.+]] = alloca i8, // CHECK: call void @__kmpc_for_static_init_4( diff --git a/test/OpenMP/for_firstprivate_codegen.cpp b/test/OpenMP/for_firstprivate_codegen.cpp index 1e1d404b48db..88f54103ad69 100644 --- a/test/OpenMP/for_firstprivate_codegen.cpp +++ b/test/OpenMP/for_firstprivate_codegen.cpp @@ -81,8 +81,10 @@ int main() { // LAMBDA: alloca i{{[0-9]+}}, // LAMBDA: alloca i{{[0-9]+}}, // LAMBDA: alloca i{{[0-9]+}}, + // LAMBDA: alloca i{{[0-9]+}}, // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_PRIVATE_REF:%.+]] = alloca i{{[0-9]+}}*, // LAMBDA: [[SIVAR2_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, // LAMBDA: store i{{[0-9]+}}* [[SIVAR_REF]], i{{[0-9]+}}** %{{.+}}, @@ -91,20 +93,26 @@ int main() { // LAMBDA: [[G_VAL:%.+]] = load volatile i{{[0-9]+}}, i{{[0-9]+}}* [[G]] // LAMBDA: store i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G_PRIVATE_ADDR]] + // LAMBDA: store i{{[0-9]+}}* [[G1_PRIVATE_ADDR]], i{{[0-9]+}}** [[G1_PRIVATE_REF]], // LAMBDA: [[SIVAR2_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR2_PRIVATE_ADDR_REF]] // LAMBDA: store i{{[0-9]+}} [[SIVAR2_VAL]], i{{[0-9]+}}* [[SIVAR2_PRIVATE_ADDR]] // LAMBDA-NOT: call void @__kmpc_barrier( g = 1; - g1 = 1; - sivar = 2; + g1 = 2; + sivar = 3; // LAMBDA: call void @__kmpc_for_static_init_4( // LAMBDA: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], - // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[SIVAR2_PRIVATE_ADDR]], + // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PRIVATE_REF]], + // LAMBDA: store volatile i{{[0-9]+}} 2, i{{[0-9]+}}* [[G1_PRIVATE_ADDR]], + // LAMBDA: store i{{[0-9]+}} 3, i{{[0-9]+}}* [[SIVAR2_PRIVATE_ADDR]], // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // LAMBDA: store i{{[0-9]+}}* [[G_PRIVATE_ADDR]], i{{[0-9]+}}** [[G_PRIVATE_ADDR_REF]] - // LAMBDA: [[SIVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PRIVATE_REF]], + // LAMBDA: store i{{[0-9]+}}* [[G1_PRIVATE_ADDR]], i{{[0-9]+}}** [[G1_PRIVATE_ADDR_REF]] + // LAMBDA: [[SIVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 // LAMBDA: store i{{[0-9]+}}* [[SIVAR2_PRIVATE_ADDR]], i{{[0-9]+}}** [[SIVAR_PRIVATE_ADDR_REF]] // LAMBDA: call void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]]) // LAMBDA: call void @__kmpc_for_static_fini( @@ -112,17 +120,20 @@ int main() { [&]() { // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], - g = 2; - g1 = 2; - sivar = 4; + g = 4; + g1 = 5; + sivar = 6; // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]] - // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]] - // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[G_REF]] + // LAMBDA: [[G1_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 5, i{{[0-9]+}}* [[G1_REF]] + // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]] - // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]] + // LAMBDA: store i{{[0-9]+}} 6, i{{[0-9]+}}* [[SIVAR_REF]] }(); } }(); @@ -144,6 +155,7 @@ int main() { // BLOCKS: alloca i{{[0-9]+}}, // BLOCKS: alloca i{{[0-9]+}}, // BLOCKS: alloca i{{[0-9]+}}, + // BLOCKS: alloca i{{[0-9]+}}, // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, // BLOCKS: [[G1_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, // BLOCKS: [[SIVAR2_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, @@ -208,6 +220,7 @@ int main() { // CHECK: alloca i{{[0-9]+}}, // CHECK: alloca i{{[0-9]+}}, // CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], // CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], @@ -262,38 +275,30 @@ int main() { // CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]() // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], // CHECK: [[TVAR:%.+]] = alloca i32, -// CHECK: [[TVAR_CAST:%.+]] = alloca i64, // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) -// CHECK: [[TVAR_VAL:%.+]] = load i32, i32* [[TVAR]], -// CHECK: [[TVAR_CONV:%.+]] = bitcast i64* [[TVAR_CAST]] to i32* -// CHECK: store i32 [[TVAR_VAL]], i32* [[TVAR_CONV]], -// CHECK: [[PVT_CASTVAL:%[^,]+]] = load i64, i64* [[TVAR_CAST]], -// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i64, [2 x i32]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void (i32*, i32*, ...)*), i64 [[PVT_CASTVAL]], +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32*, [2 x i32]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void (i32*, i32*, ...)*), i32* [[TVAR]], // CHECK: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]* // CHECK: ret // -// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i64 {{.*}}%{{.+}}, [2 x i32]* dereferenceable(8) %{{.+}}, [2 x [[S_INT_TY]]]* dereferenceable(8) %{{.+}}, [[S_INT_TY]]* dereferenceable(4) %{{.+}}) +// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}}, [2 x i32]* dereferenceable(8) %{{.+}}, [2 x [[S_INT_TY]]]* dereferenceable(8) %{{.+}}, [[S_INT_TY]]* dereferenceable(4) %{{.+}}) // Skip temp vars for loop -// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, // CHECK: alloca i{{[0-9]+}}, // CHECK: alloca i{{[0-9]+}}, // CHECK: alloca i{{[0-9]+}}, // CHECK: alloca i{{[0-9]+}}, // CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], // CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], -// CHECK: %{{.+}} = bitcast i64* [[T_VAR_PRIV]] to i32* -// CHECK-NOT: load i{{[0-9]+}}*, i{{[0-9]+}}** % +// CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** % // CHECK: [[VEC_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** % // CHECK: [[S_ARR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** % // CHECK: [[VAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** % -// firstprivate t_var(t_var) -// CHECK-NOT: load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_REF]], - // firstprivate vec(vec) // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* // CHECK: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8* diff --git a/test/OpenMP/for_lastprivate_codegen.cpp b/test/OpenMP/for_lastprivate_codegen.cpp index f0fba042c58a..30a17eaf581a 100644 --- a/test/OpenMP/for_lastprivate_codegen.cpp +++ b/test/OpenMP/for_lastprivate_codegen.cpp @@ -213,6 +213,7 @@ int main() { // LAMBDA: alloca i{{[0-9]+}}, // LAMBDA: alloca i{{[0-9]+}}, // LAMBDA: alloca i{{[0-9]+}}, + // LAMBDA: alloca i{{[0-9]+}}, // LAMBDA: [[A_PRIV:%.+]] = alloca i{{[0-9]+}}, // LAMBDA: [[B_PRIV:%.+]] = alloca i{{[0-9]+}}, // LAMBDA: [[C_PRIV:%.+]] = alloca i{{[0-9]+}}, @@ -246,6 +247,7 @@ int main() { // LAMBDA: alloca i{{[0-9]+}}, // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, align 128 // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_PRIVATE_REF:%.+]] = alloca i{{[0-9]+}}*, // LAMBDA: [[SIVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, // LAMBDA: [[SIVAR_PRIVATE_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}}, @@ -254,10 +256,15 @@ int main() { // LAMBDA: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1) // LAMBDA: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], + // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PRIVATE_REF]], + // LAMBDA: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G1_PRIVATE_ADDR]], // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]], // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // LAMBDA: store i{{[0-9]+}}* [[G_PRIVATE_ADDR]], i{{[0-9]+}}** [[G_PRIVATE_ADDR_REF]] - // LAMBDA: [[SIVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PRIVATE_REF]], + // LAMBDA: store i{{[0-9]+}}* [[G1_PRIVATE_ADDR]], i{{[0-9]+}}** [[G1_PRIVATE_ADDR_REF]] + // LAMBDA: [[SIVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 // LAMBDA: store i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]], i{{[0-9]+}}** [[SIVAR_PRIVATE_ADDR_REF]] // LAMBDA: call void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]]) // LAMBDA: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID]]) @@ -291,7 +298,10 @@ int main() { // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]] // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]] - // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G1_REF]] + // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]] // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]] }(); @@ -398,6 +408,7 @@ int main() { // BLOCKS: alloca i{{[0-9]+}}, // BLOCKS: alloca i{{[0-9]+}}, // BLOCKS: alloca i{{[0-9]+}}, +// BLOCKS: alloca i{{[0-9]+}}, // BLOCKS: [[A_PRIV:%.+]] = alloca i{{[0-9]+}}, // BLOCKS: [[B_PRIV:%.+]] = alloca i{{[0-9]+}}, // BLOCKS: [[C_PRIV:%.+]] = alloca i{{[0-9]+}}, @@ -468,6 +479,7 @@ int main() { // CHECK: alloca i{{[0-9]+}}, // CHECK: alloca i{{[0-9]+}}, // CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], // CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], @@ -602,6 +614,7 @@ int main() { // CHECK: ret void // CHECK: define internal void [[MAIN_MICROTASK3]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}) +// CHECK: alloca i8, // CHECK: [[CNT_PRIV:%.+]] = alloca i8, // CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_REF]] @@ -664,6 +677,7 @@ int main() { // CHECK: alloca i{{[0-9]+}}, // CHECK: alloca i{{[0-9]+}}, // CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, // CHECK: [[A_PRIV:%.+]] = alloca i{{[0-9]+}}, // CHECK: [[B_PRIV:%.+]] = alloca i{{[0-9]+}}, // CHECK: [[C_PRIV:%.+]] = alloca i{{[0-9]+}}, diff --git a/test/OpenMP/for_lastprivate_messages.cpp b/test/OpenMP/for_lastprivate_messages.cpp index 79912b6efbca..13ef35e925a7 100644 --- a/test/OpenMP/for_lastprivate_messages.cpp +++ b/test/OpenMP/for_lastprivate_messages.cpp @@ -18,9 +18,9 @@ public: const S2 &operator =(const S2&) const; S2 &operator =(const S2&); static float S2s; // expected-note {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note {{static data member is predetermined as shared}} }; -const float S2::S2sc = 0; // expected-note {{static data member is predetermined as shared}} +const float S2::S2sc = 0; const S2 b; const S2 ba[5]; class S3 { diff --git a/test/OpenMP/for_linear_codegen.cpp b/test/OpenMP/for_linear_codegen.cpp index 16a67c0b91d2..54cdf8a4cb33 100644 --- a/test/OpenMP/for_linear_codegen.cpp +++ b/test/OpenMP/for_linear_codegen.cpp @@ -157,6 +157,7 @@ int main() { // LAMBDA: alloca i{{[0-9]+}}, // LAMBDA: alloca i{{[0-9]+}}, // LAMBDA: alloca i{{[0-9]+}}, + // LAMBDA: alloca i{{[0-9]+}}, // LAMBDA: [[A_PRIV:%.+]] = alloca i{{[0-9]+}}, // LAMBDA: [[B_PRIV:%.+]] = alloca i{{[0-9]+}}, // LAMBDA: [[C_PRIV:%.+]] = alloca i{{[0-9]+}}, @@ -181,6 +182,7 @@ int main() { // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}) // LAMBDA: alloca i{{[0-9]+}}, + // LAMBDA: alloca i{{[0-9]+}}, // LAMBDA: [[G_START_ADDR:%.+]] = alloca i{{[0-9]+}}, // LAMBDA: alloca i{{[0-9]+}}, // LAMBDA: alloca i{{[0-9]+}}, @@ -233,6 +235,7 @@ int main() { for (int i = 0; i < 2; ++i) { // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}) // BLOCKS: alloca i{{[0-9]+}}, + // BLOCKS: alloca i{{[0-9]+}}, // BLOCKS: [[G_START_ADDR:%.+]] = alloca i{{[0-9]+}}, // BLOCKS: alloca i{{[0-9]+}}, // BLOCKS: alloca i{{[0-9]+}}, @@ -307,6 +310,7 @@ int main() { // BLOCKS: alloca i{{[0-9]+}}, // BLOCKS: alloca i{{[0-9]+}}, // BLOCKS: alloca i{{[0-9]+}}, +// BLOCKS: alloca i{{[0-9]+}}, // BLOCKS: [[A_PRIV:%.+]] = alloca i{{[0-9]+}}, // BLOCKS: [[B_PRIV:%.+]] = alloca i{{[0-9]+}}, // BLOCKS: [[C_PRIV:%.+]] = alloca i{{[0-9]+}}, @@ -351,13 +355,13 @@ int main() { // CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, float** dereferenceable(8) %{{.+}}, i64* dereferenceable(8) %{{.+}}) // CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, // CHECK: [[PVAR_START:%.+]] = alloca float*, // CHECK: [[LVAR_START:%.+]] = alloca i64, // CHECK: alloca i{{[0-9]+}}, // CHECK: alloca i{{[0-9]+}}, // CHECK: alloca i{{[0-9]+}}, // CHECK: alloca i{{[0-9]+}}, -// CHECK: alloca i{{[0-9]+}}, // CHECK: [[PVAR_PRIV:%.+]] = alloca float*, // CHECK: [[LVAR_PRIV:%.+]] = alloca i64, // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]] @@ -419,6 +423,7 @@ int main() { // CHECK: alloca i{{[0-9]+}}, // CHECK: alloca i{{[0-9]+}}, // CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, // CHECK: [[A_PRIV:%.+]] = alloca i{{[0-9]+}}, // CHECK: [[B_PRIV:%.+]] = alloca i{{[0-9]+}}, // CHECK: [[C_PRIV:%.+]] = alloca i{{[0-9]+}}, @@ -446,13 +451,13 @@ int main() { // CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i32** dereferenceable(8) %{{.+}}, i32* dereferenceable(4) %{{.+}}) // CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, // CHECK: [[PVAR_START:%.+]] = alloca i32*, // CHECK: [[LVAR_START:%.+]] = alloca i32, // CHECK: alloca i{{[0-9]+}}, // CHECK: alloca i{{[0-9]+}}, // CHECK: alloca i{{[0-9]+}}, // CHECK: alloca i{{[0-9]+}}, -// CHECK: alloca i{{[0-9]+}}, // CHECK: [[PVAR_PRIV:%.+]] = alloca i32*, // CHECK: [[LVAR_PRIV:%.+]] = alloca i32, // CHECK: [[LVAR_PRIV_REF:%.+]] = alloca i32*, diff --git a/test/OpenMP/for_private_codegen.cpp b/test/OpenMP/for_private_codegen.cpp index 5bad8d00688d..f1416d75b64b 100644 --- a/test/OpenMP/for_private_codegen.cpp +++ b/test/OpenMP/for_private_codegen.cpp @@ -52,6 +52,8 @@ int main() { for (int i = 0; i < 2; ++i) { // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}) // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double, + // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double, + // LAMBDA: [[G1_PRIVATE_REF:%.+]] = alloca double*, // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float, g = 1; @@ -60,13 +62,18 @@ int main() { sfvar = 4.0; // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4( // LAMBDA: store double 1.0{{.+}}, double* [[G_PRIVATE_ADDR]], + // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = load double*, double** [[G1_PRIVATE_REF]], + // LAMBDA: store volatile double 1.0{{.+}}, double* [[G1_PRIVATE_ADDR]], // LAMBDA: store i{{[0-9]+}} 3, i{{[0-9]+}}* [[SVAR_PRIVATE_ADDR]], // LAMBDA: store float 4.0{{.+}}, float* [[SFVAR_PRIVATE_ADDR]], // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // LAMBDA: store double* [[G_PRIVATE_ADDR]], double** [[G_PRIVATE_ADDR_REF]] - // LAMBDA: [[SVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = load double*, double** [[G1_PRIVATE_REF]], + // LAMBDA: store double* [[G1_PRIVATE_ADDR]], double** [[G1_PRIVATE_ADDR_REF]] + // LAMBDA: [[SVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 // LAMBDA: store i{{[0-9]+}}* [[SVAR_PRIVATE_ADDR]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]] - // LAMBDA: [[SFVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: [[SFVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 // LAMBDA: store float* [[SFVAR_PRIVATE_ADDR]], float** [[SFVAR_PRIVATE_ADDR_REF]] // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]]) // LAMBDA: call {{.*}}void @__kmpc_for_static_fini( @@ -81,10 +88,13 @@ int main() { // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]] // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]] - // LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_REF:%.+]] = load double*, double** [[G1_PTR_REF]] + // LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]] + // LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 // LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]] // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]] - // LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 // LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]] // LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]] }(); diff --git a/test/OpenMP/for_reduction_codegen.cpp b/test/OpenMP/for_reduction_codegen.cpp index d5afae990bf4..cef457333062 100644 --- a/test/OpenMP/for_reduction_codegen.cpp +++ b/test/OpenMP/for_reduction_codegen.cpp @@ -27,7 +27,7 @@ struct S { // CHECK-DAG: [[REDUCTION_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 18, i32 0, i32 0, i8* // CHECK-DAG: [[REDUCTION_LOCK:@.+]] = common global [8 x i32] zeroinitializer -template <typename T> +template <typename T, int length> T tmain() { T t; S<T> test; @@ -36,6 +36,7 @@ T tmain() { S<T> s_arr[] = {1, 2}; S<T> &var = test; S<T> var1; + S<T> arr[length]; #pragma omp parallel #pragma omp for reduction(+:t_var) reduction(&:var) reduction(&& : var1) reduction(min: t_var1) nowait for (int i = 0; i < 2; ++i) { @@ -48,6 +49,12 @@ T tmain() { vec[i] = t_var; s_arr[i] = var; } +#pragma omp parallel +#pragma omp for reduction(+ : arr[1:length-2]) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } return T(); } @@ -180,12 +187,12 @@ int main() { S<float> test; float t_var = 0, t_var1; int vec[] = {1, 2}; - S<float> s_arr[] = {1, 2}; + S<float> s_arr[] = {1, 2, 3, 4}; S<float> &var = test; S<float> var1, arrs[10][4]; S<float> **var2 = foo(); - S<float> vvar2[2]; - S<float> (&var3)[2] = s_arr; + S<float> vvar2[5]; + S<float> (&var3)[4] = s_arr; #pragma omp parallel #pragma omp for reduction(+:t_var) reduction(&:var) reduction(&& : var1) reduction(min: t_var1) for (int i = 0; i < 2; ++i) { @@ -200,11 +207,28 @@ int main() { #pragma omp for reduction(+:arr) reduction(&:arrs) for (int i = 0; i < 10; ++i) ++arr[1][i]; + // arr is a VLA, but the array section has constant length so we can generate a constant sized array! +#pragma omp parallel +#pragma omp for reduction(+:arr[1][0:2]) + for (int i = 0; i < 10; ++i) + ++arr[1][i]; #pragma omp parallel #pragma omp for reduction(& : var2[0 : 5][1 : 6]) for (int i = 0; i < 10; ++i) ; #pragma omp parallel +#pragma omp for reduction(& : var2[1][1 : 6]) + for (int i = 0; i < 10; ++i) + ; +#pragma omp parallel +#pragma omp for reduction(& : var2[1 : 1][1 : 6]) + for (int i = 0; i < 10; ++i) + ; +#pragma omp parallel +#pragma omp for reduction(& : var2[1 : 1][1]) + for (int i = 0; i < 10; ++i) + ; +#pragma omp parallel #pragma omp for reduction(& : vvar2[0 : 5]) for (int i = 0; i < 10; ++i) ; @@ -213,28 +237,43 @@ int main() { for (int i = 0; i < 10; ++i) ; #pragma omp parallel +#pragma omp for reduction(& : var3[ : 2]) + for (int i = 0; i < 10; ++i) + ; + // TODO: The compiler should also be able to generate a constant sized array in this case! +#pragma omp parallel +#pragma omp for reduction(& : var3[2 : ]) + for (int i = 0; i < 10; ++i) + ; +#pragma omp parallel #pragma omp for reduction(& : var3) for (int i = 0; i < 10; ++i) ; - return tmain<int>(); + return tmain<int, 42>(); #endif } // CHECK: define {{.*}}i{{[0-9]+}} @main() // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]], // CHECK: call {{.*}} [[S_FLOAT_TY_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) -// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 6, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, float*, [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]*, float*, [2 x i32]*, [2 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK:@.+]] to void +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 6, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, float*, [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]*, float*, [2 x i32]*, [4 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK:@.+]] to void // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i64, i64, i32*, [2 x i32]*, [10 x [4 x [[S_FLOAT_TY]]]]*)* [[MAIN_MICROTASK1:@.+]] to void // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i64, i64, i32*, [10 x [4 x [[S_FLOAT_TY]]]]*)* [[MAIN_MICROTASK2:@.+]] to void -// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[S_FLOAT_TY]]***)* [[MAIN_MICROTASK3:@.+]] to void -// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK4:@.+]] to void -// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK5:@.+]] to void -// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK6:@.+]] to void -// CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]() +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 3, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i64, i64, i32*)* [[MAIN_MICROTASK3:@.+]] to void +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[S_FLOAT_TY]]***)* [[MAIN_MICROTASK4:@.+]] to void +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[S_FLOAT_TY]]***)* [[MAIN_MICROTASK5:@.+]] to void +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[S_FLOAT_TY]]***)* [[MAIN_MICROTASK6:@.+]] to void +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[S_FLOAT_TY]]***)* [[MAIN_MICROTASK7:@.+]] to void +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [5 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK8:@.+]] to void +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [4 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK9:@.+]] to void +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [4 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK10:@.+]] to void +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [4 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK11:@.+]] to void +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [4 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK12:@.+]] to void +// CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT_42:@.+]]() // CHECK: call {{.*}} [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]* // CHECK: ret // -// CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, float* dereferenceable(4) %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(4) %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(4) %{{.+}}, float* dereferenceable(4) %{{.+}}, [2 x i32]* dereferenceable(8) %vec, [2 x [[S_FLOAT_TY]]]* dereferenceable(8) %{{.+}}) +// CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, float* dereferenceable(4) %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(4) %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(4) %{{.+}}, float* dereferenceable(4) %{{.+}}, [2 x i32]* dereferenceable(8) %vec, [4 x [[S_FLOAT_TY]]]* dereferenceable(16) %{{.+}}) // CHECK: [[T_VAR_PRIV:%.+]] = alloca float, // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], // CHECK: [[VAR1_PRIV:%.+]] = alloca [[S_FLOAT_TY]], @@ -491,7 +530,7 @@ int main() { // CHECK: store float [[UP]], float* [[T_VAR1_LHS]], // CHECK: ret void -// CHECK: define internal void [[MAIN_MICROTASK1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, [2 x i32]* dereferenceable(8) %{{.+}}, [10 x [4 x [[S_FLOAT_TY]]]]* dereferenceable(160) %{{.+}}) +// CHECK: define internal void [[MAIN_MICROTASK1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}} %{{.+}}, [2 x i32]* dereferenceable(8) %{{.+}}, [10 x [4 x [[S_FLOAT_TY]]]]* dereferenceable(160) %{{.+}}) // Reduction list for runtime. // CHECK: [[RED_LIST:%.+]] = alloca [4 x i8*], @@ -692,7 +731,7 @@ int main() { // CHECK: ret void -// CHECK: define internal void [[MAIN_MICROTASK2]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, [10 x [4 x [[S_FLOAT_TY]]]]* dereferenceable(160) %{{.+}}) +// CHECK: define internal void [[MAIN_MICROTASK2]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}} %{{.+}}, [10 x [4 x [[S_FLOAT_TY]]]]* dereferenceable(160) %{{.+}}) // CHECK: [[ARRS_PRIV:%.+]] = alloca [10 x [4 x [[S_FLOAT_TY]]]], @@ -882,7 +921,35 @@ int main() { // CHECK: ret void -// CHECK: define internal void [[MAIN_MICROTASK3]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[S_FLOAT_TY]]*** dereferenceable(8) %{{.+}}) +// CHECK: define internal void [[MAIN_MICROTASK3]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i32* {{.*}} %{{.+}}) + +// CHECK: [[VLA1_ORIG_ADDR:%.+]] = alloca i64 +// CHECK: [[VLA2_ORIG_ADDR:%.+]] = alloca i64 +// CHECK: [[ARR_ORIG_ADDR:%.+]] = alloca i32*, +// CHECK: [[ARR_PRIV:%.+]] = alloca [1 x [2 x i32]], + +// Reduction list for runtime. +// CHECK: [[RED_LIST:%.+]] = alloca [1 x i8*], + +// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], +// CHECK: [[VLA1:%.+]] = load i64, i64* [[VLA1_ORIG_ADDR]] +// CHECK: [[VLA2:%.+]] = load i64, i64* [[VLA2_ORIG_ADDR]] +// CHECK: [[ARR_ORIG:%.+]] = load i32*, i32** [[ARR_ORIG_ADDR]], + +// CHECK: [[LOW_OFFSET:%.+]] = mul nsw i64 1, [[VLA2]] +// CHECK: [[ARRIDX:%.+]] = getelementptr inbounds i32, i32* [[ARR_ORIG]], i64 [[LOW_OFFSET]] +// CHECK: [[LOW:%.+]] = getelementptr inbounds i32, i32* [[ARRIDX]], i64 0 + +// CHECK: [[START:%.+]] = ptrtoint i32* [[ARR_ORIG]] to i64 +// CHECK: [[LOW_BOUND:%.+]] = ptrtoint i32* [[LOW]] to i64 +// CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]] +// CHECK: [[OFFSET:%.+]] = sdiv exact i64 [[OFFSET_BYTES]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) +// CHECK: [[ARR_PRIV_PTR:%.+]] = bitcast [1 x [2 x i32]]* [[ARR_PRIV]] to i32* +// CHECK: [[ARR_PRIV:%.+]] = getelementptr i32, i32* [[ARR_PRIV_PTR]], i64 [[OFFSET]] + +// CHECK: ret void + +// CHECK: define internal void [[MAIN_MICROTASK4]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[S_FLOAT_TY]]*** dereferenceable(8) %{{.+}}) // CHECK: [[VAR2_ORIG_ADDR:%.+]] = alloca [[S_FLOAT_TY]]***, @@ -910,44 +977,186 @@ int main() { // CHECK: store [[S_FLOAT_TY]]* [[PSEUDO_VAR2_PRIV]], [[S_FLOAT_TY]]** [[REF]] // CHECK: ret void -// CHECK: define internal void [[MAIN_MICROTASK4]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x [[S_FLOAT_TY]]]* dereferenceable(8) %{{.+}}) +// CHECK: define internal void [[MAIN_MICROTASK5]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[S_FLOAT_TY]]*** dereferenceable(8) %{{.+}}) -// CHECK: [[VVAR2_ORIG_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR2_ORIG_ADDR:%.+]] = alloca [[S_FLOAT_TY]]***, +// CHECK: [[VAR2_PRIV:%.+]] = alloca [1 x [6 x [[S_FLOAT_TY]]]], // Reduction list for runtime. -// CHECK: [[RED_LIST:%.+]] = alloca [2 x i8*], +// CHECK: [[RED_LIST:%.+]] = alloca [1 x i8*], // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], -// CHECK: [[VVAR2_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VVAR2_ORIG_ADDR]], +// CHECK: [[VAR2_ORIG:%.+]] = load [[S_FLOAT_TY]]***, [[S_FLOAT_TY]]**** [[VAR2_ORIG_ADDR]], -// CHECK: [[LOW:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[VVAR2_ORIG]], i64 0, i64 0 -// CHECK: [[LAST:%.+]] = ptrtoint [[S_FLOAT_TY]]* %{{.+}} to i64 -// CHECK: [[FIRST:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64 -// CHECK: [[BYTE_DIF:%.+]] = sub i64 [[LAST]], [[FIRST]] -// CHECK: [[DIF:%.+]] = sdiv exact i64 [[BYTE_DIF]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64) -// CHECK: [[SIZE:%.+]] = add nuw i64 [[DIF]], 1 -// CHECK: call i8* @llvm.stacksave() -// CHECK: [[VVAR2_PRIV:%.+]] = alloca [[S_FLOAT_TY]], i64 [[SIZE]], -// CHECK: [[ORIG_START:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[VVAR2_ORIG]] to [[S_FLOAT_TY]]* +// CHECK: [[LD:%.+]] = load [[S_FLOAT_TY]]**, [[S_FLOAT_TY]]*** [[VAR2_ORIG]], +// CHECK: [[ARRIDX:%.+]] = getelementptr inbounds [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[LD]], i64 1 +// CHECK: [[LD:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[ARRIDX]], +// CHECK: [[LOW:%.+]] = getelementptr inbounds [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[LD]], i64 1 +// CHECK: [[LD:%.+]] = load [[S_FLOAT_TY]]**, [[S_FLOAT_TY]]*** [[VAR2_ORIG]], + +// CHECK: [[LD:%.+]] = load [[S_FLOAT_TY]]**, [[S_FLOAT_TY]]*** [[VAR2_ORIG]], +// CHECK: [[ORIG_START:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[LD]], // CHECK: [[START:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[ORIG_START]] to i64 // CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64 // CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]] // CHECK: [[OFFSET:%.+]] = sdiv exact i64 [[OFFSET_BYTES]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64) -// CHECK: [[PSEUDO_VVAR2_PRIV:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[VVAR2_PRIV]], i64 [[OFFSET]] -// CHECK: [[VVAR2_PRIV:%.+]] = bitcast [[S_FLOAT_TY]]* [[PSEUDO_VVAR2_PRIV]] to [2 x [[S_FLOAT_TY]]]* +// CHECK: [[VAR2_PRIV_PTR:%.+]] = bitcast [1 x [6 x [[S_FLOAT_TY]]]]* [[VAR2_PRIV]] to [[S_FLOAT_TY]]* +// CHECK: [[VAR2_PRIV:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[VAR2_PRIV_PTR]], i64 [[OFFSET]] +// CHECK: store [[S_FLOAT_TY]]** [[REF:.+]], [[S_FLOAT_TY]]*** % +// CHECK: store [[S_FLOAT_TY]]* [[VAR2_PRIV]], [[S_FLOAT_TY]]** [[REF]] // CHECK: ret void -// CHECK: define internal void [[MAIN_MICROTASK5]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x [[S_FLOAT_TY]]]* dereferenceable(8) %{{.+}}) +// CHECK: define internal void [[MAIN_MICROTASK6]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[S_FLOAT_TY]]*** dereferenceable(8) %{{.+}}) -// CHECK: [[VAR3_ORIG_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR2_ORIG_ADDR:%.+]] = alloca [[S_FLOAT_TY]]***, +// CHECK: [[VAR2_PRIV:%.+]] = alloca [1 x [6 x [[S_FLOAT_TY]]]], // Reduction list for runtime. -// CHECK: [[RED_LIST:%.+]] = alloca [2 x i8*], +// CHECK: [[RED_LIST:%.+]] = alloca [1 x i8*], // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], +// CHECK: [[VAR2_ORIG:%.+]] = load [[S_FLOAT_TY]]***, [[S_FLOAT_TY]]**** [[VAR2_ORIG_ADDR]], + +// CHECK: [[LD:%.+]] = load [[S_FLOAT_TY]]**, [[S_FLOAT_TY]]*** [[VAR2_ORIG]], +// CHECK: [[ARRIDX:%.+]] = getelementptr inbounds [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[LD]], i64 1 +// CHECK: [[LD:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[ARRIDX]], +// CHECK: [[LOW:%.+]] = getelementptr inbounds [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[LD]], i64 1 +// CHECK: [[LD:%.+]] = load [[S_FLOAT_TY]]**, [[S_FLOAT_TY]]*** [[VAR2_ORIG]], + +// CHECK: [[LD:%.+]] = load [[S_FLOAT_TY]]**, [[S_FLOAT_TY]]*** [[VAR2_ORIG]], +// CHECK: [[ORIG_START:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[LD]], +// CHECK: [[START:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[ORIG_START]] to i64 +// CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64 +// CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]] +// CHECK: [[OFFSET:%.+]] = sdiv exact i64 [[OFFSET_BYTES]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64) +// CHECK: [[VAR2_PRIV_PTR:%.+]] = bitcast [1 x [6 x [[S_FLOAT_TY]]]]* [[VAR2_PRIV]] to [[S_FLOAT_TY]]* +// CHECK: [[VAR2_PRIV:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[VAR2_PRIV_PTR]], i64 [[OFFSET]] +// CHECK: store [[S_FLOAT_TY]]** [[REF:.+]], [[S_FLOAT_TY]]*** % +// CHECK: store [[S_FLOAT_TY]]* [[VAR2_PRIV]], [[S_FLOAT_TY]]** [[REF]] +// CHECK: ret void + +// CHECK: define internal void [[MAIN_MICROTASK7]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[S_FLOAT_TY]]*** dereferenceable(8) %{{.+}}) + +// CHECK: [[VAR2_ORIG_ADDR:%.+]] = alloca [[S_FLOAT_TY]]***, +// CHECK: [[VAR2_PRIV:%.+]] = alloca [[S_FLOAT_TY]], + +// Reduction list for runtime. +// CHECK: [[RED_LIST:%.+]] = alloca [1 x i8*], + +// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], +// CHECK: [[VAR2_ORIG:%.+]] = load [[S_FLOAT_TY]]***, [[S_FLOAT_TY]]**** [[VAR2_ORIG_ADDR]], + +// CHECK: [[LD:%.+]] = load [[S_FLOAT_TY]]**, [[S_FLOAT_TY]]*** [[VAR2_ORIG]], +// CHECK: [[ARRIDX:%.+]] = getelementptr inbounds [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[LD]], i64 1 +// CHECK: [[LD:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[ARRIDX]], +// CHECK: [[LOW:%.+]] = getelementptr inbounds [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[LD]], i64 1 +// CHECK: [[LD:%.+]] = load [[S_FLOAT_TY]]**, [[S_FLOAT_TY]]*** [[VAR2_ORIG]], + +// CHECK: [[LD:%.+]] = load [[S_FLOAT_TY]]**, [[S_FLOAT_TY]]*** [[VAR2_ORIG]], +// CHECK: [[ORIG_START:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[LD]], +// CHECK: [[START:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[ORIG_START]] to i64 +// CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64 +// CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]] +// CHECK: [[OFFSET:%.+]] = sdiv exact i64 [[OFFSET_BYTES]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64) +// CHECK: [[PSEUDO_VAR2_PRIV:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[VAR2_PRIV]], i64 [[OFFSET]] +// CHECK: store [[S_FLOAT_TY]]** [[REF:.+]], [[S_FLOAT_TY]]*** % +// CHECK: store [[S_FLOAT_TY]]* [[PSEUDO_VAR2_PRIV]], [[S_FLOAT_TY]]** [[REF]] +// CHECK: ret void + +// CHECK: define internal void [[MAIN_MICROTASK8]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [5 x [[S_FLOAT_TY]]]* dereferenceable(20) %{{.+}}) + +// CHECK: [[VVAR2_ORIG_ADDR:%.+]] = alloca [5 x [[S_FLOAT_TY]]]*, +// CHECK: [[VVAR2_PRIV:%.+]] = alloca [5 x [[S_FLOAT_TY]]], + +// Reduction list for runtime. +// CHECK: [[RED_LIST:%.+]] = alloca [1 x i8*], + +// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], +// CHECK: [[VVAR2_ORIG:%.+]] = load [5 x [[S_FLOAT_TY]]]*, [5 x [[S_FLOAT_TY]]]** [[VVAR2_ORIG_ADDR]], + +// CHECK: [[LOW:%.+]] = getelementptr inbounds [5 x [[S_FLOAT_TY]]], [5 x [[S_FLOAT_TY]]]* [[VVAR2_ORIG]], i64 0, i64 0 +// CHECK: [[ORIG_START:%.+]] = bitcast [5 x [[S_FLOAT_TY]]]* [[VVAR2_ORIG]] to [[S_FLOAT_TY]]* +// CHECK: [[START:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[ORIG_START]] to i64 +// CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64 +// CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]] +// CHECK: [[OFFSET:%.+]] = sdiv exact i64 [[OFFSET_BYTES]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64) +// CHECK: [[VVAR2_PRIV_PTR:%.+]] = bitcast [5 x [[S_FLOAT_TY]]]* [[VVAR2_PRIV]] to [[S_FLOAT_TY]]* +// CHECK: [[VVAR2_PRIV:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[VVAR2_PRIV_PTR]], i64 [[OFFSET]] +// CHECK: ret void + +// CHECK: define internal void [[MAIN_MICROTASK9]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [4 x [[S_FLOAT_TY]]]* dereferenceable(16) %{{.+}}) + +// CHECK: [[VAR3_ORIG_ADDR:%.+]] = alloca [4 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR3_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], + +// Reduction list for runtime. +// CHECK: [[RED_LIST:%.+]] = alloca [1 x i8*], + +// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], + +// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], +// CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]], +// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], + +// CHECK: [[LOW:%.+]] = getelementptr inbounds [4 x [[S_FLOAT_TY]]], [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], i64 0, i64 1 +// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], + +// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], +// CHECK: [[ORIG_START:%.+]] = bitcast [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]* +// CHECK: [[START:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[ORIG_START]] to i64 +// CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64 +// CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]] +// CHECK: [[OFFSET:%.+]] = sdiv exact i64 [[OFFSET_BYTES]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64) +// CHECK: [[VAR3_PRIV_PTR:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]] to [[S_FLOAT_TY]]* +// CHECK: [[PSEUDO_VAR3_PRIV:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[VAR3_PRIV_PTR]], i64 [[OFFSET]] +// CHECK: [[VAR3_PRIV:%.+]] = bitcast [[S_FLOAT_TY]]* [[PSEUDO_VAR3_PRIV]] to [4 x [[S_FLOAT_TY]]]* + +// CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], [4 x [[S_FLOAT_TY]]]** % + +// CHECK: ret void + +// CHECK: define internal void [[MAIN_MICROTASK10]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [4 x [[S_FLOAT_TY]]]* dereferenceable(16) %{{.+}}) + +// CHECK: [[VAR3_ORIG_ADDR:%.+]] = alloca [4 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR3_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], + +// Reduction list for runtime. +// CHECK: [[RED_LIST:%.+]] = alloca [1 x i8*], + +// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], + +// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], +// CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]], +// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], + +// CHECK: [[LOW:%.+]] = getelementptr inbounds [4 x [[S_FLOAT_TY]]], [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], i64 0, i64 0 +// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], + +// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], +// CHECK: [[ORIG_START:%.+]] = bitcast [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]* +// CHECK: [[START:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[ORIG_START]] to i64 +// CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64 +// CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]] +// CHECK: [[OFFSET:%.+]] = sdiv exact i64 [[OFFSET_BYTES]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64) +// CHECK: [[VAR3_PRIV_PTR:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]] to [[S_FLOAT_TY]]* +// CHECK: [[PSEUDO_VAR3_PRIV:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[VAR3_PRIV_PTR]], i64 [[OFFSET]] +// CHECK: [[VAR3_PRIV:%.+]] = bitcast [[S_FLOAT_TY]]* [[PSEUDO_VAR3_PRIV]] to [4 x [[S_FLOAT_TY]]]* + +// CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], [4 x [[S_FLOAT_TY]]]** % + +// CHECK: ret void + +// CHECK: define internal void [[MAIN_MICROTASK11]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [4 x [[S_FLOAT_TY]]]* dereferenceable(16) %{{.+}}) + +// CHECK: [[VAR3_ORIG_ADDR:%.+]] = alloca [4 x [[S_FLOAT_TY]]]*, + +// Reduction list for runtime. +// CHECK: [[RED_LIST:%.+]] = alloca [2 x i8*], + +// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], +// CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]], +// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], -// CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], -// CHECK: store [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]], // CHECK: [[LAST:%.+]] = ptrtoint [[S_FLOAT_TY]]* %{{.+}} to i64 // CHECK: [[FIRST:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW:%.+]] to i64 // CHECK: [[BYTE_DIF:%.+]] = sub i64 [[LAST]], [[FIRST]] @@ -955,44 +1164,47 @@ int main() { // CHECK: [[SIZE:%.+]] = add nuw i64 [[DIF]], 1 // CHECK: call i8* @llvm.stacksave() // CHECK: [[VAR3_PRIV:%.+]] = alloca [[S_FLOAT_TY]], i64 [[SIZE]], -// CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], -// CHECK: [[ORIG_START:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]* +// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], +// CHECK: [[ORIG_START:%.+]] = bitcast [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]* // CHECK: [[START:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[ORIG_START]] to i64 // CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64 // CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]] // CHECK: [[OFFSET:%.+]] = sdiv exact i64 [[OFFSET_BYTES]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64) // CHECK: [[PSEUDO_VAR3_PRIV:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[VAR3_PRIV]], i64 [[OFFSET]] -// CHECK: [[VAR3_PRIV:%.+]] = bitcast [[S_FLOAT_TY]]* [[PSEUDO_VAR3_PRIV]] to [2 x [[S_FLOAT_TY]]]* +// CHECK: [[VAR3_PRIV:%.+]] = bitcast [[S_FLOAT_TY]]* [[PSEUDO_VAR3_PRIV]] to [4 x [[S_FLOAT_TY]]]* -// CHECK: store [2 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], [2 x [[S_FLOAT_TY]]]** % +// CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], [4 x [[S_FLOAT_TY]]]** % // CHECK: ret void -// CHECK: define internal void [[MAIN_MICROTASK6]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x [[S_FLOAT_TY]]]* dereferenceable(8) %{{.+}}) +// CHECK: define internal void [[MAIN_MICROTASK12]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [4 x [[S_FLOAT_TY]]]* dereferenceable(16) %{{.+}}) -// CHECK: [[VAR3_ORIG_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, -// CHECK: [[VAR3_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK: [[VAR3_ORIG_ADDR:%.+]] = alloca [4 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR3_PRIV:%.+]] = alloca [4 x [[S_FLOAT_TY]]], // Reduction list for runtime. // CHECK: [[RED_LIST:%.+]] = alloca [1 x i8*], // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], -// CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], -// CHECK: store [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]], -// CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], -// CHECK: getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], i32 0, i32 0 -// CHECK: getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* %{{.+}}, i64 2 +// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], +// CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]], +// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], +// CHECK: getelementptr inbounds [4 x [[S_FLOAT_TY]]], [4 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], i32 0, i32 0 +// CHECK: getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* %{{.+}}, i64 4 -// CHECK: store [2 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], [2 x [[S_FLOAT_TY]]]** % -// CHECK: bitcast [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]* +// CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], [4 x [[S_FLOAT_TY]]]** % +// CHECK: bitcast [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]* // CHECK: ret void -// CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]() +// CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT_42]]() // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], // CHECK: call {{.*}} [[S_INT_TY_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 6, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32*, [[S_INT_TY]]*, [[S_INT_TY]]*, i32*, [2 x i32]*, [2 x [[S_INT_TY]]]*)* [[TMAIN_MICROTASK:@.+]] to void +// Not interested in this one: +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [42 x [[S_INT_TY]]]*, [2 x i32]*, i32*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[TMAIN_MICROTASK2:@.+]] to void // CHECK: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]* // CHECK: ret // @@ -1002,6 +1214,7 @@ int main() { // CHECK: alloca i{{[0-9]+}}, // CHECK: alloca i{{[0-9]+}}, // CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], // CHECK: [[VAR1_PRIV:%.+]] = alloca [[S_INT_TY]], @@ -1226,5 +1439,28 @@ int main() { // CHECK: store i{{[0-9]+}} [[UP]], i{{[0-9]+}}* [[T_VAR1_LHS]], // CHECK: ret void +// CHECK: define internal void [[TMAIN_MICROTASK2]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [42 x [[S_INT_TY]]]* dereferenceable(168) %{{.*}}, [2 x i32]* dereferenceable(8) %{{.*}}, i32* dereferenceable(4) %{{.*}}, [2 x [[S_INT_TY]]]* dereferenceable(8) %{{.*}}, [[S_INT_TY]]* dereferenceable(4) %{{.*}}) + +// CHECK: [[ARR_ORIG_ADDR:%.+]] = alloca [42 x [[S_INT_TY]]]*, +// CHECK: [[ARR_PRIV:%.+]] = alloca [40 x [[S_INT_TY]]], + +// Reduction list for runtime. +// CHECK: [[RED_LIST:%.+]] = alloca [1 x i8*], + +// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], + +// CHECK: [[ARR_ORIG:%.+]] = load [42 x [[S_INT_TY]]]*, [42 x [[S_INT_TY]]]** [[ARR_ORIG_ADDR]], +// CHECK: [[LOW:%.+]] = getelementptr inbounds [42 x [[S_INT_TY]]], [42 x [[S_INT_TY]]]* [[ARR_ORIG]], i64 0, i64 1 +// CHECK: [[ORIG_START:%.+]] = bitcast [42 x [[S_INT_TY]]]* [[ARR_ORIG]] to [[S_INT_TY]]* +// CHECK: [[START:%.+]] = ptrtoint [[S_INT_TY]]* [[ORIG_START]] to i64 +// CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_INT_TY]]* [[LOW]] to i64 +// CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]] +// CHECK: [[OFFSET:%.+]] = sdiv exact i64 [[OFFSET_BYTES]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64) +// CHECK: [[ARR_PRIV_PTR:%.+]] = bitcast [40 x [[S_INT_TY]]]* [[ARR_PRIV]] to [[S_INT_TY]]* +// CHECK: [[PSEUDO_ARR_PRIV:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[ARR_PRIV_PTR]], i64 [[OFFSET]] +// CHECK: [[ARR_PRIV:%.+]] = bitcast [[S_INT_TY]]* [[PSEUDO_ARR_PRIV]] to [42 x [[S_INT_TY]]]* + +// CHECK: ret void + #endif diff --git a/test/OpenMP/for_reduction_codegen_UDR.cpp b/test/OpenMP/for_reduction_codegen_UDR.cpp index 95b04f44d468..05a46a6eb7ff 100644 --- a/test/OpenMP/for_reduction_codegen_UDR.cpp +++ b/test/OpenMP/for_reduction_codegen_UDR.cpp @@ -40,7 +40,7 @@ void init_plus(BaseS1&, const BaseS1&); // CHECK-DAG: [[REDUCTION_LOCK:@.+]] = common global [8 x i32] zeroinitializer #pragma omp declare reduction(operator&& : int : omp_out = 111 & omp_in) -template <typename T> +template <typename T, int length> T tmain() { T t; S<T> test; @@ -49,6 +49,7 @@ T tmain() { S<T> s_arr[] = {1, 2}; S<T> &var = test; S<T> var1; + S<T> arr[length]; #pragma omp declare reduction(operator& : T : omp_out = 15 + omp_in) #pragma omp declare reduction(operator+ : T : omp_out = 1513 + omp_in) initializer(omp_priv = 321) #pragma omp declare reduction(min : T : omp_out = 47 - omp_in) initializer(omp_priv = 432 / omp_orig) @@ -66,6 +67,12 @@ T tmain() { vec[i] = t_var; s_arr[i] = var; } +#pragma omp parallel +#pragma omp for reduction(+ : arr[1:length-2]) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } return T(); } @@ -78,12 +85,12 @@ int main() { S<float> test; float t_var = 0, t_var1; int vec[] = {1, 2}; - S<float> s_arr[] = {1, 2}; + S<float> s_arr[] = {1, 2, 3, 4}; S<float> &var = test; S<float> var1, arrs[10][4]; S<float> **var2 = foo(); - S<float> vvar2[2]; - S<float>(&var3)[2] = s_arr; + S<float> vvar2[5]; + S<float>(&var3)[4] = s_arr; #pragma omp declare reduction(operator+ : int : omp_out = 555 * omp_in) initializer(omp_priv = 888) #pragma omp parallel #pragma omp for reduction(+ : t_var) reduction(& : var) reduction(&& : var1) reduction(min : t_var1) @@ -115,24 +122,24 @@ int main() { #pragma omp for reduction(& : var3) for (int i = 0; i < 10; ++i) ; - return tmain<int>(); + return tmain<int, 42>(); } // CHECK: define {{.*}}i{{[0-9]+}} @main() // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]], // CHECK: call {{.*}} [[S_FLOAT_TY_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) -// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 6, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, float*, [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]*, float*, [2 x i32]*, [2 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK:@.+]] to void +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 6, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, float*, [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]*, float*, [2 x i32]*, [4 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK:@.+]] to void // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i64, i64, i32*, [2 x i32]*, [10 x [4 x [[S_FLOAT_TY]]]]*)* [[MAIN_MICROTASK1:@.+]] to void // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i64, i64, i32*, [10 x [4 x [[S_FLOAT_TY]]]]*)* [[MAIN_MICROTASK2:@.+]] to void // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[S_FLOAT_TY]]***)* [[MAIN_MICROTASK3:@.+]] to void -// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK4:@.+]] to void -// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK5:@.+]] to void -// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK6:@.+]] to void -// CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]() +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [5 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK4:@.+]] to void +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [4 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK5:@.+]] to void +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [4 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK6:@.+]] to void +// CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT_42:@.+]]() // CHECK: call {{.*}} [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]* // CHECK: ret // -// CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, float* dereferenceable(4) %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(12) %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(12) %{{.+}}, float* dereferenceable(4) %{{.+}}, [2 x i32]* dereferenceable(8) %vec, [2 x [[S_FLOAT_TY]]]* dereferenceable(24) %{{.+}}) +// CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, float* dereferenceable(4) %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(12) %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(12) %{{.+}}, float* dereferenceable(4) %{{.+}}, [2 x i32]* dereferenceable(8) %vec, [4 x [[S_FLOAT_TY]]]* dereferenceable(48) %{{.+}}) // CHECK: [[T_VAR_PRIV:%.+]] = alloca float, // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], // CHECK: [[VAR1_PRIV:%.+]] = alloca [[S_FLOAT_TY]], @@ -300,7 +307,7 @@ int main() { // CHECK: fadd float 5.550000e+02, % // CHECK: ret void -// CHECK: define internal void [[MAIN_MICROTASK1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, [2 x i32]* dereferenceable(8) %{{.+}}, [10 x [4 x [[S_FLOAT_TY]]]]* dereferenceable(480) %{{.+}}) +// CHECK: define internal void [[MAIN_MICROTASK1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}} %{{.+}}, [2 x i32]* dereferenceable(8) %{{.+}}, [10 x [4 x [[S_FLOAT_TY]]]]* dereferenceable(480) %{{.+}}) // Reduction list for runtime. // CHECK: [[RED_LIST:%.+]] = alloca [4 x i8*], @@ -496,7 +503,7 @@ int main() { // CHECK: ret void -// CHECK: define internal void [[MAIN_MICROTASK2]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, [10 x [4 x [[S_FLOAT_TY]]]]* dereferenceable(480) %{{.+}}) +// CHECK: define internal void [[MAIN_MICROTASK2]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}} %{{.+}}, [10 x [4 x [[S_FLOAT_TY]]]]* dereferenceable(480) %{{.+}}) // CHECK: [[ARRS_PRIV:%.+]] = alloca [10 x [4 x [[S_FLOAT_TY]]]], @@ -711,89 +718,87 @@ int main() { // CHECK: store [[S_FLOAT_TY]]* [[PSEUDO_VAR2_PRIV]], [[S_FLOAT_TY]]** [[REF]] // CHECK: ret void -// CHECK: define internal void [[MAIN_MICROTASK4]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x [[S_FLOAT_TY]]]* dereferenceable(24) %{{.+}}) +// CHECK: define internal void [[MAIN_MICROTASK4]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [5 x [[S_FLOAT_TY]]]* dereferenceable(60) %{{.+}}) -// CHECK: [[VVAR2_ORIG_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, +// CHECK: [[VVAR2_ORIG_ADDR:%.+]] = alloca [5 x [[S_FLOAT_TY]]]*, +// CHECK: [[VVAR2_PRIV:%.+]] = alloca [5 x [[S_FLOAT_TY]]], // Reduction list for runtime. -// CHECK: [[RED_LIST:%.+]] = alloca [2 x i8*], +// CHECK: [[RED_LIST:%.+]] = alloca [1 x i8*], // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], -// CHECK: [[VVAR2_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VVAR2_ORIG_ADDR]], +// CHECK: [[VVAR2_ORIG:%.+]] = load [5 x [[S_FLOAT_TY]]]*, [5 x [[S_FLOAT_TY]]]** [[VVAR2_ORIG_ADDR]], -// CHECK: [[LOW:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[VVAR2_ORIG]], i64 0, i64 0 -// CHECK: [[LAST:%.+]] = ptrtoint [[S_FLOAT_TY]]* %{{.+}} to i64 -// CHECK: [[FIRST:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64 -// CHECK: [[BYTE_DIF:%.+]] = sub i64 [[LAST]], [[FIRST]] -// CHECK: [[DIF:%.+]] = sdiv exact i64 [[BYTE_DIF]], ptrtoint ([[S_FLOAT_TY]]* getelementptr ([[S_FLOAT_TY]], [[S_FLOAT_TY]]* null, i32 1) to i64) -// CHECK: [[SIZE:%.+]] = add nuw i64 [[DIF]], 1 -// CHECK: call i8* @llvm.stacksave() -// CHECK: [[VVAR2_PRIV:%.+]] = alloca [[S_FLOAT_TY]], i64 [[SIZE]], -// CHECK: [[ORIG_START:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[VVAR2_ORIG]] to [[S_FLOAT_TY]]* +// CHECK: [[LOW:%.+]] = getelementptr inbounds [5 x [[S_FLOAT_TY]]], [5 x [[S_FLOAT_TY]]]* [[VVAR2_ORIG]], i64 0, i64 0 +// CHECK: [[ORIG_START:%.+]] = bitcast [5 x [[S_FLOAT_TY]]]* [[VVAR2_ORIG]] to [[S_FLOAT_TY]]* // CHECK: [[START:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[ORIG_START]] to i64 // CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64 // CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]] // CHECK: [[OFFSET:%.+]] = sdiv exact i64 [[OFFSET_BYTES]], ptrtoint ([[S_FLOAT_TY]]* getelementptr ([[S_FLOAT_TY]], [[S_FLOAT_TY]]* null, i32 1) to i64) -// CHECK: [[PSEUDO_VVAR2_PRIV:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[VVAR2_PRIV]], i64 [[OFFSET]] -// CHECK: [[VVAR2_PRIV:%.+]] = bitcast [[S_FLOAT_TY]]* [[PSEUDO_VVAR2_PRIV]] to [2 x [[S_FLOAT_TY]]]* +// CHECK: [[VVAR2_PRIV_PTR:%.+]] = bitcast [5 x [[S_FLOAT_TY]]]* [[VVAR2_PRIV]] to [[S_FLOAT_TY]]* +// CHECK: [[VVAR2_PRIV:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[VVAR2_PRIV_PTR]], i64 [[OFFSET]] // CHECK: ret void -// CHECK: define internal void [[MAIN_MICROTASK5]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x [[S_FLOAT_TY]]]* dereferenceable(24) %{{.+}}) +// CHECK: define internal void [[MAIN_MICROTASK5]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [4 x [[S_FLOAT_TY]]]* dereferenceable(48) %{{.+}}) -// CHECK: [[VAR3_ORIG_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR3_ORIG_ADDR:%.+]] = alloca [4 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR3_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], // Reduction list for runtime. -// CHECK: [[RED_LIST:%.+]] = alloca [2 x i8*], +// CHECK: [[RED_LIST:%.+]] = alloca [1 x i8*], // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], -// CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], -// CHECK: store [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]], -// CHECK: [[LAST:%.+]] = ptrtoint [[S_FLOAT_TY]]* %{{.+}} to i64 -// CHECK: [[FIRST:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW:%.+]] to i64 -// CHECK: [[BYTE_DIF:%.+]] = sub i64 [[LAST]], [[FIRST]] -// CHECK: [[DIF:%.+]] = sdiv exact i64 [[BYTE_DIF]], ptrtoint ([[S_FLOAT_TY]]* getelementptr ([[S_FLOAT_TY]], [[S_FLOAT_TY]]* null, i32 1) to i64) -// CHECK: [[SIZE:%.+]] = add nuw i64 [[DIF]], 1 -// CHECK: call i8* @llvm.stacksave() -// CHECK: [[VAR3_PRIV:%.+]] = alloca [[S_FLOAT_TY]], i64 [[SIZE]], -// CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], -// CHECK: [[ORIG_START:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]* +// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], +// CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]], +// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], + +// CHECK: [[LOW:%.+]] = getelementptr inbounds [4 x [[S_FLOAT_TY]]], [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], i64 0, i64 1 +// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], + +// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], +// CHECK: [[ORIG_START:%.+]] = bitcast [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]* // CHECK: [[START:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[ORIG_START]] to i64 // CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64 // CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]] // CHECK: [[OFFSET:%.+]] = sdiv exact i64 [[OFFSET_BYTES]], ptrtoint ([[S_FLOAT_TY]]* getelementptr ([[S_FLOAT_TY]], [[S_FLOAT_TY]]* null, i32 1) to i64) -// CHECK: [[PSEUDO_VAR3_PRIV:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[VAR3_PRIV]], i64 [[OFFSET]] -// CHECK: [[VAR3_PRIV:%.+]] = bitcast [[S_FLOAT_TY]]* [[PSEUDO_VAR3_PRIV]] to [2 x [[S_FLOAT_TY]]]* +// CHECK: [[VAR3_PRIV_PTR:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]] to [[S_FLOAT_TY]]* +// CHECK: [[PSEUDO_VAR3_PRIV:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[VAR3_PRIV_PTR]], i64 [[OFFSET]] +// CHECK: [[VAR3_PRIV:%.+]] = bitcast [[S_FLOAT_TY]]* [[PSEUDO_VAR3_PRIV]] to [4 x [[S_FLOAT_TY]]]* -// CHECK: store [2 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], [2 x [[S_FLOAT_TY]]]** % +// CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], [4 x [[S_FLOAT_TY]]]** % // CHECK: ret void -// CHECK: define internal void [[MAIN_MICROTASK6]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x [[S_FLOAT_TY]]]* dereferenceable(24) %{{.+}}) +// CHECK: define internal void [[MAIN_MICROTASK6]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [4 x [[S_FLOAT_TY]]]* dereferenceable(48) %{{.+}}) -// CHECK: [[VAR3_ORIG_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, -// CHECK: [[VAR3_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK: [[VAR3_ORIG_ADDR:%.+]] = alloca [4 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR3_PRIV:%.+]] = alloca [4 x [[S_FLOAT_TY]]], // Reduction list for runtime. // CHECK: [[RED_LIST:%.+]] = alloca [1 x i8*], // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], -// CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], -// CHECK: store [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]], -// CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], -// CHECK: getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], i32 0, i32 0 -// CHECK: bitcast [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]* -// CHECK: getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* %{{.+}}, i64 2 +// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], +// CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]], +// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], +// CHECK: getelementptr inbounds [4 x [[S_FLOAT_TY]]], [4 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], i32 0, i32 0 +// CHECK: bitcast [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]* +// CHECK: getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* %{{.+}}, i64 4 -// CHECK: store [2 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], [2 x [[S_FLOAT_TY]]]** % +// CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], [4 x [[S_FLOAT_TY]]]** % // CHECK: ret void -// CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]() +// CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT_42]]() // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], // CHECK: call {{.*}} [[S_INT_TY_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 6, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32*, [[S_INT_TY]]*, [[S_INT_TY]]*, i32*, [2 x i32]*, [2 x [[S_INT_TY]]]*)* [[TMAIN_MICROTASK:@.+]] to void +// Not interested in this one: +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [42 x [[S_INT_TY]]]*, [2 x i32]*, i32*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[TMAIN_MICROTASK2:@.+]] to void +// CHECK: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]* // CHECK: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]* // CHECK: ret // @@ -803,6 +808,7 @@ int main() { // CHECK: alloca i{{[0-9]+}}, // CHECK: alloca i{{[0-9]+}}, // CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], // CHECK: [[VAR1_PRIV:%.+]] = alloca [[S_INT_TY]], @@ -964,5 +970,28 @@ int main() { // CHECK: sub nsw i32 47, % // CHECK: ret void +// CHECK: define internal void [[TMAIN_MICROTASK2]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [42 x [[S_INT_TY]]]* dereferenceable(504) %{{.*}}, [2 x i32]* dereferenceable(8) %{{.*}}, i32* dereferenceable(4) %{{.*}}, [2 x [[S_INT_TY]]]* dereferenceable(24) %{{.*}}, [[S_INT_TY]]* dereferenceable(12) %{{.*}}) + +// CHECK: [[ARR_ORIG_ADDR:%.+]] = alloca [42 x [[S_INT_TY]]]*, +// CHECK: [[ARR_PRIV:%.+]] = alloca [40 x [[S_INT_TY]]], + +// Reduction list for runtime. +// CHECK: [[RED_LIST:%.+]] = alloca [1 x i8*], + +// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], + +// CHECK: [[ARR_ORIG:%.+]] = load [42 x [[S_INT_TY]]]*, [42 x [[S_INT_TY]]]** [[ARR_ORIG_ADDR]], +// CHECK: [[LOW:%.+]] = getelementptr inbounds [42 x [[S_INT_TY]]], [42 x [[S_INT_TY]]]* [[ARR_ORIG]], i64 0, i64 1 +// CHECK: [[ORIG_START:%.+]] = bitcast [42 x [[S_INT_TY]]]* [[ARR_ORIG]] to [[S_INT_TY]]* +// CHECK: [[START:%.+]] = ptrtoint [[S_INT_TY]]* [[ORIG_START]] to i64 +// CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_INT_TY]]* [[LOW]] to i64 +// CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]] +// CHECK: [[OFFSET:%.+]] = sdiv exact i64 [[OFFSET_BYTES]], ptrtoint ([[S_INT_TY]]* getelementptr ([[S_INT_TY]], [[S_INT_TY]]* null, i32 1) to i64) +// CHECK: [[ARR_PRIV_PTR:%.+]] = bitcast [40 x [[S_INT_TY]]]* [[ARR_PRIV]] to [[S_INT_TY]]* +// CHECK: [[PSEUDO_ARR_PRIV:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[ARR_PRIV_PTR]], i64 [[OFFSET]] +// CHECK: [[ARR_PRIV:%.+]] = bitcast [[S_INT_TY]]* [[PSEUDO_ARR_PRIV]] to [42 x [[S_INT_TY]]]* + +// CHECK: ret void + #endif diff --git a/test/OpenMP/for_reduction_messages.cpp b/test/OpenMP/for_reduction_messages.cpp index bb70ecc2b23e..d4f4a795fe2c 100644 --- a/test/OpenMP/for_reduction_messages.cpp +++ b/test/OpenMP/for_reduction_messages.cpp @@ -26,9 +26,9 @@ public: S2() : a(0) {} S2(S2 &s2) : a(s2.a) {} static float S2s; // expected-note 2 {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note 2 {{'S2sc' declared here}} }; -const float S2::S2sc = 0; // expected-note 2 {{'S2sc' defined here}} +const float S2::S2sc = 0; S2 b; // expected-note 3 {{'b' defined here}} const S2 ba[5]; // expected-note 2 {{'ba' defined here}} class S3 { diff --git a/test/OpenMP/for_simd_lastprivate_messages.cpp b/test/OpenMP/for_simd_lastprivate_messages.cpp index d48c07d92b39..a176a2d70eb9 100644 --- a/test/OpenMP/for_simd_lastprivate_messages.cpp +++ b/test/OpenMP/for_simd_lastprivate_messages.cpp @@ -18,9 +18,9 @@ public: S2 &operator =(const S2&); const S2 &operator =(const S2&) const; static float S2s; // expected-note {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note {{static data member is predetermined as shared}} }; -const float S2::S2sc = 0; // expected-note {{static data member is predetermined as shared}} +const float S2::S2sc = 0; const S2 b; const S2 ba[5]; class S3 { diff --git a/test/OpenMP/for_simd_reduction_messages.cpp b/test/OpenMP/for_simd_reduction_messages.cpp index 485070e758ae..a77ab754eff9 100644 --- a/test/OpenMP/for_simd_reduction_messages.cpp +++ b/test/OpenMP/for_simd_reduction_messages.cpp @@ -26,9 +26,9 @@ public: S2() : a(0) {} S2(S2 &s2) : a(s2.a) {} static float S2s; // expected-note 2 {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note 2 {{'S2sc' declared here}} }; -const float S2::S2sc = 0; // expected-note 2 {{'S2sc' defined here}} +const float S2::S2sc = 0; S2 b; // expected-note 3 {{'b' defined here}} const S2 ba[5]; // expected-note 2 {{'ba' defined here}} class S3 { diff --git a/test/OpenMP/is_initial_device.c b/test/OpenMP/is_initial_device.c new file mode 100644 index 000000000000..0521f0bd5393 --- /dev/null +++ b/test/OpenMP/is_initial_device.c @@ -0,0 +1,36 @@ +// REQUIRES: powerpc-registered-target + +// RUN: %clang_cc1 -verify -fopenmp -x c -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-unknown-unknown \ +// RUN: -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x ir -triple powerpc64le-unknown-unknown -emit-llvm \ +// RUN: %t-ppc-host.bc -o - | FileCheck %s -check-prefixes HOST,OUTLINED +// RUN: %clang_cc1 -verify -fopenmp -x c -triple powerpc64le-unknown-unknown -emit-llvm -fopenmp-is-device \ +// RUN: %s -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefixes DEVICE,OUTLINED + +// expected-no-diagnostics +int check() { + int host = omp_is_initial_device(); + int device; +#pragma omp target map(tofrom: device) + { + device = omp_is_initial_device(); + } + + return host + device; +} + +// The host should get a value of 1: +// HOST: define{{.*}} @check() +// HOST: [[HOST:%.*]] = alloca i32 +// HOST: store i32 1, i32* [[HOST]] + +// OUTLINED: define{{.*}} @{{.*}}omp_offloading{{.*}}(i32*{{.*}} [[DEVICE_ARGUMENT:%.*]]) +// OUTLINED: [[DEVICE_ADDR_STORAGE:%.*]] = alloca i32* +// OUTLINED: store i32* [[DEVICE_ARGUMENT]], i32** [[DEVICE_ADDR_STORAGE]] +// OUTLINED: [[DEVICE_ADDR:%.*]] = load i32*, i32** [[DEVICE_ADDR_STORAGE]] + +// The outlined function that is called as fallback also runs on the host: +// HOST: store i32 1, i32* [[DEVICE_ADDR]] + +// The device should get a value of 0: +// DEVICE: store i32 0, i32* [[DEVICE_ADDR]] diff --git a/test/OpenMP/nvptx_data_sharing.cpp b/test/OpenMP/nvptx_data_sharing.cpp new file mode 100644 index 000000000000..65215cd2c4f9 --- /dev/null +++ b/test/OpenMP/nvptx_data_sharing.cpp @@ -0,0 +1,57 @@ +// Test device data sharing codegen. +///==========================================================================/// + +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CK1 + +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +void test_ds(){ + #pragma omp target + { + int a = 10; + #pragma omp parallel + { + a = 1000; + } + } +} + +/// ========= In the worker function ========= /// + +// CK1: define internal void @__omp_offloading_{{.*}}test_ds{{.*}}worker() [[ATTR1:#.*]] { +// CK1: [[SHAREDARGS:%.+]] = alloca i8** +// CK1: call i1 @__kmpc_kernel_parallel(i8** %work_fn, i8*** [[SHAREDARGS]]) +// CK1: [[SHARGSTMP:%.+]] = load i8**, i8*** [[SHAREDARGS]] +// CK1: call void @__omp_outlined___wrapper{{.*}}({{.*}}, i8** [[SHARGSTMP]]) + +/// ========= In the kernel function ========= /// + +// CK1: {{.*}}define void @__omp_offloading{{.*}}test_ds{{.*}}() [[ATTR2:#.*]] { +// CK1: [[SHAREDARGS1:%.+]] = alloca i8** +// CK1: call void @__kmpc_kernel_prepare_parallel({{.*}}, i8*** [[SHAREDARGS1]], i32 1) +// CK1: [[SHARGSTMP1:%.+]] = load i8**, i8*** [[SHAREDARGS1]] +// CK1: [[SHARGSTMP2:%.+]] = getelementptr inbounds i8*, i8** [[SHARGSTMP1]] +// CK1: [[SHAREDVAR:%.+]] = bitcast i32* {{.*}} to i8* +// CK1: store i8* [[SHAREDVAR]], i8** [[SHARGSTMP2]] + +/// ========= In the data sharing wrapper function ========= /// + +// CK1: {{.*}}define internal void @__omp_outlined___wrapper({{.*}}i8**) [[ATTR1]] { +// CK1: [[SHAREDARGS2:%.+]] = alloca i8** +// CK1: store i8** %2, i8*** [[SHAREDARGS2]] +// CK1: [[SHARGSTMP3:%.+]] = load i8**, i8*** [[SHAREDARGS2]] +// CK1: [[SHARGSTMP4:%.+]] = getelementptr inbounds i8*, i8** [[SHARGSTMP3]] +// CK1: [[SHARGSTMP5:%.+]] = bitcast i8** [[SHARGSTMP4]] to i32** +// CK1: [[SHARGSTMP6:%.+]] = load i32*, i32** [[SHARGSTMP5]] +// CK1: call void @__omp_outlined__({{.*}}, i32* [[SHARGSTMP6]]) + +/// ========= Attributes ========= /// + +// CK1-NOT: attributes [[ATTR1]] = { {{.*}}"has-nvptx-shared-depot"{{.*}} } +// CK1: attributes [[ATTR2]] = { {{.*}}"has-nvptx-shared-depot"{{.*}} } + +#endif diff --git a/test/OpenMP/nvptx_parallel_codegen.cpp b/test/OpenMP/nvptx_parallel_codegen.cpp index 224f24569669..ba889bf4975f 100644 --- a/test/OpenMP/nvptx_parallel_codegen.cpp +++ b/test/OpenMP/nvptx_parallel_codegen.cpp @@ -78,7 +78,7 @@ int bar(int n){ // // CHECK: [[AWAIT_WORK]] // CHECK: call void @llvm.nvvm.barrier0() - // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]]) + // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]], // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8 // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1 // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]], @@ -92,20 +92,20 @@ int bar(int n){ // // CHECK: [[EXEC_PARALLEL]] // CHECK: [[WF1:%.+]] = load i8*, i8** [[OMP_WORK_FN]], - // CHECK: [[WM1:%.+]] = icmp eq i8* [[WF1]], bitcast (void (i32*, i32*)* [[PARALLEL_FN1:@.+]] to i8*) + // CHECK: [[WM1:%.+]] = icmp eq i8* [[WF1]], bitcast (void (i16, i32, i8**)* [[PARALLEL_FN1:@.+]]_wrapper to i8*) // CHECK: br i1 [[WM1]], label {{%?}}[[EXEC_PFN1:.+]], label {{%?}}[[CHECK_NEXT1:.+]] // // CHECK: [[EXEC_PFN1]] - // CHECK: call void [[PARALLEL_FN1]]( + // CHECK: call void [[PARALLEL_FN1]]_wrapper( // CHECK: br label {{%?}}[[TERM_PARALLEL:.+]] // // CHECK: [[CHECK_NEXT1]] // CHECK: [[WF2:%.+]] = load i8*, i8** [[OMP_WORK_FN]], - // CHECK: [[WM2:%.+]] = icmp eq i8* [[WF2]], bitcast (void (i32*, i32*)* [[PARALLEL_FN2:@.+]] to i8*) + // CHECK: [[WM2:%.+]] = icmp eq i8* [[WF2]], bitcast (void (i16, i32, i8**)* [[PARALLEL_FN2:@.+]]_wrapper to i8*) // CHECK: br i1 [[WM2]], label {{%?}}[[EXEC_PFN2:.+]], label {{%?}}[[CHECK_NEXT2:.+]] // // CHECK: [[EXEC_PFN2]] - // CHECK: call void [[PARALLEL_FN2]]( + // CHECK: call void [[PARALLEL_FN2]]_wrapper( // CHECK: br label {{%?}}[[TERM_PARALLEL:.+]] // // CHECK: [[CHECK_NEXT2]] @@ -152,13 +152,13 @@ int bar(int n){ // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]] // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]] - // CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i32*, i32*)* [[PARALLEL_FN1]] to i8*)) + // CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32, i8**)* [[PARALLEL_FN1]]_wrapper to i8*), // CHECK: call void @llvm.nvvm.barrier0() // CHECK: call void @llvm.nvvm.barrier0() // CHECK: call void @__kmpc_serialized_parallel( // CHECK: {{call|invoke}} void [[PARALLEL_FN3:@.+]]( // CHECK: call void @__kmpc_end_serialized_parallel( - // CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i32*, i32*)* [[PARALLEL_FN2]] to i8*)) + // CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32, i8**)* [[PARALLEL_FN2]]_wrapper to i8*), // CHECK: call void @llvm.nvvm.barrier0() // CHECK: call void @llvm.nvvm.barrier0() // CHECK-64-DAG: load i32, i32* [[REF_A]] @@ -166,7 +166,7 @@ int bar(int n){ // CHECK: br label {{%?}}[[TERMINATE:.+]] // // CHECK: [[TERMINATE]] - // CHECK: call void @__kmpc_kernel_deinit() + // CHECK: call void @__kmpc_kernel_deinit( // CHECK: call void @llvm.nvvm.barrier0() // CHECK: br label {{%?}}[[EXIT]] // @@ -203,7 +203,7 @@ int bar(int n){ // // CHECK: [[AWAIT_WORK]] // CHECK: call void @llvm.nvvm.barrier0() - // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]]) + // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]], // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8 // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1 // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]], @@ -217,11 +217,11 @@ int bar(int n){ // // CHECK: [[EXEC_PARALLEL]] // CHECK: [[WF:%.+]] = load i8*, i8** [[OMP_WORK_FN]], - // CHECK: [[WM:%.+]] = icmp eq i8* [[WF]], bitcast (void (i32*, i32*)* [[PARALLEL_FN4:@.+]] to i8*) + // CHECK: [[WM:%.+]] = icmp eq i8* [[WF]], bitcast (void (i16, i32, i8**)* [[PARALLEL_FN4:@.+]]_wrapper to i8*) // CHECK: br i1 [[WM]], label {{%?}}[[EXEC_PFN:.+]], label {{%?}}[[CHECK_NEXT:.+]] // // CHECK: [[EXEC_PFN]] - // CHECK: call void [[PARALLEL_FN4]]( + // CHECK: call void [[PARALLEL_FN4]]_wrapper( // CHECK: br label {{%?}}[[TERM_PARALLEL:.+]] // // CHECK: [[CHECK_NEXT]] @@ -283,7 +283,7 @@ int bar(int n){ // CHECK: br i1 [[CMP]], label {{%?}}[[IF_THEN:.+]], label {{%?}}[[IF_ELSE:.+]] // // CHECK: [[IF_THEN]] - // CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i32*, i32*)* [[PARALLEL_FN4]] to i8*)) + // CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32, i8**)* [[PARALLEL_FN4]]_wrapper to i8*), // CHECK: call void @llvm.nvvm.barrier0() // CHECK: call void @llvm.nvvm.barrier0() // CHECK: br label {{%?}}[[IF_END:.+]] @@ -303,7 +303,7 @@ int bar(int n){ // CHECK: br label {{%?}}[[TERMINATE:.+]] // // CHECK: [[TERMINATE]] - // CHECK: call void @__kmpc_kernel_deinit() + // CHECK: call void @__kmpc_kernel_deinit( // CHECK: call void @llvm.nvvm.barrier0() // CHECK: br label {{%?}}[[EXIT]] // diff --git a/test/OpenMP/nvptx_param_translate.c b/test/OpenMP/nvptx_param_translate.c new file mode 100644 index 000000000000..ec123ce3811b --- /dev/null +++ b/test/OpenMP/nvptx_param_translate.c @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s +// expected-no-diagnostics + +// CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** % +// CHECK: call void (i8*, ...) [[MAP_FN]](i8* % +int main() { + double a, b; + +#pragma omp target map(tofrom \ + : a) map(to \ + : b) + { +#pragma omp taskgroup +#pragma omp task shared(a) + a = b; + } + return 0; +} diff --git a/test/OpenMP/nvptx_target_codegen.cpp b/test/OpenMP/nvptx_target_codegen.cpp index d7ce7dc7d106..23b40e10c4c8 100644 --- a/test/OpenMP/nvptx_target_codegen.cpp +++ b/test/OpenMP/nvptx_target_codegen.cpp @@ -91,7 +91,7 @@ int foo(int n) { // CHECK: br label {{%?}}[[TERMINATE:.+]] // // CHECK: [[TERMINATE]] - // CHECK: call void @__kmpc_kernel_deinit() + // CHECK: call void @__kmpc_kernel_deinit( // CHECK: call void @llvm.nvvm.barrier0() // CHECK: br label {{%?}}[[EXIT]] // @@ -168,7 +168,7 @@ int foo(int n) { // CHECK: br label {{%?}}[[TERMINATE:.+]] // // CHECK: [[TERMINATE]] - // CHECK: call void @__kmpc_kernel_deinit() + // CHECK: call void @__kmpc_kernel_deinit( // CHECK: call void @llvm.nvvm.barrier0() // CHECK: br label {{%?}}[[EXIT]] // @@ -278,7 +278,7 @@ int foo(int n) { // CHECK: br label {{%?}}[[TERMINATE:.+]] // // CHECK: [[TERMINATE]] - // CHECK: call void @__kmpc_kernel_deinit() + // CHECK: call void @__kmpc_kernel_deinit( // CHECK: call void @llvm.nvvm.barrier0() // CHECK: br label {{%?}}[[EXIT]] // @@ -441,7 +441,7 @@ int bar(int n){ // CHECK: br label {{%?}}[[TERMINATE:.+]] // // CHECK: [[TERMINATE]] - // CHECK: call void @__kmpc_kernel_deinit() + // CHECK: call void @__kmpc_kernel_deinit( // CHECK: call void @llvm.nvvm.barrier0() // CHECK: br label {{%?}}[[EXIT]] // @@ -531,7 +531,7 @@ int bar(int n){ // CHECK: br label {{%?}}[[TERMINATE:.+]] // // CHECK: [[TERMINATE]] - // CHECK: call void @__kmpc_kernel_deinit() + // CHECK: call void @__kmpc_kernel_deinit( // CHECK: call void @llvm.nvvm.barrier0() // CHECK: br label {{%?}}[[EXIT]] // @@ -616,7 +616,7 @@ int bar(int n){ // CHECK: br label {{%?}}[[TERMINATE:.+]] // // CHECK: [[TERMINATE]] - // CHECK: call void @__kmpc_kernel_deinit() + // CHECK: call void @__kmpc_kernel_deinit( // CHECK: call void @llvm.nvvm.barrier0() // CHECK: br label {{%?}}[[EXIT]] // diff --git a/test/OpenMP/nvptx_target_firstprivate_codegen.cpp b/test/OpenMP/nvptx_target_firstprivate_codegen.cpp index 5dcff8e54842..f750be03e052 100644 --- a/test/OpenMP/nvptx_target_firstprivate_codegen.cpp +++ b/test/OpenMP/nvptx_target_firstprivate_codegen.cpp @@ -1,15 +1,14 @@ - // Test target codegen - host bc file has to be created first. // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 +// RUN: %clang_cc1 -debug-info-kind=limited -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 +// RUN: %clang_cc1 -debug-info-kind=limited -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 // expected-no-diagnostics #ifndef HEADER #define HEADER -template<typename tx, typename ty> -struct TT{ +template <typename tx, typename ty> +struct TT { tx X; ty Y; }; @@ -23,29 +22,32 @@ int foo(int n, double *ptr) { float b[10]; double c[5][10]; TT<long long, char> d; - - #pragma omp target firstprivate(a) + +#pragma omp target firstprivate(a) map(tofrom \ + : b) { + b[a] = a; } - - // TCHECK: define void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]]) + + // TCHECK: define {{.*}}void @__omp_offloading_{{.+}}([10 x float] addrspace(1)* noalias [[B_IN:%.+]], i{{[0-9]+}} [[A_IN:%.+]]) // TCHECK: [[A_ADDR:%.+]] = alloca i{{[0-9]+}}, // TCHECK-NOT: alloca i{{[0-9]+}}, + // TCHECK-64: call void @llvm.dbg.declare(metadata [10 x float] addrspace(1)** %{{.+}}, metadata !{{[0-9]+}}, metadata !DIExpression()) // TCHECK: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[A_ADDR]], - // TCHECK: ret void + // TCHECK: ret void -#pragma omp target firstprivate(aa,b,c,d) +#pragma omp target firstprivate(aa, b, c, d) { aa += 1; b[2] = 1.0; c[1][2] = 1.0; d.X = 1; - d.Y = 1; + d.Y = 1; } - + // make sure that firstprivate variables are generated in all cases and that we use those instances for operations inside the // target region - // TCHECK: define void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A2_IN:%.+]], [10 x float]* {{.+}} [[B_IN:%.+]], [5 x [10 x double]]* {{.+}} [[C_IN:%.+]], [[TT]]* {{.+}} [[D_IN:%.+]]) + // TCHECK: define {{.*}}void @__omp_offloading_{{.+}}(i{{[0-9]+}}{{.*}} [[A2_IN:%.+]], [10 x float]*{{.*}} [[B_IN:%.+]], [5 x [10 x double]]*{{.*}} [[C_IN:%.+]], [[TT]]*{{.*}} [[D_IN:%.+]]) // TCHECK: [[A2_ADDR:%.+]] = alloca i{{[0-9]+}}, // TCHECK: [[B_ADDR:%.+]] = alloca [10 x float]*, // TCHECK: [[C_ADDR:%.+]] = alloca [5 x [10 x double]]*, @@ -58,10 +60,12 @@ int foo(int n, double *ptr) { // TCHECK: store [10 x float]* [[B_IN]], [10 x float]** [[B_ADDR]], // TCHECK: store [5 x [10 x double]]* [[C_IN]], [5 x [10 x double]]** [[C_ADDR]], // TCHECK: store [[TT]]* [[D_IN]], [[TT]]** [[D_ADDR]], - // TCHECK: [[CONV_A2ADDR:%.+]] = bitcast i{{[0-9]+}}* [[A2_ADDR]] to i{{[0-9]+}}* // TCHECK: [[B_ADDR_REF:%.+]] = load [10 x float]*, [10 x float]** [[B_ADDR]], + // TCHECK: [[B_ADDR_REF:%.+]] = load [10 x float]*, [10 x float]** % // TCHECK: [[C_ADDR_REF:%.+]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], + // TCHECK: [[C_ADDR_REF:%.+]] = load [5 x [10 x double]]*, [5 x [10 x double]]** % // TCHECK: [[D_ADDR_REF:%.+]] = load [[TT]]*, [[TT]]** [[D_ADDR]], + // TCHECK: [[D_ADDR_REF:%.+]] = load [[TT]]*, [[TT]]** % // firstprivate(aa): a_priv = a_in @@ -74,16 +78,15 @@ int foo(int n, double *ptr) { // TCHECK: [[C_PRIV_BCAST:%.+]] = bitcast [5 x [10 x double]]* [[C_PRIV]] to i8* // TCHECK: [[C_IN_BCAST:%.+]] = bitcast [5 x [10 x double]]* [[C_ADDR_REF]] to i8* // TCHECK: call void @llvm.memcpy.{{.+}}(i8* [[C_PRIV_BCAST]], i8* [[C_IN_BCAST]],{{.+}}) - + // firstprivate(d) // TCHECK: [[D_PRIV_BCAST:%.+]] = bitcast [[TT]]* [[D_PRIV]] to i8* // TCHECK: [[D_IN_BCAST:%.+]] = bitcast [[TT]]* [[D_ADDR_REF]] to i8* // TCHECK: call void @llvm.memcpy.{{.+}}(i8* [[D_PRIV_BCAST]], i8* [[D_IN_BCAST]],{{.+}}) - // TCHECK: load i16, i16* [[CONV_A2ADDR]], + // TCHECK: load i16, i16* [[A2_ADDR]], - - #pragma omp target firstprivate(ptr) +#pragma omp target firstprivate(ptr) { ptr[0]++; } @@ -98,13 +101,12 @@ int foo(int n, double *ptr) { return a; } - -template<typename tx> +template <typename tx> tx ftemplate(int n) { tx a = 0; tx b[10]; -#pragma omp target firstprivate(a,b) +#pragma omp target firstprivate(a, b) { a += 1; b[2] += 1; @@ -113,13 +115,12 @@ tx ftemplate(int n) { return a; } -static -int fstatic(int n) { +static int fstatic(int n) { int a = 0; char aaa = 0; int b[10]; -#pragma omp target firstprivate(a,aaa,b) +#pragma omp target firstprivate(a, aaa, b) { a += 1; aaa += 1; @@ -129,7 +130,7 @@ int fstatic(int n) { return a; } -// TCHECK: define void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]], i{{[0-9]+}} [[A3_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]]) +// TCHECK: define {{.*}}void @__omp_offloading_{{.+}}(i{{[0-9]+}}{{.*}} [[A_IN:%.+]], i{{[0-9]+}}{{.*}} [[A3_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]]) // TCHECK: [[A_ADDR:%.+]] = alloca i{{[0-9]+}}, // TCHECK: [[A3_ADDR:%.+]] = alloca i{{[0-9]+}}, // TCHECK: [[B_ADDR:%.+]] = alloca [10 x i{{[0-9]+}}]*, @@ -138,9 +139,8 @@ int fstatic(int n) { // TCHECK: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[A_ADDR]], // TCHECK: store i{{[0-9]+}} [[A3_IN]], i{{[0-9]+}}* [[A3_ADDR]], // TCHECK: store [10 x i{{[0-9]+}}]* [[B_IN]], [10 x i{{[0-9]+}}]** [[B_ADDR]], -// TCHECK-64: [[A_CONV:%.+]] = bitcast i{{[0-9]+}}* [[A_ADDR]] to i{{[0-9]+}}* -// TCHECK: [[A3_CONV:%.+]] = bitcast i{{[0-9]+}}* [[A3_ADDR]] to i8* // TCHECK: [[B_ADDR_REF:%.+]] = load [10 x i{{[0-9]+}}]*, [10 x i{{[0-9]+}}]** [[B_ADDR]], +// TCHECK: [[B_ADDR_REF:%.+]] = load [10 x i{{[0-9]+}}]*, [10 x i{{[0-9]+}}]** % // firstprivate(a): a_priv = a_in @@ -158,8 +158,8 @@ int fstatic(int n) { struct S1 { double a; - int r1(int n){ - int b = n+1; + int r1(int n) { + int b = n + 1; #pragma omp target firstprivate(b) { @@ -169,7 +169,7 @@ struct S1 { return (int)b; } - // TCHECK: define void @__omp_offloading_{{.+}}([[S1]]* [[TH:%.+]], i{{[0-9]+}} [[B_IN:%.+]]) + // TCHECK: define internal void @__omp_offloading_{{.+}}([[S1]]* [[TH:%.+]], i{{[0-9]+}} [[B_IN:%.+]]) // TCHECK: [[TH_ADDR:%.+]] = alloca [[S1]]*, // TCHECK: [[B_ADDR:%.+]] = alloca i{{[0-9]+}}, // TCHECK-NOT: alloca i{{[0-9]+}}, @@ -185,9 +185,7 @@ struct S1 { // TCHECK: ret void }; - - -int bar(int n, double *ptr){ +int bar(int n, double *ptr) { int a = 0; a += foo(n, ptr); S1 S; @@ -200,15 +198,15 @@ int bar(int n, double *ptr){ // template -// TCHECK: define void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]]) +// TCHECK: define internal void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]]) // TCHECK: [[A_ADDR:%.+]] = alloca i{{[0-9]+}}, // TCHECK: [[B_ADDR:%.+]] = alloca [10 x i{{[0-9]+}}]*, // TCHECK-NOT: alloca i{{[0-9]+}}, // TCHECK: [[B_PRIV:%.+]] = alloca [10 x i{{[0-9]+}}], // TCHECK: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[A_ADDR]], // TCHECK: store [10 x i{{[0-9]+}}]* [[B_IN]], [10 x i{{[0-9]+}}]** [[B_ADDR]], -// TCHECK-64: [[A_ADDR_CONV:%.+]] = bitcast i{{[0-9]+}}* [[A_ADDR]] to i{{[0-9]+}}* // TCHECK: [[B_ADDR_REF:%.+]] = load [10 x i{{[0-9]+}}]*, [10 x i{{[0-9]+}}]** [[B_ADDR]], +// TCHECK: [[B_ADDR_REF:%.+]] = load [10 x i{{[0-9]+}}]*, [10 x i{{[0-9]+}}]** % // firstprivate(a) // TCHECK-NOT: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* diff --git a/test/OpenMP/nvptx_target_teams_codegen.cpp b/test/OpenMP/nvptx_target_teams_codegen.cpp index e823eabbb10f..b79fd185d77a 100644 --- a/test/OpenMP/nvptx_target_teams_codegen.cpp +++ b/test/OpenMP/nvptx_target_teams_codegen.cpp @@ -60,7 +60,7 @@ int bar(int n){ // // CHECK: [[AWAIT_WORK]] // CHECK: call void @llvm.nvvm.barrier0() - // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]]) + // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]], i8*** %shared_args) // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8 // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1 // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]], @@ -121,11 +121,13 @@ int bar(int n){ // CHECK: [[ACV:%.+]] = load i[[SZ]], i[[SZ]]* [[AC]], align // CHECK: store i[[SZ]] [[ACV]], i[[SZ]]* [[A_ADDR_T:%.+]], align // CHECK: [[CONV2:%.+]] = bitcast i[[SZ]]* [[A_ADDR_T]] to i8* - // CHECK: store i8 49, i8* [[CONV2]], align + // CHECK: [[LD_CONV2:%.+]] = load i8, i8* [[CONV2]], + // CHECK: store i8 [[LD_CONV2]], i8* [[A_PRIV:%[^,]+]], + // CHECK: store i8 49, i8* [[A_PRIV]], align // CHECK: br label {{%?}}[[TERMINATE:.+]] // // CHECK: [[TERMINATE]] - // CHECK: call void @__kmpc_kernel_deinit() + // CHECK: call void @__kmpc_kernel_deinit( // CHECK: call void @llvm.nvvm.barrier0() // CHECK: br label {{%?}}[[EXIT]] // @@ -146,7 +148,7 @@ int bar(int n){ // // CHECK: [[AWAIT_WORK]] // CHECK: call void @llvm.nvvm.barrier0() - // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]]) + // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]], i8*** %shared_args) // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8 // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1 // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]], @@ -207,11 +209,13 @@ int bar(int n){ // CHECK: [[ACV:%.+]] = load i[[SZ]], i[[SZ]]* [[AC]], align // CHECK: store i[[SZ]] [[ACV]], i[[SZ]]* [[AA_ADDR_T:%.+]], align // CHECK: [[CONV2:%.+]] = bitcast i[[SZ]]* [[AA_ADDR_T]] to i16* - // CHECK: store i16 1, i16* [[CONV2]], align + // CHECK: [[LD_CONV2:%.+]] = load i16, i16* [[CONV2]], + // CHECK: store i16 [[LD_CONV2]], i16* [[A_PRIV:%[^,]+]], + // CHECK: store i16 1, i16* [[A_PRIV]], align // CHECK: br label {{%?}}[[TERMINATE:.+]] // // CHECK: [[TERMINATE]] - // CHECK: call void @__kmpc_kernel_deinit() + // CHECK: call void @__kmpc_kernel_deinit( // CHECK: call void @llvm.nvvm.barrier0() // CHECK: br label {{%?}}[[EXIT]] // diff --git a/test/OpenMP/nvptx_teams_reduction_codegen.cpp b/test/OpenMP/nvptx_teams_reduction_codegen.cpp index ae129ebfae4d..d77231807fb4 100644 --- a/test/OpenMP/nvptx_teams_reduction_codegen.cpp +++ b/test/OpenMP/nvptx_teams_reduction_codegen.cpp @@ -84,7 +84,7 @@ int bar(int n){ // CHECK: br label %[[EXIT]] // // CHECK: [[EXIT]] - // CHECK: call void @__kmpc_kernel_deinit() + // CHECK: call void @__kmpc_kernel_deinit( // // Reduction function @@ -360,7 +360,7 @@ int bar(int n){ // CHECK: br label %[[EXIT]] // // CHECK: [[EXIT]] - // CHECK: call void @__kmpc_kernel_deinit() + // CHECK: call void @__kmpc_kernel_deinit( // // Reduction function @@ -776,7 +776,7 @@ int bar(int n){ // CHECK: br label %[[EXIT]] // // CHECK: [[EXIT]] - // CHECK: call void @__kmpc_kernel_deinit() + // CHECK: call void @__kmpc_kernel_deinit( // // Reduction function diff --git a/test/OpenMP/openmp_offload_codegen.cpp b/test/OpenMP/openmp_offload_codegen.cpp new file mode 100644 index 000000000000..f89bccf3b552 --- /dev/null +++ b/test/OpenMP/openmp_offload_codegen.cpp @@ -0,0 +1,36 @@ +// Test device for mapping codegen. +///==========================================================================/// + +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -S -emit-llvm %s -o - 2>&1 \ +// RUN: | FileCheck -check-prefix=CK1 %s + +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CK1-DEVICE + +// expected-no-diagnostics + +#ifdef CK1 + +void target_maps_parallel_integer(int a){ + int ParamToKernel = a; +#pragma omp target map(tofrom: ParamToKernel) + { + ParamToKernel += 1; + } +} + +// CK1-DEVICE: {{.*}}void @__omp_offloading_{{.*}}(i32* dereferenceable(4){{.*}} + +// CK1: {{.*}}void {{.*}}target_maps_parallel_integer{{.*}} { + +// CK1: [[GEPOBP:%.+]] = getelementptr inbounds {{.*}} +// CK1: [[GEPOBPBIT:%.+]] = bitcast i8** [[GEPOBP]] +// CK1: store i32* %ParamToKernel, i32** [[GEPOBPBIT]] +// CK1: [[GEPOP:%.+]] = getelementptr inbounds {{.*}} +// CK1: [[GEPOPBIT:%.+]] = bitcast i8** [[GEPOP]] +// CK1: store i32* %ParamToKernel, i32** [[GEPOPBIT]] +// CK1: [[GEPOBPARG:%.+]] = getelementptr inbounds {{.*}} +// CK1: [[GEPOPARG:%.+]] = getelementptr inbounds {{.*}} +// CK1: call {{.*}}tgt_target({{.*}}i8** [[GEPOBPARG]], i8** [[GEPOPARG]] + +#endif
\ No newline at end of file diff --git a/test/OpenMP/openmp_win_codegen.cpp b/test/OpenMP/openmp_win_codegen.cpp new file mode 100644 index 000000000000..45b2c185cb8f --- /dev/null +++ b/test/OpenMP/openmp_win_codegen.cpp @@ -0,0 +1,61 @@ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-pc-windows-msvc18.0.0 -std=c++11 -fms-compatibility-version=18 -fms-extensions -emit-llvm %s -fexceptions -fcxx-exceptions -o - -O1 | FileCheck %s +// REQUIRES: x86-registered-target +// expected-no-diagnostics + +void foo(); +void bar(); + +struct Test { + static void main() { + int failed = 0; + int j = 2; + +#pragma omp parallel + { + int local_j = 3; +#pragma omp single copyprivate(local_j) + { + local_j = 4; + } + + // Assure reports a data race, but value written to "j" + // should always be the same. + j = local_j; + } + + } +}; + +// CHECK-LABEL: @main +int main() { + // CHECK: call void @{{.+}}main + Test::main(); + // CHECK: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* {{.*}}@0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*)) +#pragma omp parallel + { + try { + foo(); + } catch (int t) { +#pragma omp critical + { + bar(); + }; + } + }; + // CHECK: ret i32 0 + return 0; +} + +// CHECK: define internal void [[OUTLINED]]( +// CHECK: [[GID:%.+]] = {{.*}}call i32 @__kmpc_global_thread_num(%ident_t* {{.*}}@0) +// CHECK: invoke void @{{.+}}foo +// CHECK: catchswitch within +// CHECK: catchpad within +// CHECK: call void @__kmpc_critical(%ident_t* {{.*}}@0, i32 [[GID]], +// CHECK: invoke void @{{.+}}bar +// CHECK: call void @__kmpc_end_critical(%ident_t* {{.*}}@0, i32 [[GID]], +// CHECK: catchret from +// CHECK: cleanuppad within +// CHECK: call void @__kmpc_end_critical(%ident_t* {{.*}}@0, i32 [[GID]], +// CHECK: cleanupret from + diff --git a/test/OpenMP/ordered_codegen.cpp b/test/OpenMP/ordered_codegen.cpp index bd7c07051b04..af6a8722438d 100644 --- a/test/OpenMP/ordered_codegen.cpp +++ b/test/OpenMP/ordered_codegen.cpp @@ -216,7 +216,7 @@ float f[10]; // CHECK-LABEL: foo_simd void foo_simd(int low, int up) { // CHECK: store float 0.000000e+00, float* %{{.+}}, align {{[0-9]+}}, !llvm.mem.parallel_loop_access ! - // CHECK-NEXT: call void [[CAP_FUNC:@.+]](i32* %{{.+}}) #{{[0-9]+}}, !llvm.mem.parallel_loop_access ! + // CHECK-NEXT: call void [[CAP_FUNC:@.+]](i32* %{{.+}}), !llvm.mem.parallel_loop_access ! #pragma omp simd for (int i = low; i < up; ++i) { f[i] = 0.0; @@ -224,7 +224,7 @@ void foo_simd(int low, int up) { f[i] = 1.0; } // CHECK: store float 0.000000e+00, float* %{{.+}}, align {{[0-9]+}} - // CHECK-NEXT: call void [[CAP_FUNC:@.+]](i32* %{{.+}}) #{{[0-9]+}} + // CHECK-NEXT: call void [[CAP_FUNC:@.+]](i32* %{{.+}}) #pragma omp for simd ordered for (int i = low; i < up; ++i) { f[i] = 0.0; diff --git a/test/OpenMP/parallel_ast_print.cpp b/test/OpenMP/parallel_ast_print.cpp index 1e24c6e059bd..09141db5841f 100644 --- a/test/OpenMP/parallel_ast_print.cpp +++ b/test/OpenMP/parallel_ast_print.cpp @@ -135,6 +135,9 @@ struct S { // CHECK-NEXT: #pragma omp threadprivate(S<long>::TS) // CHECK-NEXT: } +int thrp; +#pragma omp threadprivate(thrp) + template <typename T, int C> T tmain(T argc, T *argv) { T b = argc, c, d, e, f, g; @@ -143,7 +146,7 @@ T tmain(T argc, T *argv) { T arr[C][10], arr1[C]; #pragma omp parallel a=2; -#pragma omp parallel default(none), private(argc,b) firstprivate(argv) shared (d) if (parallel:argc > 0) num_threads(C) copyin(S<T>::TS) proc_bind(master) reduction(+:c, arr1[argc]) reduction(max:e, arr[:C][0:10]) +#pragma omp parallel default(none), private(argc,b) firstprivate(argv) shared (d) if (parallel:argc > 0) num_threads(C) copyin(S<T>::TS, thrp) proc_bind(master) reduction(+:c, arr1[argc]) reduction(max:e, arr[:C][0:10]) foo(); #pragma omp parallel if (C) num_threads(s) proc_bind(close) reduction(^:e, f, arr[0:C][:argc]) reduction(&& : g) foo(); @@ -157,7 +160,7 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: T arr[C][10], arr1[C]; // CHECK-NEXT: #pragma omp parallel // CHECK-NEXT: a = 2; -// CHECK-NEXT: #pragma omp parallel default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(C) copyin(S<T>::TS) proc_bind(master) reduction(+: c,arr1[argc]) reduction(max: e,arr[:C][0:10]) +// CHECK-NEXT: #pragma omp parallel default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(C) copyin(S<T>::TS,thrp) proc_bind(master) reduction(+: c,arr1[argc]) reduction(max: e,arr[:C][0:10]) // CHECK-NEXT: foo() // CHECK-NEXT: #pragma omp parallel if(C) num_threads(s) proc_bind(close) reduction(^: e,f,arr[0:C][:argc]) reduction(&&: g) // CHECK-NEXT: foo() @@ -168,7 +171,7 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: int arr[5][10], arr1[5]; // CHECK-NEXT: #pragma omp parallel // CHECK-NEXT: a = 2; -// CHECK-NEXT: #pragma omp parallel default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(5) copyin(S<int>::TS) proc_bind(master) reduction(+: c,arr1[argc]) reduction(max: e,arr[:5][0:10]) +// CHECK-NEXT: #pragma omp parallel default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(5) copyin(S<int>::TS,thrp) proc_bind(master) reduction(+: c,arr1[argc]) reduction(max: e,arr[:5][0:10]) // CHECK-NEXT: foo() // CHECK-NEXT: #pragma omp parallel if(5) num_threads(s) proc_bind(close) reduction(^: e,f,arr[0:5][:argc]) reduction(&&: g) // CHECK-NEXT: foo() @@ -179,7 +182,7 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: long arr[1][10], arr1[1]; // CHECK-NEXT: #pragma omp parallel // CHECK-NEXT: a = 2; -// CHECK-NEXT: #pragma omp parallel default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(1) copyin(S<long>::TS) proc_bind(master) reduction(+: c,arr1[argc]) reduction(max: e,arr[:1][0:10]) +// CHECK-NEXT: #pragma omp parallel default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(1) copyin(S<long>::TS,thrp) proc_bind(master) reduction(+: c,arr1[argc]) reduction(max: e,arr[:1][0:10]) // CHECK-NEXT: foo() // CHECK-NEXT: #pragma omp parallel if(1) num_threads(s) proc_bind(close) reduction(^: e,f,arr[0:1][:argc]) reduction(&&: g) // CHECK-NEXT: foo() diff --git a/test/OpenMP/parallel_codegen.cpp b/test/OpenMP/parallel_codegen.cpp index 23b323778b47..77e8efded68b 100644 --- a/test/OpenMP/parallel_codegen.cpp +++ b/test/OpenMP/parallel_codegen.cpp @@ -53,7 +53,7 @@ int main (int argc, char **argv) { // CHECK-DEBUG: ret i32 // CHECK-DEBUG-NEXT: } -// CHECK: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i{{[0-9]+}}{{.*}} [[VLA_SIZE:%.+]], i32* [[VLA_ADDR:%[^)]+]]) +// CHECK: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i{{[0-9]+}}{{.*}} [[VLA_SIZE:%.+]], i32* {{.+}} [[VLA_ADDR:%[^)]+]]) // CHECK-SAME: #[[FN_ATTRS:[0-9]+]] // CHECK: store i32* [[VLA_ADDR]], i32** [[VLA_PTR_ADDR:%.+]], // CHECK: [[VLA_REF:%.+]] = load i32*, i32** [[VLA_PTR_ADDR]] @@ -64,7 +64,7 @@ int main (int argc, char **argv) { // CHECK: call {{.*}}void @{{.+terminate.*|abort}}( // CHECK-NEXT: unreachable // CHECK-NEXT: } -// CHECK-DEBUG: define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i64 [[VLA_SIZE:%.+]], i32* [[VLA_ADDR:%[^)]+]]) +// CHECK-DEBUG: define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i64 [[VLA_SIZE:%.+]], i32* {{.+}} [[VLA_ADDR:%[^)]+]]) // CHECK-DEBUG-SAME: #[[FN_ATTRS:[0-9]+]] // CHECK-DEBUG: store i32* [[VLA_ADDR]], i32** [[VLA_PTR_ADDR:%.+]], // CHECK-DEBUG: [[VLA_REF:%.+]] = load i32*, i32** [[VLA_PTR_ADDR]] @@ -80,7 +80,7 @@ int main (int argc, char **argv) { // CHECK-DAG: declare {{.*}}void @__kmpc_fork_call(%ident_t*, i32, void (i32*, i32*, ...)*, ...) // CHECK-DEBUG-DAG: define linkonce_odr void [[FOO]](i32 %argc) // CHECK-DEBUG-DAG: declare void @__kmpc_fork_call(%ident_t*, i32, void (i32*, i32*, ...)*, ...) -// CHECK-DEBUG-DAG: define internal void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i64 [[VLA_SIZE:%.+]], i32* [[VLA_ADDR:%[^)]+]]) +// CHECK-DEBUG-DAG: define internal void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i64 [[VLA_SIZE:%.+]], i32* {{.+}} [[VLA_ADDR:%[^)]+]]) // CHECK-DEBUG-DAG: call void [[OMP_OUTLINED_DEBUG]] // CHECK: define linkonce_odr {{[a-z\_\b]*[ ]?i32}} [[TMAIN]](i8** %argc) @@ -109,7 +109,7 @@ int main (int argc, char **argv) { // CHECK: call {{.*}}void @{{.+terminate.*|abort}}( // CHECK-NEXT: unreachable // CHECK-NEXT: } -// CHECK-DEBUG: define internal void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** dereferenceable({{4|8}}) %argc) +// CHECK-DEBUG: define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** dereferenceable({{4|8}}) %argc) // CHECK-DEBUG: store i8*** %argc, i8**** [[ARGC_PTR_ADDR:%.+]], // CHECK-DEBUG: [[ARGC_REF:%.+]] = load i8***, i8**** [[ARGC_PTR_ADDR]] // CHECK-DEBUG-NEXT: [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]] @@ -120,7 +120,9 @@ int main (int argc, char **argv) { // CHECK-DEBUG-NEXT: } // CHECK: define linkonce_odr {{.*}}void [[FOO1]](i8** %argc) -// CHECK-DEBUG: define linkonce_odr void [[FOO1]](i8** %argc) +// CHECK-DEBUG-DAG: define linkonce_odr void [[FOO1]](i8** %argc) +// CHECK-DEBUG-DAG: define internal void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** dereferenceable({{4|8}}) %argc) +// CHECK-DEBUG-DAG: call void [[OMP_OUTLINED_DEBUG]]({{[^)]+}}){{[^,]*}}, !dbg // CHECK: attributes #[[FN_ATTRS]] = {{.+}} nounwind // CHECK-DEBUG: attributes #[[FN_ATTRS]] = {{.+}} nounwind diff --git a/test/OpenMP/parallel_firstprivate_messages.cpp b/test/OpenMP/parallel_firstprivate_messages.cpp index fc0eb4c7d358..b2cb4ff131ef 100644 --- a/test/OpenMP/parallel_firstprivate_messages.cpp +++ b/test/OpenMP/parallel_firstprivate_messages.cpp @@ -56,7 +56,7 @@ using A::x; } int main(int argc, char **argv) { - const int d = 5; + const int d = 5; // expected-note {{variable 'd' declared const here}} const int da[5] = { 0 }; S4 e(4); S5 g(5); @@ -72,6 +72,8 @@ int main(int argc, char **argv) { #pragma omp parallel firstprivate (argc) #pragma omp parallel firstprivate (S1) // expected-error {{'S1' does not refer to a value}} #pragma omp parallel firstprivate (a, b, c, d, f) // expected-error {{firstprivate variable with incomplete type 'S1'}} + #pragma omp parallel firstprivate (d) + d = 5; // expected-error {{cannot assign to variable 'd' with const-qualified type}} #pragma omp parallel firstprivate (argv[1]) // expected-error {{expected variable name}} #pragma omp parallel firstprivate(ba) #pragma omp parallel firstprivate(ca) diff --git a/test/OpenMP/parallel_for_codegen.cpp b/test/OpenMP/parallel_for_codegen.cpp index 1324ee67b615..1773619bce20 100644 --- a/test/OpenMP/parallel_for_codegen.cpp +++ b/test/OpenMP/parallel_for_codegen.cpp @@ -8,6 +8,7 @@ #define HEADER // CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[LOOP_LOC:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 514, i32 0, i32 0, i8* // CHECK-LABEL: with_var_schedule void with_var_schedule() { @@ -19,8 +20,8 @@ void with_var_schedule() { // CHECK: [[CHUNK:%.+]] = load i8*, i8** % // CHECK: [[CHUNK_VAL:%.+]] = load i8, i8* [[CHUNK]], // CHECK: [[CHUNK_SIZE:%.+]] = sext i8 [[CHUNK_VAL]] to i64 -// CHECK: call void @__kmpc_for_static_init_8u([[IDENT_T_TY]]* [[DEFAULT_LOC:@[^,]+]], i32 [[GTID:%[^,]+]], i32 33, i32* [[IS_LAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]], i64 1, i64 [[CHUNK_SIZE]]) -// CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) +// CHECK: call void @__kmpc_for_static_init_8u([[IDENT_T_TY]]* [[LOOP_LOC]], i32 [[GTID:%[^,]+]], i32 33, i32* [[IS_LAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]], i64 1, i64 [[CHUNK_SIZE]]) +// CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[LOOP_LOC]], i32 [[GTID]]) #pragma omp parallel for schedule(static, char(a)) for (unsigned long long i = 1; i < 2; ++i) { } @@ -32,9 +33,7 @@ void without_schedule_clause(float *a, float *b, float *c, float *d) { // CHECK: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*), // CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* noalias [[GTID_PARAM_ADDR:%.+]], i32* noalias %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}) // CHECK: store i32* [[GTID_PARAM_ADDR]], i32** [[GTID_REF_ADDR:%.+]], -// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]], -// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], -// CHECK: call void @__kmpc_for_static_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1) +// CHECK: call void @__kmpc_for_static_init_4([[IDENT_T_TY]]* [[LOOP_LOC]], i32 [[GTID:%.+]], i32 34, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1) // UB = min(UB, GlobalUB) // CHECK-NEXT: [[UB:%.+]] = load i32, i32* [[OMP_UB]] // CHECK-NEXT: [[UBCMP:%.+]] = icmp sgt i32 [[UB]], 4571423 @@ -65,7 +64,7 @@ void without_schedule_clause(float *a, float *b, float *c, float *d) { // CHECK-NEXT: br label %{{.+}} } // CHECK: [[LOOP1_END]] -// CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) +// CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[LOOP_LOC]], i32 [[GTID]]) // CHECK: ret void } @@ -75,9 +74,7 @@ void static_not_chunked(float *a, float *b, float *c, float *d) { // CHECK: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*), // CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* noalias [[GTID_PARAM_ADDR:%.+]], i32* noalias %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}) // CHECK: store i32* [[GTID_PARAM_ADDR]], i32** [[GTID_REF_ADDR:%.+]], -// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]], -// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], -// CHECK: call void @__kmpc_for_static_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1) +// CHECK: call void @__kmpc_for_static_init_4([[IDENT_T_TY]]* [[LOOP_LOC]], i32 [[GTID:%.+]], i32 34, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1) // UB = min(UB, GlobalUB) // CHECK-NEXT: [[UB:%.+]] = load i32, i32* [[OMP_UB]] // CHECK-NEXT: [[UBCMP:%.+]] = icmp sgt i32 [[UB]], 4571423 @@ -108,7 +105,7 @@ void static_not_chunked(float *a, float *b, float *c, float *d) { // CHECK-NEXT: br label %{{.+}} } // CHECK: [[LOOP1_END]] -// CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) +// CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[LOOP_LOC]], i32 [[GTID]]) // CHECK: ret void } @@ -118,9 +115,7 @@ void static_chunked(float *a, float *b, float *c, float *d) { // CHECK: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*), // CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* noalias [[GTID_PARAM_ADDR:%.+]], i32* noalias %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}) // CHECK: store i32* [[GTID_PARAM_ADDR]], i32** [[GTID_REF_ADDR:%.+]], -// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]], -// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], -// CHECK: call void @__kmpc_for_static_init_4u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 33, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 5) +// CHECK: call void @__kmpc_for_static_init_4u([[IDENT_T_TY]]* [[LOOP_LOC]], i32 [[GTID:%.+]], i32 33, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 5) // UB = min(UB, GlobalUB) // CHECK: [[UB:%.+]] = load i32, i32* [[OMP_UB]] // CHECK-NEXT: [[UBCMP:%.+]] = icmp ugt i32 [[UB]], 16908288 @@ -170,7 +165,7 @@ void static_chunked(float *a, float *b, float *c, float *d) { // CHECK-NEXT: store i32 [[ADD_UB]], i32* [[OMP_UB]] // CHECK: [[O_LOOP1_END]] -// CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) +// CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[LOOP_LOC]], i32 [[GTID]]) // CHECK: ret void } @@ -180,9 +175,7 @@ void dynamic1(float *a, float *b, float *c, float *d) { // CHECK: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*), // CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* noalias [[GTID_PARAM_ADDR:%.+]], i32* noalias %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}) // CHECK: store i32* [[GTID_PARAM_ADDR]], i32** [[GTID_REF_ADDR:%.+]], -// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]], -// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], -// CHECK: call void @__kmpc_dispatch_init_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 35, i64 0, i64 16908287, i64 1, i64 1) +// CHECK: call void @__kmpc_dispatch_init_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID:%.+]], i32 35, i64 0, i64 16908287, i64 1, i64 1) // // CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]]) // CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0 @@ -224,9 +217,7 @@ void guided7(float *a, float *b, float *c, float *d) { // CHECK: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*), // CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* noalias [[GTID_PARAM_ADDR:%.+]], i32* noalias %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}) // CHECK: store i32* [[GTID_PARAM_ADDR]], i32** [[GTID_REF_ADDR:%.+]], -// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]], -// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], -// CHECK: call void @__kmpc_dispatch_init_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 36, i64 0, i64 16908287, i64 1, i64 7) +// CHECK: call void @__kmpc_dispatch_init_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID:%.+]], i32 36, i64 0, i64 16908287, i64 1, i64 7) // // CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]]) // CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0 @@ -270,12 +261,8 @@ void test_auto(float *a, float *b, float *c, float *d) { // CHECK: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, float**, float**, float**, float**)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*), // CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* noalias [[GTID_PARAM_ADDR:%.+]], i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}}, i32* dereferenceable(4) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}) // CHECK: store i32* [[GTID_PARAM_ADDR]], i32** [[GTID_REF_ADDR:%.+]], -// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]], -// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], -// CHECK: call void @__kmpc_dispatch_init_8([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 38, i64 0, i64 [[LAST_ITER:%[^,]+]], i64 1, i64 1) +// CHECK: call void @__kmpc_dispatch_init_8([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID:%.+]], i32 38, i64 0, i64 [[LAST_ITER:%[^,]+]], i64 1, i64 1) // -// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]], -// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], // CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_8([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]]) // CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0 // CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]] @@ -318,9 +305,7 @@ void runtime(float *a, float *b, float *c, float *d) { // CHECK: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, float**, float**, float**, float**)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*), // CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* noalias [[GTID_PARAM_ADDR:%.+]], i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}) // CHECK: store i32* [[GTID_PARAM_ADDR]], i32** [[GTID_REF_ADDR:%.+]], -// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]], -// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], -// CHECK: call void @__kmpc_dispatch_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 37, i32 0, i32 199, i32 1, i32 1) +// CHECK: call void @__kmpc_dispatch_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID:%.+]], i32 37, i32 0, i32 199, i32 1, i32 1) // // CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]]) // CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0 diff --git a/test/OpenMP/parallel_for_lastprivate_messages.cpp b/test/OpenMP/parallel_for_lastprivate_messages.cpp index ccfe2ea407b1..134100bc1753 100644 --- a/test/OpenMP/parallel_for_lastprivate_messages.cpp +++ b/test/OpenMP/parallel_for_lastprivate_messages.cpp @@ -18,9 +18,9 @@ public: S2 &operator=(const S2 &); const S2 &operator=(const S2 &) const; static float S2s; // expected-note {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note {{static data member is predetermined as shared}} }; -const float S2::S2sc = 0; // expected-note {{static data member is predetermined as shared}} +const float S2::S2sc = 0; const S2 b; const S2 ba[5]; class S3 { diff --git a/test/OpenMP/parallel_for_linear_codegen.cpp b/test/OpenMP/parallel_for_linear_codegen.cpp index d0968d7cd146..2c3cfddbd571 100644 --- a/test/OpenMP/parallel_for_linear_codegen.cpp +++ b/test/OpenMP/parallel_for_linear_codegen.cpp @@ -49,6 +49,7 @@ int main() { for (int i = 0; i < 2; ++i) { // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}}) // LAMBDA: alloca i{{[0-9]+}}, + // LAMBDA: alloca i{{[0-9]+}}, // LAMBDA: [[G_START_ADDR:%.+]] = alloca i{{[0-9]+}}, // LAMBDA: alloca i{{[0-9]+}}, // LAMBDA: alloca i{{[0-9]+}}, @@ -96,6 +97,7 @@ int main() { for (int i = 0; i < 2; ++i) { // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}}) // BLOCKS: alloca i{{[0-9]+}}, + // BLOCKS: alloca i{{[0-9]+}}, // BLOCKS: [[G_START_ADDR:%.+]] = alloca i{{[0-9]+}}, // BLOCKS: alloca i{{[0-9]+}}, // BLOCKS: alloca i{{[0-9]+}}, @@ -155,13 +157,13 @@ int main() { // CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, float** dereferenceable(8) %{{.+}}, i64* dereferenceable(8) %{{.+}}) // CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, // CHECK: [[PVAR_START:%.+]] = alloca float*, // CHECK: [[LVAR_START:%.+]] = alloca i64, // CHECK: alloca i{{[0-9]+}}, // CHECK: alloca i{{[0-9]+}}, // CHECK: alloca i{{[0-9]+}}, // CHECK: alloca i{{[0-9]+}}, -// CHECK: alloca i{{[0-9]+}}, // CHECK: [[PVAR_PRIV:%.+]] = alloca float*, // CHECK: [[LVAR_PRIV:%.+]] = alloca i64, // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]] @@ -205,13 +207,13 @@ int main() { // // CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i32** dereferenceable(8) %{{.+}}, i32* dereferenceable(4) %{{.+}}) // CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, // CHECK: [[PVAR_START:%.+]] = alloca i32*, // CHECK: [[LVAR_START:%.+]] = alloca i32, // CHECK: alloca i{{[0-9]+}}, // CHECK: alloca i{{[0-9]+}}, // CHECK: alloca i{{[0-9]+}}, // CHECK: alloca i{{[0-9]+}}, -// CHECK: alloca i{{[0-9]+}}, // CHECK: [[PVAR_PRIV:%.+]] = alloca i32*, // CHECK: [[LVAR_PRIV:%.+]] = alloca i32, // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]] diff --git a/test/OpenMP/parallel_for_reduction_messages.cpp b/test/OpenMP/parallel_for_reduction_messages.cpp index 57ab1fac9c11..6499ef593a3d 100644 --- a/test/OpenMP/parallel_for_reduction_messages.cpp +++ b/test/OpenMP/parallel_for_reduction_messages.cpp @@ -25,9 +25,9 @@ public: S2() : a(0) {} S2(S2 &s2) : a(s2.a) {} static float S2s; // expected-note 2 {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note 2 {{'S2sc' declared here}} }; -const float S2::S2sc = 0; // expected-note 2 {{'S2sc' defined here}} +const float S2::S2sc = 0; S2 b; // expected-note 3 {{'b' defined here}} const S2 ba[5]; // expected-note 2 {{'ba' defined here}} class S3 { @@ -61,6 +61,12 @@ class S5 { public: S5(int v) : a(v) {} + void foo() { +#pragma omp parallel private(a) // expected-note {{defined as private}} +#pragma omp for reduction(+:a) // expected-error {{reduction variable must be shared}} + for (int i = 0; i < 10; ++i) + ::foo(); + } }; class S6 { // expected-note 3 {{candidate function (the implicit copy assignment operator) not viable: no known conversion from 'int' to 'const S6' for 1st argument}} #if __cplusplus >= 201103L // C++11 or later diff --git a/test/OpenMP/parallel_for_simd_codegen.cpp b/test/OpenMP/parallel_for_simd_codegen.cpp index 06d96353ee9b..369ea17844e9 100644 --- a/test/OpenMP/parallel_for_simd_codegen.cpp +++ b/test/OpenMP/parallel_for_simd_codegen.cpp @@ -114,6 +114,7 @@ void simple(float *a, float *b, float *c, float *d) { int lin = 12; #pragma omp parallel for simd linear(lin : get_val()), linear(g_ptr) +// CHECK: alloca i32, // Init linear private var. // CHECK: [[LIN_VAR:%.+]] = load i32*, i32** % // CHECK: [[LIN_LOAD:%.+]] = load i32, i32* [[LIN_VAR]] diff --git a/test/OpenMP/parallel_for_simd_lastprivate_messages.cpp b/test/OpenMP/parallel_for_simd_lastprivate_messages.cpp index bd1a6d505572..8670e55c6e2b 100644 --- a/test/OpenMP/parallel_for_simd_lastprivate_messages.cpp +++ b/test/OpenMP/parallel_for_simd_lastprivate_messages.cpp @@ -17,9 +17,9 @@ public: S2(S2 &s2) : a(s2.a) {} const S2 &operator=(const S2 &) const; static float S2s; // expected-note {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note {{static data member is predetermined as shared}} }; -const float S2::S2sc = 0; // expected-note {{static data member is predetermined as shared}} +const float S2::S2sc = 0; const S2 b; const S2 ba[5]; class S3 { diff --git a/test/OpenMP/parallel_for_simd_reduction_messages.cpp b/test/OpenMP/parallel_for_simd_reduction_messages.cpp index 60a947dd5f84..8ff246e35f6a 100644 --- a/test/OpenMP/parallel_for_simd_reduction_messages.cpp +++ b/test/OpenMP/parallel_for_simd_reduction_messages.cpp @@ -25,9 +25,9 @@ public: S2() : a(0) {} S2(S2 &s2) : a(s2.a) {} static float S2s; // expected-note 2 {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note 2 {{'S2sc' declared here}} }; -const float S2::S2sc = 0; // expected-note 2 {{'S2sc' defined here}} +const float S2::S2sc = 0; S2 b; // expected-note 3 {{'b' defined here}} const S2 ba[5]; // expected-note 2 {{'ba' defined here}} class S3 { diff --git a/test/OpenMP/parallel_reduction_messages.cpp b/test/OpenMP/parallel_reduction_messages.cpp index 947353fc2dd4..96f4b58ec8a4 100644 --- a/test/OpenMP/parallel_reduction_messages.cpp +++ b/test/OpenMP/parallel_reduction_messages.cpp @@ -24,9 +24,9 @@ public: S2() : a(0) {} S2(S2 &s2) : a(s2.a) {} static float S2s; // expected-note 2 {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note 2 {{'S2sc' declared here}} }; -const float S2::S2sc = 0; // expected-note 2 {{'S2sc' defined here}} +const float S2::S2sc = 0; S2 b; // expected-note 3 {{'b' defined here}} const S2 ba[5]; // expected-note 2 {{'ba' defined here}} class S3 { diff --git a/test/OpenMP/parallel_sections_codegen.cpp b/test/OpenMP/parallel_sections_codegen.cpp index 6fc687d0c503..a26147303611 100644 --- a/test/OpenMP/parallel_sections_codegen.cpp +++ b/test/OpenMP/parallel_sections_codegen.cpp @@ -28,9 +28,7 @@ int main() { { // CHECK: store i32 0, i32* [[LB_PTR:%.+]], // CHECK: store i32 1, i32* [[UB_PTR:%.+]], -// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]], -// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], -// CHECK: call void @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_PTR:%.+]], i32* [[LB_PTR]], i32* [[UB_PTR]], i32* [[STRIDE_PTR:%.+]], i32 1, i32 1) +// CHECK: call void @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID:%.+]], i32 34, i32* [[IS_LAST_PTR:%.+]], i32* [[LB_PTR]], i32* [[UB_PTR]], i32* [[STRIDE_PTR:%.+]], i32 1, i32 1) // <<UB = min(UB, GlobalUB);>> // CHECK: [[UB:%.+]] = load i32, i32* [[UB_PTR]] // CHECK: [[CMP:%.+]] = icmp slt i32 [[UB]], 1 diff --git a/test/OpenMP/parallel_sections_lastprivate_messages.cpp b/test/OpenMP/parallel_sections_lastprivate_messages.cpp index af3c5e2a575b..eccbfc5e7e02 100644 --- a/test/OpenMP/parallel_sections_lastprivate_messages.cpp +++ b/test/OpenMP/parallel_sections_lastprivate_messages.cpp @@ -17,9 +17,9 @@ public: S2(S2 &s2) : a(s2.a) {} const S2 &operator=(const S2 &) const; static float S2s; // expected-note {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note {{static data member is predetermined as shared}} }; -const float S2::S2sc = 0; // expected-note {{static data member is predetermined as shared}} +const float S2::S2sc = 0; const S2 b; const S2 ba[5]; class S3 { diff --git a/test/OpenMP/parallel_sections_reduction_messages.cpp b/test/OpenMP/parallel_sections_reduction_messages.cpp index 05cc7f0f6c5f..26b3a5355c19 100644 --- a/test/OpenMP/parallel_sections_reduction_messages.cpp +++ b/test/OpenMP/parallel_sections_reduction_messages.cpp @@ -26,9 +26,9 @@ public: S2() : a(0) {} S2(S2 &s2) : a(s2.a) {} static float S2s; // expected-note 2 {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note 2 {{'S2sc' declared here}} }; -const float S2::S2sc = 0; // expected-note 2 {{'S2sc' defined here}} +const float S2::S2sc = 0; S2 b; // expected-note 3 {{'b' defined here}} const S2 ba[5]; // expected-note 2 {{'ba' defined here}} class S3 { diff --git a/test/OpenMP/sections_codegen.cpp b/test/OpenMP/sections_codegen.cpp index 65747e7b912f..94ded37db95d 100644 --- a/test/OpenMP/sections_codegen.cpp +++ b/test/OpenMP/sections_codegen.cpp @@ -4,7 +4,8 @@ // expected-no-diagnostics #ifndef HEADER #define HEADER -// CHECK: [[IMPLICIT_BARRIER_SECTIONS_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 194, i32 0, i32 0, i8* +// CHECK-DAG: [[IMPLICIT_BARRIER_SECTIONS_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 194, i32 0, i32 0, i8* +// CHECK-DAG: [[SECTIONS_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 1026, i32 0, i32 0, i8* // CHECK-LABEL: foo void foo() {}; // CHECK-LABEL: bar @@ -29,7 +30,7 @@ int main() { { // CHECK: store i32 0, i32* [[LB_PTR:%.+]], // CHECK: store i32 1, i32* [[UB_PTR:%.+]], -// CHECK: call void @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_PTR:%.+]], i32* [[LB_PTR]], i32* [[UB_PTR]], i32* [[STRIDE_PTR:%.+]], i32 1, i32 1) +// CHECK: call void @__kmpc_for_static_init_4(%{{.+}}* [[SECTIONS_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST_PTR:%.+]], i32* [[LB_PTR]], i32* [[UB_PTR]], i32* [[STRIDE_PTR:%.+]], i32 1, i32 1) // <<UB = min(UB, GlobalUB);>> // CHECK: [[UB:%.+]] = load i32, i32* [[UB_PTR]] // CHECK: [[CMP:%.+]] = icmp slt i32 [[UB]], 1 @@ -69,7 +70,7 @@ int main() { // CHECK-NEXT: br label %[[INNER_FOR_COND]] // CHECK: [[INNER_LOOP_END]] } -// CHECK: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID]]) +// CHECK: call void @__kmpc_for_static_fini(%{{.+}}* [[SECTIONS_LOC]], i32 [[GTID]]) // CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_SECTIONS_LOC]], #pragma omp sections nowait { diff --git a/test/OpenMP/sections_firstprivate_codegen.cpp b/test/OpenMP/sections_firstprivate_codegen.cpp index a5426f8bf956..227bb076374f 100644 --- a/test/OpenMP/sections_firstprivate_codegen.cpp +++ b/test/OpenMP/sections_firstprivate_codegen.cpp @@ -256,11 +256,7 @@ int main() { // CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]() // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) -// CHECK: [[T_VARVAL:%.+]] = load i32, i32* %{{.+}}, -// CHECK: [[T_VARCONV:%.+]] = bitcast i64* [[T_VARCAST:%.+]] to i32* -// CHECK: store i32 [[T_VARVAL]], i32* [[T_VARCONV]], -// CHECK: [[T_VARPVT:%.+]] = load i64, i64* [[T_VARCAST]], -// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i64, [2 x i32]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void {{.*}}i64 [[T_VARPVT]], +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32*, [2 x i32]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void // CHECK: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]* // CHECK: ret // @@ -271,14 +267,13 @@ int main() { // CHECK: alloca i{{[0-9]+}}, // CHECK: alloca i{{[0-9]+}}, // CHECK: alloca i{{[0-9]+}}, -// CHECK: alloca i{{[0-9]+}}, // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], // CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], -// CHECK-NOT: load i{{[0-9]+}}*, i{{[0-9]+}}** % +// CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** % // CHECK: [[VEC_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** % // CHECK: [[S_ARR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** % // CHECK: [[VAR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** % diff --git a/test/OpenMP/sections_lastprivate_messages.cpp b/test/OpenMP/sections_lastprivate_messages.cpp index 5f6b420cb8ea..01bb13de5510 100644 --- a/test/OpenMP/sections_lastprivate_messages.cpp +++ b/test/OpenMP/sections_lastprivate_messages.cpp @@ -17,9 +17,9 @@ public: S2(S2 &s2) : a(s2.a) {} const S2 &operator=(const S2 &) const; static float S2s; // expected-note {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note {{static data member is predetermined as shared}} }; -const float S2::S2sc = 0; // expected-note {{static data member is predetermined as shared}} +const float S2::S2sc = 0; const S2 b; const S2 ba[5]; class S3 { diff --git a/test/OpenMP/sections_reduction_messages.cpp b/test/OpenMP/sections_reduction_messages.cpp index f13c5b3f2862..58b22a5a61d0 100644 --- a/test/OpenMP/sections_reduction_messages.cpp +++ b/test/OpenMP/sections_reduction_messages.cpp @@ -27,9 +27,9 @@ public: S2() : a(0) {} S2(S2 &s2) : a(s2.a) {} static float S2s; // expected-note 2 {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note 2 {{'S2sc' declared here}} }; -const float S2::S2sc = 0; // expected-note 2 {{'S2sc' defined here}} +const float S2::S2sc = 0; S2 b; // expected-note 3 {{'b' defined here}} const S2 ba[5]; // expected-note 2 {{'ba' defined here}} class S3 { diff --git a/test/OpenMP/simd_lastprivate_messages.cpp b/test/OpenMP/simd_lastprivate_messages.cpp index 16223db71ad5..349d6a5b904d 100644 --- a/test/OpenMP/simd_lastprivate_messages.cpp +++ b/test/OpenMP/simd_lastprivate_messages.cpp @@ -17,9 +17,9 @@ public: S2(S2 &s2) : a(s2.a) {} const S2 &operator=(const S2 &) const; static float S2s; // expected-note {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note {{static data member is predetermined as shared}} }; -const float S2::S2sc = 0; // expected-note {{static data member is predetermined as shared}} +const float S2::S2sc = 0; const S2 b; const S2 ba[5]; class S3 { diff --git a/test/OpenMP/simd_reduction_messages.cpp b/test/OpenMP/simd_reduction_messages.cpp index 1e1a233ec499..bdf429d1967c 100644 --- a/test/OpenMP/simd_reduction_messages.cpp +++ b/test/OpenMP/simd_reduction_messages.cpp @@ -25,9 +25,9 @@ public: S2() : a(0) {} S2(S2 &s2) : a(s2.a) {} static float S2s; // expected-note 2 {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note 2 {{'S2sc' declared here}} }; -const float S2::S2sc = 0; // expected-note 2 {{'S2sc' defined here}} +const float S2::S2sc = 0; S2 b; // expected-note 3 {{'b' defined here}} const S2 ba[5]; // expected-note 2 {{'ba' defined here}} class S3 { diff --git a/test/OpenMP/single_firstprivate_codegen.cpp b/test/OpenMP/single_firstprivate_codegen.cpp index 9f059e970470..4a91bc2d5bc5 100644 --- a/test/OpenMP/single_firstprivate_codegen.cpp +++ b/test/OpenMP/single_firstprivate_codegen.cpp @@ -222,24 +222,18 @@ int main() { // CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]() // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) -// CHECK: [[T_VARVAL:%.+]] = load i32, i32* [[T_VAR:%.+]], -// CHECK: [[T_VARCONV:%.+]] = bitcast i64* [[T_VARCAST:%.+]] to i32* -// CHECK: store i32 [[T_VARVAL]], i32* [[T_VARCONV]], -// CHECK: [[T_VARPVT:%.+]] = load i64, i64* [[T_VARCAST]], -// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i64, [2 x i32]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void {{.*}}i64 [[T_VARPVT:%.+]], +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32*, [2 x i32]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void // CHECK: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]* // CHECK: ret // -// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i64 {{.*}}%{{.+}}, [2 x i32]* dereferenceable(8) %{{.+}}, [2 x [[S_INT_TY]]]* dereferenceable(8) %{{.+}}, [[S_INT_TY]]* dereferenceable(4) %{{.+}}) -// CHECK: [[T_VAR_ARG:%.+]] = alloca i{{[0-9]+}}, +// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}}, [2 x i32]* dereferenceable(8) %{{.+}}, [2 x [[S_INT_TY]]]* dereferenceable(8) %{{.+}}, [[S_INT_TY]]* dereferenceable(4) %{{.+}}) // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], // CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]], -// CHECK-NOT: load i{{[0-9]+}}*, i{{[0-9]+}}** % -// CHECK: [[T_VAR_CONV:%.+]] = bitcast i64* [[T_VAR_ARG]] to i32* +// CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** % // CHECK: [[VEC_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** % // CHECK: [[S_ARR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** % // CHECK: [[VAR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** % diff --git a/test/OpenMP/target_codegen.cpp b/test/OpenMP/target_codegen.cpp index c457045c17c7..20bc96b84cbf 100644 --- a/test/OpenMP/target_codegen.cpp +++ b/test/OpenMP/target_codegen.cpp @@ -34,16 +34,18 @@ // code, only 6 will have mapped arguments, and only 4 have all-constant map // sizes. -// CHECK-DAG: [[SIZET2:@.+]] = private unnamed_addr constant [1 x i{{32|64}}] [i[[SZ:32|64]] 2] -// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: [[SIZET:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 0, i[[SZ]] 4] +// CHECK-DAG: [[MAPT:@.+]] = private unnamed_addr constant [2 x i64] [i64 32, i64 288] +// CHECK-DAG: [[SIZET2:@.+]] = private unnamed_addr constant [1 x i{{32|64}}] [i[[SZ]] 2] +// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: [[SIZET3:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2] -// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i32] [i32 288, i32 288] -// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [9 x i32] [i32 288, i32 35, i32 288, i32 35, i32 35, i32 288, i32 288, i32 35, i32 35] +// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i64] [i64 288, i64 288] +// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [9 x i64] [i64 288, i64 547, i64 288, i64 547, i64 547, i64 288, i64 288, i64 547, i64 547] // CHECK-DAG: [[SIZET5:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 40] -// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i32] [i32 288, i32 288, i32 35] +// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 288, i64 547] // CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [4 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 1, i[[SZ]] 40] -// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i32] [i32 288, i32 288, i32 288, i32 35] -// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i32] [i32 35, i32 288, i32 288, i32 288, i32 35] +// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i64] [i64 288, i64 288, i64 288, i64 547] +// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i64] [i64 547, i64 288, i64 288, i64 288, i64 547] // CHECK-DAG: @{{.*}} = private constant i8 0 // CHECK-DAG: @{{.*}} = private constant i8 0 // CHECK-DAG: @{{.*}} = private constant i8 0 @@ -59,6 +61,7 @@ // TCHECK: @{{.+}} = constant [[ENTTY]] // TCHECK: @{{.+}} = constant [[ENTTY]] // TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = {{.*}}constant [[ENTTY]] // TCHECK-NOT: @{{.+}} = constant [[ENTTY]] // Check if offloading descriptor is created. @@ -79,6 +82,9 @@ struct TT{ ty Y; }; +int global; +extern int global; + // CHECK: define {{.*}}[[FOO:@.+]]( int foo(int n) { int a = 0; @@ -88,31 +94,60 @@ int foo(int n) { double c[5][10]; double cn[5][n]; TT<long long, char> d; + static long *plocal; - // CHECK: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i32* null) - // CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4 - // CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4 - // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 + // CHECK: [[ADD:%.+]] = add nsw i32 + // CHECK: [[DEVICE:%.+]] = sext i32 [[ADD]] to i64 + // CHECK: [[RET:%.+]] = call i32 @__tgt_target(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null) + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] // CHECK: [[FAIL]] // CHECK: call void [[HVT0:@.+]]() // CHECK-NEXT: br label %[[END]] // CHECK: [[END]] - #pragma omp target + #pragma omp target device(global + a) { } - // CHECK: store i32 0, i32* [[RHV:%.+]], align 4 - // CHECK: store i32 -1, i32* [[RHV]], align 4 - // CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4 - // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 + // CHECK-DAG: [[ADD:%.+]] = add nsw i32 + // CHECK-DAG: [[DEVICE:%.+]] = sext i32 [[ADD]] to i64 + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0) + // CHECK-DAG: [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0 + + // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0 + // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0 + // CHECK-DAG: [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]** + // CHECK-DAG: [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]** + // CHECK-DAG: store i[[SZ]]* [[BP0:%[^,]+]], i[[SZ]]** [[CBPADDR0]] + // CHECK-DAG: store i[[SZ]]* [[BP0]], i[[SZ]]** [[CPADDR0]] + + // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1 + // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1 + // CHECK-DAG: [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]* + // CHECK-DAG: [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] [[BP1:%[^,]+]], i[[SZ]]* [[CBPADDR1]] + // CHECK-DAG: store i[[SZ]] [[BP1]], i[[SZ]]* [[CPADDR1]] + // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT0_:@.+]](i[[SZ]]* [[BP0]], i[[SZ]] [[BP1]]) + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + #pragma omp target device(global + a) nowait + { + static int local1; + *plocal = global; + local1 = global; + } + // CHECK: call void [[HVT1:@.+]](i[[SZ]] {{[^,]+}}) - #pragma omp target if(0) + #pragma omp target if(0) firstprivate(global) { - a += 1; + global += 1; } - // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i32* getelementptr inbounds ([1 x i32], [1 x i32]* [[MAPT2]], i32 0, i32 0)) + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT2]], i32 0, i32 0)) // CHECK-DAG: [[BP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[P]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]] @@ -122,9 +157,7 @@ int foo(int n) { // CHECK-DAG: store i[[SZ]] [[BP0:%[^,]+]], i[[SZ]]* [[CBPADDR0]] // CHECK-DAG: store i[[SZ]] [[P0:%[^,]+]], i[[SZ]]* [[CPADDR0]] - // CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4 - // CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4 - // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 + // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] // CHECK: [[FAIL]] // CHECK: call void [[HVT2:@.+]](i[[SZ]] {{[^,]+}}) @@ -138,7 +171,7 @@ int foo(int n) { // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10 // CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] // CHECK: [[IFTHEN]] - // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i32* getelementptr inbounds ([2 x i32], [2 x i32]* [[MAPT3]], i32 0, i32 0)) + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0)) // CHECK-DAG: [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0 @@ -155,21 +188,18 @@ int foo(int n) { // CHECK-DAG: [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]* // CHECK-DAG: store i[[SZ]] [[BP1:%[^,]+]], i[[SZ]]* [[CBPADDR1]] // CHECK-DAG: store i[[SZ]] [[P1:%[^,]+]], i[[SZ]]* [[CPADDR1]] - // CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4 - // CHECK-NEXT: br label %[[IFEND:.+]] - - // CHECK: [[IFELSE]] - // CHECK: store i32 -1, i32* [[RHV]], align 4 - // CHECK-NEXT: br label %[[IFEND:.+]] - - // CHECK: [[IFEND]] - // CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4 - // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 + // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // CHECK: [[FAIL]] // CHECK: call void [[HVT3:@.+]]({{[^,]+}}, {{[^,]+}}) // CHECK-NEXT: br label %[[END]] // CHECK: [[END]] + // CHECK-NEXT: br label %[[IFEND:.+]] + // CHECK: [[IFELSE]] + // CHECK: call void [[HVT3]]({{[^,]+}}, {{[^,]+}}) + // CHECK-NEXT: br label %[[IFEND]] + + // CHECK: [[IFEND]] #pragma omp target if(n>10) { a += 1; @@ -191,9 +221,9 @@ int foo(int n) { // CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8 // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20 - // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] + // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[IFELSE:[^,]+]] // CHECK: [[TRY]] - // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i32* getelementptr inbounds ([9 x i32], [9 x i32]* [[MAPT4]], i32 0, i32 0)) + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0)) // CHECK-DAG: [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[SR]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S:%[^,]+]], i32 0, i32 0 @@ -282,15 +312,18 @@ int foo(int n) { // CHECK-DAG: store [[TT]]* %{{.+}}, [[TT]]** [[CPADDR8]] // CHECK-DAG: store i[[SZ]] {{12|16}}, i[[SZ]]* [[SADDR8]] - // CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4 - // CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4 - // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 - // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] - + // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // CHECK: [[FAIL]] // CHECK: call void [[HVT4:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) // CHECK-NEXT: br label %[[END]] // CHECK: [[END]] + // CHECK-NEXT: br label %[[IFEND:.+]] + // CHECK: [[IFELSE]] + // CHECK: call void [[HVT4]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) + // CHECK-NEXT: br label %[[IFEND]] + + // CHECK: [[IFEND]] #pragma omp target if(n>20) { a += 1; @@ -464,9 +497,9 @@ int bar(int n){ // CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2 // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60 -// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] +// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[IFELSE:[^,]+]] // CHECK: [[TRY]] -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i32* getelementptr inbounds ([5 x i32], [5 x i32]* [[MAPT7]], i32 0, i32 0)) +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0)) // CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0 // CHECK-DAG: [[SR]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S:%.+]], i32 0, i32 0 @@ -507,9 +540,9 @@ int bar(int n){ // CHECK-DAG: store i[[SZ]] 4, i[[SZ]]* [[SADDR1]] // CHECK-DAG: [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to [[S1]]** -// CHECK-DAG: [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to [[S1]]** +// CHECK-DAG: [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to double** // CHECK-DAG: store [[S1]]* %{{.+}}, [[S1]]** [[CBPADDR0]] -// CHECK-DAG: store [[S1]]* %{{.+}}, [[S1]]** [[CPADDR0]] +// CHECK-DAG: store double* %{{.+}}, double** [[CPADDR0]] // CHECK-DAG: store i[[SZ]] 8, i[[SZ]]* [[SADDR0]] // CHECK-DAG: [[CBPADDR4:%.+]] = bitcast i8** [[BPADDR4]] to i16** @@ -518,15 +551,18 @@ int bar(int n){ // CHECK-DAG: store i16* %{{.+}}, i16** [[CPADDR4]] // CHECK-DAG: store i[[SZ]] [[CSIZE]], i[[SZ]]* [[SADDR4]] -// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4 -// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4 -// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 -// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] - +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // CHECK: [[FAIL]] // CHECK: call void [[HVT7:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) // CHECK-NEXT: br label %[[END]] // CHECK: [[END]] +// CHECK-NEXT: br label %[[IFEND:.+]] +// CHECK: [[IFELSE]] +// CHECK: call void [[HVT7]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[IFEND]] + +// CHECK: [[IFEND]] // // CHECK: define {{.*}}[[FSTATIC]] @@ -534,7 +570,7 @@ int bar(int n){ // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50 // CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] // CHECK: [[IFTHEN]] -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([4 x i[[SZ]]], [4 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i32* getelementptr inbounds ([4 x i32], [4 x i32]* [[MAPT6]], i32 0, i32 0)) +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([4 x i[[SZ]]], [4 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0)) // CHECK-DAG: [[BPR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP:%.+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P:%.+]], i32 0, i32 0 @@ -566,21 +602,18 @@ int bar(int n){ // CHECK-DAG: store [10 x i32]* [[VAL3:%[^,]+]], [10 x i32]** [[CBPADDR3]] // CHECK-DAG: store [10 x i32]* [[VAL3]], [10 x i32]** [[CPADDR3]] -// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4 -// CHECK-NEXT: br label %[[IFEND:.+]] - -// CHECK: [[IFELSE]] -// CHECK: store i32 -1, i32* [[RHV]], align 4 -// CHECK-NEXT: br label %[[IFEND:.+]] - -// CHECK: [[IFEND]] -// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4 -// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // CHECK: [[FAIL]] // CHECK: call void [[HVT6:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) // CHECK-NEXT: br label %[[END]] // CHECK: [[END]] +// CHECK-NEXT: br label %[[IFEND:.+]] +// CHECK: [[IFELSE]] +// CHECK: call void [[HVT6]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[IFEND]] + +// CHECK: [[IFEND]] // // CHECK: define {{.*}}[[FTEMPLATE]] @@ -588,7 +621,7 @@ int bar(int n){ // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40 // CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] // CHECK: [[IFTHEN]] -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i32* getelementptr inbounds ([3 x i32], [3 x i32]* [[MAPT5]], i32 0, i32 0)) +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0)) // CHECK-DAG: [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0 @@ -613,22 +646,18 @@ int bar(int n){ // CHECK-DAG: store [10 x i32]* [[VAL2:%[^,]+]], [10 x i32]** [[CBPADDR2]] // CHECK-DAG: store [10 x i32]* [[VAL2]], [10 x i32]** [[CPADDR2]] -// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4 -// CHECK-NEXT: br label %[[IFEND:.+]] - -// CHECK: [[IFELSE]] -// CHECK: store i32 -1, i32* [[RHV]], align 4 -// CHECK-NEXT: br label %[[IFEND:.+]] - -// CHECK: [[IFEND]] -// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4 -// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // CHECK: [[FAIL]] // CHECK: call void [[HVT5:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}) // CHECK-NEXT: br label %[[END]] // CHECK: [[END]] +// CHECK-NEXT: br label %[[IFEND:.+]] +// CHECK: [[IFELSE]] +// CHECK: call void [[HVT5]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[IFEND]] +// CHECK: [[IFEND]] // Check that the offloading functions are emitted and that the arguments are diff --git a/test/OpenMP/target_codegen_registration.cpp b/test/OpenMP/target_codegen_registration.cpp index 064c15b8d8f1..5684b2573b6e 100644 --- a/test/OpenMP/target_codegen_registration.cpp +++ b/test/OpenMP/target_codegen_registration.cpp @@ -63,40 +63,40 @@ // CHECK-DAG: {{@.+}} = private constant i8 0 // TCHECK-NOT: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-NTARGET-NOT: private constant i8 0 // CHECK-NTARGET-NOT: private unnamed_addr constant [1 x i diff --git a/test/OpenMP/target_data_codegen.cpp b/test/OpenMP/target_data_codegen.cpp index 7e5e267d13b6..2c9526de73e1 100644 --- a/test/OpenMP/target_data_codegen.cpp +++ b/test/OpenMP/target_data_codegen.cpp @@ -22,15 +22,15 @@ ST<int> gb; double gc[100]; // CK1: [[SIZE00:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 800] -// CK1: [[MTYPE00:@.+]] = {{.+}}constant [1 x i32] [i32 34] +// CK1: [[MTYPE00:@.+]] = {{.+}}constant [1 x i64] [i64 34] // CK1: [[SIZE02:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] 4] -// CK1: [[MTYPE02:@.+]] = {{.+}}constant [1 x i32] [i32 33] +// CK1: [[MTYPE02:@.+]] = {{.+}}constant [1 x i64] [i64 33] -// CK1: [[MTYPE03:@.+]] = {{.+}}constant [1 x i32] [i32 37] +// CK1: [[MTYPE03:@.+]] = {{.+}}constant [1 x i64] [i64 37] // CK1: [[SIZE04:@.+]] = {{.+}}constant [2 x i[[sz]]] [i[[sz]] {{8|4}}, i[[sz]] 24] -// CK1: [[MTYPE04:@.+]] = {{.+}}constant [2 x i32] [i32 33, i32 17] +// CK1: [[MTYPE04:@.+]] = {{.+}}constant [2 x i64] [i64 33, i64 17] // CK1-LABEL: _Z3fooi void foo(int arg) { @@ -38,8 +38,9 @@ void foo(int arg) { float lb[arg]; // Region 00 - // CK1-DAG: call void @__tgt_target_data_begin(i32 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}) - // CK1-DAG: [[DEV]] = load i32, i32* %{{[^,]+}}, + // CK1-DAG: call void @__tgt_target_data_begin(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}) + // CK1-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64 + // CK1-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}}, // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -52,8 +53,9 @@ void foo(int arg) { // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1 - // CK1-DAG: call void @__tgt_target_data_end(i32 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}) - // CK1-DAG: [[DEV]] = load i32, i32* %{{[^,]+}}, + // CK1-DAG: call void @__tgt_target_data_end(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}) + // CK1-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64 + // CK1-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}}, // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]] // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P]] #pragma omp target data if(1+3-5) device(arg) map(from: gc) @@ -67,7 +69,7 @@ void foo(int arg) { // Region 02 // CK1: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] // CK1: [[IFTHEN]] - // CK1-DAG: call void @__tgt_target_data_begin(i32 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}) + // CK1-DAG: call void @__tgt_target_data_begin(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}) // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -86,7 +88,7 @@ void foo(int arg) { // CK1: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] // CK1: [[IFTHEN]] - // CK1-DAG: call void @__tgt_target_data_end(i32 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}) + // CK1-DAG: call void @__tgt_target_data_end(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}) // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]] // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P]] // CK1: br label %[[IFEND:[^,]+]] @@ -97,7 +99,7 @@ void foo(int arg) { {++arg;} // Region 03 - // CK1-DAG: call void @__tgt_target_data_begin(i32 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}) + // CK1-DAG: call void @__tgt_target_data_begin(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}) // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] @@ -113,7 +115,7 @@ void foo(int arg) { // CK1-DAG: [[CSVAL0]] = mul nuw i[[sz]] %{{[^,]+}}, 4 // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1 - // CK1-DAG: call void @__tgt_target_data_end(i32 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}) + // CK1-DAG: call void @__tgt_target_data_end(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}) // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]] // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P]] // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S]] @@ -124,7 +126,7 @@ void foo(int arg) { {++arg;} // Region 04 - // CK1-DAG: call void @__tgt_target_data_begin(i32 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}) + // CK1-DAG: call void @__tgt_target_data_begin(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}) // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -147,7 +149,7 @@ void foo(int arg) { // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1 - // CK1-DAG: call void @__tgt_target_data_end(i32 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}) + // CK1-DAG: call void @__tgt_target_data_end(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}) // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]] // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P]] #pragma omp target data map(to: gb.b[:3]) @@ -178,7 +180,7 @@ struct ST { }; // CK2: [[SIZE00:@.+]] = {{.+}}constant [2 x i[[sz:64|32]]] [i{{64|32}} {{8|4}}, i{{64|32}} 24] -// CK2: [[MTYPE00:@.+]] = {{.+}}constant [2 x i32] [i32 37, i32 21] +// CK2: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 37, i64 21] // CK2-LABEL: _Z3bari int bar(int arg){ @@ -189,8 +191,9 @@ int bar(int arg){ // Region 00 // CK2: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] // CK2: [[IFTHEN]] -// CK2-DAG: call void @__tgt_target_data_begin(i32 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}) -// CK2-DAG: [[DEV]] = load i32, i32* %{{[^,]+}}, +// CK2-DAG: call void @__tgt_target_data_begin(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}) +// CK2-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64 +// CK2-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}}, // CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK2-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -222,8 +225,9 @@ int bar(int arg){ // CK2: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] // CK2: [[IFTHEN]] -// CK2-DAG: call void @__tgt_target_data_end(i32 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}) -// CK2-DAG: [[DEV]] = load i32, i32* %{{[^,]+}}, +// CK2-DAG: call void @__tgt_target_data_end(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}) +// CK2-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64 +// CK2-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}}, // CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]] // CK2-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P]] // CK2: br label %[[IFEND:[^,]+]] diff --git a/test/OpenMP/target_data_use_device_ptr_codegen.cpp b/test/OpenMP/target_data_use_device_ptr_codegen.cpp index 5e275565170e..36c42e32ea3f 100644 --- a/test/OpenMP/target_data_use_device_ptr_codegen.cpp +++ b/test/OpenMP/target_data_use_device_ptr_codegen.cpp @@ -14,18 +14,18 @@ double *g; // CK1: @g = global double* -// CK1: [[MTYPE00:@.+]] = {{.*}}constant [1 x i32] [i32 99] -// CK1: [[MTYPE01:@.+]] = {{.*}}constant [1 x i32] [i32 99] -// CK1: [[MTYPE03:@.+]] = {{.*}}constant [1 x i32] [i32 99] -// CK1: [[MTYPE04:@.+]] = {{.*}}constant [1 x i32] [i32 99] -// CK1: [[MTYPE05:@.+]] = {{.*}}constant [1 x i32] [i32 99] -// CK1: [[MTYPE06:@.+]] = {{.*}}constant [1 x i32] [i32 99] -// CK1: [[MTYPE07:@.+]] = {{.*}}constant [1 x i32] [i32 99] -// CK1: [[MTYPE08:@.+]] = {{.*}}constant [2 x i32] [{{i32 35, i32 99|i32 99, i32 35}}] -// CK1: [[MTYPE09:@.+]] = {{.*}}constant [2 x i32] [i32 99, i32 99] -// CK1: [[MTYPE10:@.+]] = {{.*}}constant [2 x i32] [i32 99, i32 99] -// CK1: [[MTYPE11:@.+]] = {{.*}}constant [2 x i32] [i32 96, i32 35] -// CK1: [[MTYPE12:@.+]] = {{.*}}constant [2 x i32] [i32 96, i32 35] +// CK1: [[MTYPE00:@.+]] = {{.*}}constant [1 x i64] [i64 99] +// CK1: [[MTYPE01:@.+]] = {{.*}}constant [1 x i64] [i64 99] +// CK1: [[MTYPE03:@.+]] = {{.*}}constant [1 x i64] [i64 99] +// CK1: [[MTYPE04:@.+]] = {{.*}}constant [1 x i64] [i64 99] +// CK1: [[MTYPE05:@.+]] = {{.*}}constant [1 x i64] [i64 99] +// CK1: [[MTYPE06:@.+]] = {{.*}}constant [1 x i64] [i64 99] +// CK1: [[MTYPE07:@.+]] = {{.*}}constant [1 x i64] [i64 99] +// CK1: [[MTYPE08:@.+]] = {{.*}}constant [2 x i64] [{{i64 35, i64 99|i64 99, i64 35}}] +// CK1: [[MTYPE09:@.+]] = {{.*}}constant [2 x i64] [i64 99, i64 99] +// CK1: [[MTYPE10:@.+]] = {{.*}}constant [2 x i64] [i64 99, i64 99] +// CK1: [[MTYPE11:@.+]] = {{.*}}constant [2 x i64] [i64 96, i64 35] +// CK1: [[MTYPE12:@.+]] = {{.*}}constant [2 x i64] [i64 96, i64 35] // CK1-LABEL: @_Z3foo template<typename T> @@ -330,10 +330,10 @@ void bar(float *&a, int *&b) { #ifdef CK2 // CK2: [[ST:%.+]] = type { double*, double** } -// CK2: [[MTYPE00:@.+]] = {{.*}}constant [2 x i32] [i32 35, i32 83] -// CK2: [[MTYPE01:@.+]] = {{.*}}constant [3 x i32] [i32 32, i32 19, i32 83] -// CK2: [[MTYPE02:@.+]] = {{.*}}constant [2 x i32] [i32 96, i32 35] -// CK2: [[MTYPE03:@.+]] = {{.*}}constant [4 x i32] [i32 96, i32 32, i32 19, i32 83] +// CK2: [[MTYPE00:@.+]] = {{.*}}constant [2 x i64] [i64 35, i64 83] +// CK2: [[MTYPE01:@.+]] = {{.*}}constant [3 x i64] [i64 32, i64 19, i64 83] +// CK2: [[MTYPE02:@.+]] = {{.*}}constant [2 x i64] [i64 96, i64 35] +// CK2: [[MTYPE03:@.+]] = {{.*}}constant [4 x i64] [i64 96, i64 32, i64 19, i64 83] template <typename T> struct ST { diff --git a/test/OpenMP/target_depend_messages.cpp b/test/OpenMP/target_depend_messages.cpp index 48bd94180c1e..3eb3af132d8f 100644 --- a/test/OpenMP/target_depend_messages.cpp +++ b/test/OpenMP/target_depend_messages.cpp @@ -36,21 +36,21 @@ int main(int argc, char **argv, char *env[]) { foo(); #pragma omp target depend (out: ) // expected-error {{expected expression}} foo(); - #pragma omp target depend (inout : foobool(argc)), depend (in, argc) // expected-error {{expected variable name, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}} + #pragma omp target depend (inout : foobool(argc)), depend (in, argc) // expected-error {{expected addressable lvalue expression, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}} foo(); #pragma omp target depend (out :S1) // expected-error {{'S1' does not refer to a value}} foo(); - #pragma omp target depend(in : argv[1][1] = '2') // expected-error {{expected variable name, array element or array section}} + #pragma omp target depend(in : argv[1][1] = '2') foo(); - #pragma omp target depend (in : vec[1]) // expected-error {{expected variable name, array element or array section}} + #pragma omp target depend (in : vec[1]) // expected-error {{expected addressable lvalue expression, array element or array section}} foo(); #pragma omp target depend (in : argv[0]) foo(); #pragma omp target depend (in : ) // expected-error {{expected expression}} foo(); - #pragma omp target depend (in : main) // expected-error {{expected variable name, array element or array section}} + #pragma omp target depend (in : main) foo(); - #pragma omp target depend(in : a[0]) // expected-error{{expected variable name, array element or array section}} + #pragma omp target depend(in : a[0]) // expected-error{{expected addressable lvalue expression, array element or array section}} foo(); #pragma omp target depend (in : vec[1:2]) // expected-error {{ value is not an array or pointer}} foo(); diff --git a/test/OpenMP/target_enter_data_codegen.cpp b/test/OpenMP/target_enter_data_codegen.cpp index 683cd976d395..08b6c7c2702e 100644 --- a/test/OpenMP/target_enter_data_codegen.cpp +++ b/test/OpenMP/target_enter_data_codegen.cpp @@ -22,15 +22,15 @@ ST<int> gb; double gc[100]; // CK1: [[SIZE00:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 800] -// CK1: [[MTYPE00:@.+]] = {{.+}}constant [1 x i32] [i32 32] +// CK1: [[MTYPE00:@.+]] = {{.+}}constant [1 x i64] [i64 32] // CK1: [[SIZE02:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] 4] -// CK1: [[MTYPE02:@.+]] = {{.+}}constant [1 x i32] [i32 33] +// CK1: [[MTYPE02:@.+]] = {{.+}}constant [1 x i64] [i64 33] -// CK1: [[MTYPE03:@.+]] = {{.+}}constant [1 x i32] [i32 37] +// CK1: [[MTYPE03:@.+]] = {{.+}}constant [1 x i64] [i64 37] // CK1: [[SIZE04:@.+]] = {{.+}}constant [2 x i[[sz]]] [i[[sz]] {{8|4}}, i[[sz]] 24] -// CK1: [[MTYPE04:@.+]] = {{.+}}constant [2 x i32] [i32 33, i32 17] +// CK1: [[MTYPE04:@.+]] = {{.+}}constant [2 x i64] [i64 33, i64 17] // CK1-LABEL: _Z3fooi void foo(int arg) { @@ -38,8 +38,9 @@ void foo(int arg) { float lb[arg]; // Region 00 - // CK1-DAG: call void @__tgt_target_data_begin(i32 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}) - // CK1-DAG: [[DEV]] = load i32, i32* %{{[^,]+}}, + // CK1-DAG: call void @__tgt_target_data_begin_nowait(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}) + // CK1-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64 + // CK1-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}}, // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -52,7 +53,7 @@ void foo(int arg) { // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1 // CK1-NOT: __tgt_target_data_end - #pragma omp target enter data if(1+3-5) device(arg) map(alloc: gc) + #pragma omp target enter data if(1+3-5) device(arg) map(alloc: gc) nowait {++arg;} // Region 01 @@ -63,7 +64,7 @@ void foo(int arg) { // Region 02 // CK1: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] // CK1: [[IFTHEN]] - // CK1-DAG: call void @__tgt_target_data_begin(i32 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}) + // CK1-DAG: call void @__tgt_target_data_begin(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}) // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -87,7 +88,7 @@ void foo(int arg) { {++arg;} // Region 03 - // CK1-DAG: call void @__tgt_target_data_begin(i32 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}) + // CK1-DAG: call void @__tgt_target_data_begin(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}) // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] @@ -110,7 +111,7 @@ void foo(int arg) { {++arg;} // Region 04 - // CK1-DAG: call void @__tgt_target_data_begin(i32 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}) + // CK1-DAG: call void @__tgt_target_data_begin(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}) // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -161,7 +162,7 @@ struct ST { }; // CK2: [[SIZE00:@.+]] = {{.+}}constant [2 x i[[sz:64|32]]] [i{{64|32}} {{8|4}}, i{{64|32}} 24] -// CK2: [[MTYPE00:@.+]] = {{.+}}constant [2 x i32] [i32 37, i32 21] +// CK2: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 37, i64 21] // CK2-LABEL: _Z3bari int bar(int arg){ @@ -172,8 +173,9 @@ int bar(int arg){ // Region 00 // CK2: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] // CK2: [[IFTHEN]] -// CK2-DAG: call void @__tgt_target_data_begin(i32 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}) -// CK2-DAG: [[DEV]] = load i32, i32* %{{[^,]+}}, +// CK2-DAG: call void @__tgt_target_data_begin(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}) +// CK2-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64 +// CK2-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}}, // CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK2-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] diff --git a/test/OpenMP/target_enter_data_depend_messages.cpp b/test/OpenMP/target_enter_data_depend_messages.cpp index 4aa122355ba5..77e7039e8e20 100644 --- a/test/OpenMP/target_enter_data_depend_messages.cpp +++ b/test/OpenMP/target_enter_data_depend_messages.cpp @@ -38,21 +38,21 @@ int tmain(T argc, S **argv, R *env[]) { foo(); #pragma omp target enter data map(to: i) depend (out: ) // expected-error {{expected expression}} foo(); - #pragma omp target enter data map(to: i) depend (inout : foobool(argc)), depend (in, argc) // expected-error {{expected variable name, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}} + #pragma omp target enter data map(to: i) depend (inout : foobool(argc)), depend (in, argc) // expected-error {{expected addressable lvalue expression, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}} foo(); #pragma omp target enter data map(to: i) depend (out :S1) // expected-error {{'S1' does not refer to a value}} foo(); - #pragma omp target enter data map(to: i) depend(in : argv[1][1] = '2') // expected-error {{expected variable name, array element or array section}} + #pragma omp target enter data map(to: i) depend(in : argv[1][1] = '2') // expected-error {{expected addressable lvalue expression, array element or array section}} foo(); - #pragma omp target enter data map(to: i) depend (in : vec[1]) // expected-error {{expected variable name, array element or array section}} + #pragma omp target enter data map(to: i) depend (in : vec[1]) // expected-error {{expected addressable lvalue expression, array element or array section}} foo(); #pragma omp target enter data map(to: i) depend (in : argv[0]) foo(); #pragma omp target enter data map(to: i) depend (in : ) // expected-error {{expected expression}} foo(); - #pragma omp target enter data map(to: i) depend (in : tmain) // expected-error {{expected variable name, array element or array section}} + #pragma omp target enter data map(to: i) depend (in : tmain) foo(); - #pragma omp target enter data map(to: i) depend(in : a[0]) // expected-error{{expected variable name, array element or array section}} + #pragma omp target enter data map(to: i) depend(in : a[0]) // expected-error{{expected addressable lvalue expression, array element or array section}} foo(); #pragma omp target enter data map(to: i) depend (in : vec[1:2]) // expected-error {{ value is not an array or pointer}} foo(); @@ -113,21 +113,21 @@ int main(int argc, char **argv, char *env[]) { foo(); #pragma omp target enter data map(to: i) depend (out: ) // expected-error {{expected expression}} foo(); - #pragma omp target enter data map(to: i) depend (inout : foobool(argc)), depend (in, argc) // expected-error {{expected variable name, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}} + #pragma omp target enter data map(to: i) depend (inout : foobool(argc)), depend (in, argc) // expected-error {{expected addressable lvalue expression, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}} foo(); #pragma omp target enter data map(to: i) depend (out :S1) // expected-error {{'S1' does not refer to a value}} foo(); - #pragma omp target enter data map(to: i) depend(in : argv[1][1] = '2') // expected-error {{expected variable name, array element or array section}} + #pragma omp target enter data map(to: i) depend(in : argv[1][1] = '2') foo(); - #pragma omp target enter data map(to: i) depend (in : vec[1]) // expected-error {{expected variable name, array element or array section}} + #pragma omp target enter data map(to: i) depend (in : vec[1]) // expected-error {{expected addressable lvalue expression, array element or array section}} foo(); #pragma omp target enter data map(to: i) depend (in : argv[0]) foo(); #pragma omp target enter data map(to: i) depend (in : ) // expected-error {{expected expression}} foo(); - #pragma omp target enter data map(to: i) depend (in : main) // expected-error {{expected variable name, array element or array section}} + #pragma omp target enter data map(to: i) depend (in : main) foo(); - #pragma omp target enter data map(to: i) depend(in : a[0]) // expected-error{{expected variable name, array element or array section}} + #pragma omp target enter data map(to: i) depend(in : a[0]) // expected-error{{expected addressable lvalue expression, array element or array section}} foo(); #pragma omp target enter data map(to: i) depend (in : vec[1:2]) // expected-error {{ value is not an array or pointer}} foo(); diff --git a/test/OpenMP/target_exit_data_codegen.cpp b/test/OpenMP/target_exit_data_codegen.cpp index a82c1c6c4aac..9359e3be451b 100644 --- a/test/OpenMP/target_exit_data_codegen.cpp +++ b/test/OpenMP/target_exit_data_codegen.cpp @@ -22,15 +22,15 @@ ST<int> gb; double gc[100]; // CK1: [[SIZE00:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 800] -// CK1: [[MTYPE00:@.+]] = {{.+}}constant [1 x i32] [i32 34] +// CK1: [[MTYPE00:@.+]] = {{.+}}constant [1 x i64] [i64 34] // CK1: [[SIZE02:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] 4] -// CK1: [[MTYPE02:@.+]] = {{.+}}constant [1 x i32] [i32 32] +// CK1: [[MTYPE02:@.+]] = {{.+}}constant [1 x i64] [i64 32] -// CK1: [[MTYPE03:@.+]] = {{.+}}constant [1 x i32] [i32 38] +// CK1: [[MTYPE03:@.+]] = {{.+}}constant [1 x i64] [i64 38] // CK1: [[SIZE04:@.+]] = {{.+}}constant [2 x i[[sz]]] [i[[sz]] {{8|4}}, i[[sz]] 24] -// CK1: [[MTYPE04:@.+]] = {{.+}}constant [2 x i32] [i32 32, i32 16] +// CK1: [[MTYPE04:@.+]] = {{.+}}constant [2 x i64] [i64 32, i64 16] // CK1-LABEL: _Z3fooi void foo(int arg) { @@ -39,8 +39,9 @@ void foo(int arg) { // Region 00 // CK1-NOT: __tgt_target_data_begin - // CK1-DAG: call void @__tgt_target_data_end(i32 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}) - // CK1-DAG: [[DEV]] = load i32, i32* %{{[^,]+}}, + // CK1-DAG: call void @__tgt_target_data_end_nowait(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}) + // CK1-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64 + // CK1-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}}, // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -52,7 +53,7 @@ void foo(int arg) { // CK1-DAG: store [100 x double]* @gc, [100 x double]** [[PC0]] // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1 - #pragma omp target exit data if(1+3-5) device(arg) map(from: gc) + #pragma omp target exit data if(1+3-5) device(arg) map(from: gc) nowait {++arg;} // Region 01 @@ -64,7 +65,7 @@ void foo(int arg) { // CK1-NOT: __tgt_target_data_begin // CK1: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] // CK1: [[IFTHEN]] - // CK1-DAG: call void @__tgt_target_data_end(i32 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}) + // CK1-DAG: call void @__tgt_target_data_end(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}) // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -88,7 +89,7 @@ void foo(int arg) { // Region 03 // CK1-NOT: __tgt_target_data_begin - // CK1-DAG: call void @__tgt_target_data_end(i32 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}) + // CK1-DAG: call void @__tgt_target_data_end(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}) // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] @@ -111,7 +112,7 @@ void foo(int arg) { // Region 04 // CK1-NOT: __tgt_target_data_begin - // CK1-DAG: call void @__tgt_target_data_end(i32 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}) + // CK1-DAG: call void @__tgt_target_data_end(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}) // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -161,7 +162,7 @@ struct ST { }; // CK2: [[SIZE00:@.+]] = {{.+}}constant [2 x i[[sz:64|32]]] [i{{64|32}} {{8|4}}, i{{64|32}} 24] -// CK2: [[MTYPE00:@.+]] = {{.+}}constant [2 x i32] [i32 36, i32 20] +// CK2: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 36, i64 20] // CK2-LABEL: _Z3bari int bar(int arg){ @@ -173,8 +174,9 @@ int bar(int arg){ // CK2-NOT: __tgt_target_data_begin // CK2: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] // CK2: [[IFTHEN]] -// CK2-DAG: call void @__tgt_target_data_end(i32 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}) -// CK2-DAG: [[DEV]] = load i32, i32* %{{[^,]+}}, +// CK2-DAG: call void @__tgt_target_data_end(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}) +// CK2-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64 +// CK2-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}}, // CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK2-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] diff --git a/test/OpenMP/target_exit_data_depend_messages.cpp b/test/OpenMP/target_exit_data_depend_messages.cpp index cdefbeed08bd..ce8d237f75e9 100644 --- a/test/OpenMP/target_exit_data_depend_messages.cpp +++ b/test/OpenMP/target_exit_data_depend_messages.cpp @@ -38,21 +38,21 @@ int tmain(T argc, S **argv, R *env[]) { foo(); #pragma omp target exit data map(from: i) depend (out: ) // expected-error {{expected expression}} foo(); - #pragma omp target exit data map(from: i) depend (inout : foobool(argc)), depend (in, argc) // expected-error {{expected variable name, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}} + #pragma omp target exit data map(from: i) depend (inout : foobool(argc)), depend (in, argc) // expected-error {{expected addressable lvalue expression, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}} foo(); #pragma omp target exit data map(from: i) depend (out :S1) // expected-error {{'S1' does not refer to a value}} foo(); - #pragma omp target exit data map(from: i) depend(in : argv[1][1] = '2') // expected-error {{expected variable name, array element or array section}} + #pragma omp target exit data map(from: i) depend(in : argv[1][1] = '2') // expected-error {{expected addressable lvalue expression, array element or array section}} foo(); - #pragma omp target exit data map(from: i) depend (in : vec[1]) // expected-error {{expected variable name, array element or array section}} + #pragma omp target exit data map(from: i) depend (in : vec[1]) // expected-error {{expected addressable lvalue expression, array element or array section}} foo(); #pragma omp target exit data map(from: i) depend (in : argv[0]) foo(); #pragma omp target exit data map(from: i) depend (in : ) // expected-error {{expected expression}} foo(); - #pragma omp target exit data map(from: i) depend (in : tmain) // expected-error {{expected variable name, array element or array section}} + #pragma omp target exit data map(from: i) depend (in : tmain) foo(); - #pragma omp target exit data map(from: i) depend(in : a[0]) // expected-error{{expected variable name, array element or array section}} + #pragma omp target exit data map(from: i) depend(in : a[0]) // expected-error{{expected addressable lvalue expression, array element or array section}} foo(); #pragma omp target exit data map(from: i) depend (in : vec[1:2]) // expected-error {{ value is not an array or pointer}} foo(); @@ -113,21 +113,21 @@ int main(int argc, char **argv, char *env[]) { foo(); #pragma omp target exit data map(from: i) depend (out: ) // expected-error {{expected expression}} foo(); - #pragma omp target exit data map(from: i) depend (inout : foobool(argc)), depend (in, argc) // expected-error {{expected variable name, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}} + #pragma omp target exit data map(from: i) depend (inout : foobool(argc)), depend (in, argc) // expected-error {{expected addressable lvalue expression, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}} foo(); #pragma omp target exit data map(from: i) depend (out :S1) // expected-error {{'S1' does not refer to a value}} foo(); - #pragma omp target exit data map(from: i) depend(in : argv[1][1] = '2') // expected-error {{expected variable name, array element or array section}} + #pragma omp target exit data map(from: i) depend(in : argv[1][1] = '2') foo(); - #pragma omp target exit data map(from: i) depend (in : vec[1]) // expected-error {{expected variable name, array element or array section}} + #pragma omp target exit data map(from: i) depend (in : vec[1]) // expected-error {{expected addressable lvalue expression, array element or array section}} foo(); #pragma omp target exit data map(from: i) depend (in : argv[0]) foo(); #pragma omp target exit data map(from: i) depend (in : ) // expected-error {{expected expression}} foo(); - #pragma omp target exit data map(from: i) depend (in : main) // expected-error {{expected variable name, array element or array section}} + #pragma omp target exit data map(from: i) depend (in : main) foo(); - #pragma omp target exit data map(from: i) depend(in : a[0]) // expected-error{{expected variable name, array element or array section}} + #pragma omp target exit data map(from: i) depend(in : a[0]) // expected-error{{expected addressable lvalue expression, array element or array section}} foo(); #pragma omp target exit data map(from: i) depend (in : vec[1:2]) // expected-error {{ value is not an array or pointer}} foo(); diff --git a/test/OpenMP/target_firstprivate_codegen.cpp b/test/OpenMP/target_firstprivate_codegen.cpp index 1fb0ef9e848c..c2909c6f18b8 100644 --- a/test/OpenMP/target_firstprivate_codegen.cpp +++ b/test/OpenMP/target_firstprivate_codegen.cpp @@ -33,15 +33,15 @@ struct TT{ // TCHECK: [[S1:%.+]] = type { double } // CHECK-DAG: [[SIZET:@.+]] = private unnamed_addr constant [1 x i{{32|64}}] [i[[SZ:32|64]] 4] -// CHECK: [[MAPT:@.+]] = private unnamed_addr constant [1 x i32] [i32 288] -// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [9 x i32] [i32 288, i32 161, i32 288, i32 161, i32 161, i32 288, i32 288, i32 161, i32 161] +// CHECK: [[MAPT:@.+]] = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [9 x i64] [i64 288, i64 161, i64 288, i64 161, i64 161, i64 288, i64 288, i64 161, i64 161] // CHECK-DAG: [[SIZET3:@.+]] = private unnamed_addr constant [1 x i{{32|64}}] zeroinitializer -// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [1 x i32] [i32 32] -// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [5 x i32] [i32 35, i32 288, i32 288, i32 288, i32 161] +// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [1 x i64] [i64 32] +// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [5 x i64] [i64 547, i64 288, i64 288, i64 288, i64 161] // CHECK-DAG: [[SIZET5:@.+]] = private unnamed_addr constant [3 x i{{32|64}}] [i[[SZ]] 4, i[[SZ]] 1, i[[SZ]] 40] -// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i32] [i32 288, i32 288, i32 161] +// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 288, i64 161] // CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [2 x i{{32|64}}] [i[[SZ]] 4, i[[SZ]] 40] -// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [2 x i32] [i32 288, i32 161] +// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [2 x i64] [i64 288, i64 161] // CHECK: define {{.*}}[[FOO:@.+]]( @@ -68,7 +68,6 @@ int foo(int n, double *ptr) { // CHECK: [[C:%.+]] = alloca [5 x [10 x double]], // CHECK: [[D:%.+]] = alloca [[TT]], // CHECK: [[ACAST:%.+]] = alloca i{{[0-9]+}}, - // CHECK: {{.+}} = alloca i{{[0-9]+}}, // CHECK: [[BASE_PTR_ARR:%.+]] = alloca [1 x i8*], // CHECK: [[PTR_ARR:%.+]] = alloca [1 x i8*], // CHECK: [[A2CAST:%.+]] = alloca i{{[0-9]+}}, @@ -101,7 +100,7 @@ int foo(int n, double *ptr) { // CHECK: store i{{[0-9]+}} [[ACAST_VAL]], i{{[0-9]+}}* [[ACAST_TOPTR2]], // CHECK: [[BASE_PTR_GEP_ARG:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BASE_PTR_ARR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[PTR_GEP_ARG:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PTR_ARR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: {{.+}} = call i32 @__tgt_target(i32 -1, {{.+}}, i32 1, i8** [[BASE_PTR_GEP_ARG]], i8** [[PTR_GEP_ARG]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i32* getelementptr inbounds ([1 x i32], [1 x i32]* [[MAPT]], i32 0, i32 0)) + // CHECK: {{.+}} = call i32 @__tgt_target(i64 -1, {{.+}}, i32 1, i8** [[BASE_PTR_GEP_ARG]], i8** [[PTR_GEP_ARG]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT]], i32 0, i32 0)) // TCHECK: define void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]]) // TCHECK: [[A_ADDR:%.+]] = alloca i{{[0-9]+}}, @@ -220,11 +219,11 @@ int foo(int n, double *ptr) { // CHECK: [[BASE_PTR_GEP_ARG2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[PTR_GEP_ARG2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[SIZES_ARG2:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[SIZET2]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: {{.+}} = call i32 @__tgt_target(i32 -1, {{.+}}, i32 9, i8** [[BASE_PTR_GEP_ARG2]], i8** [[PTR_GEP_ARG2]], i[[SZ]]* [[SIZES_ARG2]], i32* getelementptr inbounds ([9 x i32], [9 x i32]* [[MAPT2]], i32 0, i32 0)) + // CHECK: {{.+}} = call i32 @__tgt_target(i64 -1, {{.+}}, i32 9, i8** [[BASE_PTR_GEP_ARG2]], i8** [[PTR_GEP_ARG2]], i[[SZ]]* [[SIZES_ARG2]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT2]], i32 0, i32 0)) // make sure that firstprivate variables are generated in all cases and that we use those instances for operations inside the // target region - // TCHECK: define {{.*}}void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A2_IN:%.+]], [10 x float]* {{.+}} [[B_IN:%.+]], i{{[0-9]+}} [[BN_SZ:%.+]], float* [[BN_IN:%.+]], [5 x [10 x double]]* {{.+}} [[C_IN:%.+]], i{{[0-9]+}} [[CN_SZ1:%.+]], i{{[0-9]+}} [[CN_SZ2:%.+]], double* [[CN_IN:%.+]], [[TT]]* {{.+}} [[D_IN:%.+]]) + // TCHECK: define {{.*}}void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A2_IN:%.+]], [10 x float]* {{.+}} [[B_IN:%.+]], i{{[0-9]+}} [[BN_SZ:%.+]], float* {{.+}} [[BN_IN:%.+]], [5 x [10 x double]]* {{.+}} [[C_IN:%.+]], i{{[0-9]+}} [[CN_SZ1:%.+]], i{{[0-9]+}} [[CN_SZ2:%.+]], double* {{.+}} [[CN_IN:%.+]], [[TT]]* {{.+}} [[D_IN:%.+]]) // TCHECK: [[A2_ADDR:%.+]] = alloca i{{[0-9]+}}, // TCHECK: [[B_ADDR:%.+]] = alloca [10 x float]*, // TCHECK: [[VLA_ADDR:%.+]] = alloca i{{[0-9]+}}, @@ -311,7 +310,7 @@ int foo(int n, double *ptr) { // CHECK: [[BASE_PTR_GEP_ARG3:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BASE_PTR_ARR3]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CHECK: [[PTR_GEP_ARG3:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PTR_ARR3]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: {{.+}} = call i32 @__tgt_target(i32 -1, {{.+}}, i32 1, i8** [[BASE_PTR_GEP_ARG3]], i8** [[PTR_GEP_ARG3]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i32* getelementptr inbounds ([1 x i32], [1 x i32]* [[MAPT3]], i32 0, i32 0)) + // CHECK: {{.+}} = call i32 @__tgt_target(i64 -1, {{.+}}, i32 1, i8** [[BASE_PTR_GEP_ARG3]], i8** [[PTR_GEP_ARG3]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT3]], i32 0, i32 0)) // TCHECK: define void @__omp_offloading_{{.+}}(double* [[PTR_IN:%.+]]) // TCHECK: [[PTR_ADDR:%.+]] = alloca double*, @@ -447,7 +446,7 @@ struct S1 { // CHECK: store i{{[0-9]+}} [[B_SIZE:%.+]], i{{[0-9]+}}* [[SIZES_GEP4_4]], // only check that we use the map types stored in the global variable - // CHECK: call i32 @__tgt_target(i32 -1, {{.+}}, i32 5, i8** {{.+}}, i8** {{.+}}, i{{[0-9]+}}* {{.+}}, i32* getelementptr inbounds ([5 x i32], [5 x i32]* [[MAPT4]], i32 0, i32 0)) + // CHECK: call i32 @__tgt_target(i64 -1, {{.+}}, i32 5, i8** {{.+}}, i8** {{.+}}, i{{[0-9]+}}* {{.+}}, i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT4]], i32 0, i32 0)) // TCHECK: define void @__omp_offloading_{{.+}}([[S1]]* [[TH:%.+]], i{{[0-9]+}} [[B_IN:%.+]], i{{[0-9]+}} [[VLA:%.+]], i{{[0-9]+}} [[VLA1:%.+]], i{{[0-9]+}}{{.+}} [[C_IN:%.+]]) // TCHECK: [[TH_ADDR:%.+]] = alloca [[S1]]*, @@ -520,7 +519,7 @@ struct S1 { // CHECK: store [10 x i{{[0-9]+}}]* [[B]], [10 x i{{[0-9]+}}]** [[BCAST_TOPTR]], // only check that the right sizes and map types are used - // CHECK: call i32 @__tgt_target(i32 -1, {{.+}}, i32 3, i8** {{.+}}, i8** {{.+}}, i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i32* getelementptr inbounds ([3 x i32], [3 x i32]* [[MAPT5]], i32 0, i32 0)) + // CHECK: call i32 @__tgt_target(i64 -1, {{.+}}, i32 3, i8** {{.+}}, i8** {{.+}}, i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0)) }; @@ -558,7 +557,7 @@ int bar(int n, double *ptr){ // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTRS_GEP6_1]] to [10 x i{{[0-9]+}}]** // CHECK: store [10 x i{{[0-9]+}}]* [[B]], [10 x i{{[0-9]+}}]** [[BCAST_TOPTR]], -// CHECK: call i32 @__tgt_target(i32 -1, {{.+}}, i32 2, i8** {{.+}}, i8** {{.+}}, i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i32* getelementptr inbounds ([2 x i32], [2 x i32]* [[MAPT6]], i32 0, i32 0)) +// CHECK: call i32 @__tgt_target(i64 -1, {{.+}}, i32 2, i8** {{.+}}, i8** {{.+}}, i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT6]], i32 0, i32 0)) // TCHECK: define void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]]) diff --git a/test/OpenMP/target_is_device_ptr_codegen.cpp b/test/OpenMP/target_is_device_ptr_codegen.cpp index 1a54aa18c00b..1491f41a787f 100644 --- a/test/OpenMP/target_is_device_ptr_codegen.cpp +++ b/test/OpenMP/target_is_device_ptr_codegen.cpp @@ -15,25 +15,25 @@ double *g; // CK1: @g = global double* // CK1: [[SIZES00:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} {{8|4}}] -// CK1: [[TYPES00:@.+]] = {{.+}}constant [1 x i32] [i32 288] +// CK1: [[TYPES00:@.+]] = {{.+}}constant [1 x i64] [i64 288] // CK1: [[SIZES01:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] {{8|4}}] -// CK1: [[TYPES01:@.+]] = {{.+}}constant [1 x i32] [i32 288] +// CK1: [[TYPES01:@.+]] = {{.+}}constant [1 x i64] [i64 288] // CK1: [[SIZES02:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] {{8|4}}] -// CK1: [[TYPES02:@.+]] = {{.+}}constant [1 x i32] [i32 288] +// CK1: [[TYPES02:@.+]] = {{.+}}constant [1 x i64] [i64 288] // CK1: [[SIZES03:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] {{8|4}}] -// CK1: [[TYPES03:@.+]] = {{.+}}constant [1 x i32] [i32 288] +// CK1: [[TYPES03:@.+]] = {{.+}}constant [1 x i64] [i64 288] // CK1: [[SIZES04:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] {{8|4}}] -// CK1: [[TYPES04:@.+]] = {{.+}}constant [1 x i32] [i32 288] +// CK1: [[TYPES04:@.+]] = {{.+}}constant [1 x i64] [i64 288] // CK1: [[SIZES05:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] {{8|4}}] -// CK1: [[TYPES05:@.+]] = {{.+}}constant [1 x i32] [i32 288] +// CK1: [[TYPES05:@.+]] = {{.+}}constant [1 x i64] [i64 288] // CK1: [[SIZES06:@.+]] = {{.+}}constant [2 x i[[sz]]] [i[[sz]] {{8|4}}, i[[sz]] {{8|4}}] -// CK1: [[TYPES06:@.+]] = {{.+}}constant [2 x i32] [i32 288, i32 288] +// CK1: [[TYPES06:@.+]] = {{.+}}constant [2 x i64] [i64 288, i64 288] // CK1-LABEL: @_Z3foo template<typename T> @@ -41,7 +41,7 @@ void foo(float *&lr, T *&tr) { float *l; T *t; - // CK1-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES00]]{{.+}}, {{.+}}[[TYPES00]]{{.+}}) + // CK1-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES00]]{{.+}}, {{.+}}[[TYPES00]]{{.+}}) // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 @@ -58,7 +58,7 @@ void foo(float *&lr, T *&tr) { ++g; } - // CK1-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES01]]{{.+}}, {{.+}}[[TYPES01]]{{.+}}) + // CK1-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES01]]{{.+}}, {{.+}}[[TYPES01]]{{.+}}) // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 @@ -75,7 +75,7 @@ void foo(float *&lr, T *&tr) { ++l; } - // CK1-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES02]]{{.+}}, {{.+}}[[TYPES02]]{{.+}}) + // CK1-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES02]]{{.+}}, {{.+}}[[TYPES02]]{{.+}}) // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 @@ -92,7 +92,7 @@ void foo(float *&lr, T *&tr) { ++t; } - // CK1-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES03]]{{.+}}, {{.+}}[[TYPES03]]{{.+}}) + // CK1-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES03]]{{.+}}, {{.+}}[[TYPES03]]{{.+}}) // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 @@ -110,7 +110,7 @@ void foo(float *&lr, T *&tr) { ++lr; } - // CK1-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES04]]{{.+}}, {{.+}}[[TYPES04]]{{.+}}) + // CK1-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES04]]{{.+}}, {{.+}}[[TYPES04]]{{.+}}) // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 @@ -128,7 +128,7 @@ void foo(float *&lr, T *&tr) { ++tr; } - // CK1-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES05]]{{.+}}, {{.+}}[[TYPES05]]{{.+}}) + // CK1-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES05]]{{.+}}, {{.+}}[[TYPES05]]{{.+}}) // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 @@ -146,7 +146,7 @@ void foo(float *&lr, T *&tr) { ++tr; } - // CK1-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 2, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES06]]{{.+}}, {{.+}}[[TYPES06]]{{.+}}) + // CK1-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 2, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES06]]{{.+}}, {{.+}}[[TYPES06]]{{.+}}) // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 @@ -191,13 +191,13 @@ void bar(float *&a, int *&b) { // CK2: [[ST:%.+]] = type { double*, double** } // CK2: [[SIZE00:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} {{8|4}}] -// CK2: [[MTYPE00:@.+]] = {{.+}}constant [1 x i32] [i32 33] +// CK2: [[MTYPE00:@.+]] = {{.+}}constant [1 x i64] [i64 33] // CK2: [[SIZE01:@.+]] = {{.+}}constant [2 x i[[sz]]] [i[[sz]] {{8|4}}, i[[sz]] {{8|4}}] -// CK2: [[MTYPE01:@.+]] = {{.+}}constant [2 x i32] [i32 32, i32 17] +// CK2: [[MTYPE01:@.+]] = {{.+}}constant [2 x i64] [i64 32, i64 17] // CK2: [[SIZE02:@.+]] = {{.+}}constant [3 x i[[sz]]] [i[[sz]] {{8|4}}, i[[sz]] {{8|4}}, i[[sz]] {{8|4}}] -// CK2: [[MTYPE02:@.+]] = {{.+}}constant [3 x i32] [i32 33, i32 0, i32 17] +// CK2: [[MTYPE02:@.+]] = {{.+}}constant [3 x i64] [i64 33, i64 0, i64 17] template <typename T> struct ST { @@ -209,7 +209,7 @@ struct ST { void foo(double *&arg) { int *la = 0; - // CK2-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}) + // CK2-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}) // CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK2-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -225,7 +225,7 @@ struct ST { a++; } - // CK2-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE01]]{{.+}}) + // CK2-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE01]]{{.+}}) // CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK2-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -249,7 +249,7 @@ struct ST { b++; } - // CK2-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE02]]{{.+}}) + // CK2-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE02]]{{.+}}) // CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK2-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] diff --git a/test/OpenMP/target_map_codegen.cpp b/test/OpenMP/target_map_codegen.cpp index 69e80bb9505b..34b239504462 100644 --- a/test/OpenMP/target_map_codegen.cpp +++ b/test/OpenMP/target_map_codegen.cpp @@ -15,15 +15,33 @@ // RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 #ifdef CK1 +class B { +public: + static double VAR; + B() { + } + + static void modify(int &res) { +#pragma omp target map(tofrom \ + : res) + { + res = B::VAR; + } + } +}; +double B::VAR = 1.0; + // CK1-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4] // Map types: OMP_MAP_PRIVATE_VAL | OMP_MAP_IS_FIRST = 288 -// CK1-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 288] +// CK1-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 288] // CK1-LABEL: implicit_maps_integer void implicit_maps_integer (int a){ + // CK1: call void{{.*}}modify + B::modify(a); int i = a; - // CK1-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK1-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 @@ -62,15 +80,15 @@ void implicit_maps_integer (int a){ // CK2: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4] // Map types: OMP_MAP_PRIVATE_VAL | OMP_MAP_IS_FIRST = 288 -// CK2: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 288] +// CK2: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 288] // CK2: [[SIZES2:@.+]] = {{.+}}constant [1 x i[[sz]]] zeroinitializer // Map types: OMP_MAP_IS_PTR = 32 -// CK2: [[TYPES2:@.+]] = {{.+}}constant [1 x i32] [i32 32] +// CK2: [[TYPES2:@.+]] = {{.+}}constant [1 x i64] [i64 32] // CK2-LABEL: implicit_maps_reference void implicit_maps_reference (int a, int *b){ int &i = a; - // CK2-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK2-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) // CK2-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 // CK2-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 // CK2-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 @@ -90,7 +108,7 @@ void implicit_maps_reference (int a, int *b){ } int *&p = b; - // CK2-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES2]]{{.+}}, {{.+}}[[TYPES2]]{{.+}}) + // CK2-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES2]]{{.+}}, {{.+}}[[TYPES2]]{{.+}}) // CK2-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 // CK2-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 // CK2-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 @@ -141,12 +159,12 @@ void implicit_maps_reference (int a, int *b){ // CK3-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4] // Map types: OMP_MAP_PRIVATE_VAL | OMP_MAP_IS_FIRST = 288 -// CK3-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 288] +// CK3-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 288] // CK3-LABEL: implicit_maps_parameter void implicit_maps_parameter (int a){ - // CK3-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK3-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) // CK3-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 // CK3-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 // CK3-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 @@ -185,7 +203,7 @@ void implicit_maps_parameter (int a){ // CK4-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4] // Map types: OMP_MAP_PRIVATE_VAL | OMP_MAP_IS_FIRST = 288 -// CK4-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 288] +// CK4-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 288] // CK4-LABEL: implicit_maps_nested_integer void implicit_maps_nested_integer (int a){ @@ -198,7 +216,7 @@ void implicit_maps_nested_integer (int a){ // CK4: define internal void [[KERNELP1]](i32* {{[^,]+}}, i32* {{[^,]+}}, i32* {{[^,]+}}) #pragma omp parallel { - // CK4-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK4-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) // CK4-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 // CK4-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 // CK4-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 @@ -241,7 +259,7 @@ void implicit_maps_nested_integer (int a){ // CK5-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4] // Map types: OMP_MAP_PRIVATE_VAL | OMP_MAP_IS_FIRST = 288 -// CK5-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 288] +// CK5-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 288] // CK5-LABEL: implicit_maps_nested_integer_and_enum void implicit_maps_nested_integer_and_enum (int a){ @@ -252,7 +270,7 @@ void implicit_maps_nested_integer_and_enum (int a){ // Using an enum should not change the mapping information. int i = a; - // CK5-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK5-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) // CK5-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 // CK5-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 // CK5-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 @@ -292,12 +310,12 @@ void implicit_maps_nested_integer_and_enum (int a){ // CK6-DAG: [[GBL:@Gi]] = global i32 0 // CK6-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4] // Map types: OMP_MAP_PRIVATE_VAL | OMP_MAP_IS_FIRST = 288 -// CK6-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 288] +// CK6-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 288] // CK6-LABEL: implicit_maps_host_global int Gi; void implicit_maps_host_global (int a){ - // CK6-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK6-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) // CK6-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 // CK6-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 // CK6-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 @@ -341,15 +359,15 @@ void implicit_maps_host_global (int a){ // CK7-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 8] // Map types: OMP_MAP_PRIVATE_VAL | OMP_MAP_IS_FIRST = 288 -// CK7-64-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 288] -// Map types: OMP_MAP_TO | OMP_MAP_IS_FIRST = 33 -// CK7-32-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 33] +// CK7-64-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 288] +// Map types: OMP_MAP_TO | OMP_MAP_PRIVATE_PTR | OMP_MAP_FIRST_REF = 161 +// CK7-32-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 161] // CK7-LABEL: implicit_maps_double void implicit_maps_double (int a){ double d = (double)a; - // CK7-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK7-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) // CK7-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 // CK7-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 // CK7-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 @@ -400,13 +418,13 @@ void implicit_maps_double (int a){ // CK8-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4] // Map types: OMP_MAP_PRIVATE_VAL | OMP_MAP_IS_FIRST = 288 -// CK8-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 288] +// CK8-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 288] // CK8-LABEL: implicit_maps_float void implicit_maps_float (int a){ float f = (float)a; - // CK8-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK8-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) // CK8-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 // CK8-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 // CK8-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 @@ -443,14 +461,14 @@ void implicit_maps_float (int a){ #ifdef CK9 // CK9-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 16] -// Map types: OMP_MAP_TO + OMP_MAP_FROM + OMP_MAP_IS_FIRST = 35 -// CK9-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 35] +// Map types: OMP_MAP_TO + OMP_MAP_FROM + OMP_MAP_IS_FIRST + OMP_MAP_IMPLICIT = 547 +// CK9-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 547] // CK9-LABEL: implicit_maps_array void implicit_maps_array (int a){ double darr[2] = {(double)a, (double)a}; - // CK9-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK9-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) // CK9-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 // CK9-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 // CK9-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 @@ -485,13 +503,13 @@ void implicit_maps_array (int a){ // CK10-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] zeroinitializer // Map types: OMP_MAP_IS_FIRST = 32 -// CK10-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 32] +// CK10-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 32] // CK10-LABEL: implicit_maps_pointer void implicit_maps_pointer (){ double *ddyn; - // CK10-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK10-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) // CK10-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 // CK10-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 // CK10-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 @@ -526,14 +544,14 @@ void implicit_maps_pointer (){ #ifdef CK11 // CK11-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 16] -// Map types: OMP_MAP_TO + OMP_MAP_IS_FIRST = 33 -// CK11-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 33] +// Map types: OMP_MAP_TO + OMP_MAP_FROM + OMP_MAP_IS_FIRST + OMP_MAP_IMPLICIT = 547 +// CK11-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 547] // CK11-LABEL: implicit_maps_double_complex void implicit_maps_double_complex (int a){ double _Complex dc = (double)a; - // CK11-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK11-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) // CK11-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 // CK11-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 // CK11-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 @@ -544,7 +562,7 @@ void implicit_maps_double_complex (int a){ // CK11-DAG: store { double, double }* [[PTR]], { double, double }** [[CP1]] // CK11: call void [[KERNEL:@.+]]({ double, double }* [[PTR]]) - #pragma omp target + #pragma omp target defaultmap(tofrom:scalar) { dc *= dc; } @@ -570,15 +588,15 @@ void implicit_maps_double_complex (int a){ // CK12-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 8] // Map types: OMP_MAP_PRIVATE_VAL + OMP_MAP_IS_FIRST = 288 -// CK12-64-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 288] -// Map types: OMP_MAP_TO + OMP_MAP_IS_FIRST = 33 -// CK12-32-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 33] +// CK12-64-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 288] +// Map types: OMP_MAP_TO | OMP_MAP_PRIVATE_PTR | OMP_MAP_FIRST_REF = 161 +// CK12-32-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 161] // CK12-LABEL: implicit_maps_float_complex void implicit_maps_float_complex (int a){ float _Complex fc = (float)a; - // CK12-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK12-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) // CK12-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 // CK12-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 // CK12-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 @@ -630,14 +648,14 @@ void implicit_maps_float_complex (int a){ // Map types: // - OMP_MAP_PRIVATE_VAL + OMP_MAP_IS_FIRST = 288 (vla size) // - OMP_MAP_PRIVATE_VAL + OMP_MAP_IS_FIRST = 288 (vla size) -// - OMP_MAP_TO + OMP_MAP_FROM + OMP_MAP_IS_FIRST = 35 -// CK13-DAG: [[TYPES:@.+]] = {{.+}}constant [3 x i32] [i32 288, i32 288, i32 35] +// - OMP_MAP_TO + OMP_MAP_FROM + OMP_MAP_IS_FIRST + OMP_IMPICIT_MAP = 547 +// CK13-DAG: [[TYPES:@.+]] = {{.+}}constant [3 x i64] [i64 288, i64 288, i64 547] // CK13-LABEL: implicit_maps_variable_length_array void implicit_maps_variable_length_array (int a){ double vla[2][a]; - // CK13-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 3, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], i[[sz:64|32]]* [[SGEP:%[^,]+]], {{.+}}[[TYPES]]{{.+}}) + // CK13-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 3, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], i[[sz:64|32]]* [[SGEP:%[^,]+]], {{.+}}[[TYPES]]{{.+}}) // CK13-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 // CK13-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 // CK13-DAG: [[SGEP]] = getelementptr inbounds {{.+}}[[SS:%[^,]+]], i32 0, i32 0 @@ -699,11 +717,12 @@ void implicit_maps_variable_length_array (int a){ #ifdef CK14 // CK14-DAG: [[ST:%.+]] = type { i32, double } -// CK14-DAG: [[SIZES:@.+]] = {{.+}}constant [2 x i[[sz:64|32]]] [i{{64|32}} {{16|12}}, i{{64|32}} 4] +// CK14-DAG: [[SIZES:@.+]] = {{.+}}constant [3 x i[[sz:64|32]]] [i{{64|32}} 4, i{{64|32}} 8, i{{64|32}} 4] // Map types: -// - OMP_MAP_TO + OMP_MAP_FROM + OMP_MAP_IS_FIRST = 35 +// - OMP_MAP_TO + OMP_MAP_FROM + OMP_MAP_IS_FIRST + OMP_IMPLICIT_MAP = 547 +// - OMP_MAP_TO + OMP_MAP_FROM + OMP_IMPLICIT_MAP = 515 // - OMP_MAP_PRIVATE_VAL + OMP_MAP_IS_FIRST = 288 -// CK14-DAG: [[TYPES:@.+]] = {{.+}}constant [2 x i32] [i32 35, i32 288] +// CK14-DAG: [[TYPES:@.+]] = {{.+}}constant [3 x i64] [i64 547, i64 515, i64 288] class SSS { public: @@ -726,19 +745,26 @@ void implicit_maps_class (int a){ SSS sss(a, (double)a); // CK14: define {{.*}}void @{{.+}}foo{{.+}}([[ST]]* {{[^,]+}}, i32 {{[^,]+}}) - // CK14-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 2, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK14-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 3, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) // CK14-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 // CK14-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 // CK14-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 // CK14-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 // CK14-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[ST]]** - // CK14-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to [[ST]]** + // CK14-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i32** // CK14-DAG: store [[ST]]* [[DECL:%.+]], [[ST]]** [[CBP0]] - // CK14-DAG: store [[ST]]* [[DECL]], [[ST]]** [[CP0]] + // CK14-DAG: store i32* %{{.+}}, i32** [[CP0]] // CK14-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 1 // CK14-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 1 + // CK14-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to [[ST]]** + // CK14-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to double** + // CK14-DAG: store [[ST]]* [[DECL]], [[ST]]** [[CBP1]] + // CK14-DAG: store double* %{{.+}}, double** [[CP1]] + + // CK14-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 2 + // CK14-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 2 // CK14-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[sz]]* // CK14-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[sz]]* // CK14-DAG: store i[[sz]] [[VAL:%.+]], i[[sz]]* [[CBP1]] @@ -773,17 +799,17 @@ void implicit_maps_class (int a){ #ifdef CK15 // CK15: [[ST:%.+]] = type { i32, double, i32* } -// CK15: [[SIZES:@.+]] = {{.+}}constant [2 x i[[sz:64|32]]] [i{{64|32}} {{24|16}}, i{{64|32}} 4] +// CK15: [[SIZES:@.+]] = {{.+}}constant [5 x i[[sz:64|32]]] [i{{64|32}} 4, i{{64|32}} 8, i{{64|32}} {{8|4}}, i{{64|32}} 4, i{{64|32}} 4] // Map types: -// - OMP_MAP_TO + OMP_MAP_FROM + OMP_MAP_IS_FIRST = 35 +// - OMP_MAP_TO + OMP_MAP_FROM + OMP_MAP_IS_FIRST + OMP_MAP_IMPLICIT = 547 // - OMP_MAP_PRIVATE_VAL + OMP_MAP_IS_FIRST = 288 -// CK15: [[TYPES:@.+]] = {{.+}}constant [2 x i32] [i32 35, i32 288] +// CK15: [[TYPES:@.+]] = {{.+}}constant [5 x i64] [i64 547, i64 515, i64 512, i64 531, i64 288] -// CK15: [[SIZES2:@.+]] = {{.+}}constant [2 x i[[sz]]] [i{{64|32}} {{24|16}}, i{{64|32}} 4] +// CK15: [[SIZES2:@.+]] = {{.+}}constant [5 x i[[sz]]] [i{{64|32}} 4, i{{64|32}} 8, i{{64|32}} {{8|4}}, i{{64|32}} 4, i{{64|32}} 4] // Map types: // - OMP_MAP_TO + OMP_MAP_FROM + OMP_MAP_IS_FIRST = 35 // - OMP_MAP_PRIVATE_VAL + OMP_MAP_IS_FIRST = 288 -// CK15: [[TYPES2:@.+]] = {{.+}}constant [2 x i32] [i32 35, i32 288] +// CK15: [[TYPES2:@.+]] = {{.+}}constant [5 x i64] [i64 547, i64 515, i64 512, i64 531, i64 288] template<int x> class SSST { @@ -818,23 +844,44 @@ void implicit_maps_templated_class (int a){ SSST<123> ssst(a, (double)a, a); // CK15: define {{.*}}void @{{.+}}foo{{.+}}([[ST]]* {{[^,]+}}, i32 {{[^,]+}}) - // CK15-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 2, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK15-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 5, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) // CK15-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 // CK15-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 // CK15-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 // CK15-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 // CK15-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[ST]]** - // CK15-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to [[ST]]** + // CK15-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i32** // CK15-DAG: store [[ST]]* [[DECL:%.+]], [[ST]]** [[CBP0]] - // CK15-DAG: store [[ST]]* [[DECL]], [[ST]]** [[CP0]] + // CK15-DAG: store i32* %{{.+}}, i32** [[CP0]] // CK15-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 1 // CK15-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 1 - // CK15-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[sz]]* - // CK15-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[sz]]* - // CK15-DAG: store i[[sz]] [[VAL:%.+]], i[[sz]]* [[CBP1]] - // CK15-DAG: store i[[sz]] [[VAL]], i[[sz]]* [[CP1]] + // CK15-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to [[ST]]** + // CK15-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to double** + // CK15-DAG: store [[ST]]* [[DECL]], [[ST]]** [[CBP1]] + // CK15-DAG: store double* %{{.+}}, double** [[CP1]] + + // CK15-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 2 + // CK15-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 2 + // CK15-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [[ST]]** + // CK15-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to i32*** + // CK15-DAG: store [[ST]]* [[DECL]], [[ST]]** [[CBP2]] + // CK15-DAG: store i32** %{{.+}}, i32*** [[CP2]] + + // CK15-DAG: [[BP3:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 3 + // CK15-DAG: [[P3:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 3 + // CK15-DAG: [[CBP3:%.+]] = bitcast i8** [[BP3]] to i32*** + // CK15-DAG: [[CP3:%.+]] = bitcast i8** [[P3]] to i32** + // CK15-DAG: store i32** %{{.+}}, i32*** [[CBP3]] + // CK15-DAG: store i32* %{{.+}}, i32** [[CP3]] + + // CK15-DAG: [[BP4:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 4 + // CK15-DAG: [[P4:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 4 + // CK15-DAG: [[CBP4:%.+]] = bitcast i8** [[BP4]] to i[[sz]]* + // CK15-DAG: [[CP4:%.+]] = bitcast i8** [[P4]] to i[[sz]]* + // CK15-DAG: store i[[sz]] [[VAL:%.+]], i[[sz]]* [[CBP4]] + // CK15-DAG: store i[[sz]] [[VAL]], i[[sz]]* [[CP4]] // CK15-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]], // CK15-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32* // CK15-64-DAG: store i32 {{.+}}, i32* [[CADDR]], @@ -843,23 +890,44 @@ void implicit_maps_templated_class (int a){ ssst.foo(456); // CK15: define {{.*}}void @{{.+}}bar{{.+}}([[ST]]* {{[^,]+}}, i32 {{[^,]+}}) - // CK15-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 2, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES2]]{{.+}}, {{.+}}[[TYPES2]]{{.+}}) + // CK15-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 5, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES2]]{{.+}}, {{.+}}[[TYPES2]]{{.+}}) // CK15-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 // CK15-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 // CK15-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 // CK15-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 // CK15-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[ST]]** - // CK15-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to [[ST]]** - // CK15-DAG: store [[ST]]* [[DECL:%.+]], [[ST]]** [[CBP0]] - // CK15-DAG: store [[ST]]* [[DECL]], [[ST]]** [[CP0]] + // CK15-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i32** + // CK15-DAG: store [[ST]]* [[DECL]], [[ST]]** [[CBP0]] + // CK15-DAG: store i32* %{{.+}}, i32** [[CP0]] // CK15-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 1 // CK15-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 1 - // CK15-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[sz]]* - // CK15-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[sz]]* - // CK15-DAG: store i[[sz]] [[VAL:%.+]], i[[sz]]* [[CBP1]] - // CK15-DAG: store i[[sz]] [[VAL]], i[[sz]]* [[CP1]] + // CK15-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to [[ST]]** + // CK15-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to double** + // CK15-DAG: store [[ST]]* [[DECL]], [[ST]]** [[CBP1]] + // CK15-DAG: store double* %{{.+}}, double** [[CP1]] + + // CK15-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 2 + // CK15-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 2 + // CK15-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [[ST]]** + // CK15-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to i32*** + // CK15-DAG: store [[ST]]* [[DECL]], [[ST]]** [[CBP2]] + // CK15-DAG: store i32** %{{.+}}, i32*** [[CP2]] + + // CK15-DAG: [[BP3:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 3 + // CK15-DAG: [[P3:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 3 + // CK15-DAG: [[CBP3:%.+]] = bitcast i8** [[BP3]] to i32*** + // CK15-DAG: [[CP3:%.+]] = bitcast i8** [[P3]] to i32** + // CK15-DAG: store i32** %{{.+}}, i32*** [[CBP3]] + // CK15-DAG: store i32* %{{.+}}, i32** [[CP3]] + + // CK15-DAG: [[BP4:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 4 + // CK15-DAG: [[P4:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 4 + // CK15-DAG: [[CBP4:%.+]] = bitcast i8** [[BP4]] to i[[sz]]* + // CK15-DAG: [[CP4:%.+]] = bitcast i8** [[P4]] to i[[sz]]* + // CK15-DAG: store i[[sz]] [[VAL:%.+]], i[[sz]]* [[CBP4]] + // CK15-DAG: store i[[sz]] [[VAL]], i[[sz]]* [[CP4]] // CK15-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]], // CK15-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32* // CK15-64-DAG: store i32 {{.+}}, i32* [[CADDR]], @@ -903,7 +971,7 @@ void implicit_maps_templated_class (int a){ // CK16-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4] // Map types: // - OMP_MAP_PRIVATE_VAL + OMP_MAP_IS_FIRST = 288 -// CK16-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 288] +// CK16-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 288] template<int y> int foo(int d) { @@ -919,7 +987,7 @@ void implicit_maps_templated_function (int a){ int i = a; // CK16: define {{.*}}i32 @{{.+}}foo{{.+}}(i32 {{[^,]+}}) - // CK16-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK16-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) // CK16-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 // CK16-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 @@ -955,8 +1023,8 @@ void implicit_maps_templated_function (int a){ // CK17-DAG: [[ST:%.+]] = type { i32, double } // CK17-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} {{16|12}}] -// Map types: OMP_MAP_TO + OMP_MAP_FROM + OMP_MAP_IS_FIRST = 35 -// CK17-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 35] +// Map types: OMP_MAP_TO + OMP_MAP_FROM + OMP_MAP_IS_FIRST + OMP_MAP_IMPLICIT = 547 +// CK17-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 547] class SSS { public: @@ -968,7 +1036,7 @@ public: void implicit_maps_struct (int a){ SSS s = {a, (double)a}; - // CK17-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK17-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) // CK17-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 // CK17-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 // CK17-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 @@ -1004,7 +1072,7 @@ void implicit_maps_struct (int a){ // CK18-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4] // Map types: // - OMP_MAP_PRIVATE_VAL + OMP_MAP_IS_FIRST = 288 -// CK18-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 288] +// CK18-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 288] template<typename T> int foo(T d) { @@ -1019,7 +1087,7 @@ void implicit_maps_template_type_capture (int a){ int i = a; // CK18: define {{.*}}i32 @{{.+}}foo{{.+}}(i32 {{[^,]+}}) - // CK18-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK18-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) // CK18-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 // CK18-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 @@ -1054,125 +1122,125 @@ void implicit_maps_template_type_capture (int a){ #ifdef CK19 // CK19: [[SIZE00:@.+]] = private {{.*}}constant [1 x i[[Z:64|32]]] [i[[Z:64|32]] 4] -// CK19: [[MTYPE00:@.+]] = private {{.*}}constant [1 x i32] [i32 32] +// CK19: [[MTYPE00:@.+]] = private {{.*}}constant [1 x i64] [i64 32] // CK19: [[SIZE00n:@.+]] = private {{.*}}constant [1 x i[[Z:64|32]]] [i[[Z:64|32]] 4] -// CK19: [[MTYPE00n:@.+]] = private {{.*}}constant [1 x i32] [i32 32] +// CK19: [[MTYPE00n:@.+]] = private {{.*}}constant [1 x i64] [i64 32] // CK19: [[SIZE01:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 400] -// CK19: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i32] [i32 33] +// CK19: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i64] [i64 33] // CK19: [[SIZE02:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 240] -// CK19: [[MTYPE02:@.+]] = private {{.*}}constant [1 x i32] [i32 34] +// CK19: [[MTYPE02:@.+]] = private {{.*}}constant [1 x i64] [i64 34] // CK19: [[SIZE03:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 240] -// CK19: [[MTYPE03:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK19: [[MTYPE03:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK19: [[SIZE04:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 400] -// CK19: [[MTYPE04:@.+]] = private {{.*}}constant [1 x i32] [i32 32] +// CK19: [[MTYPE04:@.+]] = private {{.*}}constant [1 x i64] [i64 32] // CK19: [[SIZE05:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 4] -// CK19: [[MTYPE05:@.+]] = private {{.*}}constant [1 x i32] [i32 33] +// CK19: [[MTYPE05:@.+]] = private {{.*}}constant [1 x i64] [i64 33] -// CK19: [[MTYPE06:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK19: [[MTYPE06:@.+]] = private {{.*}}constant [1 x i64] [i64 35] -// CK19: [[MTYPE07:@.+]] = private {{.*}}constant [1 x i32] [i32 32] +// CK19: [[MTYPE07:@.+]] = private {{.*}}constant [1 x i64] [i64 32] // CK19: [[SIZE08:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 4] -// CK19: [[MTYPE08:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK19: [[MTYPE08:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK19: [[SIZE09:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] {{8|4}}] -// CK19: [[MTYPE09:@.+]] = private {{.*}}constant [1 x i32] [i32 34] +// CK19: [[MTYPE09:@.+]] = private {{.*}}constant [1 x i64] [i64 34] // CK19: [[SIZE10:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 240] -// CK19: [[MTYPE10:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK19: [[MTYPE10:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK19: [[SIZE11:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 240] -// CK19: [[MTYPE11:@.+]] = private {{.*}}constant [1 x i32] [i32 32] +// CK19: [[MTYPE11:@.+]] = private {{.*}}constant [1 x i64] [i64 32] // CK19: [[SIZE12:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 4] -// CK19: [[MTYPE12:@.+]] = private {{.*}}constant [1 x i32] [i32 33] +// CK19: [[MTYPE12:@.+]] = private {{.*}}constant [1 x i64] [i64 33] -// CK19: [[MTYPE13:@.+]] = private {{.*}}constant [1 x i32] [i32 32] +// CK19: [[MTYPE13:@.+]] = private {{.*}}constant [1 x i64] [i64 32] -// CK19: [[MTYPE14:@.+]] = private {{.*}}constant [1 x i32] [i32 33] +// CK19: [[MTYPE14:@.+]] = private {{.*}}constant [1 x i64] [i64 33] // CK19: [[SIZE15:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 4] -// CK19: [[MTYPE15:@.+]] = private {{.*}}constant [1 x i32] [i32 34] +// CK19: [[MTYPE15:@.+]] = private {{.*}}constant [1 x i64] [i64 34] -// CK19: [[MTYPE16:@.+]] = private {{.*}}constant [2 x i32] [i32 288, i32 33] +// CK19: [[MTYPE16:@.+]] = private {{.*}}constant [2 x i64] [i64 288, i64 33] // CK19: [[SIZE17:@.+]] = private {{.*}}constant [2 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] 240] -// CK19: [[MTYPE17:@.+]] = private {{.*}}constant [2 x i32] [i32 288, i32 34] +// CK19: [[MTYPE17:@.+]] = private {{.*}}constant [2 x i64] [i64 288, i64 34] // CK19: [[SIZE18:@.+]] = private {{.*}}constant [2 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] 240] -// CK19: [[MTYPE18:@.+]] = private {{.*}}constant [2 x i32] [i32 288, i32 35] +// CK19: [[MTYPE18:@.+]] = private {{.*}}constant [2 x i64] [i64 288, i64 35] -// CK19: [[MTYPE19:@.+]] = private {{.*}}constant [2 x i32] [i32 288, i32 32] +// CK19: [[MTYPE19:@.+]] = private {{.*}}constant [2 x i64] [i64 288, i64 32] // CK19: [[SIZE20:@.+]] = private {{.*}}constant [2 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] 4] -// CK19: [[MTYPE20:@.+]] = private {{.*}}constant [2 x i32] [i32 288, i32 33] +// CK19: [[MTYPE20:@.+]] = private {{.*}}constant [2 x i64] [i64 288, i64 33] -// CK19: [[MTYPE21:@.+]] = private {{.*}}constant [2 x i32] [i32 288, i32 35] +// CK19: [[MTYPE21:@.+]] = private {{.*}}constant [2 x i64] [i64 288, i64 35] // CK19: [[SIZE22:@.+]] = private {{.*}}constant [2 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] 4] -// CK19: [[MTYPE22:@.+]] = private {{.*}}constant [2 x i32] [i32 288, i32 35] +// CK19: [[MTYPE22:@.+]] = private {{.*}}constant [2 x i64] [i64 288, i64 35] // CK19: [[SIZE23:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 4] -// CK19: [[MTYPE23:@.+]] = private {{.*}}constant [1 x i32] [i32 39] +// CK19: [[MTYPE23:@.+]] = private {{.*}}constant [1 x i64] [i64 39] // CK19: [[SIZE24:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 480] -// CK19: [[MTYPE24:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK19: [[MTYPE24:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK19: [[SIZE25:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 16] -// CK19: [[MTYPE25:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK19: [[MTYPE25:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK19: [[SIZE26:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 24] -// CK19: [[MTYPE26:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK19: [[MTYPE26:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK19: [[SIZE27:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 4] -// CK19: [[MTYPE27:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK19: [[MTYPE27:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK19: [[SIZE28:@.+]] = private {{.*}}constant [3 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] {{8|4}}, i[[Z]] 16] -// CK19: [[MTYPE28:@.+]] = private {{.*}}constant [3 x i32] [i32 35, i32 19, i32 19] +// CK19: [[MTYPE28:@.+]] = private {{.*}}constant [3 x i64] [i64 35, i64 19, i64 19] // CK19: [[SIZE29:@.+]] = private {{.*}}constant [3 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] {{8|4}}, i[[Z]] 4] -// CK19: [[MTYPE29:@.+]] = private {{.*}}constant [3 x i32] [i32 35, i32 19, i32 19] +// CK19: [[MTYPE29:@.+]] = private {{.*}}constant [3 x i64] [i64 35, i64 19, i64 19] -// CK19: [[MTYPE30:@.+]] = private {{.*}}constant [4 x i32] [i32 288, i32 288, i32 288, i32 35] +// CK19: [[MTYPE30:@.+]] = private {{.*}}constant [4 x i64] [i64 288, i64 288, i64 288, i64 35] // CK19: [[SIZE31:@.+]] = private {{.*}}constant [4 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] {{8|4}}, i[[Z]] {{8|4}}, i[[Z]] 40] -// CK19: [[MTYPE31:@.+]] = private {{.*}}constant [4 x i32] [i32 288, i32 288, i32 288, i32 35] +// CK19: [[MTYPE31:@.+]] = private {{.*}}constant [4 x i64] [i64 288, i64 288, i64 288, i64 35] // CK19: [[SIZE32:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 13728] -// CK19: [[MTYPE32:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK19: [[MTYPE32:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK19: [[SIZE33:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 13728] -// CK19: [[MTYPE33:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK19: [[MTYPE33:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK19: [[SIZE34:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 13728] -// CK19: [[MTYPE34:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK19: [[MTYPE34:@.+]] = private {{.*}}constant [1 x i64] [i64 35] -// CK19: [[MTYPE35:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK19: [[MTYPE35:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK19: [[SIZE36:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 208] -// CK19: [[MTYPE36:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK19: [[MTYPE36:@.+]] = private {{.*}}constant [1 x i64] [i64 35] -// CK19: [[MTYPE37:@.+]] = private {{.*}}constant [3 x i32] [i32 288, i32 288, i32 35] +// CK19: [[MTYPE37:@.+]] = private {{.*}}constant [3 x i64] [i64 288, i64 288, i64 35] -// CK19: [[MTYPE38:@.+]] = private {{.*}}constant [3 x i32] [i32 288, i32 288, i32 35] +// CK19: [[MTYPE38:@.+]] = private {{.*}}constant [3 x i64] [i64 288, i64 288, i64 35] -// CK19: [[MTYPE39:@.+]] = private {{.*}}constant [3 x i32] [i32 288, i32 288, i32 35] +// CK19: [[MTYPE39:@.+]] = private {{.*}}constant [3 x i64] [i64 288, i64 288, i64 35] -// CK19: [[MTYPE40:@.+]] = private {{.*}}constant [3 x i32] [i32 288, i32 288, i32 35] +// CK19: [[MTYPE40:@.+]] = private {{.*}}constant [3 x i64] [i64 288, i64 288, i64 35] // CK19: [[SIZE41:@.+]] = private {{.*}}constant [3 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] {{8|4}}, i[[Z]] 208] -// CK19: [[MTYPE41:@.+]] = private {{.*}}constant [3 x i32] [i32 288, i32 288, i32 35] +// CK19: [[MTYPE41:@.+]] = private {{.*}}constant [3 x i64] [i64 288, i64 288, i64 35] // CK19: [[SIZE42:@.+]] = private {{.*}}constant [3 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] {{8|4}}, i[[Z]] 104] -// CK19: [[MTYPE42:@.+]] = private {{.*}}constant [3 x i32] [i32 35, i32 19, i32 19] +// CK19: [[MTYPE42:@.+]] = private {{.*}}constant [3 x i64] [i64 35, i64 19, i64 19] -// CK19: [[MTYPE43:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK19: [[MTYPE43:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK19-LABEL: explicit_maps_single void explicit_maps_single (int ii){ @@ -1180,7 +1248,7 @@ void explicit_maps_single (int ii){ int a = ii; // Region 00 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -1201,7 +1269,7 @@ void explicit_maps_single (int ii){ int b = a; // Region 00n - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00n]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00n]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00n]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00n]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -1223,7 +1291,7 @@ void explicit_maps_single (int ii){ int arra[100]; // Region 01 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -1241,7 +1309,7 @@ void explicit_maps_single (int ii){ } // Region 02 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -1260,7 +1328,7 @@ void explicit_maps_single (int ii){ } // Region 03 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -1279,7 +1347,7 @@ void explicit_maps_single (int ii){ } // Region 04 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE04]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE04]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -1298,7 +1366,7 @@ void explicit_maps_single (int ii){ } // Region 05 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -1317,7 +1385,7 @@ void explicit_maps_single (int ii){ } // Region 06 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE06]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE06]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] @@ -1340,7 +1408,7 @@ void explicit_maps_single (int ii){ } // Region 07 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE07]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE07]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] @@ -1363,7 +1431,7 @@ void explicit_maps_single (int ii){ } // Region 08 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE08]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE08]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE08]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE08]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -1385,7 +1453,7 @@ void explicit_maps_single (int ii){ int *pa; // Region 09 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -1403,7 +1471,7 @@ void explicit_maps_single (int ii){ } // Region 10 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE10]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE10]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE10]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE10]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -1424,7 +1492,7 @@ void explicit_maps_single (int ii){ } // Region 11 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE11]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE11]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE11]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE11]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -1445,7 +1513,7 @@ void explicit_maps_single (int ii){ } // Region 12 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE12]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE12]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE12]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE12]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -1466,7 +1534,7 @@ void explicit_maps_single (int ii){ } // Region 13 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE13]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE13]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] @@ -1491,7 +1559,7 @@ void explicit_maps_single (int ii){ } // Region 14 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE14]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE14]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] @@ -1516,7 +1584,7 @@ void explicit_maps_single (int ii){ } // Region 15 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE15]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE15]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE15]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE15]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -1540,7 +1608,7 @@ void explicit_maps_single (int ii){ int va[ii]; // Region 16 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE16]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE16]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] @@ -1571,7 +1639,7 @@ void explicit_maps_single (int ii){ } // Region 17 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE17]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE17]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE17]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE17]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -1597,7 +1665,7 @@ void explicit_maps_single (int ii){ } // Region 18 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE18]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE18]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE18]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE18]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -1623,7 +1691,7 @@ void explicit_maps_single (int ii){ } // Region 19 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE19]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE19]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] @@ -1655,7 +1723,7 @@ void explicit_maps_single (int ii){ } // Region 20 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE20]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE20]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE20]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE20]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -1681,7 +1749,7 @@ void explicit_maps_single (int ii){ } // Region 21 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE21]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE21]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] @@ -1713,7 +1781,7 @@ void explicit_maps_single (int ii){ } // Region 22 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE22]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE22]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE22]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE22]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -1740,7 +1808,7 @@ void explicit_maps_single (int ii){ // Always. // Region 23 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE23]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE23]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE23]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE23]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -1762,7 +1830,7 @@ void explicit_maps_single (int ii){ int ***mptr; // Region 24 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE24]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE24]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE24]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE24]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -1780,7 +1848,7 @@ void explicit_maps_single (int ii){ } // Region 25 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE25]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE25]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE25]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE25]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -1801,7 +1869,7 @@ void explicit_maps_single (int ii){ } // Region 26 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE26]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE26]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE26]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE26]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -1822,7 +1890,7 @@ void explicit_maps_single (int ii){ } // Region 27 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE27]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE27]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE27]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE27]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -1843,7 +1911,7 @@ void explicit_maps_single (int ii){ } // Region 28 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE28]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE28]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE28]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE28]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -1888,7 +1956,7 @@ void explicit_maps_single (int ii){ } // Region 29 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE29]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE29]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE29]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE29]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -1936,7 +2004,7 @@ void explicit_maps_single (int ii){ double mva[23][ii][ii+5]; // Region 30 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE30]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE30]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] @@ -1989,7 +2057,7 @@ void explicit_maps_single (int ii){ } // Region 31 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[SIZE31]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE31]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[SIZE31]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE31]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] // @@ -2038,7 +2106,7 @@ void explicit_maps_single (int ii){ double ***mptras; // Region 32 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE32]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE32]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE32]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE32]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -2056,7 +2124,7 @@ void explicit_maps_single (int ii){ } // Region 33 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE33]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE33]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE33]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE33]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -2075,7 +2143,7 @@ void explicit_maps_single (int ii){ } // Region 34 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE34]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE34]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE34]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE34]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -2094,7 +2162,7 @@ void explicit_maps_single (int ii){ } // Region 35 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE35]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE35]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] @@ -2119,7 +2187,7 @@ void explicit_maps_single (int ii){ } // Region 36 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE36]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE36]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE36]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE36]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -2140,7 +2208,7 @@ void explicit_maps_single (int ii){ } // Region 37 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE37]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE37]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] @@ -2180,7 +2248,7 @@ void explicit_maps_single (int ii){ } // Region 38 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE38]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE38]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] @@ -2222,7 +2290,7 @@ void explicit_maps_single (int ii){ } // Region 39 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE39]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE39]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] @@ -2264,7 +2332,7 @@ void explicit_maps_single (int ii){ } // Region 40 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE40]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE40]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] @@ -2306,7 +2374,7 @@ void explicit_maps_single (int ii){ } // Region 41 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE41]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE41]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE41]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE41]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] // @@ -2341,7 +2409,7 @@ void explicit_maps_single (int ii){ } // Region 42 - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE42]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE42]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE42]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE42]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -2386,7 +2454,7 @@ void explicit_maps_single (int ii){ } // Region 43 - the memory is not contiguous for this map - will map the whole last dimension. - // CK19-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE43]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE43]]{{.+}}) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] @@ -2469,16 +2537,16 @@ void explicit_maps_single (int ii){ #ifdef CK20 // CK20: [[SIZE00:@.+]] = private {{.*}}constant [1 x i[[Z:64|32]]] [i[[Z:64|32]] 4] -// CK20: [[MTYPE00:@.+]] = private {{.*}}constant [1 x i32] [i32 33] +// CK20: [[MTYPE00:@.+]] = private {{.*}}constant [1 x i64] [i64 33] // CK20: [[SIZE01:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 20] -// CK20: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i32] [i32 33] +// CK20: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i64] [i64 33] // CK20: [[SIZE02:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 4] -// CK20: [[MTYPE02:@.+]] = private {{.*}}constant [1 x i32] [i32 34] +// CK20: [[MTYPE02:@.+]] = private {{.*}}constant [1 x i64] [i64 34] // CK20: [[SIZE03:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 12] -// CK20: [[MTYPE03:@.+]] = private {{.*}}constant [1 x i32] [i32 34] +// CK20: [[MTYPE03:@.+]] = private {{.*}}constant [1 x i64] [i64 34] // CK20-LABEL: explicit_maps_references_and_function_args void explicit_maps_references_and_function_args (int a, float b, int (&c)[10], float *d){ @@ -2489,7 +2557,7 @@ void explicit_maps_references_and_function_args (int a, float b, int (&c)[10], f float *&dd = d; // Region 00 - // CK20-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}) + // CK20-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}) // CK20-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK20-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -2509,7 +2577,7 @@ void explicit_maps_references_and_function_args (int a, float b, int (&c)[10], f } // Region 01 - // CK20-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}) + // CK20-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}) // CK20-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK20-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -2530,7 +2598,7 @@ void explicit_maps_references_and_function_args (int a, float b, int (&c)[10], f } // Region 02 - // CK20-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}) + // CK20-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}) // CK20-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK20-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -2548,7 +2616,7 @@ void explicit_maps_references_and_function_args (int a, float b, int (&c)[10], f } // Region 03 - // CK20-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}) + // CK20-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}) // CK20-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK20-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -2586,22 +2654,22 @@ void explicit_maps_references_and_function_args (int a, float b, int (&c)[10], f // CK21: [[ST:%.+]] = type { i32, i32, float* } // CK21: [[SIZE00:@.+]] = private {{.*}}constant [1 x i[[Z:64|32]]] [i[[Z:64|32]] 4] -// CK21: [[MTYPE00:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK21: [[MTYPE00:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK21: [[SIZE01:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 492] -// CK21: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK21: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK21: [[SIZE02:@.+]] = private {{.*}}constant [2 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] 500] -// CK21: [[MTYPE02:@.+]] = private {{.*}}constant [2 x i32] [i32 34, i32 18] +// CK21: [[MTYPE02:@.+]] = private {{.*}}constant [2 x i64] [i64 34, i64 18] // CK21: [[SIZE03:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 492] -// CK21: [[MTYPE03:@.+]] = private {{.*}}constant [1 x i32] [i32 34] +// CK21: [[MTYPE03:@.+]] = private {{.*}}constant [1 x i64] [i64 34] // CK21: [[SIZE04:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 4] -// CK21: [[MTYPE04:@.+]] = private {{.*}}constant [1 x i32] [i32 34] +// CK21: [[MTYPE04:@.+]] = private {{.*}}constant [1 x i64] [i64 34] // CK21: [[SIZE05:@.+]] = private {{.*}}constant [2 x i[[Z]]] [i[[Z]] 4, i[[Z]] 4] -// CK21: [[MTYPE05:@.+]] = private {{.*}}constant [2 x i32] [i32 35, i32 3] +// CK21: [[MTYPE05:@.+]] = private {{.*}}constant [2 x i64] [i64 35, i64 3] // CK21-LABEL: explicit_maps_template_args_and_members @@ -2616,7 +2684,7 @@ struct CC { T *lb; // Region 00 - // CK21-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}) + // CK21-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}) // CK21-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK21-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -2635,7 +2703,7 @@ struct CC { } // Region 01 - // CK21-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}) + // CK21-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}) // CK21-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK21-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -2656,7 +2724,7 @@ struct CC { } // Region 02 - // CK21-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE02]]{{.+}}) + // CK21-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE02]]{{.+}}) // CK21-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK21-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -2685,7 +2753,7 @@ struct CC { } // Region 03 - // CK21-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}) + // CK21-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}) // CK21-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK21-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -2703,7 +2771,7 @@ struct CC { } // Region 04 - // CK21-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE04]]{{.+}}) + // CK21-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE04]]{{.+}}) // CK21-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK21-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -2722,7 +2790,7 @@ struct CC { // Make sure the extra flag is passed to the second map. // Region 05 - // CK21-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE05]]{{.+}}) + // CK21-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE05]]{{.+}}) // CK21-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK21-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -2777,49 +2845,49 @@ int explicit_maps_template_args_and_members(int a){ // CK22-DAG: [[STT:%.+]] = type { i32 } // CK22: [[SIZE00:@.+]] = private {{.*}}constant [1 x i[[Z:64|32]]] [i[[Z:64|32]] 4] -// CK22: [[MTYPE00:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK22: [[MTYPE00:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK22: [[SIZE01:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 400] -// CK22: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK22: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK22: [[SIZE02:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] {{8|4}}] -// CK22: [[MTYPE02:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK22: [[MTYPE02:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK22: [[SIZE03:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 16] -// CK22: [[MTYPE03:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK22: [[MTYPE03:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK22: [[SIZE04:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 20] -// CK22: [[MTYPE04:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK22: [[MTYPE04:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK22: [[SIZE05:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 4] -// CK22: [[MTYPE05:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK22: [[MTYPE05:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK22: [[SIZE06:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 400] -// CK22: [[MTYPE06:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK22: [[MTYPE06:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK22: [[SIZE07:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] {{8|4}}] -// CK22: [[MTYPE07:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK22: [[MTYPE07:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK22: [[SIZE08:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 16] -// CK22: [[MTYPE08:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK22: [[MTYPE08:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK22: [[SIZE09:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 20] -// CK22: [[MTYPE09:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK22: [[MTYPE09:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK22: [[SIZE10:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 4] -// CK22: [[MTYPE10:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK22: [[MTYPE10:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK22: [[SIZE11:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 400] -// CK22: [[MTYPE11:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK22: [[MTYPE11:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK22: [[SIZE12:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] {{8|4}}] -// CK22: [[MTYPE12:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK22: [[MTYPE12:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK22: [[SIZE13:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 16] -// CK22: [[MTYPE13:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK22: [[MTYPE13:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK22: [[SIZE14:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 20] -// CK22: [[MTYPE14:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK22: [[MTYPE14:@.+]] = private {{.*}}constant [1 x i64] [i64 35] int a; int c[100]; @@ -2845,7 +2913,7 @@ STT<int> *std; // CK22-LABEL: explicit_maps_globals int explicit_maps_globals(void){ // Region 00 - // CK22-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}) + // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}) // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -2861,7 +2929,7 @@ int explicit_maps_globals(void){ { a+=1; } // Region 01 - // CK22-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}) + // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}) // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -2877,7 +2945,7 @@ int explicit_maps_globals(void){ { c[3]+=1; } // Region 02 - // CK22-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}) + // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}) // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -2893,7 +2961,7 @@ int explicit_maps_globals(void){ { d[3]+=1; } // Region 03 - // CK22-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}) + // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}) // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -2909,7 +2977,7 @@ int explicit_maps_globals(void){ { c[3]+=1; } // Region 04 - // CK22-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE04]]{{.+}}) + // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE04]]{{.+}}) // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -2928,7 +2996,7 @@ int explicit_maps_globals(void){ { d[3]+=1; } // Region 05 - // CK22-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}}) + // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}}) // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -2944,7 +3012,7 @@ int explicit_maps_globals(void){ { sa.fa+=1; } // Region 06 - // CK22-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE06]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE06]]{{.+}}) + // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE06]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE06]]{{.+}}) // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -2960,7 +3028,7 @@ int explicit_maps_globals(void){ { sc[3].fa+=1; } // Region 07 - // CK22-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE07]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE07]]{{.+}}) + // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE07]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE07]]{{.+}}) // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -2976,7 +3044,7 @@ int explicit_maps_globals(void){ { sd[3].fa+=1; } // Region 08 - // CK22-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE08]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE08]]{{.+}}) + // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE08]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE08]]{{.+}}) // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -2992,7 +3060,7 @@ int explicit_maps_globals(void){ { sc[3].fa+=1; } // Region 09 - // CK22-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}}) + // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}}) // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -3011,7 +3079,7 @@ int explicit_maps_globals(void){ { sd[3].fa+=1; } // Region 10 - // CK22-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE10]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE10]]{{.+}}) + // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE10]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE10]]{{.+}}) // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -3027,7 +3095,7 @@ int explicit_maps_globals(void){ { sta.fa+=1; } // Region 11 - // CK22-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE11]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE11]]{{.+}}) + // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE11]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE11]]{{.+}}) // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -3043,7 +3111,7 @@ int explicit_maps_globals(void){ { stc[3].fa+=1; } // Region 12 - // CK22-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE12]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE12]]{{.+}}) + // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE12]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE12]]{{.+}}) // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -3059,7 +3127,7 @@ int explicit_maps_globals(void){ { std[3].fa+=1; } // Region 13 - // CK22-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE13]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE13]]{{.+}}) + // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE13]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE13]]{{.+}}) // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -3075,7 +3143,7 @@ int explicit_maps_globals(void){ { stc[3].fa+=1; } // Region 14 - // CK22-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE14]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE14]]{{.+}}) + // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE14]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE14]]{{.+}}) // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -3121,22 +3189,22 @@ int explicit_maps_globals(void){ #ifdef CK23 // CK23: [[SIZE00:@.+]] = private {{.*}}constant [1 x i[[Z:64|32]]] [i[[Z:64|32]] 4] -// CK23: [[MTYPE00:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK23: [[MTYPE00:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK23: [[SIZE01:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 4] -// CK23: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK23: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK23: [[SIZE02:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 400] -// CK23: [[MTYPE02:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK23: [[MTYPE02:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK23: [[SIZE03:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] {{8|4}}] -// CK23: [[MTYPE03:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK23: [[MTYPE03:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK23: [[SIZE04:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 16] -// CK23: [[MTYPE04:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK23: [[MTYPE04:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK23: [[SIZE05:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 16] -// CK23: [[MTYPE05:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK23: [[MTYPE05:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK23-LABEL: explicit_maps_inside_captured int explicit_maps_inside_captured(int a){ @@ -3148,7 +3216,7 @@ int explicit_maps_inside_captured(int a){ // CK23: define {{.*}}explicit_maps_inside_captured{{.*}} [&](void){ // Region 00 - // CK23-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}) + // CK23-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}) // CK23-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK23-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -3167,7 +3235,7 @@ int explicit_maps_inside_captured(int a){ #pragma omp target map(a) { a+=1; } // Region 01 - // CK23-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}) + // CK23-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}) // CK23-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK23-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -3186,7 +3254,7 @@ int explicit_maps_inside_captured(int a){ #pragma omp target map(b) { b+=1; } // Region 02 - // CK23-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}) + // CK23-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}) // CK23-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK23-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -3206,7 +3274,7 @@ int explicit_maps_inside_captured(int a){ { c[3]+=1; } // Region 03 - // CK23-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}) + // CK23-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}) // CK23-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK23-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -3225,7 +3293,7 @@ int explicit_maps_inside_captured(int a){ #pragma omp target map(d) { d[3]+=1; } // Region 04 - // CK23-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE04]]{{.+}}) + // CK23-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE04]]{{.+}}) // CK23-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK23-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -3246,7 +3314,7 @@ int explicit_maps_inside_captured(int a){ { c[3]+=1; } // Region 05 - // CK23-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}}) + // CK23-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}}) // CK23-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK23-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -3311,76 +3379,43 @@ struct SC{ }; // CK24: [[SIZE01:@.+]] = private {{.*}}constant [1 x i[[Z:64|32]]] [i[[Z:64|32]] 4] -// CK24: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i32] [i32 35] - -// CK24: [[SIZE02:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] {{56|48}}] -// CK24: [[MTYPE02:@.+]] = private {{.*}}constant [1 x i32] [i32 35] - -// CK24: [[SIZE03:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 4] -// CK24: [[MTYPE03:@.+]] = private {{.*}}constant [1 x i32] [i32 35] - -// CK24: [[SIZE04:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 20] -// CK24: [[MTYPE04:@.+]] = private {{.*}}constant [1 x i32] [i32 35] - -// CK24: [[SIZE05:@.+]] = private {{.*}}constant [2 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] {{3560|2880}}] -// CK24: [[MTYPE05:@.+]] = private {{.*}}constant [2 x i32] [i32 35, i32 19] - -// CK24: [[SIZE06:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 4] -// CK24: [[MTYPE06:@.+]] = private {{.*}}constant [1 x i32] [i32 35] - -// CK24: [[SIZE07:@.+]] = private {{.*}}constant [2 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] 4] -// CK24: [[MTYPE07:@.+]] = private {{.*}}constant [2 x i32] [i32 35, i32 19] - -// CK24: [[SIZE08:@.+]] = private {{.*}}constant [2 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] 4] -// CK24: [[MTYPE08:@.+]] = private {{.*}}constant [2 x i32] [i32 35, i32 19] - -// CK24: [[SIZE09:@.+]] = private {{.*}}constant [2 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] 4] -// CK24: [[MTYPE09:@.+]] = private {{.*}}constant [2 x i32] [i32 35, i32 19] - -// CK24: [[SIZE10:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 8] -// CK24: [[MTYPE10:@.+]] = private {{.*}}constant [1 x i32] [i32 35] - -// CK24: [[SIZE11:@.+]] = private {{.*}}constant [2 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] {{8|4}}] -// CK24: [[MTYPE11:@.+]] = private {{.*}}constant [2 x i32] [i32 35, i32 19] - -// CK24: [[SIZE12:@.+]] = private {{.*}}constant [4 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] {{8|4}}, i[[Z]] {{8|4}}, i[[Z]] 4] -// CK24: [[MTYPE12:@.+]] = private {{.*}}constant [4 x i32] [i32 35, i32 19, i32 19, i32 19] +// CK24: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK24: [[SIZE13:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 4] -// CK24: [[MTYPE13:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK24: [[MTYPE13:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK24: [[SIZE14:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] {{56|48}}] -// CK24: [[MTYPE14:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK24: [[MTYPE14:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK24: [[SIZE15:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 4] -// CK24: [[MTYPE15:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK24: [[MTYPE15:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK24: [[SIZE16:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 20] -// CK24: [[MTYPE16:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK24: [[MTYPE16:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK24: [[SIZE17:@.+]] = private {{.*}}constant [2 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] {{3560|2880}}] -// CK24: [[MTYPE17:@.+]] = private {{.*}}constant [2 x i32] [i32 35, i32 19] +// CK24: [[MTYPE17:@.+]] = private {{.*}}constant [2 x i64] [i64 35, i64 19] // CK24: [[SIZE18:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 4] -// CK24: [[MTYPE18:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK24: [[MTYPE18:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK24: [[SIZE19:@.+]] = private {{.*}}constant [2 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] 4] -// CK24: [[MTYPE19:@.+]] = private {{.*}}constant [2 x i32] [i32 35, i32 19] +// CK24: [[MTYPE19:@.+]] = private {{.*}}constant [2 x i64] [i64 35, i64 19] // CK24: [[SIZE20:@.+]] = private {{.*}}constant [2 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] 4] -// CK24: [[MTYPE20:@.+]] = private {{.*}}constant [2 x i32] [i32 35, i32 19] +// CK24: [[MTYPE20:@.+]] = private {{.*}}constant [2 x i64] [i64 35, i64 19] // CK24: [[SIZE21:@.+]] = private {{.*}}constant [2 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] 4] -// CK24: [[MTYPE21:@.+]] = private {{.*}}constant [2 x i32] [i32 35, i32 19] +// CK24: [[MTYPE21:@.+]] = private {{.*}}constant [2 x i64] [i64 35, i64 19] // CK24: [[SIZE22:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] {{8|4}}] -// CK24: [[MTYPE22:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK24: [[MTYPE22:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK24: [[SIZE23:@.+]] = private {{.*}}constant [2 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] {{8|4}}] -// CK24: [[MTYPE23:@.+]] = private {{.*}}constant [2 x i32] [i32 35, i32 19] +// CK24: [[MTYPE23:@.+]] = private {{.*}}constant [2 x i64] [i64 35, i64 19] // CK24: [[SIZE24:@.+]] = private {{.*}}constant [4 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] {{8|4}}, i[[Z]] {{8|4}}, i[[Z]] 4] -// CK24: [[MTYPE24:@.+]] = private {{.*}}constant [4 x i32] [i32 35, i32 19, i32 19, i32 19] +// CK24: [[MTYPE24:@.+]] = private {{.*}}constant [4 x i64] [i64 35, i64 19, i64 19, i64 19] // CK24-LABEL: explicit_maps_struct_fields int explicit_maps_struct_fields(int a){ @@ -3388,7 +3423,7 @@ int explicit_maps_struct_fields(int a){ SC *p; // Region 01 -// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}) +// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}) // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -3404,303 +3439,11 @@ int explicit_maps_struct_fields(int a){ #pragma omp target map(s.a) { s.a++; } -// Region 02 -// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}) -// CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] -// CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] - -// CK24-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 -// CK24-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 -// CK24-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[SC]]** -// CK24-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to [[SA]]** -// CK24-DAG: store [[SC]]* [[VAR0:%.+]], [[SC]]** [[CBP0]] -// CK24-DAG: store [[SA]]* [[SEC0:%.+]], [[SA]]** [[CP0]] -// CK24-DAG: [[SEC0]] = getelementptr {{.*}}[[SB]]* [[SEC00:%[^,]+]], i{{.+}} 0, i{{.+}} 1 -// CK24-DAG: [[SEC00]] = getelementptr {{.*}}[[SC]]* [[VAR0]], i{{.+}} 0, i{{.+}} 1 - -// CK24: call void [[CALL02:@.+]]([[SC]]* {{[^,]+}}) -#pragma omp target map(s.s.s) - { s.a++; } - -// Region 03 -// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}) -// CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] -// CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] - -// CK24-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 -// CK24-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 -// CK24-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[SC]]** -// CK24-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i32** -// CK24-DAG: store [[SC]]* [[VAR0:%.+]], [[SC]]** [[CBP0]] -// CK24-DAG: store i32* [[SEC0:%.+]], i32** [[CP0]] -// CK24-DAG: [[SEC0]] = getelementptr {{.*}}[[SA]]* [[SEC00:%[^,]+]], i{{.+}} 0, i{{.+}} 0 -// CK24-DAG: [[SEC00]] = getelementptr {{.*}}[[SB]]* [[SEC000:%[^,]+]], i{{.+}} 0, i{{.+}} 1 -// CK24-DAG: [[SEC000]] = getelementptr {{.*}}[[SC]]* [[VAR0]], i{{.+}} 0, i{{.+}} 1 - -// CK24: call void [[CALL03:@.+]]([[SC]]* {{[^,]+}}) -#pragma omp target map(s.s.s.a) - { s.a++; } - -// Region 04 -// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE04]]{{.+}}) -// CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] -// CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] - -// CK24-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 -// CK24-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 -// CK24-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[SC]]** -// CK24-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i32** -// CK24-DAG: store [[SC]]* [[VAR0:%.+]], [[SC]]** [[CBP0]] -// CK24-DAG: store i32* [[SEC0:%.+]], i32** [[CP0]] -// CK24-DAG: [[SEC0]] = getelementptr {{.*}}[10 x i32]* [[SEC00:%[^,]+]], i{{.+}} 0, i{{.+}} 0 -// CK24-DAG: [[SEC00]] = getelementptr {{.*}}[[SC]]* [[VAR0]], i{{.+}} 0, i{{.+}} 3 - -// CK24: call void [[CALL04:@.+]]([[SC]]* {{[^,]+}}) -#pragma omp target map(s.b[:5]) - { s.a++; } - -// Region 05 -// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE05]]{{.+}}) -// CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] -// CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] - -// CK24-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 -// CK24-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 -// CK24-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[SC]]** -// CK24-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to [[SB]]*** -// CK24-DAG: store [[SC]]* [[VAR0:%.+]], [[SC]]** [[CBP0]] -// CK24-DAG: store [[SB]]** [[SEC0:%.+]], [[SB]]*** [[CP0]] -// CK24-DAG: [[SEC0]] = getelementptr {{.*}}[[SC]]* [[VAR0]], i{{.+}} 0, i{{.+}} 2 - -// CK24-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1 -// CK24-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1 -// CK24-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to [[SB]]*** -// CK24-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to [[SB]]** -// CK24-DAG: store [[SB]]** [[SEC0]], [[SB]]*** [[CBP1]] -// CK24-DAG: store [[SB]]* [[SEC1:%.+]], [[SB]]** [[CP1]] -// CK24-DAG: [[SEC1]] = getelementptr {{.*}}[[SB]]* [[SEC11:%[^,]+]], i{{.+}} 0 -// CK24-DAG: [[SEC11]] = load [[SB]]*, [[SB]]** [[SEC111:%[^,]+]], -// CK24-DAG: [[SEC111]] = getelementptr {{.*}}[[SC]]* [[VAR0]], i{{.+}} 0, i{{.+}} 2 - -// CK24: call void [[CALL05:@.+]]([[SC]]* {{[^,]+}}) -#pragma omp target map(s.p[:5]) - { s.a++; } - -// Region 06 -// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE06]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE06]]{{.+}}) -// CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] -// CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] - -// CK24-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 -// CK24-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 -// CK24-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[SC]]** -// CK24-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i32** -// CK24-DAG: store [[SC]]* [[VAR0:%.+]], [[SC]]** [[CBP0]] -// CK24-DAG: store i32* [[SEC0:%.+]], i32** [[CP0]] -// CK24-DAG: [[SEC0]] = getelementptr {{.*}}[[SA]]* [[SEC00:%[^,]+]], i{{.+}} 0, i{{.+}} 0 -// CK24-DAG: [[SEC00]] = getelementptr {{.*}}[10 x [[SA]]]* [[SEC000:%[^,]+]], i{{.+}} 0, i{{.+}} 3 -// CK24-DAG: [[SEC000]] = getelementptr {{.*}}[[SB]]* [[SEC0000:%[^,]+]], i{{.+}} 0, i{{.+}} 2 -// CK24-DAG: [[SEC0000]] = getelementptr {{.*}}[[SC]]* [[VAR0]], i{{.+}} 0, i{{.+}} 1 - -// CK24: call void [[CALL06:@.+]]([[SC]]* {{[^,]+}}) -#pragma omp target map(s.s.sa[3].a) - { s.a++; } - -// Region 07 -// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE07]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE07]]{{.+}}) -// CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] -// CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] - -// CK24-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 -// CK24-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 -// CK24-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[SC]]** -// CK24-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to [[SA]]*** -// CK24-DAG: store [[SC]]* [[VAR0:%.+]], [[SC]]** [[CBP0]] -// CK24-DAG: store [[SA]]** [[SEC0:%.+]], [[SA]]*** [[CP0]] -// CK24-DAG: [[SEC0]] = getelementptr {{.*}}[10 x [[SA]]*]* [[SEC00:%[^,]+]], i{{.+}} 0, i{{.+}} 3 -// CK24-DAG: [[SEC00]] = getelementptr {{.*}}[[SB]]* [[SEC000:%[^,]+]], i{{.+}} 0, i{{.+}} 3 -// CK24-DAG: [[SEC000]] = getelementptr {{.*}}[[SC]]* [[VAR0]], i{{.+}} 0, i{{.+}} 1 - -// CK24-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1 -// CK24-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1 -// CK24-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to [[SA]]*** -// CK24-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32** -// CK24-DAG: store [[SA]]** [[SEC0]], [[SA]]*** [[CBP1]] -// CK24-DAG: store i32* [[SEC1:%.+]], i32** [[CP1]] -// CK24-DAG: [[SEC1]] = getelementptr {{.*}}[[SA]]* [[SEC11:%[^,]+]], i{{.+}} 0, i{{.+}} 0 -// CK24-DAG: [[SEC11]] = load [[SA]]*, [[SA]]** [[SEC111:%[^,]+]], -// CK24-DAG: [[SEC111]] = getelementptr {{.*}}[10 x [[SA]]*]* [[SEC1111:%[^,]+]], i{{.+}} 0, i{{.+}} 3 -// CK24-DAG: [[SEC1111]] = getelementptr {{.*}}[[SB]]* [[SEC11111:%[^,]+]], i{{.+}} 0, i{{.+}} 3 -// CK24-DAG: [[SEC11111]] = getelementptr {{.*}}[[SC]]* [[VAR0]], i{{.+}} 0, i{{.+}} 1 - -// CK24: call void [[CALL07:@.+]]([[SC]]* {{[^,]+}}) -#pragma omp target map(s.s.sp[3]->a) - { s.a++; } - -// Region 08 -// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE08]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE08]]{{.+}}) -// CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] -// CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] - -// CK24-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 -// CK24-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 -// CK24-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[SC]]** -// CK24-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to [[SB]]*** -// CK24-DAG: store [[SC]]* [[VAR0:%.+]], [[SC]]** [[CBP0]] -// CK24-DAG: store [[SB]]** [[SEC0:%.+]], [[SB]]*** [[CP0]] -// CK24-DAG: [[SEC0]] = getelementptr {{.*}}[[SC]]* [[VAR0]], i{{.+}} 0, i{{.+}} 2 - -// CK24-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1 -// CK24-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1 -// CK24-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to [[SB]]*** -// CK24-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32** -// CK24-DAG: store [[SB]]** [[SEC0]], [[SB]]*** [[CBP1]] -// CK24-DAG: store i32* [[SEC1:%.+]], i32** [[CP1]] -// CK24-DAG: [[SEC1]] = getelementptr {{.*}}[[SB]]* [[SEC11:%[^,]+]], i{{.+}} 0 -// CK24-DAG: [[SEC11]] = load [[SB]]*, [[SB]]** [[SEC111:%[^,]+]], -// CK24-DAG: [[SEC111]] = getelementptr {{.*}}[[SC]]* [[VAR0]], i{{.+}} 0, i{{.+}} 2 - -// CK24: call void [[CALL08:@.+]]([[SC]]* {{[^,]+}}) -#pragma omp target map(s.p->a) - { s.a++; } - -// Region 09 -// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE09]]{{.+}}) -// CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] -// CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] - -// CK24-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 -// CK24-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 -// CK24-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[SC]]** -// CK24-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to [[SA]]*** -// CK24-DAG: store [[SC]]* [[VAR0:%.+]], [[SC]]** [[CBP0]] -// CK24-DAG: store [[SA]]** [[SEC0:%.+]], [[SA]]*** [[CP0]] -// CK24-DAG: [[SEC0]] = getelementptr {{.*}}[[SB]]* [[SEC00:[^,]+]], i{{.+}} 0, i{{.+}} 4 -// CK24-DAG: [[SEC00]] = getelementptr {{.*}}[[SC]]* [[VAR0]], i{{.+}} 0, i{{.+}} 1 - -// CK24-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1 -// CK24-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1 -// CK24-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to [[SA]]*** -// CK24-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32** -// CK24-DAG: store [[SA]]** [[SEC0]], [[SA]]*** [[CBP1]] -// CK24-DAG: store i32* [[SEC1:%.+]], i32** [[CP1]] -// CK24-DAG: [[SEC1]] = getelementptr {{.*}}[[SA]]* [[SEC11:%[^,]+]], i{{.+}} 0 -// CK24-DAG: [[SEC11]] = load [[SA]]*, [[SA]]** [[SEC111:%[^,]+]], -// CK24-DAG: [[SEC111]] = getelementptr {{.*}}[[SB]]* [[SEC1111:[^,]+]], i{{.+}} 0, i{{.+}} 4 -// CK24-DAG: [[SEC1111]] = getelementptr {{.*}}[[SC]]* [[VAR0]], i{{.+}} 0, i{{.+}} 1 - -// CK24: call void [[CALL09:@.+]]([[SC]]* {{[^,]+}}) -#pragma omp target map(s.s.p->a) - { s.a++; } - -// Region 10 -// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE10]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE10]]{{.+}}) -// CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] -// CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] - -// CK24-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 -// CK24-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 -// CK24-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[SC]]** -// CK24-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i32** -// CK24-DAG: store [[SC]]* [[VAR0:%.+]], [[SC]]** [[CBP0]] -// CK24-DAG: store i32* [[SEC0:%.+]], i32** [[CP0]] -// CK24-DAG: [[SEC0]] = getelementptr {{.*}}[10 x i32]* [[SEC00:%[^,]+]], i{{.+}} 0, i{{.+}} 0 -// CK24-DAG: [[SEC00]] = getelementptr {{.*}}[[SA]]* [[SEC000:%[^,]+]], i{{.+}} 0, i{{.+}} 2 -// CK24-DAG: [[SEC000]] = getelementptr {{.*}}[[SB]]* [[SEC0000:%[^,]+]], i{{.+}} 0, i{{.+}} 1 -// CK24-DAG: [[SEC0000]] = getelementptr {{.*}}[[SC]]* [[VAR0]], i{{.+}} 0, i{{.+}} 1 - -// CK24: call void [[CALL10:@.+]]([[SC]]* {{[^,]+}}) -#pragma omp target map(s.s.s.b[:2]) - { s.a++; } - -// Region 11 -// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE11]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE11]]{{.+}}) -// CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] -// CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] - -// CK24-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 -// CK24-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 -// CK24-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[SC]]** -// CK24-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to [[SA]]*** -// CK24-DAG: store [[SC]]* [[VAR0:%.+]], [[SC]]** [[CBP0]] -// CK24-DAG: store [[SA]]** [[SEC0:%.+]], [[SA]]*** [[CP0]] -// CK24-DAG: [[SEC0]] = getelementptr {{.*}}[[SB]]* [[SEC00:%[^,]+]], i{{.+}} 0, i{{.+}} 4 -// CK24-DAG: [[SEC00]] = getelementptr {{.*}}[[SC]]* [[VAR0]], i{{.+}} 0, i{{.+}} 1 - -// CK24-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1 -// CK24-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1 -// CK24-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to [[SA]]*** -// CK24-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32** -// CK24-DAG: store [[SA]]** [[SEC0]], [[SA]]*** [[CBP1]] -// CK24-DAG: store i32* [[SEC1:%.+]], i32** [[CP1]] -// CK24-DAG: [[SEC1]] = getelementptr {{.*}}[10 x i32]* [[SEC11:%[^,]+]], i{{.+}} 0, i{{.+}} 0 -// CK24-DAG: [[SEC11]] = getelementptr {{.*}}[[SA]]* [[SEC111:%[^,]+]], i{{.+}} 0, i{{.+}} 2 -// CK24-DAG: [[SEC111]] = load [[SA]]*, [[SA]]** [[SEC1111:%[^,]+]], -// CK24-DAG: [[SEC1111]] = getelementptr {{.*}}[[SB]]* [[SEC11111:%[^,]+]], i{{.+}} 0, i{{.+}} 4 -// CK24-DAG: [[SEC11111]] = getelementptr {{.*}}[[SC]]* [[VAR0]], i{{.+}} 0, i{{.+}} 1 - -// CK24: call void [[CALL11:@.+]]([[SC]]* {{[^,]+}}) -#pragma omp target map(s.s.p->b[:2]) - { s.a++; } - -// Region 12 -// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[SIZE12]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE12]]{{.+}}) -// CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] -// CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] - -// CK24-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 -// CK24-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 -// CK24-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [[SC]]** -// CK24-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to [[SB]]*** -// CK24-DAG: store [[SC]]* [[VAR0:%.+]], [[SC]]** [[CBP0]] -// CK24-DAG: store [[SB]]** [[SEC0:%.+]], [[SB]]*** [[CP0]] -// CK24-DAG: [[SEC0]] = getelementptr {{.*}}[[SC]]* [[VAR0]], i{{.+}} 0, i{{.+}} 2 - -// CK24-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1 -// CK24-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1 -// CK24-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to [[SB]]*** -// CK24-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to [[SA]]*** -// CK24-DAG: store [[SB]]** [[SEC0:%.+]], [[SB]]*** [[CBP1]] -// CK24-DAG: store [[SA]]** [[SEC1:%.+]], [[SA]]*** [[CP1]] -// CK24-DAG: [[SEC1]] = getelementptr {{.*}}[[SB]]* [[SEC11:%[^,]+]], i{{.+}} 0, i{{.+}} 4 -// CK24-DAG: [[SEC11]] = load [[SB]]*, [[SB]]** [[SEC111:%[^,]+]], -// CK24-DAG: [[SEC111]] = getelementptr {{.*}}[[SC]]* [[VAR0]], i{{.+}} 0, i{{.+}} 2 - -// CK24-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2 -// CK24-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2 -// CK24-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [[SA]]*** -// CK24-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to [[SA]]*** -// CK24-DAG: store [[SA]]** [[SEC1:%.+]], [[SA]]*** [[CBP2]] -// CK24-DAG: store [[SA]]** [[SEC2:%.+]], [[SA]]*** [[CP2]] -// CK24-DAG: [[SEC2]] = getelementptr {{.*}}[[SA]]* [[SEC22:%[^,]+]], i{{.+}} 0, i{{.+}} 1 -// CK24-DAG: [[SEC22]] = load [[SA]]*, [[SA]]** [[SEC222:%[^,]+]], -// CK24-DAG: [[SEC222]] = getelementptr {{.*}}[[SB]]* [[SEC2222:%[^,]+]], i{{.+}} 0, i{{.+}} 4 -// CK24-DAG: [[SEC2222]] = load [[SB]]*, [[SB]]** [[SEC22222:%[^,]+]], -// CK24-DAG: [[SEC22222]] = getelementptr {{.*}}[[SC]]* [[VAR0]], i{{.+}} 0, i{{.+}} 2 - -// CK24-DAG: [[BP3:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 3 -// CK24-DAG: [[P3:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 3 -// CK24-DAG: [[CBP3:%.+]] = bitcast i8** [[BP3]] to [[SA]]*** -// CK24-DAG: [[CP3:%.+]] = bitcast i8** [[P3]] to i32** -// CK24-DAG: store [[SA]]** [[SEC2]], [[SA]]*** [[CBP3]] -// CK24-DAG: store i32* [[SEC3:%.+]], i32** [[CP3]] -// CK24-DAG: [[SEC3]] = getelementptr {{.*}}[[SA]]* [[SEC33:%[^,]+]], i{{.+}} 0, i{{.+}} 0 -// CK24-DAG: [[SEC33]] = load [[SA]]*, [[SA]]** [[SEC333:%[^,]+]], -// CK24-DAG: [[SEC333]] = getelementptr {{.*}}[[SA]]* [[SEC3333:%[^,]+]], i{{.+}} 0, i{{.+}} 1 -// CK24-DAG: [[SEC3333]] = load [[SA]]*, [[SA]]** [[SEC33333:%[^,]+]], -// CK24-DAG: [[SEC33333]] = getelementptr {{.*}}[[SB]]* [[SEC333333:%[^,]+]], i{{.+}} 0, i{{.+}} 4 -// CK24-DAG: [[SEC333333]] = load [[SB]]*, [[SB]]** [[SEC3333333:%[^,]+]], -// CK24-DAG: [[SEC3333333]] = getelementptr {{.*}}[[SC]]* [[VAR0]], i{{.+}} 0, i{{.+}} 2 - -// CK24: call void [[CALL12:@.+]]([[SC]]* {{[^,]+}}) -#pragma omp target map(s.p->p->p->a) - { s.a++; } - // // Same thing but starting from a pointer. // // Region 13 -// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE13]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE13]]{{.+}}) +// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE13]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE13]]{{.+}}) // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -3720,7 +3463,7 @@ int explicit_maps_struct_fields(int a){ { p->a++; } // Region 14 -// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE14]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE14]]{{.+}}) +// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE14]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE14]]{{.+}}) // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -3741,7 +3484,7 @@ int explicit_maps_struct_fields(int a){ { p->a++; } // Region 15 -// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE15]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE15]]{{.+}}) +// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE15]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE15]]{{.+}}) // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -3763,7 +3506,7 @@ int explicit_maps_struct_fields(int a){ { p->a++; } // Region 16 -// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE16]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE16]]{{.+}}) +// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE16]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE16]]{{.+}}) // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -3784,7 +3527,7 @@ int explicit_maps_struct_fields(int a){ { p->a++; } // Region 17 -// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE17]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE17]]{{.+}}) +// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE17]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE17]]{{.+}}) // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -3815,7 +3558,7 @@ int explicit_maps_struct_fields(int a){ { p->a++; } // Region 18 -// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE18]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE18]]{{.+}}) +// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE18]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE18]]{{.+}}) // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -3838,7 +3581,7 @@ int explicit_maps_struct_fields(int a){ { p->a++; } // Region 19 -// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE19]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE19]]{{.+}}) +// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE19]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE19]]{{.+}}) // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -3873,7 +3616,7 @@ int explicit_maps_struct_fields(int a){ { p->a++; } // Region 20 -// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE20]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE20]]{{.+}}) +// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE20]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE20]]{{.+}}) // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -3904,7 +3647,7 @@ int explicit_maps_struct_fields(int a){ { p->a++; } // Region 21 -// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE21]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE21]]{{.+}}) +// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE21]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE21]]{{.+}}) // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -3937,7 +3680,7 @@ int explicit_maps_struct_fields(int a){ { p->a++; } // Region 22 -// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE22]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE22]]{{.+}}) +// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE22]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE22]]{{.+}}) // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -3960,7 +3703,7 @@ int explicit_maps_struct_fields(int a){ { p->a++; } // Region 23 -// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE23]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE23]]{{.+}}) +// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE23]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE23]]{{.+}}) // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -3994,7 +3737,7 @@ int explicit_maps_struct_fields(int a){ { p->a++; } // Region 24 -// CK24-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[SIZE24]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE24]]{{.+}}) +// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[SIZE24]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE24]]{{.+}}) // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -4056,17 +3799,6 @@ int explicit_maps_struct_fields(int a){ } // CK24: define {{.+}}[[CALL01]] -// CK24: define {{.+}}[[CALL02]] -// CK24: define {{.+}}[[CALL03]] -// CK24: define {{.+}}[[CALL04]] -// CK24: define {{.+}}[[CALL05]] -// CK24: define {{.+}}[[CALL06]] -// CK24: define {{.+}}[[CALL07]] -// CK24: define {{.+}}[[CALL08]] -// CK24: define {{.+}}[[CALL09]] -// CK24: define {{.+}}[[CALL10]] -// CK24: define {{.+}}[[CALL11]] -// CK24: define {{.+}}[[CALL12]] // CK24: define {{.+}}[[CALL13]] // CK24: define {{.+}}[[CALL14]] // CK24: define {{.+}}[[CALL15]] @@ -4093,10 +3825,10 @@ int explicit_maps_struct_fields(int a){ // CK25: [[CA01:%.+]] = type { i32* } // CK25: [[SIZE00:@.+]] = private {{.*}}constant [1 x i[[Z:64|32]]] [i[[Z:64|32]] 4] -// CK25: [[MTYPE00:@.+]] = private {{.*}}constant [1 x i32] [i32 33] +// CK25: [[MTYPE00:@.+]] = private {{.*}}constant [1 x i64] [i64 33] // CK25: [[SIZE01:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 4] -// CK25: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i32] [i32 33] +// CK25: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i64] [i64 33] // CK25-LABEL: explicit_maps_with_inner_lambda @@ -4107,7 +3839,7 @@ struct CC { int foo(T arg) { // Region 00 - // CK25-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}) + // CK25-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}) // CK25-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK25-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -4128,7 +3860,7 @@ struct CC { } // Region 01 - // CK25-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}) + // CK25-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}) // CK25-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK25-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -4185,16 +3917,16 @@ int explicit_maps_with_inner_lambda(int a){ // CK26: [[ST:%.+]] = type { i32, float*, i32, float* } // CK26: [[SIZE00:@.+]] = private {{.*}}constant [2 x i[[Z:64|32]]] [i[[Z:64|32]] {{32|16}}, i[[Z:64|32]] 4] -// CK26: [[MTYPE00:@.+]] = private {{.*}}constant [2 x i32] [i32 35, i32 35] +// CK26: [[MTYPE00:@.+]] = private {{.*}}constant [2 x i64] [i64 35, i64 35] // CK26: [[SIZE01:@.+]] = private {{.*}}constant [2 x i[[Z]]] [i[[Z]] {{32|16}}, i[[Z]] 4] -// CK26: [[MTYPE01:@.+]] = private {{.*}}constant [2 x i32] [i32 35, i32 35] +// CK26: [[MTYPE01:@.+]] = private {{.*}}constant [2 x i64] [i64 35, i64 35] // CK26: [[SIZE02:@.+]] = private {{.*}}constant [2 x i[[Z]]] [i[[Z]] {{32|16}}, i[[Z]] 4] -// CK26: [[MTYPE02:@.+]] = private {{.*}}constant [2 x i32] [i32 35, i32 35] +// CK26: [[MTYPE02:@.+]] = private {{.*}}constant [2 x i64] [i64 35, i64 35] // CK26: [[SIZE03:@.+]] = private {{.*}}constant [2 x i[[Z]]] [i[[Z]] {{32|16}}, i[[Z]] 4] -// CK26: [[MTYPE03:@.+]] = private {{.*}}constant [2 x i32] [i32 35, i32 35] +// CK26: [[MTYPE03:@.+]] = private {{.*}}constant [2 x i64] [i64 35, i64 35] // CK26-LABEL: explicit_maps_with_private_class_members @@ -4211,7 +3943,7 @@ struct CC { #pragma omp parallel firstprivate(fA,fB) private(pA,pB) { // Region 00 - // CK26-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}) + // CK26-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}) // CK26-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK26-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -4238,7 +3970,7 @@ struct CC { } // Region 01 - // CK26-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE01]]{{.+}}) + // CK26-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE01]]{{.+}}) // CK26-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK26-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -4265,7 +3997,7 @@ struct CC { } // Region 02 - // CK26-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE02]]{{.+}}) + // CK26-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE02]]{{.+}}) // CK26-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK26-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -4292,7 +4024,7 @@ struct CC { } // Region 01 - // CK26-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE03]]{{.+}}) + // CK26-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE03]]{{.+}}) // CK26-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK26-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -4368,25 +4100,25 @@ int explicit_maps_with_private_class_members(){ #ifdef CK27 // CK27: [[SIZE00:@.+]] = private {{.*}}constant [1 x i[[Z:64|32]]] zeroinitializer -// CK27: [[MTYPE00:@.+]] = private {{.*}}constant [1 x i32] [i32 32] +// CK27: [[MTYPE00:@.+]] = private {{.*}}constant [1 x i64] [i64 32] // CK27: [[SIZE01:@.+]] = private {{.*}}constant [1 x i[[Z]]] zeroinitializer -// CK27: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK27: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK27: [[SIZE02:@.+]] = private {{.*}}constant [1 x i[[Z]]] zeroinitializer -// CK27: [[MTYPE02:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK27: [[MTYPE02:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK27: [[SIZE03:@.+]] = private {{.*}}constant [1 x i[[Z]]] zeroinitializer -// CK27: [[MTYPE03:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK27: [[MTYPE03:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK27: [[SIZE05:@.+]] = private {{.*}}constant [1 x i[[Z]]] zeroinitializer -// CK27: [[MTYPE05:@.+]] = private {{.*}}constant [1 x i32] [i32 32] +// CK27: [[MTYPE05:@.+]] = private {{.*}}constant [1 x i64] [i64 32] // CK27: [[SIZE07:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 4] -// CK27: [[MTYPE07:@.+]] = private {{.*}}constant [1 x i32] [i32 288] +// CK27: [[MTYPE07:@.+]] = private {{.*}}constant [1 x i64] [i64 288] // CK27: [[SIZE09:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 40] -// CK27: [[MTYPE09:@.+]] = private {{.*}}constant [1 x i32] [i32 161] +// CK27: [[MTYPE09:@.+]] = private {{.*}}constant [1 x i64] [i64 161] // CK27-LABEL: zero_size_section_and_private_maps void zero_size_section_and_private_maps (int ii){ @@ -4395,7 +4127,7 @@ void zero_size_section_and_private_maps (int ii){ int *pa; // Region 00 - // CK27-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}) + // CK27-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}) // CK27-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK27-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -4413,7 +4145,7 @@ void zero_size_section_and_private_maps (int ii){ } // Region 01 - // CK27-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}) + // CK27-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}) // CK27-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK27-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -4434,7 +4166,7 @@ void zero_size_section_and_private_maps (int ii){ } // Region 02 - // CK27-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}) + // CK27-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}) // CK27-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK27-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -4455,7 +4187,7 @@ void zero_size_section_and_private_maps (int ii){ } // Region 03 - // CK27-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}) + // CK27-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}) // CK27-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK27-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -4480,7 +4212,7 @@ void zero_size_section_and_private_maps (int ii){ int pvtArr[10]; // Region 04 - // CK27: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i32* null) + // CK27: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null) // CK27: call void [[CALL04:@.+]]() #pragma omp target private(pvtPtr) { @@ -4488,7 +4220,7 @@ void zero_size_section_and_private_maps (int ii){ } // Region 05 - // CK27-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}}) + // CK27-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}}) // CK27-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK27-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -4506,7 +4238,7 @@ void zero_size_section_and_private_maps (int ii){ } // Region 06 - // CK27: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i32* null) + // CK27: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null) // CK27: call void [[CALL06:@.+]]() #pragma omp target private(pvtScl) { @@ -4514,7 +4246,7 @@ void zero_size_section_and_private_maps (int ii){ } // Region 07 - // CK27-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZE07]]{{.+}}, {{.+}}[[MTYPE07]]{{.+}}) + // CK27-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZE07]]{{.+}}, {{.+}}[[MTYPE07]]{{.+}}) // CK27-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 // CK27-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 // CK27-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 @@ -4534,7 +4266,7 @@ void zero_size_section_and_private_maps (int ii){ } // Region 08 - // CK27: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i32* null) + // CK27: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null) // CK27: call void [[CALL08:@.+]]() #pragma omp target private(pvtArr) { @@ -4542,7 +4274,7 @@ void zero_size_section_and_private_maps (int ii){ } // Region 09 - // CK27-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}}) + // CK27-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}}) // CK27-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK27-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -4579,17 +4311,17 @@ void zero_size_section_and_private_maps (int ii){ #ifdef CK28 // CK28: [[SIZE00:@.+]] = private {{.*}}constant [1 x i[[Z:64|32]]] [i[[Z:64|32]] {{8|4}}] -// CK28: [[MTYPE00:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK28: [[MTYPE00:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK28: [[SIZE01:@.+]] = private {{.*}}constant [1 x i[[Z]]] [i[[Z]] 400] -// CK28: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i32] [i32 35] +// CK28: [[MTYPE01:@.+]] = private {{.*}}constant [1 x i64] [i64 35] // CK28-LABEL: explicit_maps_pointer_references void explicit_maps_pointer_references (int *p){ int *&a = p; // Region 00 - // CK28-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}) + // CK28-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}) // CK28-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK28-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -4609,7 +4341,7 @@ void explicit_maps_pointer_references (int *p){ } // Region 01 - // CK28-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}) + // CK28-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}) // CK28-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK28-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -4645,13 +4377,13 @@ void explicit_maps_pointer_references (int *p){ // CK29: [[SSB:%.+]] = type { [[SSA]]*, [[SSA]]** } // CK29: [[SIZE00:@.+]] = private {{.*}}constant [4 x i[[Z:64|32]]] [i[[Z:64|32]] {{8|4}}, i[[Z:64|32]] {{8|4}}, i[[Z:64|32]] {{8|4}}, i[[Z:64|32]] 80] -// CK29: [[MTYPE00:@.+]] = private {{.*}}constant [4 x i32] [i32 35, i32 16, i32 19, i32 19] +// CK29: [[MTYPE00:@.+]] = private {{.*}}constant [4 x i64] [i64 35, i64 16, i64 19, i64 19] // CK29: [[SIZE01:@.+]] = private {{.*}}constant [4 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] {{8|4}}, i[[Z]] {{8|4}}, i[[Z]] 80] -// CK29: [[MTYPE01:@.+]] = private {{.*}}constant [4 x i32] [i32 32, i32 19, i32 19, i32 19] +// CK29: [[MTYPE01:@.+]] = private {{.*}}constant [4 x i64] [i64 32, i64 19, i64 19, i64 19] // CK29: [[SIZE02:@.+]] = private {{.*}}constant [5 x i[[Z]]] [i[[Z]] {{8|4}}, i[[Z]] {{8|4}}, i[[Z]] {{8|4}}, i[[Z]] {{8|4}}, i[[Z]] 80] -// CK29: [[MTYPE02:@.+]] = private {{.*}}constant [5 x i32] [i32 32, i32 19, i32 16, i32 19, i32 19] +// CK29: [[MTYPE02:@.+]] = private {{.*}}constant [5 x i64] [i64 32, i64 19, i64 16, i64 19, i64 19] struct SSA{ double *p; @@ -4668,7 +4400,7 @@ struct SSB{ void foo() { // Region 00 - // CK29-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE00]]{{.+}}) + // CK29-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE00]]{{.+}}) // CK29-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK29-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -4715,7 +4447,7 @@ struct SSB{ } // Region 01 - // CK29-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE01]]{{.+}}) + // CK29-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE01]]{{.+}}) // CK29-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK29-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -4760,7 +4492,7 @@ struct SSB{ } // Region 02 - // CK29-DAG: call i32 @__tgt_target(i32 {{[^,]+}}, i8* {{[^,]+}}, i32 5, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[5 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[5 x i{{.+}}]* [[MTYPE02]]{{.+}}) + // CK29-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 5, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[5 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[5 x i{{.+}}]* [[MTYPE02]]{{.+}}) // CK29-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK29-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] diff --git a/test/OpenMP/target_map_messages.cpp b/test/OpenMP/target_map_messages.cpp index b9640b965d50..9ffbec472572 100644 --- a/test/OpenMP/target_map_messages.cpp +++ b/test/OpenMP/target_map_messages.cpp @@ -26,7 +26,14 @@ struct SA { T d; float e[I]; T *f; + int bf : 20; void func(int arg) { + #pragma omp target + { + a = 0.0; + func(arg); + bf = 20; + } #pragma omp target map(arg,a,d) {} #pragma omp target map(arg[2:2],a,d) // expected-error {{subscripted value is not an array or pointer}} @@ -267,8 +274,13 @@ void SAclient(int arg) { {} #pragma omp target map((p+1)->A) // expected-error {{expected expression containing only member accesses and/or array sections based on named variables}} {} - #pragma omp target map(u.B) // expected-error {{mapped storage cannot be derived from a union}} + #pragma omp target map(u.B) // expected-error {{mapping of union members is not allowed}} {} + #pragma omp target + { + u.B = 0; + r.S.foo(); + } #pragma omp target data map(to: r.C) //expected-note {{used here}} { @@ -319,8 +331,8 @@ class S2 { public: S2():a(0) { } S2(S2 &s2):a(s2.a) { } - static float S2s; // expected-note 4 {{mappable type cannot contain static members}} - static const float S2sc; // expected-note 4 {{mappable type cannot contain static members}} + static float S2s; + static const float S2sc; }; const float S2::S2sc = 0; const S2 b; @@ -353,7 +365,7 @@ template <class T> struct S6; template<> -struct S6<int> // expected-note {{mappable type cannot be polymorphic}} +struct S6<int> { virtual void foo(); }; @@ -420,8 +432,8 @@ T tmain(T argc) { #pragma omp target data map(tofrom: argc > 0 ? x : y) // expected-error 2 {{expected expression containing only member accesses and/or array sections based on named variables}} #pragma omp target data map(argc) #pragma omp target data map(S1) // expected-error {{'S1' does not refer to a value}} -#pragma omp target data map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} expected-error 2 {{type 'S2' is not mappable to target}} -#pragma omp target data map(ba) // expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target data map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} +#pragma omp target data map(ba) #pragma omp target data map(ca) #pragma omp target data map(da) #pragma omp target data map(S2::S2s) @@ -454,6 +466,25 @@ T tmain(T argc) { return 0; } +struct SA1{ + int a; + struct SA1 *p; + int b[10]; +}; +struct SB1{ + int a; + struct SA1 s; + struct SA1 sa[10]; + struct SA1 *sp[10]; + struct SA1 *p; +}; +struct SC1{ + int a; + struct SB1 s; + struct SB1 *p; + int b[10]; +}; + int main(int argc, char **argv) { const int d = 5; const int da[5] = { 0 }; @@ -467,6 +498,8 @@ int main(int argc, char **argv) { int y; int to, tofrom, always; const int (&l)[5] = da; + SC1 s; + SC1 *p; #pragma omp target data map // expected-error {{expected '(' after 'map'}} expected-error {{expected at least one 'map' or 'use_device_ptr' clause for '#pragma omp target data'}} #pragma omp target data map( // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{expected expression}} #pragma omp target data map() // expected-error {{expected expression}} @@ -489,9 +522,9 @@ int main(int argc, char **argv) { #pragma omp target data map(tofrom: argc > 0 ? argv[1] : argv[2]) // expected-error {{xpected expression containing only member accesses and/or array sections based on named variables}} #pragma omp target data map(argc) #pragma omp target data map(S1) // expected-error {{'S1' does not refer to a value}} -#pragma omp target data map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target data map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} #pragma omp target data map(argv[1]) -#pragma omp target data map(ba) // expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target data map(ba) #pragma omp target data map(ca) #pragma omp target data map(da) #pragma omp target data map(S2::S2s) @@ -525,8 +558,55 @@ int main(int argc, char **argv) { {} #pragma omp target firstprivate(j) map(j) // expected-error {{firstprivate variable cannot be in a map clause in '#pragma omp target' directive}} expected-note {{defined as firstprivate}} {} -#pragma omp target map(m) // expected-error {{type 'S6<int>' is not mappable to target}} - {} +#pragma omp target map(m) + {} +// expected-note@+1 {{used here}} +#pragma omp target map(s.s.s) +// expected-error@+1 {{variable already marked as mapped in current construct}} + { s.a++; } +// expected-note@+1 {{used here}} +#pragma omp target map(s.s.s.a) +// expected-error@+1 {{variable already marked as mapped in current construct}} + { s.a++; } +// expected-note@+1 {{used here}} +#pragma omp target map(s.b[:5]) +// expected-error@+1 {{variable already marked as mapped in current construct}} + { s.a++; } +// expected-note@+1 {{used here}} +#pragma omp target map(s.p[:5]) +// expected-error@+1 {{variable already marked as mapped in current construct}} + { s.a++; } +// expected-note@+1 {{used here}} +#pragma omp target map(s.s.sa[3].a) +// expected-error@+1 {{variable already marked as mapped in current construct}} + { s.a++; } +// expected-note@+1 {{used here}} +#pragma omp target map(s.s.sp[3]->a) +// expected-error@+1 {{variable already marked as mapped in current construct}} + { s.a++; } +// expected-note@+1 {{used here}} +#pragma omp target map(s.p->a) +// expected-error@+1 {{variable already marked as mapped in current construct}} + { s.a++; } +// expected-note@+1 {{used here}} +#pragma omp target map(s.s.p->a) +// expected-error@+1 {{variable already marked as mapped in current construct}} + { s.a++; } +// expected-note@+1 {{used here}} +#pragma omp target map(s.s.s.b[:2]) +// expected-error@+1 {{variable already marked as mapped in current construct}} + { s.a++; } +// expected-note@+1 {{used here}} +#pragma omp target map(s.s.p->b[:2]) +// expected-error@+1 {{variable already marked as mapped in current construct}} + { s.a++; } +// expected-note@+1 {{used here}} +#pragma omp target map(s.p->p->p->a) +// expected-error@+1 {{variable already marked as mapped in current construct}} + { s.a++; } +#pragma omp target map(s.s.s.b[:2]) + { s.s.s.b[0]++; } + return tmain<int, 3>(argc)+tmain<from, 4>(argc); // expected-note {{in instantiation of function template specialization 'tmain<int, 3>' requested here}} expected-note {{in instantiation of function template specialization 'tmain<int, 4>' requested here}} } #endif diff --git a/test/OpenMP/target_messages.cpp b/test/OpenMP/target_messages.cpp index 6f79f44627fa..77d808493789 100644 --- a/test/OpenMP/target_messages.cpp +++ b/test/OpenMP/target_messages.cpp @@ -4,6 +4,8 @@ // RUN: not %clang_cc1 -fopenmp -std=c++11 -triple nvptx64-nvidia-cuda -o - %s 2>&1 | FileCheck --check-prefix CHECK-UNSUPPORTED-HOST-TARGET %s // RUN: not %clang_cc1 -fopenmp -std=c++11 -triple nvptx-nvidia-cuda -o - %s 2>&1 | FileCheck --check-prefix CHECK-UNSUPPORTED-HOST-TARGET %s // CHECK-UNSUPPORTED-HOST-TARGET: error: The target '{{nvptx64-nvidia-cuda|nvptx-nvidia-cuda}}' is not a supported OpenMP host target. +// RUN: not %clang_cc1 -fopenmp -std=c++11 -fopenmp-targets=hexagon-linux-gnu -o - %s 2>&1 | FileCheck --check-prefix CHECK-UNSUPPORTED-DEVICE-TARGET %s +// CHECK-UNSUPPORTED-DEVICE-TARGET: OpenMP target is invalid: 'hexagon-linux-gnu' void foo() { } diff --git a/test/OpenMP/target_parallel_codegen.cpp b/test/OpenMP/target_parallel_codegen.cpp index 3063ab16abc8..b9d00da02f45 100644 --- a/test/OpenMP/target_parallel_codegen.cpp +++ b/test/OpenMP/target_parallel_codegen.cpp @@ -39,15 +39,15 @@ // sizes. // CHECK-DAG: [[SIZET2:@.+]] = private unnamed_addr constant [1 x i{{32|64}}] [i[[SZ:32|64]] 2] -// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: [[SIZET3:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2] -// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i32] [i32 288, i32 288] -// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [9 x i32] [i32 288, i32 35, i32 288, i32 35, i32 35, i32 288, i32 288, i32 35, i32 35] +// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i64] [i64 288, i64 288] +// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [9 x i64] [i64 288, i64 547, i64 288, i64 547, i64 547, i64 288, i64 288, i64 547, i64 547] // CHECK-DAG: [[SIZET5:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 40] -// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i32] [i32 288, i32 288, i32 35] +// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 288, i64 547] // CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [4 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 1, i[[SZ]] 40] -// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i32] [i32 288, i32 288, i32 288, i32 35] -// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i32] [i32 35, i32 288, i32 288, i32 288, i32 35] +// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i64] [i64 288, i64 288, i64 288, i64 547] +// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i64] [i64 547, i64 288, i64 288, i64 288, i64 547] // CHECK-DAG: @{{.*}} = private constant i8 0 // CHECK-DAG: @{{.*}} = private constant i8 0 // CHECK-DAG: @{{.*}} = private constant i8 0 @@ -93,30 +93,24 @@ int foo(int n) { double cn[5][n]; TT<long long, char> d; - // CHECK: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i32* null, i32 1, i32 0) - // CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4 - // CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4 - // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 + // CHECK: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null, i32 1, i32 0) + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] // CHECK: [[FAIL]] // CHECK: call void [[HVT0:@.+]]() // CHECK-NEXT: br label %[[END]] // CHECK: [[END]] - #pragma omp target parallel + #pragma omp target parallel nowait { } - // CHECK: store i32 0, i32* [[RHV:%.+]], align 4 - // CHECK: store i32 -1, i32* [[RHV]], align 4 - // CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4 - // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 // CHECK: call void [[HVT1:@.+]](i[[SZ]] {{[^,]+}}) #pragma omp target parallel if(target: 0) { a += 1; } - // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i32* getelementptr inbounds ([1 x i32], [1 x i32]* [[MAPT2]], i32 0, i32 0), i32 1, i32 0) + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT2]], i32 0, i32 0), i32 1, i32 0) // CHECK-DAG: [[BP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[P]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]] @@ -126,9 +120,7 @@ int foo(int n) { // CHECK-DAG: store i[[SZ]] [[VAL0:%.+]], i[[SZ]]* [[CBPADDR0]], // CHECK-DAG: store i[[SZ]] [[VAL0]], i[[SZ]]* [[CPADDR0]], - // CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4 - // CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4 - // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] // CHECK: [[FAIL]] // CHECK: call void [[HVT2:@.+]](i[[SZ]] {{[^,]+}}) @@ -142,7 +134,7 @@ int foo(int n) { // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10 // CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] // CHECK: [[IFTHEN]] - // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i32* getelementptr inbounds ([2 x i32], [2 x i32]* [[MAPT3]], i32 0, i32 0), i32 1, i32 0) + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i32 1, i32 0) // CHECK-DAG: [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0 @@ -159,21 +151,18 @@ int foo(int n) { // CHECK-DAG: [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]* // CHECK-DAG: store i[[SZ]] [[VAL1:%.+]], i[[SZ]]* [[CBPADDR1]], // CHECK-DAG: store i[[SZ]] [[VAL1]], i[[SZ]]* [[CPADDR1]], - // CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4 - // CHECK-NEXT: br label %[[IFEND:.+]] - - // CHECK: [[IFELSE]] - // CHECK: store i32 -1, i32* [[RHV]], align 4 - // CHECK-NEXT: br label %[[IFEND:.+]] - - // CHECK: [[IFEND]] - // CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4 - // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 + // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // CHECK: [[FAIL]] // CHECK: call void [[HVT3:@.+]]({{[^,]+}}, {{[^,]+}}) // CHECK-NEXT: br label %[[END]] // CHECK: [[END]] + // CHECK-NEXT: br label %[[IFEND:.+]] + // CHECK: [[IFELSE]] + // CHECK: call void [[HVT3]]({{[^,]+}}, {{[^,]+}}) + // CHECK-NEXT: br label %[[IFEND]] + // CHECK: [[IFEND]] + #pragma omp target parallel if(target: n>10) { a += 1; @@ -197,7 +186,7 @@ int foo(int n) { // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20 // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] // CHECK: [[TRY]] - // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i32* getelementptr inbounds ([9 x i32], [9 x i32]* [[MAPT4]], i32 0, i32 0), i32 1, i32 0) + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i32 1, i32 0) // CHECK-DAG: [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[SR]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S:%[^,]+]], i32 0, i32 0 @@ -286,9 +275,7 @@ int foo(int n) { // CHECK-DAG: [[CPADDR8]] = bitcast i8** {{%[^,]+}} to [[TT]]** // CHECK-DAG: store i[[SZ]] {{12|16}}, i[[SZ]]* {{%[^,]+}} - // CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4 - // CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4 - // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] // CHECK: [[FAIL]] @@ -441,7 +428,7 @@ int foo(int n) { // CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 9, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], [10 x float]*, i[[SZ]], float*, [5 x [10 x double]]*, i[[SZ]], i[[SZ]], double*, [[TT]]*)* [[OMP_OUTLINED4:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], [10 x float]* [[REF_B]], i[[SZ]] [[VAL_VLA1]], float* [[REF_BN]], [5 x [10 x double]]* [[REF_C]], i[[SZ]] [[VAL_VLA2]], i[[SZ]] [[VAL_VLA3]], double* [[REF_CN]], [[TT]]* [[REF_D]]) // // -// CHECK: define internal {{.*}}void [[OMP_OUTLINED4]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, [10 x float]* {{.+}}, i[[SZ]] %{{.+}}, float* %{{.+}}, [5 x [10 x double]]* {{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, double* %{{.+}}, [[TT]]* {{.+}}) +// CHECK: define internal {{.*}}void [[OMP_OUTLINED4]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, [10 x float]* {{.+}}, i[[SZ]] %{{.+}}, float* {{.+}}, [5 x [10 x double]]* {{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, double* {{.+}}, [[TT]]* {{.+}}) // To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. template<typename tx> @@ -533,7 +520,7 @@ int bar(int n){ // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60 // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] // CHECK: [[TRY]] -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i32* getelementptr inbounds ([5 x i32], [5 x i32]* [[MAPT7]], i32 0, i32 0), i32 1, i32 0) +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 0) // CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0 // CHECK-DAG: [[SR]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S:%.+]], i32 0, i32 0 @@ -582,9 +569,7 @@ int bar(int n){ // CHECK-DAG: [[CPADDR4]] = bitcast i8** {{%[^,]+}} to i16** // CHECK-DAG: store i[[SZ]] [[CSIZE]], i[[SZ]]* {{%[^,]+}} -// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4 -// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4 -// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] // CHECK: [[FAIL]] @@ -598,7 +583,7 @@ int bar(int n){ // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50 // CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] // CHECK: [[IFTHEN]] -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([4 x i[[SZ]]], [4 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i32* getelementptr inbounds ([4 x i32], [4 x i32]* [[MAPT6]], i32 0, i32 0), i32 1, i32 0) +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([4 x i[[SZ]]], [4 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0), i32 1, i32 0) // CHECK-DAG: [[BPR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP:%.+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P:%.+]], i32 0, i32 0 @@ -630,21 +615,17 @@ int bar(int n){ // CHECK-DAG: store [10 x i32]* [[VAL3:%.+]], [10 x i32]** [[CBPADDR3]], // CHECK-DAG: store [10 x i32]* [[VAL3]], [10 x i32]** [[CPADDR3]], -// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4 -// CHECK-NEXT: br label %[[IFEND:.+]] - -// CHECK: [[IFELSE]] -// CHECK: store i32 -1, i32* [[RHV]], align 4 -// CHECK-NEXT: br label %[[IFEND:.+]] - -// CHECK: [[IFEND]] -// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4 -// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // CHECK: [[FAIL]] // CHECK: call void [[HVT6:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) // CHECK-NEXT: br label %[[END]] // CHECK: [[END]] +// CHECK-NEXT: br label %[[IFEND:.+]] +// CHECK: [[IFELSE]] +// CHECK: call void [[HVT6]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[IFEND]] +// CHECK: [[IFEND]] // // CHECK: define {{.*}}[[FTEMPLATE]] @@ -652,7 +633,7 @@ int bar(int n){ // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40 // CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] // CHECK: [[IFTHEN]] -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i32* getelementptr inbounds ([3 x i32], [3 x i32]* [[MAPT5]], i32 0, i32 0), i32 1, i32 0) +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0), i32 1, i32 0) // CHECK-DAG: [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0 @@ -677,23 +658,17 @@ int bar(int n){ // CHECK-DAG: store [10 x i32]* [[VAL2:%.+]], [10 x i32]** [[CBPADDR2]], // CHECK-DAG: store [10 x i32]* [[VAL2]], [10 x i32]** [[CPADDR2]], -// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4 -// CHECK-NEXT: br label %[[IFEND:.+]] - -// CHECK: [[IFELSE]] -// CHECK: store i32 -1, i32* [[RHV]], align 4 -// CHECK-NEXT: br label %[[IFEND:.+]] - -// CHECK: [[IFEND]] -// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4 -// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // CHECK: [[FAIL]] // CHECK: call void [[HVT5:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}) // CHECK-NEXT: br label %[[END]] // CHECK: [[END]] - - +// CHECK-NEXT: br label %[[IFEND:.+]] +// CHECK: [[IFELSE]] +// CHECK: call void [[HVT:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[IFEND]] +// CHECK: [[IFEND]] // Check that the offloading functions are emitted and that the arguments are // correct and loaded correctly for the target regions of the callees of bar(). @@ -728,7 +703,7 @@ int bar(int n){ // CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [[S1]]*, i[[SZ]], i[[SZ]], i[[SZ]], i16*)* [[OMP_OUTLINED5:@.+]] to void (i32*, i32*, ...)*), [[S1]]* [[REF_THIS]], i[[SZ]] [[REF_B]], i[[SZ]] [[VAL_VLA1]], i[[SZ]] [[VAL_VLA2]], i16* [[REF_C]]) // // -// CHECK: define internal {{.*}}void [[OMP_OUTLINED5]](i32* noalias %.global_tid., i32* noalias %.bound_tid., [[S1]]* %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i16* %{{.+}}) +// CHECK: define internal {{.*}}void [[OMP_OUTLINED5]](i32* noalias %.global_tid., i32* noalias %.bound_tid., [[S1]]* %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i16* {{.+}}) // To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. diff --git a/test/OpenMP/target_parallel_codegen_registration.cpp b/test/OpenMP/target_parallel_codegen_registration.cpp index 2511bc4c9d3c..d2cb1ec6f778 100644 --- a/test/OpenMP/target_parallel_codegen_registration.cpp +++ b/test/OpenMP/target_parallel_codegen_registration.cpp @@ -63,40 +63,40 @@ // CHECK-DAG: {{@.+}} = private constant i8 0 // TCHECK-NOT: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-NTARGET-NOT: private constant i8 0 // CHECK-NTARGET-NOT: private unnamed_addr constant [1 x i diff --git a/test/OpenMP/target_parallel_debug_codegen.cpp b/test/OpenMP/target_parallel_debug_codegen.cpp new file mode 100644 index 000000000000..385f7a29cb8f --- /dev/null +++ b/test/OpenMP/target_parallel_debug_codegen.cpp @@ -0,0 +1,123 @@ +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited | FileCheck %s +// expected-no-diagnostics + +int main() { + /* int(*b)[a]; */ + /* int *(**c)[a]; */ + bool bb; + int a; + int b[10][10]; + int c[10][10][10]; +#pragma omp target parallel firstprivate(a, b) map(tofrom \ + : c) map(tofrom \ + : bb) if (a) + { + int &f = c[1][1][1]; + int &g = a; + int &h = b[1][1]; + int d = 15; + a = 5; + b[0][a] = 10; + c[0][0][a] = 11; + b[0][a] = c[0][0][a]; + bb |= b[0][a]; + } +#pragma omp target parallel firstprivate(a) map(tofrom \ + : c, b) map(to \ + : bb) + { + int &f = c[1][1][1]; + int &g = a; + int &h = b[1][1]; + int d = 15; + a = 5; + b[0][a] = 10; + c[0][0][a] = 11; + b[0][a] = c[0][0][a]; + d = bb; + } +#pragma omp target parallel map(tofrom \ + : a, c, b) map(from \ + : bb) + { + int &f = c[1][1][1]; + int &g = a; + int &h = b[1][1]; + int d = 15; + a = 5; + b[0][a] = 10; + c[0][0][a] = 11; + b[0][a] = c[0][0][a]; + bb = b[0][a]; + } + return 0; +} + +// CHECK: define internal void @__omp_offloading{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]]* {{[^,]+}}, i8 addrspace(1)* noalias{{[^,]+}}, i1 {{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]* +// CHECK: call void [[NONDEBUG_WRAPPER:.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* {{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* {{[^,]+}}, i8* {{[^)]+}}) + +// CHECK: define internal void [[DEBUG_PARALLEL:@.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* noalias{{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]]* noalias{{[^,]+}}, i8 addrspace(1)* noalias{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]* + +// CHECK: define internal void [[NONDEBUG_WRAPPER]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^,]+}}, i8* dereferenceable{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)* +// CHECK: call void [[DEBUG_PARALLEL]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]]* {{[^,]+}}, i8 addrspace(1)* {{[^)]+}}) + +// CHECK: define void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^,]+}}, i8* dereferenceable{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)* +// CHECK: call void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]]* {{[^,]+}}, i8 addrspace(1)* {{[^)]+}}) + +// CHECK: define internal void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* noalias{{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* noalias{{[^,]+}}, i8 addrspace(1)* noalias{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]* +// CHECK: addrspacecast [10 x [10 x i32]] addrspace(1)* %{{.+}} to [10 x [10 x i32]]* +// CHECK: call void [[NONDEBUG_WRAPPER:.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* {{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* {{[^,]+}}, i8* {{[^)]+}}) + +// CHECK: define internal void [[DEBUG_PARALLEL:@.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* noalias{{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* noalias{{[^,]+}}, i8 addrspace(1)* {{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]* +// CHECK: addrspacecast [10 x [10 x i32]] addrspace(1)* %{{.+}} to [10 x [10 x i32]]* + +// CHECK: define internal void [[NONDEBUG_WRAPPER]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^,]+}}, i8* dereferenceable{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)* +// CHECK: addrspacecast [10 x [10 x i32]]* %{{.+}} to [10 x [10 x i32]] addrspace(1)* +// CHECK: call void [[DEBUG_PARALLEL]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* {{[^,]+}}, i8 addrspace(1)* {{[^)]+}}) + +// CHECK: define void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^,]+}}, i8* dereferenceable{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)* +// CHECK: addrspacecast [10 x [10 x i32]]* %{{.+}} to [10 x [10 x i32]] addrspace(1)* +// CHECK: call void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* {{[^,]+}}, i8 addrspace(1)* {{[^)]+}}) + +// CHECK: define internal void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* noalias{{[^,]+}}, i32 addrspace(1)* noalias{{[^,]+}}, [10 x [10 x i32]] addrspace(1)* noalias{{[^,]+}}, i8 addrspace(1)* noalias{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]* +// CHECK: addrspacecast i32 addrspace(1)* %{{.+}} to i32* +// CHECK: addrspacecast [10 x [10 x i32]] addrspace(1)* %{{.+}} to [10 x [10 x i32]]* +// CHECK: call void [[NONDEBUG_WRAPPER:@.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x i32]]* {{[^,]+}}, i8* {{[^)]+}}) + +// CHECK: define internal void [[DEBUG_PARALLEL:@.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* noalias{{[^,]+}}, i32 addrspace(1)* noalias{{[^,]+}}, [10 x [10 x i32]] addrspace(1)* noalias{{[^,]+}}, i8 addrspace(1)* noalias{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]* +// CHECK: addrspacecast i32 addrspace(1)* %{{.+}} to i32* +// CHECK: addrspacecast [10 x [10 x i32]] addrspace(1)* %{{.+}} to [10 x [10 x i32]]* + +// CHECK: define internal void [[NONDEBUG_WRAPPER]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i32* dereferenceable{{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^,]+}}, i8* dereferenceable{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)* +// CHECK: addrspacecast i32* %{{.+}} to i32 addrspace(1)* +// CHECK: addrspacecast [10 x [10 x i32]]* %{{.+}} to [10 x [10 x i32]] addrspace(1)* +// CHECK: call void [[DEBUG_PARALLEL]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 addrspace(1)* {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* {{[^,]+}}, i8 addrspace(1)* {{[^)]+}}) + +// CHECK: define void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i32* dereferenceable{{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^,]+}}, i8* dereferenceable{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)* +// CHECK: addrspacecast i32* %{{.+}} to i32 addrspace(1)* +// CHECK: addrspacecast [10 x [10 x i32]]* %{{.+}} to [10 x [10 x i32]] addrspace(1)* +// CHECK: call void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 addrspace(1)* {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* {{[^,]+}}, i8 addrspace(1)* {{[^)]+}}) + +// CHECK: !DILocalVariable(name: ".global_tid.", +// CHECK-SAME: DIFlagArtificial +// CHECK: !DILocalVariable(name: ".bound_tid.", +// CHECK-SAME: DIFlagArtificial +// CHECK: !DILocalVariable(name: "c", +// CHECK-SAMEi-NOT: DIFlagArtificial +// CHECK: !DILocalVariable(name: "a", +// CHECK-SAMEi-NOT: DIFlagArtificial +// CHECK: !DILocalVariable(name: "b", +// CHECK-SAMEi-NOT: DIFlagArtificial diff --git a/test/OpenMP/target_parallel_depend_messages.cpp b/test/OpenMP/target_parallel_depend_messages.cpp index fde940be1aa4..24c69e95cc0a 100644 --- a/test/OpenMP/target_parallel_depend_messages.cpp +++ b/test/OpenMP/target_parallel_depend_messages.cpp @@ -36,21 +36,21 @@ int main(int argc, char **argv, char *env[]) { foo(); #pragma omp target parallel depend (out: ) // expected-error {{expected expression}} foo(); - #pragma omp target parallel depend (inout : foobool(argc)), depend (in, argc) // expected-error {{expected variable name, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}} + #pragma omp target parallel depend (inout : foobool(argc)), depend (in, argc) // expected-error {{expected addressable lvalue expression, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}} foo(); #pragma omp target parallel depend (out :S1) // expected-error {{'S1' does not refer to a value}} foo(); - #pragma omp target parallel depend(in : argv[1][1] = '2') // expected-error {{expected variable name, array element or array section}} + #pragma omp target parallel depend(in : argv[1][1] = '2') foo(); - #pragma omp target parallel depend (in : vec[1]) // expected-error {{expected variable name, array element or array section}} + #pragma omp target parallel depend (in : vec[1]) // expected-error {{expected addressable lvalue expression, array element or array section}} foo(); #pragma omp target parallel depend (in : argv[0]) foo(); #pragma omp target parallel depend (in : ) // expected-error {{expected expression}} foo(); - #pragma omp target parallel depend (in : main) // expected-error {{expected variable name, array element or array section}} + #pragma omp target parallel depend (in : main) foo(); - #pragma omp target parallel depend(in : a[0]) // expected-error{{expected variable name, array element or array section}} + #pragma omp target parallel depend(in : a[0]) // expected-error{{expected addressable lvalue expression, array element or array section}} foo(); #pragma omp target parallel depend (in : vec[1:2]) // expected-error {{ value is not an array or pointer}} foo(); diff --git a/test/OpenMP/target_parallel_for_codegen.cpp b/test/OpenMP/target_parallel_for_codegen.cpp new file mode 100644 index 000000000000..58ac10bf9b2c --- /dev/null +++ b/test/OpenMP/target_parallel_for_codegen.cpp @@ -0,0 +1,812 @@ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 + +// Test target codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK-DAG: %ident_t = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" +// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } + +// CHECK-DAG: [[TT:%.+]] = type { i64, i8 } +// CHECK-DAG: [[S1:%.+]] = type { double } +// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 } +// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* } +// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* } + +// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 } + +// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat + +// We have 8 target regions, but only 7 that actually will generate offloading +// code, only 6 will have mapped arguments, and only 4 have all-constant map +// sizes. + +// CHECK-DAG: [[SIZET2:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 2, i[[SZ]] 4, i[[SZ]] 4] +// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 288, i64 288] +// CHECK-DAG: [[SIZET3:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2] +// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i64] [i64 288, i64 288] +// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [10 x i64] [i64 288, i64 547, i64 288, i64 547, i64 547, i64 288, i64 288, i64 547, i64 547, i64 288] +// CHECK-DAG: [[SIZET5:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 40] +// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 288, i64 547] +// CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [4 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 1, i[[SZ]] 40] +// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i64] [i64 288, i64 288, i64 288, i64 547] +// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i64] [i64 547, i64 288, i64 288, i64 288, i64 547] +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 + +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK-NOT: @{{.+}} = constant [[ENTTY]] + +// Check if offloading descriptor is created. +// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]] +// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]] +// CHECK: [[DEVBEGIN:@.+]] = external constant i8 +// CHECK: [[DEVEND:@.+]] = external constant i8 +// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]]) +// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]]) + +// Check target registration is registered as a Ctor. +// CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* @[[REGFN]] to void ()*), i8* bitcast (void (i8*)* @[[REGFN]] to i8*) }] + + +template<typename tx, typename ty> +struct TT{ + tx X; + ty Y; +}; + +// CHECK-LABEL: get_val +long long get_val() { return 0; } + +// CHECK: define {{.*}}[[FOO:@.+]]( +int foo(int n) { + int a = 0; + short aa = 0; + float b[10]; + float bn[n]; + double c[5][10]; + double cn[5][n]; + TT<long long, char> d; + + // CHECK: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null, i32 1, i32 0) + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT0:@.+]]() + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + #pragma omp target parallel for + for (int i = 3; i < 32; i += 5) { +#pragma omp cancel for +#pragma omp cancellation point for + } + + // CHECK: call void [[HVT1:@.+]](i[[SZ]] {{[^,]+}}, i{{32|64}}{{[*]*}} {{[^)]+}}) + long long k = get_val(); + #pragma omp target parallel for if(target: 0) linear(k : 3) schedule(dynamic) + for (int i = 10; i > 1; i--) { + a += 1; + } + + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT2]], i32 0, i32 0), i32 1, i32 0) + // CHECK-DAG: [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 0 + // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 0 + // CHECK-DAG: [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]* + // CHECK-DAG: [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] [[VAL0:%.+]], i[[SZ]]* [[CBPADDR0]], + // CHECK-DAG: store i[[SZ]] [[VAL0]], i[[SZ]]* [[CPADDR0]], + // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 1 + // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 1 + // CHECK-DAG: [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]* + // CHECK-DAG: [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] [[VAL1:%.+]], i[[SZ]]* [[CBPADDR1]], + // CHECK-DAG: store i[[SZ]] [[VAL1]], i[[SZ]]* [[CPADDR1]], + // CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 1 + // CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 1 + // CHECK-DAG: [[CBPADDR2:%.+]] = bitcast i8** [[BPADDR2]] to i[[SZ]]* + // CHECK-DAG: [[CPADDR2:%.+]] = bitcast i8** [[PADDR2]] to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] [[VAL2:%.+]], i[[SZ]]* [[CBPADDR2]], + // CHECK-DAG: store i[[SZ]] [[VAL2]], i[[SZ]]* [[CPADDR2]], + + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT2:@.+]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}}) + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + int lin = 12; + #pragma omp target parallel for if(target: 1) linear(lin, a : get_val()) nowait + for (unsigned long long it = 2000; it >= 600; it-=400) { + aa += 1; + } + + // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10 + // CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] + // CHECK: [[IFTHEN]] + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i32 1, i32 0) + // CHECK-DAG: [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0 + + // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0 + // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0 + // CHECK-DAG: [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]* + // CHECK-DAG: [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] [[VAL0:%.+]], i[[SZ]]* [[CBPADDR0]], + // CHECK-DAG: store i[[SZ]] [[VAL0]], i[[SZ]]* [[CPADDR0]], + + // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1 + // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1 + // CHECK-DAG: [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]* + // CHECK-DAG: [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] [[VAL1:%.+]], i[[SZ]]* [[CBPADDR1]], + // CHECK-DAG: store i[[SZ]] [[VAL1]], i[[SZ]]* [[CPADDR1]], + // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT3:@.+]]({{[^,]+}}, {{[^,]+}}) + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + // CHECK-NEXT: br label %[[IFEND:.+]] + // CHECK: [[IFELSE]] + // CHECK: call void [[HVT3]]({{[^,]+}}, {{[^,]+}}) + // CHECK-NEXT: br label %[[IFEND]] + // CHECK: [[IFEND]] + + #pragma omp target parallel for if(target: n>10) + for (short it = 6; it <= 20; it-=-4) { + a += 1; + aa += 1; + } + + // We capture 3 VLA sizes in this target region + // CHECK: [[A_VAL:%.+]] = load i32, i32* %{{.+}}, + // CHECK: store i32 [[A_VAL]], i32* [[A_CADDR:%.+]], + // CHECK-64: [[A_VAL:%.+]] = load i32, i32* %{{.+}}, + // CHECK-64: [[A_ADDR:%.+]] = bitcast i[[SZ]]* [[A_CADDR:%.+]] to i32* + // CHECK-64: store i32 [[A_VAL]], i32* [[A_ADDR]], + // CHECK-64: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]], + + // CHECK-32: [[A_VAL:%.+]] = load i32, i32* %{{.+}}, + // CHECK-32: store i32 [[A_VAL]], i32* [[A_CADDR:%.+]], + // CHECK-32: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]], + + // CHECK: [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4 + // CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]] + // CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8 + + // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20 + // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] + // CHECK: [[TRY]] + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 10, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* [[MAPT4]], i32 0, i32 0), i32 1, i32 0) + // CHECK-DAG: [[BPR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[PR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[SR]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S:%[^,]+]], i32 0, i32 0 + + // CHECK-DAG: [[SADDR0:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX0:[0-9]+]] + // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX0]] + // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX0]] + // CHECK-DAG: [[SADDR1:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX1:[0-9]+]] + // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX1]] + // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX1]] + // CHECK-DAG: [[SADDR2:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX2:[0-9]+]] + // CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX2]] + // CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX2]] + // CHECK-DAG: [[SADDR3:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX3:[0-9]+]] + // CHECK-DAG: [[BPADDR3:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX3]] + // CHECK-DAG: [[PADDR3:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX3]] + // CHECK-DAG: [[SADDR4:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX4:[0-9]+]] + // CHECK-DAG: [[BPADDR4:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX4]] + // CHECK-DAG: [[PADDR4:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX4]] + // CHECK-DAG: [[SADDR5:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX5:[0-9]+]] + // CHECK-DAG: [[BPADDR5:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX5]] + // CHECK-DAG: [[PADDR5:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX5]] + // CHECK-DAG: [[SADDR6:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX6:[0-9]+]] + // CHECK-DAG: [[BPADDR6:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX6]] + // CHECK-DAG: [[PADDR6:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX6]] + // CHECK-DAG: [[SADDR7:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX7:[0-9]+]] + // CHECK-DAG: [[BPADDR7:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX7]] + // CHECK-DAG: [[PADDR7:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX7]] + // CHECK-DAG: [[SADDR8:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX8:[0-9]+]] + // CHECK-DAG: [[BPADDR8:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX8]] + // CHECK-DAG: [[PADDR8:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX8]] + + // The names below are not necessarily consistent with the names used for the + // addresses above as some are repeated. + // CHECK-DAG: store i[[SZ]] [[VLA0]], i[[SZ]]* [[CBPADDR0:%.+]], + // CHECK-DAG: store i[[SZ]] [[VLA0]], i[[SZ]]* [[CPADDR0:%.+]], + // CHECK-DAG: [[CBPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: [[CPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store i[[SZ]] [[VLA1]], i[[SZ]]* [[CBPADDR1:%.+]], + // CHECK-DAG: store i[[SZ]] [[VLA1]], i[[SZ]]* [[CPADDR1:%.+]], + // CHECK-DAG: [[CBPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: [[CPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store i[[SZ]] 5, i[[SZ]]* [[CBPADDR2:%.+]], + // CHECK-DAG: store i[[SZ]] 5, i[[SZ]]* [[CPADDR2:%.+]], + // CHECK-DAG: [[CBPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: [[CPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store i[[SZ]] [[A_CVAL]], i[[SZ]]* [[CBPADDR3:%.+]], + // CHECK-DAG: store i[[SZ]] [[A_CVAL]], i[[SZ]]* [[CPADDR3:%.+]], + // CHECK-DAG: [[CBPADDR3]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: [[CPADDR3]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] 4, i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store [10 x float]* %{{.+}}, [10 x float]** [[CBPADDR4:%.+]], + // CHECK-DAG: store [10 x float]* %{{.+}}, [10 x float]** [[CPADDR4:%.+]], + // CHECK-DAG: [[CBPADDR4]] = bitcast i8** {{%[^,]+}} to [10 x float]** + // CHECK-DAG: [[CPADDR4]] = bitcast i8** {{%[^,]+}} to [10 x float]** + // CHECK-DAG: store i[[SZ]] 40, i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store float* %{{.+}}, float** [[CBPADDR5:%.+]], + // CHECK-DAG: store float* %{{.+}}, float** [[CPADDR5:%.+]], + // CHECK-DAG: [[CBPADDR5]] = bitcast i8** {{%[^,]+}} to float** + // CHECK-DAG: [[CPADDR5]] = bitcast i8** {{%[^,]+}} to float** + // CHECK-DAG: store i[[SZ]] [[BNSIZE]], i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store [5 x [10 x double]]* %{{.+}}, [5 x [10 x double]]** [[CBPADDR6:%.+]], + // CHECK-DAG: store [5 x [10 x double]]* %{{.+}}, [5 x [10 x double]]** [[CPADDR6:%.+]], + // CHECK-DAG: [[CBPADDR6]] = bitcast i8** {{%[^,]+}} to [5 x [10 x double]]** + // CHECK-DAG: [[CPADDR6]] = bitcast i8** {{%[^,]+}} to [5 x [10 x double]]** + // CHECK-DAG: store i[[SZ]] 400, i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store double* %{{.+}}, double** [[CBPADDR7:%.+]], + // CHECK-DAG: store double* %{{.+}}, double** [[CPADDR7:%.+]], + // CHECK-DAG: [[CBPADDR7]] = bitcast i8** {{%[^,]+}} to double** + // CHECK-DAG: [[CPADDR7]] = bitcast i8** {{%[^,]+}} to double** + // CHECK-DAG: store i[[SZ]] [[CNSIZE]], i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store [[TT]]* %{{.+}}, [[TT]]** [[CBPADDR8:%.+]], + // CHECK-DAG: store [[TT]]* %{{.+}}, [[TT]]** [[CPADDR8:%.+]], + // CHECK-DAG: [[CBPADDR8]] = bitcast i8** {{%[^,]+}} to [[TT]]** + // CHECK-DAG: [[CPADDR8]] = bitcast i8** {{%[^,]+}} to [[TT]]** + // CHECK-DAG: store i[[SZ]] {{12|16}}, i[[SZ]]* {{%[^,]+}} + + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + + // CHECK: [[FAIL]] + // CHECK: call void [[HVT4:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^)]+}}) + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + #pragma omp target parallel for if(target: n>20) schedule(static, a) + for (unsigned char it = 'z'; it >= 'a'; it+=-1) { + a += 1; + b[2] += 1.0; + bn[3] += 1.0; + c[1][2] += 1.0; + cn[1][3] += 1.0; + d.X += 1; + d.Y += 1; + } + + return a; +} + +// Check that the offloading functions are emitted and that the arguments are +// correct and loaded correctly for the target regions in foo(). + +// CHECK: define internal void [[HVT0]]() +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*)) +// +// +// CHECK: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid.) +// CHECK: call i32 @__kmpc_cancel(%ident_t* @ +// CHECK: call i32 @__kmpc_cancellationpoint(%ident_t* @ +// CHECK: ret void +// CHECK: } + + +// CHECK: define internal void [[HVT1]](i[[SZ]] %{{.+}}, i{{32|64}}{{[*]*.*}} %{{.+}}) +// Create stack storage and store argument in there. +// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: alloca i{{32|64}}{{[*]*}}, align +// CHECK: [[AA_CASTED:%.+]] = alloca i[[SZ]], align +// CHECK: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align +// CHECK-64: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32* +// CHECK-64: [[AA:%.+]] = load i32, i32* [[AA_CADDR]], align +// CHECK-32: [[AA:%.+]] = load i32, i32* [[AA_ADDR]], align +// CHECK-64: [[AA_C:%.+]] = bitcast i[[SZ]]* [[AA_CASTED]] to i32* +// CHECK-64: store i32 [[AA]], i32* [[AA_C]], align +// CHECK-32: store i32 [[AA]], i32* [[AA_CASTED]], align +// CHECK: [[PARAM:%.+]] = load i[[SZ]], i[[SZ]]* [[AA_CASTED]], align +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i{{32|64}}{{[*]*}})* [[OMP_OUTLINED1:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[PARAM]], i{{32|64}}{{[*]*}} %{{.+}}) +// +// +// CHECK: define internal {{.*}}void [[OMP_OUTLINED1]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}) +// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align +// CHECK-64: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32* +// CHECK-64: [[AA:%.+]] = load i32, i32* [[AA_CADDR]], align +// CHECK-32: [[AA:%.+]] = load i32, i32* [[AA_ADDR]], align +// CHECK: ret void +// CHECK-NEXT: } + +// CHECK: define internal void [[HVT2]](i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}) +// Create stack storage and store argument in there. +// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: alloca i[[SZ]], align +// CHECK: alloca i[[SZ]], align +// CHECK: [[AA_CASTED:%.+]] = alloca i[[SZ]], align +// CHECK: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align +// CHECK: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16* +// CHECK: [[AA:%.+]] = load i16, i16* [[AA_CADDR]], align +// CHECK: [[AA_C:%.+]] = bitcast i[[SZ]]* [[AA_CASTED]] to i16* +// CHECK: store i16 [[AA]], i16* [[AA_C]], align +// CHECK: [[PARAM:%.+]] = load i[[SZ]], i[[SZ]]* [[AA_CASTED]], align +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], i[[SZ]])* [[OMP_OUTLINED2:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[PARAM]], i[[SZ]] {{.+}}, i[[SZ]] {{.+}}) +// +// +// CHECK: define internal {{.*}}void [[OMP_OUTLINED2]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}) +// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align +// CHECK: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16* +// CHECK: [[AA:%.+]] = load i16, i16* [[AA_CADDR]], align +// CHECK: ret void +// CHECK-NEXT: } + +// CHECK: define internal void [[HVT3]] +// Create stack storage and store argument in there. +// CHECK: [[A_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: [[A_CASTED:%.+]] = alloca i[[SZ]], align +// CHECK: [[AA_CASTED:%.+]] = alloca i[[SZ]], align +// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[A_ADDR]], align +// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align +// CHECK-64-DAG:[[A_CADDR:%.+]] = bitcast i[[SZ]]* [[A_ADDR]] to i32* +// CHECK-DAG: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16* +// CHECK-64-DAG:[[A:%.+]] = load i32, i32* [[A_CADDR]], align +// CHECK-32-DAG:[[A:%.+]] = load i32, i32* [[A_ADDR]], align +// CHECK-64-DAG:[[A_C:%.+]] = bitcast i[[SZ]]* [[A_CASTED]] to i32* +// CHECK-64-DAG:store i32 [[A]], i32* [[A_C]], align +// CHECK-32-DAG:store i32 [[A]], i32* [[A_CASTED]], align +// CHECK-DAG: [[AA:%.+]] = load i16, i16* [[AA_CADDR]], align +// CHECK-DAG: [[AA_C:%.+]] = bitcast i[[SZ]]* [[AA_CASTED]] to i16* +// CHECK-DAG: store i16 [[AA]], i16* [[AA_C]], align +// CHECK-DAG: [[PARAM1:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CASTED]], align +// CHECK-DAG: [[PARAM2:%.+]] = load i[[SZ]], i[[SZ]]* [[AA_CASTED]], align +// CHECK-DAG: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]])* [[OMP_OUTLINED3:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[PARAM1]], i[[SZ]] [[PARAM2]]) +// +// +// CHECK: define internal {{.*}}void [[OMP_OUTLINED3]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}) +// CHECK: [[A_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[A_ADDR]], align +// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align +// CHECK-64-DAG:[[A_CADDR:%.+]] = bitcast i[[SZ]]* [[A_ADDR]] to i32* +// CHECK-DAG: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16* +// CHECK: ret void +// CHECK-NEXT: } + +// CHECK: define internal void [[HVT4]] +// Create local storage for each capture. +// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_B:%.+]] = alloca [10 x float]* +// CHECK: [[LOCAL_VLA1:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_BN:%.+]] = alloca float* +// CHECK: [[LOCAL_C:%.+]] = alloca [5 x [10 x double]]* +// CHECK: [[LOCAL_VLA2:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_VLA3:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_CN:%.+]] = alloca double* +// CHECK: [[LOCAL_D:%.+]] = alloca [[TT]]* +// CHECK: alloca i[[SZ]] +// CHECK: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] +// CHECK-DAG: store [10 x float]* [[ARG_B:%.+]], [10 x float]** [[LOCAL_B]] +// CHECK-DAG: store i[[SZ]] [[ARG_VLA1:%.+]], i[[SZ]]* [[LOCAL_VLA1]] +// CHECK-DAG: store float* [[ARG_BN:%.+]], float** [[LOCAL_BN]] +// CHECK-DAG: store [5 x [10 x double]]* [[ARG_C:%.+]], [5 x [10 x double]]** [[LOCAL_C]] +// CHECK-DAG: store i[[SZ]] [[ARG_VLA2:%.+]], i[[SZ]]* [[LOCAL_VLA2]] +// CHECK-DAG: store i[[SZ]] [[ARG_VLA3:%.+]], i[[SZ]]* [[LOCAL_VLA3]] +// CHECK-DAG: store double* [[ARG_CN:%.+]], double** [[LOCAL_CN]] +// CHECK-DAG: store [[TT]]* [[ARG_D:%.+]], [[TT]]** [[LOCAL_D]] + +// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* +// CHECK-DAG: [[REF_B:%.+]] = load [10 x float]*, [10 x float]** [[LOCAL_B]], +// CHECK-DAG: [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA1]], +// CHECK-DAG: [[REF_BN:%.+]] = load float*, float** [[LOCAL_BN]], +// CHECK-DAG: [[REF_C:%.+]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[LOCAL_C]], +// CHECK-DAG: [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA2]], +// CHECK-DAG: [[VAL_VLA3:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA3]], +// CHECK-DAG: [[REF_CN:%.+]] = load double*, double** [[LOCAL_CN]], +// CHECK-DAG: [[REF_D:%.+]] = load [[TT]]*, [[TT]]** [[LOCAL_D]], + +// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]] +// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32* +// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align +// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]] +// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align +// CHECK-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]], + +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 10, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], [10 x float]*, i[[SZ]], float*, [5 x [10 x double]]*, i[[SZ]], i[[SZ]], double*, [[TT]]*, i[[SZ]])* [[OMP_OUTLINED4:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], [10 x float]* [[REF_B]], i[[SZ]] [[VAL_VLA1]], float* [[REF_BN]], [5 x [10 x double]]* [[REF_C]], i[[SZ]] [[VAL_VLA2]], i[[SZ]] [[VAL_VLA3]], double* [[REF_CN]], [[TT]]* [[REF_D]], i[[SZ]] %{{.+}}) +// +// +// CHECK: define internal {{.*}}void [[OMP_OUTLINED4]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, [10 x float]* {{.+}}, i[[SZ]] %{{.+}}, float* {{.+}}, [5 x [10 x double]]* {{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, double* {{.+}}, [[TT]]* {{.+}}) +// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. + +template<typename tx> +tx ftemplate(int n) { + tx a = 0; + short aa = 0; + tx b[10]; + + #pragma omp target parallel for if(target: n>40) + for (long long i = -10; i < 10; i += 3) { + a += 1; + aa += 1; + b[2] += 1; + } + + return a; +} + +static +int fstatic(int n) { + int a = 0; + short aa = 0; + char aaa = 0; + int b[10]; + + #pragma omp target parallel for if(target: n>50) + for (unsigned i=100; i<10; i+=10) { + a += 1; + aa += 1; + aaa += 1; + b[2] += 1; + } + + return a; +} + +struct S1 { + double a; + + int r1(int n){ + int b = n+1; + short int c[2][n]; + + #pragma omp target parallel for if(target: n>60) + for (unsigned long long it = 2000; it >= 600; it -= 400) { + this->a = (double)b + 1.5; + c[1][1] = ++a; + } + + return c[1][1] + (int)b; + } +}; + +// CHECK: define {{.*}}@{{.*}}bar{{.*}} +int bar(int n){ + int a = 0; + + // CHECK: call {{.*}}i32 [[FOO]](i32 {{.*}}) + a += foo(n); + + S1 S; + // CHECK: call {{.*}}i32 [[FS1:@.+]]([[S1]]* {{.*}}, i32 {{.*}}) + a += S.r1(n); + + // CHECK: call {{.*}}i32 [[FSTATIC:@.+]](i32 {{.*}}) + a += fstatic(n); + + // CHECK: call {{.*}}i32 [[FTEMPLATE:@.+]](i32 {{.*}}) + a += ftemplate<int>(n); + + return a; +} + +// +// CHECK: define {{.*}}[[FS1]] +// +// CHECK: i8* @llvm.stacksave() +// CHECK-64: [[B_ADDR:%.+]] = bitcast i[[SZ]]* [[B_CADDR:%.+]] to i32* +// CHECK-64: store i32 %{{.+}}, i32* [[B_ADDR]], +// CHECK-64: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_CADDR]], + +// CHECK-32: store i32 %{{.+}}, i32* [[B_ADDR:%.+]], +// CHECK-32: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]], + +// We capture 2 VLA sizes in this target region +// CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]] +// CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2 + +// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60 +// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] +// CHECK: [[TRY]] +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 0) +// CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0 +// CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0 +// CHECK-DAG: [[SR]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S:%.+]], i32 0, i32 0 +// CHECK-DAG: [[SADDR0:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX0:[0-9]+]] +// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX0]] +// CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX0]] +// CHECK-DAG: [[SADDR1:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX1:[0-9]+]] +// CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX1]] +// CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX1]] +// CHECK-DAG: [[SADDR2:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX2:[0-9]+]] +// CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX2]] +// CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX2]] +// CHECK-DAG: [[SADDR3:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX3:[0-9]+]] +// CHECK-DAG: [[BPADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX3]] +// CHECK-DAG: [[PADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX3]] + +// The names below are not necessarily consistent with the names used for the +// addresses above as some are repeated. +// CHECK-DAG: store i[[SZ]] [[VLA0]], i[[SZ]]* [[CBPADDR0:%.+]], +// CHECK-DAG: store i[[SZ]] [[VLA0]], i[[SZ]]* [[CPADDR0:%.+]], +// CHECK-DAG: [[CBPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: [[CPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}} + +// CHECK-DAG: store i[[SZ]] 2, i[[SZ]]* [[CBPADDR1:%.+]], +// CHECK-DAG: store i[[SZ]] 2, i[[SZ]]* [[CPADDR1:%.+]], +// CHECK-DAG: [[CBPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: [[CPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}} + +// CHECK-DAG: store i[[SZ]] [[B_CVAL]], i[[SZ]]* [[CBPADDR2:%.+]], +// CHECK-DAG: store i[[SZ]] [[B_CVAL]], i[[SZ]]* [[CPADDR2:%.+]], +// CHECK-DAG: [[CBPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: [[CPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: store i[[SZ]] 4, i[[SZ]]* {{%[^,]+}} + +// CHECK-DAG: store [[S1]]* %{{.+}}, [[S1]]** [[CBPADDR3:%.+]], +// CHECK-DAG: store [[S1]]* %{{.+}}, [[S1]]** [[CPADDR3:%.+]], +// CHECK-DAG: [[CBPADDR3]] = bitcast i8** {{%[^,]+}} to [[S1]]** +// CHECK-DAG: [[CPADDR3]] = bitcast i8** {{%[^,]+}} to [[S1]]** +// CHECK-DAG: store i[[SZ]] 8, i[[SZ]]* {{%[^,]+}} + +// CHECK-DAG: store i16* %{{.+}}, i16** [[CBPADDR4:%.+]], +// CHECK-DAG: store i16* %{{.+}}, i16** [[CPADDR4:%.+]], +// CHECK-DAG: [[CBPADDR4]] = bitcast i8** {{%[^,]+}} to i16** +// CHECK-DAG: [[CPADDR4]] = bitcast i8** {{%[^,]+}} to i16** +// CHECK-DAG: store i[[SZ]] [[CSIZE]], i[[SZ]]* {{%[^,]+}} + +// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + +// CHECK: [[FAIL]] +// CHECK: call void [[HVT7:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[END]] +// CHECK: [[END]] + +// +// CHECK: define {{.*}}[[FSTATIC]] +// +// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50 +// CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] +// CHECK: [[IFTHEN]] +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([4 x i[[SZ]]], [4 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0), i32 1, i32 0) +// CHECK-DAG: [[BPR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP:%.+]], i32 0, i32 0 +// CHECK-DAG: [[PR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P:%.+]], i32 0, i32 0 + +// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 0 +// CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 0 +// CHECK-DAG: [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]* +// CHECK-DAG: [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]* +// CHECK-DAG: store i[[SZ]] [[VAL0:%.+]], i[[SZ]]* [[CBPADDR0]], +// CHECK-DAG: store i[[SZ]] [[VAL0]], i[[SZ]]* [[CPADDR0]], + +// CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 1 +// CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 1 +// CHECK-DAG: [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]* +// CHECK-DAG: [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]* +// CHECK-DAG: store i[[SZ]] [[VAL1:%.+]], i[[SZ]]* [[CBPADDR1]], +// CHECK-DAG: store i[[SZ]] [[VAL1]], i[[SZ]]* [[CPADDR1]], + +// CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 2 +// CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 2 +// CHECK-DAG: [[CBPADDR2:%.+]] = bitcast i8** [[BPADDR2]] to i[[SZ]]* +// CHECK-DAG: [[CPADDR2:%.+]] = bitcast i8** [[PADDR2]] to i[[SZ]]* +// CHECK-DAG: store i[[SZ]] [[VAL2:%.+]], i[[SZ]]* [[CBPADDR2]], +// CHECK-DAG: store i[[SZ]] [[VAL2]], i[[SZ]]* [[CPADDR2]], + +// CHECK-DAG: [[BPADDR3:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 3 +// CHECK-DAG: [[PADDR3:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 3 +// CHECK-DAG: [[CBPADDR3:%.+]] = bitcast i8** [[BPADDR3]] to [10 x i32]** +// CHECK-DAG: [[CPADDR3:%.+]] = bitcast i8** [[PADDR3]] to [10 x i32]** +// CHECK-DAG: store [10 x i32]* [[VAL3:%.+]], [10 x i32]** [[CBPADDR3]], +// CHECK-DAG: store [10 x i32]* [[VAL3]], [10 x i32]** [[CPADDR3]], + +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] +// CHECK: [[FAIL]] +// CHECK: call void [[HVT6:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[END]] +// CHECK: [[END]] +// CHECK-NEXT: br label %[[IFEND:.+]] +// CHECK: [[IFELSE]] +// CHECK: call void [[HVT6]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[IFEND]] +// CHECK: [[IFEND]] + +// +// CHECK: define {{.*}}[[FTEMPLATE]] +// +// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40 +// CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] +// CHECK: [[IFTHEN]] +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0), i32 1, i32 0) +// CHECK-DAG: [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0 +// CHECK-DAG: [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0 + +// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 0 +// CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 0 +// CHECK-DAG: [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]* +// CHECK-DAG: [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]* +// CHECK-DAG: store i[[SZ]] [[VAL0:%.+]], i[[SZ]]* [[CBPADDR0]], +// CHECK-DAG: store i[[SZ]] [[VAL0]], i[[SZ]]* [[CPADDR0]], + +// CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 1 +// CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 1 +// CHECK-DAG: [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]* +// CHECK-DAG: [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]* +// CHECK-DAG: store i[[SZ]] [[VAL1:%.+]], i[[SZ]]* [[CBPADDR1]], +// CHECK-DAG: store i[[SZ]] [[VAL1]], i[[SZ]]* [[CPADDR1]], + +// CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 2 +// CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 2 +// CHECK-DAG: [[CBPADDR2:%.+]] = bitcast i8** [[BPADDR2]] to [10 x i32]** +// CHECK-DAG: [[CPADDR2:%.+]] = bitcast i8** [[PADDR2]] to [10 x i32]** +// CHECK-DAG: store [10 x i32]* [[VAL2:%.+]], [10 x i32]** [[CBPADDR2]], +// CHECK-DAG: store [10 x i32]* [[VAL2]], [10 x i32]** [[CPADDR2]], + +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] +// CHECK: [[FAIL]] +// CHECK: call void [[HVT5:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[END]] +// CHECK: [[END]] +// CHECK-NEXT: br label %[[IFEND:.+]] +// CHECK: [[IFELSE]] +// CHECK: call void [[HVT:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[IFEND]] +// CHECK: [[IFEND]] + +// Check that the offloading functions are emitted and that the arguments are +// correct and loaded correctly for the target regions of the callees of bar(). + +// CHECK: define internal void [[HVT7]] +// Create local storage for each capture. +// CHECK: [[LOCAL_THIS:%.+]] = alloca [[S1]]* +// CHECK: [[LOCAL_B:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_VLA1:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_VLA2:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_C:%.+]] = alloca i16* +// CHECK: [[LOCAL_B_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-DAG: store [[S1]]* [[ARG_THIS:%.+]], [[S1]]** [[LOCAL_THIS]] +// CHECK-DAG: store i[[SZ]] [[ARG_B:%.+]], i[[SZ]]* [[LOCAL_B]] +// CHECK-DAG: store i[[SZ]] [[ARG_VLA1:%.+]], i[[SZ]]* [[LOCAL_VLA1]] +// CHECK-DAG: store i[[SZ]] [[ARG_VLA2:%.+]], i[[SZ]]* [[LOCAL_VLA2]] +// CHECK-DAG: store i16* [[ARG_C:%.+]], i16** [[LOCAL_C]] +// Store captures in the context. +// CHECK-DAG: [[REF_THIS:%.+]] = load [[S1]]*, [[S1]]** [[LOCAL_THIS]], +// CHECK-64-DAG:[[CONV_BP:%.+]] = bitcast i[[SZ]]* [[LOCAL_B]] to i32* +// CHECK-DAG: [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA1]], +// CHECK-DAG: [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA2]], +// CHECK-DAG: [[REF_C:%.+]] = load i16*, i16** [[LOCAL_C]], + +// CHECK-64-DAG:[[CONV_B:%.+]] = load i32, i32* [[CONV_BP]] +// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_B_CASTED]] to i32* +// CHECK-64-DAG:store i32 [[CONV_B]], i32* [[CONV]], align +// CHECK-32-DAG:[[LOCAL_BV:%.+]] = load i32, i32* [[LOCAL_B]] +// CHECK-32-DAG:store i32 [[LOCAL_BV]], i32* [[LOCAL_B_CASTED]], align +// CHECK-DAG: [[REF_B:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_B_CASTED]], + +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [[S1]]*, i[[SZ]], i[[SZ]], i[[SZ]], i16*)* [[OMP_OUTLINED5:@.+]] to void (i32*, i32*, ...)*), [[S1]]* [[REF_THIS]], i[[SZ]] [[REF_B]], i[[SZ]] [[VAL_VLA1]], i[[SZ]] [[VAL_VLA2]], i16* [[REF_C]]) +// +// +// CHECK: define internal {{.*}}void [[OMP_OUTLINED5]](i32* noalias %.global_tid., i32* noalias %.bound_tid., [[S1]]* %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i16* {{.+}}) +// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. + + +// CHECK: define internal void [[HVT6]] +// Create local storage for each capture. +// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_AA:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_AAA:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_B:%.+]] = alloca [10 x i32]* +// CHECK: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_AAA_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] +// CHECK-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] +// CHECK-DAG: store i[[SZ]] [[ARG_AAA:%.+]], i[[SZ]]* [[LOCAL_AAA]] +// CHECK-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] +// Store captures in the context. +// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* +// CHECK-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* +// CHECK-DAG: [[CONV_AAAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AAA]] to i8* +// CHECK-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], + +// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]] +// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32* +// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align +// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]] +// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align +// CHECK-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]], + +// CHECK-DAG: [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]] +// CHECK-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16* +// CHECK-DAG: store i16 [[CONV_AA]], i16* [[CONV]], align +// CHECK-DAG: [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]], + +// CHECK-DAG: [[CONV_AAA:%.+]] = load i8, i8* [[CONV_AAAP]] +// CHECK-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AAA_CASTED]] to i8* +// CHECK-DAG: store i8 [[CONV_AAA]], i8* [[CONV]], align +// CHECK-DAG: [[REF_AAA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AAA_CASTED]], + +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED6:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] [[REF_AA]], i[[SZ]] [[REF_AAA]], [10 x i32]* [[REF_B]]) +// +// +// CHECK: define internal {{.*}}void [[OMP_OUTLINED6]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}}) +// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. + +// CHECK: define internal void [[HVT5]] +// Create local storage for each capture. +// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_AA:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_B:%.+]] = alloca [10 x i32]* +// CHECK: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] +// CHECK-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] +// CHECK-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] +// Store captures in the context. +// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* +// CHECK-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* +// CHECK-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], + +// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]] +// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32* +// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align +// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]] +// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align +// CHECK-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]], + +// CHECK-DAG: [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]] +// CHECK-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16* +// CHECK-DAG: store i16 [[CONV_AA]], i16* [[CONV]], align +// CHECK-DAG: [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]], + +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED7:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] [[REF_AA]], [10 x i32]* [[REF_B]]) +// +// +// CHECK: define internal {{.*}}void [[OMP_OUTLINED7]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}}) +// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. + +#endif diff --git a/test/OpenMP/target_parallel_for_codegen_registration.cpp b/test/OpenMP/target_parallel_for_codegen_registration.cpp new file mode 100644 index 000000000000..24f0c06771ec --- /dev/null +++ b/test/OpenMP/target_parallel_for_codegen_registration.cpp @@ -0,0 +1,451 @@ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// Test target parallel for codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK + +// Check that no target code is emmitted if no omptests flag was provided. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK-NTARGET + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK-DAG: [[SA:%.+]] = type { [4 x i32] } +// CHECK-DAG: [[SB:%.+]] = type { [8 x i32] } +// CHECK-DAG: [[SC:%.+]] = type { [16 x i32] } +// CHECK-DAG: [[SD:%.+]] = type { [32 x i32] } +// CHECK-DAG: [[SE:%.+]] = type { [64 x i32] } +// CHECK-DAG: [[ST1:%.+]] = type { [228 x i32] } +// CHECK-DAG: [[ST2:%.+]] = type { [1128 x i32] } +// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 } +// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* } +// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* } + +// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 } + +// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat + +// CHECK-DAG: [[A1:@.+]] = internal global [[SA]] +// CHECK-DAG: [[A2:@.+]] = global [[SA]] +// CHECK-DAG: [[B1:@.+]] = global [[SB]] +// CHECK-DAG: [[B2:@.+]] = global [[SB]] +// CHECK-DAG: [[C1:@.+]] = internal global [[SC]] +// CHECK-DAG: [[D1:@.+]] = global [[SD]] +// CHECK-DAG: [[E1:@.+]] = global [[SE]] +// CHECK-DAG: [[T1:@.+]] = global [[ST1]] +// CHECK-DAG: [[T2:@.+]] = global [[ST2]] + +// CHECK-NTARGET-DAG: [[SA:%.+]] = type { [4 x i32] } +// CHECK-NTARGET-DAG: [[SB:%.+]] = type { [8 x i32] } +// CHECK-NTARGET-DAG: [[SC:%.+]] = type { [16 x i32] } +// CHECK-NTARGET-DAG: [[SD:%.+]] = type { [32 x i32] } +// CHECK-NTARGET-DAG: [[SE:%.+]] = type { [64 x i32] } +// CHECK-NTARGET-DAG: [[ST1:%.+]] = type { [228 x i32] } +// CHECK-NTARGET-DAG: [[ST2:%.+]] = type { [1128 x i32] } +// CHECK-NTARGET-NOT: type { i8*, i8*, % +// CHECK-NTARGET-NOT: type { i32, % + +// We have 7 target regions + +// CHECK-DAG: {{@.+}} = private constant i8 0 +// TCHECK-NOT: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] + +// CHECK-NTARGET-NOT: private constant i8 0 +// CHECK-NTARGET-NOT: private unnamed_addr constant [1 x i + +// CHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:__omp_offloading_[0-9a-f]+_[0-9a-f]+__Z.+_l[0-9]+]]\00" +// CHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00" +// CHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00" +// CHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00" +// CHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00" +// CHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00" +// CHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00" +// CHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00" +// CHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00" +// CHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00" +// CHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00" +// CHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00" +// CHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 + +// TCHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:__omp_offloading_[0-9a-f]+_[0-9a-f]+__Z.+_l[0-9]+]]\00" +// TCHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00" +// TCHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00" +// TCHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00" +// TCHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00" +// TCHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00" +// TCHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00" +// TCHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00" +// TCHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00" +// TCHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00" +// TCHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00" +// TCHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00" +// TCHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 + +// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]] +// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]] +// CHECK: [[DEVBEGIN:@.+]] = external constant i8 +// CHECK: [[DEVEND:@.+]] = external constant i8 +// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]]) +// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]]) + +// We have 4 initializers, one for the 500 priority, another one for 501, or more for the default priority, and the last one for the offloading registration function. +// CHECK: @llvm.global_ctors = appending global [4 x { i32, void ()*, i8* }] [ +// CHECK-SAME: { i32, void ()*, i8* } { i32 500, void ()* [[P500:@[^,]+]], i8* null }, +// CHECK-SAME: { i32, void ()*, i8* } { i32 501, void ()* [[P501:@[^,]+]], i8* null }, +// CHECK-SAME: { i32, void ()*, i8* } { i32 65535, void ()* [[PMAX:@[^,]+]], i8* null }, +// CHECK-SAME: { i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* @[[REGFN]] to void ()*), i8* bitcast (void (i8*)* @[[REGFN]] to i8*) }] + +// CHECK-NTARGET: @llvm.global_ctors = appending global [3 x { i32, void ()*, i8* }] [ + +extern int *R; + +struct SA { + int arr[4]; + void foo() { + int a = *R; + a += 1; + *R = a; + } + SA() { + int a = *R; + a += 2; + *R = a; + } + ~SA() { + int a = *R; + a += 3; + *R = a; + } +}; + +struct SB { + int arr[8]; + void foo() { + int a = *R; + #pragma omp target parallel for + for (int i = 0; i < 10; ++i) + a += 4; + *R = a; + } + SB() { + int a = *R; + a += 5; + *R = a; + } + ~SB() { + int a = *R; + a += 6; + *R = a; + } +}; + +struct SC { + int arr[16]; + void foo() { + int a = *R; + a += 7; + *R = a; + } + SC() { + int a = *R; + #pragma omp target parallel for + for (int i = 0; i < 10; ++i) + a += 8; + *R = a; + } + ~SC() { + int a = *R; + a += 9; + *R = a; + } +}; + +struct SD { + int arr[32]; + void foo() { + int a = *R; + a += 10; + *R = a; + } + SD() { + int a = *R; + a += 11; + *R = a; + } + ~SD() { + int a = *R; + #pragma omp target parallel for + for (int i = 0; i < 10; ++i) + a += 12; + *R = a; + } +}; + +struct SE { + int arr[64]; + void foo() { + int a = *R; + #pragma omp target parallel for if(target: 0) + for (int i = 0; i < 10; ++i) + a += 13; + *R = a; + } + SE() { + int a = *R; + #pragma omp target parallel for + for (int i = 0; i < 10; ++i) + a += 14; + *R = a; + } + ~SE() { + int a = *R; + #pragma omp target parallel for + for (int i = 0; i < 10; ++i) + a += 15; + *R = a; + } +}; + +template <int x> +struct ST { + int arr[128 + x]; + void foo() { + int a = *R; + #pragma omp target parallel for + for (int i = 0; i < 10; ++i) + a += 16 + x; + *R = a; + } + ST() { + int a = *R; + #pragma omp target parallel for + for (int i = 0; i < 10; ++i) + a += 17 + x; + *R = a; + } + ~ST() { + int a = *R; + #pragma omp target parallel for + for (int i = 0; i < 10; ++i) + a += 18 + x; + *R = a; + } +}; + +// We have to make sure we us all the target regions: +//CHECK-DAG: define internal void @[[NAME1]]( +//CHECK-DAG: call void @[[NAME1]]( +//CHECK-DAG: define internal void @[[NAME2]]( +//CHECK-DAG: call void @[[NAME2]]( +//CHECK-DAG: define internal void @[[NAME3]]( +//CHECK-DAG: call void @[[NAME3]]( +//CHECK-DAG: define internal void @[[NAME4]]( +//CHECK-DAG: call void @[[NAME4]]( +//CHECK-DAG: define internal void @[[NAME5]]( +//CHECK-DAG: call void @[[NAME5]]( +//CHECK-DAG: define internal void @[[NAME6]]( +//CHECK-DAG: call void @[[NAME6]]( +//CHECK-DAG: define internal void @[[NAME7]]( +//CHECK-DAG: call void @[[NAME7]]( +//CHECK-DAG: define internal void @[[NAME8]]( +//CHECK-DAG: call void @[[NAME8]]( +//CHECK-DAG: define internal void @[[NAME9]]( +//CHECK-DAG: call void @[[NAME9]]( +//CHECK-DAG: define internal void @[[NAME10]]( +//CHECK-DAG: call void @[[NAME10]]( +//CHECK-DAG: define internal void @[[NAME11]]( +//CHECK-DAG: call void @[[NAME11]]( +//CHECK-DAG: define internal void @[[NAME12]]( +//CHECK-DAG: call void @[[NAME12]]( + +//TCHECK-DAG: define void @[[NAME1]]( +//TCHECK-DAG: define void @[[NAME2]]( +//TCHECK-DAG: define void @[[NAME3]]( +//TCHECK-DAG: define void @[[NAME4]]( +//TCHECK-DAG: define void @[[NAME5]]( +//TCHECK-DAG: define void @[[NAME6]]( +//TCHECK-DAG: define void @[[NAME7]]( +//TCHECK-DAG: define void @[[NAME8]]( +//TCHECK-DAG: define void @[[NAME9]]( +//TCHECK-DAG: define void @[[NAME10]]( +//TCHECK-DAG: define void @[[NAME11]]( +//TCHECK-DAG: define void @[[NAME12]]( + +// CHECK-NTARGET-NOT: __tgt_target +// CHECK-NTARGET-NOT: __tgt_register_lib +// CHECK-NTARGET-NOT: __tgt_unregister_lib + +// TCHECK-NOT: __tgt_target +// TCHECK-NOT: __tgt_register_lib +// TCHECK-NOT: __tgt_unregister_lib + +// We have 2 initializers with priority 500 +//CHECK: define internal void [[P500]]( +//CHECK: call void @{{.+}}() +//CHECK: call void @{{.+}}() +//CHECK-NOT: call void @{{.+}}() +//CHECK: ret void + +// We have 1 initializers with priority 501 +//CHECK: define internal void [[P501]]( +//CHECK: call void @{{.+}}() +//CHECK-NOT: call void @{{.+}}() +//CHECK: ret void + +// We have 6 initializers with default priority +//CHECK: define internal void [[PMAX]]( +//CHECK: call void @{{.+}}() +//CHECK: call void @{{.+}}() +//CHECK: call void @{{.+}}() +//CHECK: call void @{{.+}}() +//CHECK: call void @{{.+}}() +//CHECK: call void @{{.+}}() +//CHECK-NOT: call void @{{.+}}() +//CHECK: ret void + +// Check registration and unregistration + +//CHECK: define internal void @[[UNREGFN:.+]](i8*) +//CHECK-SAME: comdat($[[REGFN]]) { +//CHECK: call i32 @__tgt_unregister_lib([[DSCTY]]* [[DESC]]) +//CHECK: ret void +//CHECK: declare i32 @__tgt_unregister_lib([[DSCTY]]*) + +//CHECK: define linkonce hidden void @[[REGFN]](i8*) +//CHECK-SAME: comdat { +//CHECK: call i32 @__tgt_register_lib([[DSCTY]]* [[DESC]]) +//CHECK: call i32 @__cxa_atexit(void (i8*)* @[[UNREGFN]], i8* bitcast ([[DSCTY]]* [[DESC]] to i8*), +//CHECK: ret void +//CHECK: declare i32 @__tgt_register_lib([[DSCTY]]*) + +static __attribute__((init_priority(500))) SA a1; +SA a2; +SB __attribute__((init_priority(500))) b1; +SB __attribute__((init_priority(501))) b2; +static SC c1; +SD d1; +SE e1; +ST<100> t1; +ST<1000> t2; + + +int bar(int a){ + int r = a; + + a1.foo(); + a2.foo(); + b1.foo(); + b2.foo(); + c1.foo(); + d1.foo(); + e1.foo(); + t1.foo(); + t2.foo(); + + #pragma omp target parallel for + for (int i = 0; i < 10; ++i) + ++r; + + return r + *R; +} + +// Check metadata is properly generated: +// CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 195, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 247, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 265, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 272, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 284, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 291, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 415, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 298, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 291, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 298, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 284, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 221, i32 {{[0-9]+}}} + +// TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 195, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 247, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 265, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 272, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 284, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 291, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 415, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 298, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 291, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 298, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 284, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 221, i32 {{[0-9]+}}} + +#endif diff --git a/test/OpenMP/target_parallel_for_codegen_registration_naming.cpp b/test/OpenMP/target_parallel_for_codegen_registration_naming.cpp new file mode 100644 index 000000000000..b42b5b55a505 --- /dev/null +++ b/test/OpenMP/target_parallel_for_codegen_registration_naming.cpp @@ -0,0 +1,68 @@ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// Test target parallel for codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: [[CA:%.+]] = type { i32* } + +// CHECK: define {{.*}}i32 @[[NNAME:.+]](i32 {{.*}}%{{.+}}) +int nested(int a){ + // CHECK: call void @__omp_offloading_[[FILEID:[0-9a-f]+_[0-9a-f]+]]_[[NNAME]]_l[[T1L:[0-9]+]]( + #pragma omp target parallel for + for (int i = 0; i < 10; ++i) + ++a; + + // CHECK: call void @"[[LNAME:.+]]"([[CA]]* + auto F = [&](){ + #pragma omp parallel + { + #pragma omp target parallel for + for (int i = 0; i < 10; ++i) + ++a; + } + }; + + F(); + + return a; +} + +// CHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T1L]]( +// TCHECK: define {{.*}}void @__omp_offloading_[[FILEID:[0-9a-f]+_[0-9a-f]+]]_[[NNAME:.+]]_l[[T1L:[0-9]+]]( + +// CHECK: define {{.*}}void @"[[LNAME]]"( +// CHECK: call void {{.*}}@__kmpc_fork_call{{.+}}[[PNAME:@.+]] to + +// CHECK: define {{.*}}void [[PNAME]]( +// CHECK: call void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T2L:[0-9]+]]( + +// CHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T2L]]( +// TCHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME:.+]]_l[[T2L:[0-9]+]]( + + +// Check metadata is properly generated: +// CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} + +// TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +#endif diff --git a/test/OpenMP/target_parallel_for_debug_codegen.cpp b/test/OpenMP/target_parallel_for_debug_codegen.cpp new file mode 100644 index 000000000000..0e0bb7e85306 --- /dev/null +++ b/test/OpenMP/target_parallel_for_debug_codegen.cpp @@ -0,0 +1,113 @@ +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited | FileCheck %s +// expected-no-diagnostics + +int main() { + /* int(*b)[a]; */ + /* int *(**c)[a]; */ + bool bb; + int a; + int b[10][10]; + int c[10][10][10]; +#pragma omp target parallel for firstprivate(a, b) map(tofrom \ + : c) map(tofrom \ + : bb) if (a) + for (int i = 0; i < 10; ++i) { + int &f = c[1][1][1]; + int &g = a; + int &h = b[1][1]; + int d = 15; + a = 5; + b[0][a] = 10; + c[0][0][a] = 11; + b[0][a] = c[0][0][a]; + bb |= b[0][a]; + } +#pragma omp target parallel for firstprivate(a) map(tofrom \ + : c, b) map(to \ + : bb) + for (int i = 0; i < 10; ++i) { + int &f = c[1][1][1]; + int &g = a; + int &h = b[1][1]; + int d = 15; + a = 5; + b[0][a] = 10; + c[0][0][a] = 11; + b[0][a] = c[0][0][a]; + d = bb; + } +#pragma omp target parallel for map(tofrom \ + : a, c, b) map(from \ + : bb) + for (int i = 0; i < 10; ++i) { + int &f = c[1][1][1]; + int &g = a; + int &h = b[1][1]; + int d = 15; + a = 5; + b[0][a] = 10; + c[0][0][a] = 11; + b[0][a] = c[0][0][a]; + bb = b[0][a]; + } + return 0; +} + +// CHECK: define internal void @__omp_offloading{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]]* {{[^,]+}}, i8 addrspace(1)* noalias{{[^,]+}}, i1 {{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]* +// CHECK: call void [[NONDEBUG_WRAPPER:.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* {{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* {{[^,]+}}, i8* {{[^)]+}}) + +// CHECK: define internal void [[DEBUG_PARALLEL:@.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* noalias{{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]]* noalias{{[^,]+}}, i8 addrspace(1)* noalias{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]* + +// CHECK: define internal void [[NONDEBUG_WRAPPER]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^,]+}}, i8* dereferenceable{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)* +// CHECK: call void [[DEBUG_PARALLEL]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]]* {{[^,]+}}, i8 addrspace(1)* {{[^)]+}}) + +// CHECK: define void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^,]+}}, i8* dereferenceable{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)* +// CHECK: call void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]]* {{[^,]+}}, i8 addrspace(1)* {{[^)]+}}) + +// CHECK: define internal void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* noalias{{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* noalias{{[^,]+}}, i8 addrspace(1)* noalias{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]* +// CHECK: addrspacecast [10 x [10 x i32]] addrspace(1)* %{{.+}} to [10 x [10 x i32]]* +// CHECK: call void [[NONDEBUG_WRAPPER:.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* {{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* {{[^,]+}}, i8* {{[^)]+}}) + +// CHECK: define internal void [[DEBUG_PARALLEL:@.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* noalias{{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* noalias{{[^,]+}}, i8 addrspace(1)* {{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]* +// CHECK: addrspacecast [10 x [10 x i32]] addrspace(1)* %{{.+}} to [10 x [10 x i32]]* + +// CHECK: define internal void [[NONDEBUG_WRAPPER]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^,]+}}, i8* dereferenceable{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)* +// CHECK: addrspacecast [10 x [10 x i32]]* %{{.+}} to [10 x [10 x i32]] addrspace(1)* +// CHECK: call void [[DEBUG_PARALLEL]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* {{[^,]+}}, i8 addrspace(1)* {{[^)]+}}) + +// CHECK: define void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^,]+}}, i8* dereferenceable{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)* +// CHECK: addrspacecast [10 x [10 x i32]]* %{{.+}} to [10 x [10 x i32]] addrspace(1)* +// CHECK: call void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* {{[^,]+}}, i8 addrspace(1)* {{[^)]+}}) + +// CHECK: define internal void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* noalias{{[^,]+}}, i32 addrspace(1)* noalias{{[^,]+}}, [10 x [10 x i32]] addrspace(1)* noalias{{[^,]+}}, i8 addrspace(1)* noalias{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]* +// CHECK: addrspacecast i32 addrspace(1)* %{{.+}} to i32* +// CHECK: addrspacecast [10 x [10 x i32]] addrspace(1)* %{{.+}} to [10 x [10 x i32]]* +// CHECK: call void [[NONDEBUG_WRAPPER:@.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x i32]]* {{[^,]+}}, i8* {{[^)]+}}) + +// CHECK: define internal void [[DEBUG_PARALLEL:@.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* noalias{{[^,]+}}, i32 addrspace(1)* noalias{{[^,]+}}, [10 x [10 x i32]] addrspace(1)* noalias{{[^,]+}}, i8 addrspace(1)* noalias{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]* +// CHECK: addrspacecast i32 addrspace(1)* %{{.+}} to i32* +// CHECK: addrspacecast [10 x [10 x i32]] addrspace(1)* %{{.+}} to [10 x [10 x i32]]* + +// CHECK: define internal void [[NONDEBUG_WRAPPER]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i32* dereferenceable{{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^,]+}}, i8* dereferenceable{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)* +// CHECK: addrspacecast i32* %{{.+}} to i32 addrspace(1)* +// CHECK: addrspacecast [10 x [10 x i32]]* %{{.+}} to [10 x [10 x i32]] addrspace(1)* +// CHECK: call void [[DEBUG_PARALLEL]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 addrspace(1)* {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* {{[^,]+}}, i8 addrspace(1)* {{[^)]+}}) + +// CHECK: define void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i32* dereferenceable{{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^,]+}}, i8* dereferenceable{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)* +// CHECK: addrspacecast i32* %{{.+}} to i32 addrspace(1)* +// CHECK: addrspacecast [10 x [10 x i32]]* %{{.+}} to [10 x [10 x i32]] addrspace(1)* +// CHECK: call void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 addrspace(1)* {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* {{[^,]+}}, i8 addrspace(1)* {{[^)]+}}) + diff --git a/test/OpenMP/target_parallel_for_depend_messages.cpp b/test/OpenMP/target_parallel_for_depend_messages.cpp index cf17d7a5def2..19872566161a 100644 --- a/test/OpenMP/target_parallel_for_depend_messages.cpp +++ b/test/OpenMP/target_parallel_for_depend_messages.cpp @@ -37,21 +37,21 @@ int main(int argc, char **argv, char *env[]) { for (i = 0; i < argc; ++i) foo(); #pragma omp target parallel for depend (out: ) // expected-error {{expected expression}} for (i = 0; i < argc; ++i) foo(); - #pragma omp target parallel for depend (inout : foobool(argc)), depend (in, argc) // expected-error {{expected variable name, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}} + #pragma omp target parallel for depend (inout : foobool(argc)), depend (in, argc) // expected-error {{expected addressable lvalue expression, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}} for (i = 0; i < argc; ++i) foo(); #pragma omp target parallel for depend (out :S1) // expected-error {{'S1' does not refer to a value}} for (i = 0; i < argc; ++i) foo(); - #pragma omp target parallel for depend(in : argv[1][1] = '2') // expected-error {{expected variable name, array element or array section}} + #pragma omp target parallel for depend(in : argv[1][1] = '2') for (i = 0; i < argc; ++i) foo(); - #pragma omp target parallel for depend (in : vec[1]) // expected-error {{expected variable name, array element or array section}} + #pragma omp target parallel for depend (in : vec[1]) // expected-error {{expected addressable lvalue expression, array element or array section}} for (i = 0; i < argc; ++i) foo(); #pragma omp target parallel for depend (in : argv[0]) for (i = 0; i < argc; ++i) foo(); #pragma omp target parallel for depend (in : ) // expected-error {{expected expression}} for (i = 0; i < argc; ++i) foo(); - #pragma omp target parallel for depend (in : main) // expected-error {{expected variable name, array element or array section}} + #pragma omp target parallel for depend (in : main) for (i = 0; i < argc; ++i) foo(); - #pragma omp target parallel for depend(in : a[0]) // expected-error{{expected variable name, array element or array section}} + #pragma omp target parallel for depend(in : a[0]) // expected-error{{expected addressable lvalue expression, array element or array section}} for (i = 0; i < argc; ++i) foo(); #pragma omp target parallel for depend (in : vec[1:2]) // expected-error {{ value is not an array or pointer}} for (i = 0; i < argc; ++i) foo(); diff --git a/test/OpenMP/target_parallel_for_lastprivate_messages.cpp b/test/OpenMP/target_parallel_for_lastprivate_messages.cpp index c001b7f0d168..ee703a1ce32e 100644 --- a/test/OpenMP/target_parallel_for_lastprivate_messages.cpp +++ b/test/OpenMP/target_parallel_for_lastprivate_messages.cpp @@ -18,9 +18,9 @@ public: S2 &operator=(const S2 &); const S2 &operator=(const S2 &) const; static float S2s; // expected-note {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note {{static data member is predetermined as shared}} }; -const float S2::S2sc = 0; // expected-note {{static data member is predetermined as shared}} +const float S2::S2sc = 0; const S2 b; const S2 ba[5]; class S3 { diff --git a/test/OpenMP/target_parallel_for_map_messages.cpp b/test/OpenMP/target_parallel_for_map_messages.cpp index f0019f94f493..f3e1c616b406 100644 --- a/test/OpenMP/target_parallel_for_map_messages.cpp +++ b/test/OpenMP/target_parallel_for_map_messages.cpp @@ -14,8 +14,8 @@ class S2 { public: S2():a(0) { } S2(S2 &s2):a(s2.a) { } - static float S2s; // expected-note 4 {{mappable type cannot contain static members}} - static const float S2sc; // expected-note 4 {{mappable type cannot contain static members}} + static float S2s; + static const float S2sc; }; const float S2::S2sc = 0; const S2 b; @@ -116,9 +116,9 @@ T tmain(T argc) { for (i = 0; i < argc; ++i) foo(); #pragma omp target parallel for map(S1) // expected-error {{'S1' does not refer to a value}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target parallel for map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target parallel for map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target parallel for map(ba) // expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target parallel for map(ba) for (i = 0; i < argc; ++i) foo(); #pragma omp target parallel for map(ca) for (i = 0; i < argc; ++i) foo(); @@ -222,11 +222,11 @@ int main(int argc, char **argv) { for (i = 0; i < argc; ++i) foo(); #pragma omp target parallel for map(S1) // expected-error {{'S1' does not refer to a value}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target parallel for map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target parallel for map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} for (i = 0; i < argc; ++i) foo(); #pragma omp target parallel for map(argv[1]) for (i = 0; i < argc; ++i) foo(); -#pragma omp target parallel for map(ba) // expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target parallel for map(ba) for (i = 0; i < argc; ++i) foo(); #pragma omp target parallel for map(ca) for (i = 0; i < argc; ++i) foo(); diff --git a/test/OpenMP/target_parallel_for_reduction_messages.cpp b/test/OpenMP/target_parallel_for_reduction_messages.cpp index 234b71aec218..9f1f68f25f83 100644 --- a/test/OpenMP/target_parallel_for_reduction_messages.cpp +++ b/test/OpenMP/target_parallel_for_reduction_messages.cpp @@ -25,9 +25,9 @@ public: S2() : a(0) {} S2(S2 &s2) : a(s2.a) {} static float S2s; // expected-note 2 {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note 2 {{'S2sc' declared here}} }; -const float S2::S2sc = 0; // expected-note 2 {{'S2sc' defined here}} +const float S2::S2sc = 0; S2 b; // expected-note 3 {{'b' defined here}} const S2 ba[5]; // expected-note 2 {{'ba' defined here}} class S3 { diff --git a/test/OpenMP/target_parallel_for_simd_ast_print.cpp b/test/OpenMP/target_parallel_for_simd_ast_print.cpp index 82d72c3b1a4b..8f0ed97d5cd9 100644 --- a/test/OpenMP/target_parallel_for_simd_ast_print.cpp +++ b/test/OpenMP/target_parallel_for_simd_ast_print.cpp @@ -77,14 +77,14 @@ T tmain(T argc, T *argv) { a = 2; // CHECK-NEXT: for (T i = 0; i < 2; ++i) // CHECK-NEXT: a = 2; -#pragma omp target parallel for simd private(argc, b), firstprivate(c, d), lastprivate(d, f) collapse(N) schedule(static, N) ordered(N) if (parallel :argc) num_threads(N) default(shared) shared(e) reduction(+ : h) +#pragma omp target parallel for simd private(argc, b), firstprivate(c, d), lastprivate(d, f) collapse(N) schedule(static, N) ordered if (parallel :argc) num_threads(N) default(shared) shared(e) reduction(+ : h) for (int i = 0; i < 2; ++i) for (int j = 0; j < 2; ++j) for (int j = 0; j < 2; ++j) for (int j = 0; j < 2; ++j) for (int j = 0; j < 2; ++j) foo(); - // CHECK-NEXT: #pragma omp target parallel for simd private(argc,b) firstprivate(c,d) lastprivate(d,f) collapse(N) schedule(static, N) ordered(N) if(parallel: argc) num_threads(N) default(shared) shared(e) reduction(+: h) + // CHECK-NEXT: #pragma omp target parallel for simd private(argc,b) firstprivate(c,d) lastprivate(d,f) collapse(N) schedule(static, N) ordered if(parallel: argc) num_threads(N) default(shared) shared(e) reduction(+: h) // CHECK-NEXT: for (int i = 0; i < 2; ++i) // CHECK-NEXT: for (int j = 0; j < 2; ++j) // CHECK-NEXT: for (int j = 0; j < 2; ++j) diff --git a/test/OpenMP/target_parallel_for_simd_codegen.cpp b/test/OpenMP/target_parallel_for_simd_codegen.cpp new file mode 100644 index 000000000000..d168214df35f --- /dev/null +++ b/test/OpenMP/target_parallel_for_simd_codegen.cpp @@ -0,0 +1,812 @@ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 + +// Test target codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER +// CHECK-DAG: %ident_t = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" +// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } + +// CHECK-DAG: [[TT:%.+]] = type { i64, i8 } +// CHECK-DAG: [[S1:%.+]] = type { double } +// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 } +// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* } +// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* } + +// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 } + +// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat + +// We have 8 target regions, but only 7 that actually will generate offloading +// code, only 6 will have mapped arguments, and only 4 have all-constant map +// sizes. + +// CHECK-DAG: [[SIZET2:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 2, i[[SZ]] 4, i[[SZ]] 4] +// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 288, i64 288] +// CHECK-DAG: [[SIZET3:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2] +// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i64] [i64 288, i64 288] +// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [10 x i64] [i64 288, i64 547, i64 288, i64 547, i64 547, i64 288, i64 288, i64 547, i64 547, i64 288] +// CHECK-DAG: [[SIZET5:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 40] +// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 288, i64 547] +// CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [4 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 1, i[[SZ]] 40] +// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i64] [i64 288, i64 288, i64 288, i64 547] +// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i64] [i64 547, i64 288, i64 288, i64 288, i64 547] +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 + +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK-NOT: @{{.+}} = constant [[ENTTY]] + +// Check if offloading descriptor is created. +// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]] +// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]] +// CHECK: [[DEVBEGIN:@.+]] = external constant i8 +// CHECK: [[DEVEND:@.+]] = external constant i8 +// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]]) +// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]]) + +// Check target registration is registered as a Ctor. +// CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* @[[REGFN]] to void ()*), i8* bitcast (void (i8*)* @[[REGFN]] to i8*) }] + + +template<typename tx, typename ty> +struct TT{ + tx X; + ty Y; +}; + +// CHECK-LABEL: get_val +long long get_val() { return 0; } + +// CHECK: define {{.*}}[[FOO:@.+]]( +int foo(int n) { + int a = 0; + short aa = 0; + float b[10]; + float bn[n]; + double c[5][10]; + double cn[5][n]; + TT<long long, char> d; + + // CHECK: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null, i32 1, i32 0) + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT0:@.+]]() + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + #pragma omp target parallel for simd nowait + for (int i = 3; i < 32; i += 5) { + } + + // CHECK: call void [[HVT1:@.+]](i[[SZ]] {{[^,]+}}, i{{32|64}}{{[*]*}} {{[^)]+}}) + long long k = get_val(); + #pragma omp target parallel for simd if(target: 0) linear(k : 3) schedule(dynamic) + for (int i = 10; i > 1; i--) { + a += 1; + } + + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT2]], i32 0, i32 0), i32 1, i32 0) + // CHECK-DAG: [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 0 + // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 0 + // CHECK-DAG: [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]* + // CHECK-DAG: [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] [[VAL0:%.+]], i[[SZ]]* [[CBPADDR0]], + // CHECK-DAG: store i[[SZ]] [[VAL0]], i[[SZ]]* [[CPADDR0]], + // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 1 + // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 1 + // CHECK-DAG: [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]* + // CHECK-DAG: [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] [[VAL1:%.+]], i[[SZ]]* [[CBPADDR1]], + // CHECK-DAG: store i[[SZ]] [[VAL1]], i[[SZ]]* [[CPADDR1]], + // CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 1 + // CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 1 + // CHECK-DAG: [[CBPADDR2:%.+]] = bitcast i8** [[BPADDR2]] to i[[SZ]]* + // CHECK-DAG: [[CPADDR2:%.+]] = bitcast i8** [[PADDR2]] to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] [[VAL2:%.+]], i[[SZ]]* [[CBPADDR2]], + // CHECK-DAG: store i[[SZ]] [[VAL2]], i[[SZ]]* [[CPADDR2]], + + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT2:@.+]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}}) + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + int lin = 12; + #pragma omp target parallel for simd if(target: 1) linear(lin, a : get_val()) + for (unsigned long long it = 2000; it >= 600; it-=400) { + aa += 1; + } + + // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10 + // CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] + // CHECK: [[IFTHEN]] + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i32 1, i32 0) + // CHECK-DAG: [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0 + + // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0 + // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0 + // CHECK-DAG: [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]* + // CHECK-DAG: [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] [[VAL0:%.+]], i[[SZ]]* [[CBPADDR0]], + // CHECK-DAG: store i[[SZ]] [[VAL0]], i[[SZ]]* [[CPADDR0]], + + // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1 + // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1 + // CHECK-DAG: [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]* + // CHECK-DAG: [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] [[VAL1:%.+]], i[[SZ]]* [[CBPADDR1]], + // CHECK-DAG: store i[[SZ]] [[VAL1]], i[[SZ]]* [[CPADDR1]], + // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT3:@.+]]({{[^,]+}}, {{[^,]+}}) + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + // CHECK-NEXT: br label %[[IFEND:.+]] + // CHECK: [[IFELSE]] + // CHECK: call void [[HVT3]]({{[^,]+}}, {{[^,]+}}) + // CHECK-NEXT: br label %[[IFEND]] + // CHECK: [[IFEND]] + + #pragma omp target parallel for simd if(target: n>10) + for (short it = 6; it <= 20; it-=-4) { + a += 1; + aa += 1; + } + + // We capture 3 VLA sizes in this target region + // CHECK: [[A_VAL:%.+]] = load i32, i32* %{{.+}}, + // CHECK: store i32 [[A_VAL]], i32* [[A_CADDR:%.+]], + // CHECK-64: [[A_VAL:%.+]] = load i32, i32* %{{.+}}, + // CHECK-64: [[A_ADDR:%.+]] = bitcast i[[SZ]]* [[A_CADDR:%.+]] to i32* + // CHECK-64: store i32 [[A_VAL]], i32* [[A_ADDR]], + // CHECK-64: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]], + + // CHECK-32: [[A_VAL:%.+]] = load i32, i32* %{{.+}}, + // CHECK-32: store i32 [[A_VAL]], i32* [[A_CADDR:%.+]], + // CHECK-32: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]], + + // CHECK: [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4 + // CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]] + // CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8 + + // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20 + // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] + // CHECK: [[TRY]] + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 10, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* [[MAPT4]], i32 0, i32 0), i32 1, i32 0) + // CHECK-DAG: [[BPR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[PR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[SR]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S:%[^,]+]], i32 0, i32 0 + + // CHECK-DAG: [[SADDR0:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX0:[0-9]+]] + // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX0]] + // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX0]] + // CHECK-DAG: [[SADDR1:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX1:[0-9]+]] + // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX1]] + // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX1]] + // CHECK-DAG: [[SADDR2:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX2:[0-9]+]] + // CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX2]] + // CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX2]] + // CHECK-DAG: [[SADDR3:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX3:[0-9]+]] + // CHECK-DAG: [[BPADDR3:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX3]] + // CHECK-DAG: [[PADDR3:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX3]] + // CHECK-DAG: [[SADDR4:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX4:[0-9]+]] + // CHECK-DAG: [[BPADDR4:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX4]] + // CHECK-DAG: [[PADDR4:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX4]] + // CHECK-DAG: [[SADDR5:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX5:[0-9]+]] + // CHECK-DAG: [[BPADDR5:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX5]] + // CHECK-DAG: [[PADDR5:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX5]] + // CHECK-DAG: [[SADDR6:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX6:[0-9]+]] + // CHECK-DAG: [[BPADDR6:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX6]] + // CHECK-DAG: [[PADDR6:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX6]] + // CHECK-DAG: [[SADDR7:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX7:[0-9]+]] + // CHECK-DAG: [[BPADDR7:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX7]] + // CHECK-DAG: [[PADDR7:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX7]] + // CHECK-DAG: [[SADDR8:%.+]] = getelementptr inbounds [10 x i[[SZ]]], [10 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX8:[0-9]+]] + // CHECK-DAG: [[BPADDR8:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP]], i32 0, i32 [[IDX8]] + // CHECK-DAG: [[PADDR8:%.+]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P]], i32 0, i32 [[IDX8]] + + // The names below are not necessarily consistent with the names used for the + // addresses above as some are repeated. + // CHECK-DAG: store i[[SZ]] [[VLA0]], i[[SZ]]* [[CBPADDR0:%.+]], + // CHECK-DAG: store i[[SZ]] [[VLA0]], i[[SZ]]* [[CPADDR0:%.+]], + // CHECK-DAG: [[CBPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: [[CPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store i[[SZ]] [[VLA1]], i[[SZ]]* [[CBPADDR1:%.+]], + // CHECK-DAG: store i[[SZ]] [[VLA1]], i[[SZ]]* [[CPADDR1:%.+]], + // CHECK-DAG: [[CBPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: [[CPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store i[[SZ]] 5, i[[SZ]]* [[CBPADDR2:%.+]], + // CHECK-DAG: store i[[SZ]] 5, i[[SZ]]* [[CPADDR2:%.+]], + // CHECK-DAG: [[CBPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: [[CPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store i[[SZ]] [[A_CVAL]], i[[SZ]]* [[CBPADDR3:%.+]], + // CHECK-DAG: store i[[SZ]] [[A_CVAL]], i[[SZ]]* [[CPADDR3:%.+]], + // CHECK-DAG: [[CBPADDR3]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: [[CPADDR3]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] 4, i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store [10 x float]* %{{.+}}, [10 x float]** [[CBPADDR4:%.+]], + // CHECK-DAG: store [10 x float]* %{{.+}}, [10 x float]** [[CPADDR4:%.+]], + // CHECK-DAG: [[CBPADDR4]] = bitcast i8** {{%[^,]+}} to [10 x float]** + // CHECK-DAG: [[CPADDR4]] = bitcast i8** {{%[^,]+}} to [10 x float]** + // CHECK-DAG: store i[[SZ]] 40, i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store float* %{{.+}}, float** [[CBPADDR5:%.+]], + // CHECK-DAG: store float* %{{.+}}, float** [[CPADDR5:%.+]], + // CHECK-DAG: [[CBPADDR5]] = bitcast i8** {{%[^,]+}} to float** + // CHECK-DAG: [[CPADDR5]] = bitcast i8** {{%[^,]+}} to float** + // CHECK-DAG: store i[[SZ]] [[BNSIZE]], i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store [5 x [10 x double]]* %{{.+}}, [5 x [10 x double]]** [[CBPADDR6:%.+]], + // CHECK-DAG: store [5 x [10 x double]]* %{{.+}}, [5 x [10 x double]]** [[CPADDR6:%.+]], + // CHECK-DAG: [[CBPADDR6]] = bitcast i8** {{%[^,]+}} to [5 x [10 x double]]** + // CHECK-DAG: [[CPADDR6]] = bitcast i8** {{%[^,]+}} to [5 x [10 x double]]** + // CHECK-DAG: store i[[SZ]] 400, i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store double* %{{.+}}, double** [[CBPADDR7:%.+]], + // CHECK-DAG: store double* %{{.+}}, double** [[CPADDR7:%.+]], + // CHECK-DAG: [[CBPADDR7]] = bitcast i8** {{%[^,]+}} to double** + // CHECK-DAG: [[CPADDR7]] = bitcast i8** {{%[^,]+}} to double** + // CHECK-DAG: store i[[SZ]] [[CNSIZE]], i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store [[TT]]* %{{.+}}, [[TT]]** [[CBPADDR8:%.+]], + // CHECK-DAG: store [[TT]]* %{{.+}}, [[TT]]** [[CPADDR8:%.+]], + // CHECK-DAG: [[CBPADDR8]] = bitcast i8** {{%[^,]+}} to [[TT]]** + // CHECK-DAG: [[CPADDR8]] = bitcast i8** {{%[^,]+}} to [[TT]]** + // CHECK-DAG: store i[[SZ]] {{12|16}}, i[[SZ]]* {{%[^,]+}} + + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + + // CHECK: [[FAIL]] + // CHECK: call void [[HVT4:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^)]+}}) + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + #pragma omp target parallel for simd if(target: n>20) schedule(static, a) + for (unsigned char it = 'z'; it >= 'a'; it+=-1) { + a += 1; + b[2] += 1.0; + bn[3] += 1.0; + c[1][2] += 1.0; + cn[1][3] += 1.0; + d.X += 1; + d.Y += 1; + } + + return a; +} + +// Check that the offloading functions are emitted and that the arguments are +// correct and loaded correctly for the target regions in foo(). + +// CHECK: define internal void [[HVT0]]() +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*)) +// +// +// CHECK: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid.) +// CHECK: !llvm.loop +// CHECK: ret void +// CHECK-NEXT: } + + +// CHECK: define internal void [[HVT1]](i[[SZ]] %{{.+}}, i{{32|64}}{{[*]*.*}} %{{.+}}) +// Create stack storage and store argument in there. +// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: alloca i{{32|64}}{{[*]*}}, align +// CHECK: [[AA_CASTED:%.+]] = alloca i[[SZ]], align +// CHECK: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align +// CHECK-64: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32* +// CHECK-64: [[AA:%.+]] = load i32, i32* [[AA_CADDR]], align +// CHECK-32: [[AA:%.+]] = load i32, i32* [[AA_ADDR]], align +// CHECK-64: [[AA_C:%.+]] = bitcast i[[SZ]]* [[AA_CASTED]] to i32* +// CHECK-64: store i32 [[AA]], i32* [[AA_C]], align +// CHECK-32: store i32 [[AA]], i32* [[AA_CASTED]], align +// CHECK: [[PARAM:%.+]] = load i[[SZ]], i[[SZ]]* [[AA_CASTED]], align +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i{{32|64}}{{[*]*}})* [[OMP_OUTLINED1:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[PARAM]], i{{32|64}}{{[*]*}} %{{.+}}) +// +// +// CHECK: define internal {{.*}}void [[OMP_OUTLINED1]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}) +// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align +// CHECK-64: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32* +// CHECK-64: [[AA:%.+]] = load i32, i32* [[AA_CADDR]], align +// CHECK-32: [[AA:%.+]] = load i32, i32* [[AA_ADDR]], align +// CHECK: !llvm.mem.parallel_loop_access +// CHECK: !llvm.loop +// CHECK: ret void +// CHECK-NEXT: } + +// CHECK: define internal void [[HVT2]](i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}) +// Create stack storage and store argument in there. +// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: alloca i[[SZ]], align +// CHECK: alloca i[[SZ]], align +// CHECK: [[AA_CASTED:%.+]] = alloca i[[SZ]], align +// CHECK: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align +// CHECK: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16* +// CHECK: [[AA:%.+]] = load i16, i16* [[AA_CADDR]], align +// CHECK: [[AA_C:%.+]] = bitcast i[[SZ]]* [[AA_CASTED]] to i16* +// CHECK: store i16 [[AA]], i16* [[AA_C]], align +// CHECK: [[PARAM:%.+]] = load i[[SZ]], i[[SZ]]* [[AA_CASTED]], align +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], i[[SZ]])* [[OMP_OUTLINED2:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[PARAM]], i[[SZ]] {{.+}}, i[[SZ]] {{.+}}) +// +// +// CHECK: define internal {{.*}}void [[OMP_OUTLINED2]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}) +// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align +// CHECK: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16* +// CHECK: [[AA:%.+]] = load i16, i16* [[AA_CADDR]], align +// CHECK: !llvm.loop +// CHECK: ret void +// CHECK-NEXT: } + +// CHECK: define internal void [[HVT3]] +// Create stack storage and store argument in there. +// CHECK: [[A_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: [[A_CASTED:%.+]] = alloca i[[SZ]], align +// CHECK: [[AA_CASTED:%.+]] = alloca i[[SZ]], align +// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[A_ADDR]], align +// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align +// CHECK-64-DAG:[[A_CADDR:%.+]] = bitcast i[[SZ]]* [[A_ADDR]] to i32* +// CHECK-DAG: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16* +// CHECK-64-DAG:[[A:%.+]] = load i32, i32* [[A_CADDR]], align +// CHECK-32-DAG:[[A:%.+]] = load i32, i32* [[A_ADDR]], align +// CHECK-64-DAG:[[A_C:%.+]] = bitcast i[[SZ]]* [[A_CASTED]] to i32* +// CHECK-64-DAG:store i32 [[A]], i32* [[A_C]], align +// CHECK-32-DAG:store i32 [[A]], i32* [[A_CASTED]], align +// CHECK-DAG: [[AA:%.+]] = load i16, i16* [[AA_CADDR]], align +// CHECK-DAG: [[AA_C:%.+]] = bitcast i[[SZ]]* [[AA_CASTED]] to i16* +// CHECK-DAG: store i16 [[AA]], i16* [[AA_C]], align +// CHECK-DAG: [[PARAM1:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CASTED]], align +// CHECK-DAG: [[PARAM2:%.+]] = load i[[SZ]], i[[SZ]]* [[AA_CASTED]], align +// CHECK-DAG: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]])* [[OMP_OUTLINED3:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[PARAM1]], i[[SZ]] [[PARAM2]]) +// +// +// CHECK: define internal {{.*}}void [[OMP_OUTLINED3]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}) +// CHECK: [[A_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[A_ADDR]], align +// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align +// CHECK-64-DAG:[[A_CADDR:%.+]] = bitcast i[[SZ]]* [[A_ADDR]] to i32* +// CHECK-DAG: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16* +// CHECK: !llvm.loop +// CHECK: ret void +// CHECK-NEXT: } + +// CHECK: define internal void [[HVT4]] +// Create local storage for each capture. +// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_B:%.+]] = alloca [10 x float]* +// CHECK: [[LOCAL_VLA1:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_BN:%.+]] = alloca float* +// CHECK: [[LOCAL_C:%.+]] = alloca [5 x [10 x double]]* +// CHECK: [[LOCAL_VLA2:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_VLA3:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_CN:%.+]] = alloca double* +// CHECK: [[LOCAL_D:%.+]] = alloca [[TT]]* +// CHECK: alloca i[[SZ]] +// CHECK: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] +// CHECK-DAG: store [10 x float]* [[ARG_B:%.+]], [10 x float]** [[LOCAL_B]] +// CHECK-DAG: store i[[SZ]] [[ARG_VLA1:%.+]], i[[SZ]]* [[LOCAL_VLA1]] +// CHECK-DAG: store float* [[ARG_BN:%.+]], float** [[LOCAL_BN]] +// CHECK-DAG: store [5 x [10 x double]]* [[ARG_C:%.+]], [5 x [10 x double]]** [[LOCAL_C]] +// CHECK-DAG: store i[[SZ]] [[ARG_VLA2:%.+]], i[[SZ]]* [[LOCAL_VLA2]] +// CHECK-DAG: store i[[SZ]] [[ARG_VLA3:%.+]], i[[SZ]]* [[LOCAL_VLA3]] +// CHECK-DAG: store double* [[ARG_CN:%.+]], double** [[LOCAL_CN]] +// CHECK-DAG: store [[TT]]* [[ARG_D:%.+]], [[TT]]** [[LOCAL_D]] + +// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* +// CHECK-DAG: [[REF_B:%.+]] = load [10 x float]*, [10 x float]** [[LOCAL_B]], +// CHECK-DAG: [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA1]], +// CHECK-DAG: [[REF_BN:%.+]] = load float*, float** [[LOCAL_BN]], +// CHECK-DAG: [[REF_C:%.+]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[LOCAL_C]], +// CHECK-DAG: [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA2]], +// CHECK-DAG: [[VAL_VLA3:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA3]], +// CHECK-DAG: [[REF_CN:%.+]] = load double*, double** [[LOCAL_CN]], +// CHECK-DAG: [[REF_D:%.+]] = load [[TT]]*, [[TT]]** [[LOCAL_D]], + +// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]] +// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32* +// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align +// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]] +// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align +// CHECK-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]], + +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 10, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], [10 x float]*, i[[SZ]], float*, [5 x [10 x double]]*, i[[SZ]], i[[SZ]], double*, [[TT]]*, i[[SZ]])* [[OMP_OUTLINED4:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], [10 x float]* [[REF_B]], i[[SZ]] [[VAL_VLA1]], float* [[REF_BN]], [5 x [10 x double]]* [[REF_C]], i[[SZ]] [[VAL_VLA2]], i[[SZ]] [[VAL_VLA3]], double* [[REF_CN]], [[TT]]* [[REF_D]], i[[SZ]] %{{.+}}) +// +// +// CHECK: define internal {{.*}}void [[OMP_OUTLINED4]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, [10 x float]* {{.+}}, i[[SZ]] %{{.+}}, float* {{.+}}, [5 x [10 x double]]* {{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, double* {{.+}}, [[TT]]* {{.+}}) +// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. + +template<typename tx> +tx ftemplate(int n) { + tx a = 0; + short aa = 0; + tx b[10]; + + #pragma omp target parallel for simd if(target: n>40) + for (long long i = -10; i < 10; i += 3) { + a += 1; + aa += 1; + b[2] += 1; + } + + return a; +} + +static +int fstatic(int n) { + int a = 0; + short aa = 0; + char aaa = 0; + int b[10]; + + #pragma omp target parallel for simd if(target: n>50) + for (unsigned i=100; i<10; i+=10) { + a += 1; + aa += 1; + aaa += 1; + b[2] += 1; + } + + return a; +} + +struct S1 { + double a; + + int r1(int n){ + int b = n+1; + short int c[2][n]; + + #pragma omp target parallel for simd if(target: n>60) + for (unsigned long long it = 2000; it >= 600; it -= 400) { + this->a = (double)b + 1.5; + c[1][1] = ++a; + } + + return c[1][1] + (int)b; + } +}; + +// CHECK: define {{.*}}@{{.*}}bar{{.*}} +int bar(int n){ + int a = 0; + + // CHECK: call {{.*}}i32 [[FOO]](i32 {{.*}}) + a += foo(n); + + S1 S; + // CHECK: call {{.*}}i32 [[FS1:@.+]]([[S1]]* {{.*}}, i32 {{.*}}) + a += S.r1(n); + + // CHECK: call {{.*}}i32 [[FSTATIC:@.+]](i32 {{.*}}) + a += fstatic(n); + + // CHECK: call {{.*}}i32 [[FTEMPLATE:@.+]](i32 {{.*}}) + a += ftemplate<int>(n); + + return a; +} + +// +// CHECK: define {{.*}}[[FS1]] +// +// CHECK: i8* @llvm.stacksave() +// CHECK-64: [[B_ADDR:%.+]] = bitcast i[[SZ]]* [[B_CADDR:%.+]] to i32* +// CHECK-64: store i32 %{{.+}}, i32* [[B_ADDR]], +// CHECK-64: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_CADDR]], + +// CHECK-32: store i32 %{{.+}}, i32* [[B_ADDR:%.+]], +// CHECK-32: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]], + +// We capture 2 VLA sizes in this target region +// CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]] +// CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2 + +// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60 +// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] +// CHECK: [[TRY]] +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 0) +// CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0 +// CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0 +// CHECK-DAG: [[SR]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S:%.+]], i32 0, i32 0 +// CHECK-DAG: [[SADDR0:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX0:[0-9]+]] +// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX0]] +// CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX0]] +// CHECK-DAG: [[SADDR1:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX1:[0-9]+]] +// CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX1]] +// CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX1]] +// CHECK-DAG: [[SADDR2:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX2:[0-9]+]] +// CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX2]] +// CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX2]] +// CHECK-DAG: [[SADDR3:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX3:[0-9]+]] +// CHECK-DAG: [[BPADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX3]] +// CHECK-DAG: [[PADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX3]] + +// The names below are not necessarily consistent with the names used for the +// addresses above as some are repeated. +// CHECK-DAG: store i[[SZ]] [[VLA0]], i[[SZ]]* [[CBPADDR0:%.+]], +// CHECK-DAG: store i[[SZ]] [[VLA0]], i[[SZ]]* [[CPADDR0:%.+]], +// CHECK-DAG: [[CBPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: [[CPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}} + +// CHECK-DAG: store i[[SZ]] 2, i[[SZ]]* [[CBPADDR1:%.+]], +// CHECK-DAG: store i[[SZ]] 2, i[[SZ]]* [[CPADDR1:%.+]], +// CHECK-DAG: [[CBPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: [[CPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}} + +// CHECK-DAG: store i[[SZ]] [[B_CVAL]], i[[SZ]]* [[CBPADDR2:%.+]], +// CHECK-DAG: store i[[SZ]] [[B_CVAL]], i[[SZ]]* [[CPADDR2:%.+]], +// CHECK-DAG: [[CBPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: [[CPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: store i[[SZ]] 4, i[[SZ]]* {{%[^,]+}} + +// CHECK-DAG: store [[S1]]* %{{.+}}, [[S1]]** [[CBPADDR3:%.+]], +// CHECK-DAG: store [[S1]]* %{{.+}}, [[S1]]** [[CPADDR3:%.+]], +// CHECK-DAG: [[CBPADDR3]] = bitcast i8** {{%[^,]+}} to [[S1]]** +// CHECK-DAG: [[CPADDR3]] = bitcast i8** {{%[^,]+}} to [[S1]]** +// CHECK-DAG: store i[[SZ]] 8, i[[SZ]]* {{%[^,]+}} + +// CHECK-DAG: store i16* %{{.+}}, i16** [[CBPADDR4:%.+]], +// CHECK-DAG: store i16* %{{.+}}, i16** [[CPADDR4:%.+]], +// CHECK-DAG: [[CBPADDR4]] = bitcast i8** {{%[^,]+}} to i16** +// CHECK-DAG: [[CPADDR4]] = bitcast i8** {{%[^,]+}} to i16** +// CHECK-DAG: store i[[SZ]] [[CSIZE]], i[[SZ]]* {{%[^,]+}} + +// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + +// CHECK: [[FAIL]] +// CHECK: call void [[HVT7:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[END]] +// CHECK: [[END]] + +// +// CHECK: define {{.*}}[[FSTATIC]] +// +// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50 +// CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] +// CHECK: [[IFTHEN]] +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([4 x i[[SZ]]], [4 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0), i32 1, i32 0) +// CHECK-DAG: [[BPR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP:%.+]], i32 0, i32 0 +// CHECK-DAG: [[PR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P:%.+]], i32 0, i32 0 + +// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 0 +// CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 0 +// CHECK-DAG: [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]* +// CHECK-DAG: [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]* +// CHECK-DAG: store i[[SZ]] [[VAL0:%.+]], i[[SZ]]* [[CBPADDR0]], +// CHECK-DAG: store i[[SZ]] [[VAL0]], i[[SZ]]* [[CPADDR0]], + +// CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 1 +// CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 1 +// CHECK-DAG: [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]* +// CHECK-DAG: [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]* +// CHECK-DAG: store i[[SZ]] [[VAL1:%.+]], i[[SZ]]* [[CBPADDR1]], +// CHECK-DAG: store i[[SZ]] [[VAL1]], i[[SZ]]* [[CPADDR1]], + +// CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 2 +// CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 2 +// CHECK-DAG: [[CBPADDR2:%.+]] = bitcast i8** [[BPADDR2]] to i[[SZ]]* +// CHECK-DAG: [[CPADDR2:%.+]] = bitcast i8** [[PADDR2]] to i[[SZ]]* +// CHECK-DAG: store i[[SZ]] [[VAL2:%.+]], i[[SZ]]* [[CBPADDR2]], +// CHECK-DAG: store i[[SZ]] [[VAL2]], i[[SZ]]* [[CPADDR2]], + +// CHECK-DAG: [[BPADDR3:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 3 +// CHECK-DAG: [[PADDR3:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 3 +// CHECK-DAG: [[CBPADDR3:%.+]] = bitcast i8** [[BPADDR3]] to [10 x i32]** +// CHECK-DAG: [[CPADDR3:%.+]] = bitcast i8** [[PADDR3]] to [10 x i32]** +// CHECK-DAG: store [10 x i32]* [[VAL3:%.+]], [10 x i32]** [[CBPADDR3]], +// CHECK-DAG: store [10 x i32]* [[VAL3]], [10 x i32]** [[CPADDR3]], + +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] +// CHECK: [[FAIL]] +// CHECK: call void [[HVT6:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[END]] +// CHECK: [[END]] +// CHECK-NEXT: br label %[[IFEND:.+]] +// CHECK: [[IFELSE]] +// CHECK: call void [[HVT6]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[IFEND]] +// CHECK: [[IFEND]] + +// +// CHECK: define {{.*}}[[FTEMPLATE]] +// +// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40 +// CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] +// CHECK: [[IFTHEN]] +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0), i32 1, i32 0) +// CHECK-DAG: [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0 +// CHECK-DAG: [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0 + +// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 0 +// CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 0 +// CHECK-DAG: [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]* +// CHECK-DAG: [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]* +// CHECK-DAG: store i[[SZ]] [[VAL0:%.+]], i[[SZ]]* [[CBPADDR0]], +// CHECK-DAG: store i[[SZ]] [[VAL0]], i[[SZ]]* [[CPADDR0]], + +// CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 1 +// CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 1 +// CHECK-DAG: [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]* +// CHECK-DAG: [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]* +// CHECK-DAG: store i[[SZ]] [[VAL1:%.+]], i[[SZ]]* [[CBPADDR1]], +// CHECK-DAG: store i[[SZ]] [[VAL1]], i[[SZ]]* [[CPADDR1]], + +// CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 2 +// CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 2 +// CHECK-DAG: [[CBPADDR2:%.+]] = bitcast i8** [[BPADDR2]] to [10 x i32]** +// CHECK-DAG: [[CPADDR2:%.+]] = bitcast i8** [[PADDR2]] to [10 x i32]** +// CHECK-DAG: store [10 x i32]* [[VAL2:%.+]], [10 x i32]** [[CBPADDR2]], +// CHECK-DAG: store [10 x i32]* [[VAL2]], [10 x i32]** [[CPADDR2]], + +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] +// CHECK: [[FAIL]] +// CHECK: call void [[HVT5:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[END]] +// CHECK: [[END]] +// CHECK-NEXT: br label %[[IFEND:.+]] +// CHECK: [[IFELSE]] +// CHECK: call void [[HVT:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[IFEND]] +// CHECK: [[IFEND]] + +// Check that the offloading functions are emitted and that the arguments are +// correct and loaded correctly for the target regions of the callees of bar(). + +// CHECK: define internal void [[HVT7]] +// Create local storage for each capture. +// CHECK: [[LOCAL_THIS:%.+]] = alloca [[S1]]* +// CHECK: [[LOCAL_B:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_VLA1:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_VLA2:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_C:%.+]] = alloca i16* +// CHECK: [[LOCAL_B_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-DAG: store [[S1]]* [[ARG_THIS:%.+]], [[S1]]** [[LOCAL_THIS]] +// CHECK-DAG: store i[[SZ]] [[ARG_B:%.+]], i[[SZ]]* [[LOCAL_B]] +// CHECK-DAG: store i[[SZ]] [[ARG_VLA1:%.+]], i[[SZ]]* [[LOCAL_VLA1]] +// CHECK-DAG: store i[[SZ]] [[ARG_VLA2:%.+]], i[[SZ]]* [[LOCAL_VLA2]] +// CHECK-DAG: store i16* [[ARG_C:%.+]], i16** [[LOCAL_C]] +// Store captures in the context. +// CHECK-DAG: [[REF_THIS:%.+]] = load [[S1]]*, [[S1]]** [[LOCAL_THIS]], +// CHECK-64-DAG:[[CONV_BP:%.+]] = bitcast i[[SZ]]* [[LOCAL_B]] to i32* +// CHECK-DAG: [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA1]], +// CHECK-DAG: [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA2]], +// CHECK-DAG: [[REF_C:%.+]] = load i16*, i16** [[LOCAL_C]], + +// CHECK-64-DAG:[[CONV_B:%.+]] = load i32, i32* [[CONV_BP]] +// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_B_CASTED]] to i32* +// CHECK-64-DAG:store i32 [[CONV_B]], i32* [[CONV]], align +// CHECK-32-DAG:[[LOCAL_BV:%.+]] = load i32, i32* [[LOCAL_B]] +// CHECK-32-DAG:store i32 [[LOCAL_BV]], i32* [[LOCAL_B_CASTED]], align +// CHECK-DAG: [[REF_B:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_B_CASTED]], + +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [[S1]]*, i[[SZ]], i[[SZ]], i[[SZ]], i16*)* [[OMP_OUTLINED5:@.+]] to void (i32*, i32*, ...)*), [[S1]]* [[REF_THIS]], i[[SZ]] [[REF_B]], i[[SZ]] [[VAL_VLA1]], i[[SZ]] [[VAL_VLA2]], i16* [[REF_C]]) +// +// +// CHECK: define internal {{.*}}void [[OMP_OUTLINED5]](i32* noalias %.global_tid., i32* noalias %.bound_tid., [[S1]]* %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i16* {{.+}}) +// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. + + +// CHECK: define internal void [[HVT6]] +// Create local storage for each capture. +// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_AA:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_AAA:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_B:%.+]] = alloca [10 x i32]* +// CHECK: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_AAA_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] +// CHECK-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] +// CHECK-DAG: store i[[SZ]] [[ARG_AAA:%.+]], i[[SZ]]* [[LOCAL_AAA]] +// CHECK-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] +// Store captures in the context. +// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* +// CHECK-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* +// CHECK-DAG: [[CONV_AAAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AAA]] to i8* +// CHECK-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], + +// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]] +// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32* +// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align +// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]] +// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align +// CHECK-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]], + +// CHECK-DAG: [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]] +// CHECK-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16* +// CHECK-DAG: store i16 [[CONV_AA]], i16* [[CONV]], align +// CHECK-DAG: [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]], + +// CHECK-DAG: [[CONV_AAA:%.+]] = load i8, i8* [[CONV_AAAP]] +// CHECK-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AAA_CASTED]] to i8* +// CHECK-DAG: store i8 [[CONV_AAA]], i8* [[CONV]], align +// CHECK-DAG: [[REF_AAA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AAA_CASTED]], + +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED6:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] [[REF_AA]], i[[SZ]] [[REF_AAA]], [10 x i32]* [[REF_B]]) +// +// +// CHECK: define internal {{.*}}void [[OMP_OUTLINED6]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}}) +// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. + +// CHECK: define internal void [[HVT5]] +// Create local storage for each capture. +// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_AA:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_B:%.+]] = alloca [10 x i32]* +// CHECK: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] +// CHECK-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] +// CHECK-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] +// Store captures in the context. +// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* +// CHECK-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* +// CHECK-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], + +// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]] +// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32* +// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align +// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]] +// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align +// CHECK-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]], + +// CHECK-DAG: [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]] +// CHECK-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16* +// CHECK-DAG: store i16 [[CONV_AA]], i16* [[CONV]], align +// CHECK-DAG: [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]], + +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED7:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] [[REF_AA]], [10 x i32]* [[REF_B]]) +// +// +// CHECK: define internal {{.*}}void [[OMP_OUTLINED7]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}}) +// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. + +#endif diff --git a/test/OpenMP/target_parallel_for_simd_codegen_registration.cpp b/test/OpenMP/target_parallel_for_simd_codegen_registration.cpp new file mode 100644 index 000000000000..5cc9d7015cbd --- /dev/null +++ b/test/OpenMP/target_parallel_for_simd_codegen_registration.cpp @@ -0,0 +1,451 @@ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// Test target parallel for simd codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK + +// Check that no target code is emmitted if no omptests flag was provided. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK-NTARGET + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK-DAG: [[SA:%.+]] = type { [4 x i32] } +// CHECK-DAG: [[SB:%.+]] = type { [8 x i32] } +// CHECK-DAG: [[SC:%.+]] = type { [16 x i32] } +// CHECK-DAG: [[SD:%.+]] = type { [32 x i32] } +// CHECK-DAG: [[SE:%.+]] = type { [64 x i32] } +// CHECK-DAG: [[ST1:%.+]] = type { [228 x i32] } +// CHECK-DAG: [[ST2:%.+]] = type { [1128 x i32] } +// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 } +// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* } +// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* } + +// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 } + +// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat + +// CHECK-DAG: [[A1:@.+]] = internal global [[SA]] +// CHECK-DAG: [[A2:@.+]] = global [[SA]] +// CHECK-DAG: [[B1:@.+]] = global [[SB]] +// CHECK-DAG: [[B2:@.+]] = global [[SB]] +// CHECK-DAG: [[C1:@.+]] = internal global [[SC]] +// CHECK-DAG: [[D1:@.+]] = global [[SD]] +// CHECK-DAG: [[E1:@.+]] = global [[SE]] +// CHECK-DAG: [[T1:@.+]] = global [[ST1]] +// CHECK-DAG: [[T2:@.+]] = global [[ST2]] + +// CHECK-NTARGET-DAG: [[SA:%.+]] = type { [4 x i32] } +// CHECK-NTARGET-DAG: [[SB:%.+]] = type { [8 x i32] } +// CHECK-NTARGET-DAG: [[SC:%.+]] = type { [16 x i32] } +// CHECK-NTARGET-DAG: [[SD:%.+]] = type { [32 x i32] } +// CHECK-NTARGET-DAG: [[SE:%.+]] = type { [64 x i32] } +// CHECK-NTARGET-DAG: [[ST1:%.+]] = type { [228 x i32] } +// CHECK-NTARGET-DAG: [[ST2:%.+]] = type { [1128 x i32] } +// CHECK-NTARGET-NOT: type { i8*, i8*, % +// CHECK-NTARGET-NOT: type { i32, % + +// We have 7 target regions + +// CHECK-DAG: {{@.+}} = private constant i8 0 +// TCHECK-NOT: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] + +// CHECK-NTARGET-NOT: private constant i8 0 +// CHECK-NTARGET-NOT: private unnamed_addr constant [1 x i + +// CHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:__omp_offloading_[0-9a-f]+_[0-9a-f]+__Z.+_l[0-9]+]]\00" +// CHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00" +// CHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00" +// CHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00" +// CHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00" +// CHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00" +// CHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00" +// CHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00" +// CHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00" +// CHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00" +// CHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00" +// CHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00" +// CHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 + +// TCHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:__omp_offloading_[0-9a-f]+_[0-9a-f]+__Z.+_l[0-9]+]]\00" +// TCHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00" +// TCHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00" +// TCHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00" +// TCHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00" +// TCHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00" +// TCHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00" +// TCHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00" +// TCHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00" +// TCHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00" +// TCHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00" +// TCHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00" +// TCHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 + +// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]] +// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]] +// CHECK: [[DEVBEGIN:@.+]] = external constant i8 +// CHECK: [[DEVEND:@.+]] = external constant i8 +// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]]) +// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]]) + +// We have 4 initializers, one for the 500 priority, another one for 501, or more for the default priority, and the last one for the offloading registration function. +// CHECK: @llvm.global_ctors = appending global [4 x { i32, void ()*, i8* }] [ +// CHECK-SAME: { i32, void ()*, i8* } { i32 500, void ()* [[P500:@[^,]+]], i8* null }, +// CHECK-SAME: { i32, void ()*, i8* } { i32 501, void ()* [[P501:@[^,]+]], i8* null }, +// CHECK-SAME: { i32, void ()*, i8* } { i32 65535, void ()* [[PMAX:@[^,]+]], i8* null }, +// CHECK-SAME: { i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* @[[REGFN]] to void ()*), i8* bitcast (void (i8*)* @[[REGFN]] to i8*) }] + +// CHECK-NTARGET: @llvm.global_ctors = appending global [3 x { i32, void ()*, i8* }] [ + +extern int *R; + +struct SA { + int arr[4]; + void foo() { + int a = *R; + a += 1; + *R = a; + } + SA() { + int a = *R; + a += 2; + *R = a; + } + ~SA() { + int a = *R; + a += 3; + *R = a; + } +}; + +struct SB { + int arr[8]; + void foo() { + int a = *R; + #pragma omp target parallel for simd + for (int i = 0; i < 10; ++i) + a += 4; + *R = a; + } + SB() { + int a = *R; + a += 5; + *R = a; + } + ~SB() { + int a = *R; + a += 6; + *R = a; + } +}; + +struct SC { + int arr[16]; + void foo() { + int a = *R; + a += 7; + *R = a; + } + SC() { + int a = *R; + #pragma omp target parallel for simd + for (int i = 0; i < 10; ++i) + a += 8; + *R = a; + } + ~SC() { + int a = *R; + a += 9; + *R = a; + } +}; + +struct SD { + int arr[32]; + void foo() { + int a = *R; + a += 10; + *R = a; + } + SD() { + int a = *R; + a += 11; + *R = a; + } + ~SD() { + int a = *R; + #pragma omp target parallel for simd + for (int i = 0; i < 10; ++i) + a += 12; + *R = a; + } +}; + +struct SE { + int arr[64]; + void foo() { + int a = *R; + #pragma omp target parallel for simd if(target: 0) + for (int i = 0; i < 10; ++i) + a += 13; + *R = a; + } + SE() { + int a = *R; + #pragma omp target parallel for simd + for (int i = 0; i < 10; ++i) + a += 14; + *R = a; + } + ~SE() { + int a = *R; + #pragma omp target parallel for simd + for (int i = 0; i < 10; ++i) + a += 15; + *R = a; + } +}; + +template <int x> +struct ST { + int arr[128 + x]; + void foo() { + int a = *R; + #pragma omp target parallel for simd + for (int i = 0; i < 10; ++i) + a += 16 + x; + *R = a; + } + ST() { + int a = *R; + #pragma omp target parallel for simd + for (int i = 0; i < 10; ++i) + a += 17 + x; + *R = a; + } + ~ST() { + int a = *R; + #pragma omp target parallel for simd + for (int i = 0; i < 10; ++i) + a += 18 + x; + *R = a; + } +}; + +// We have to make sure we us all the target regions: +//CHECK-DAG: define internal void @[[NAME1]]( +//CHECK-DAG: call void @[[NAME1]]( +//CHECK-DAG: define internal void @[[NAME2]]( +//CHECK-DAG: call void @[[NAME2]]( +//CHECK-DAG: define internal void @[[NAME3]]( +//CHECK-DAG: call void @[[NAME3]]( +//CHECK-DAG: define internal void @[[NAME4]]( +//CHECK-DAG: call void @[[NAME4]]( +//CHECK-DAG: define internal void @[[NAME5]]( +//CHECK-DAG: call void @[[NAME5]]( +//CHECK-DAG: define internal void @[[NAME6]]( +//CHECK-DAG: call void @[[NAME6]]( +//CHECK-DAG: define internal void @[[NAME7]]( +//CHECK-DAG: call void @[[NAME7]]( +//CHECK-DAG: define internal void @[[NAME8]]( +//CHECK-DAG: call void @[[NAME8]]( +//CHECK-DAG: define internal void @[[NAME9]]( +//CHECK-DAG: call void @[[NAME9]]( +//CHECK-DAG: define internal void @[[NAME10]]( +//CHECK-DAG: call void @[[NAME10]]( +//CHECK-DAG: define internal void @[[NAME11]]( +//CHECK-DAG: call void @[[NAME11]]( +//CHECK-DAG: define internal void @[[NAME12]]( +//CHECK-DAG: call void @[[NAME12]]( + +//TCHECK-DAG: define void @[[NAME1]]( +//TCHECK-DAG: define void @[[NAME2]]( +//TCHECK-DAG: define void @[[NAME3]]( +//TCHECK-DAG: define void @[[NAME4]]( +//TCHECK-DAG: define void @[[NAME5]]( +//TCHECK-DAG: define void @[[NAME6]]( +//TCHECK-DAG: define void @[[NAME7]]( +//TCHECK-DAG: define void @[[NAME8]]( +//TCHECK-DAG: define void @[[NAME9]]( +//TCHECK-DAG: define void @[[NAME10]]( +//TCHECK-DAG: define void @[[NAME11]]( +//TCHECK-DAG: define void @[[NAME12]]( + +// CHECK-NTARGET-NOT: __tgt_target +// CHECK-NTARGET-NOT: __tgt_register_lib +// CHECK-NTARGET-NOT: __tgt_unregister_lib + +// TCHECK-NOT: __tgt_target +// TCHECK-NOT: __tgt_register_lib +// TCHECK-NOT: __tgt_unregister_lib + +// We have 2 initializers with priority 500 +//CHECK: define internal void [[P500]]( +//CHECK: call void @{{.+}}() +//CHECK: call void @{{.+}}() +//CHECK-NOT: call void @{{.+}}() +//CHECK: ret void + +// We have 1 initializers with priority 501 +//CHECK: define internal void [[P501]]( +//CHECK: call void @{{.+}}() +//CHECK-NOT: call void @{{.+}}() +//CHECK: ret void + +// We have 6 initializers with default priority +//CHECK: define internal void [[PMAX]]( +//CHECK: call void @{{.+}}() +//CHECK: call void @{{.+}}() +//CHECK: call void @{{.+}}() +//CHECK: call void @{{.+}}() +//CHECK: call void @{{.+}}() +//CHECK: call void @{{.+}}() +//CHECK-NOT: call void @{{.+}}() +//CHECK: ret void + +// Check registration and unregistration + +//CHECK: define internal void @[[UNREGFN:.+]](i8*) +//CHECK-SAME: comdat($[[REGFN]]) { +//CHECK: call i32 @__tgt_unregister_lib([[DSCTY]]* [[DESC]]) +//CHECK: ret void +//CHECK: declare i32 @__tgt_unregister_lib([[DSCTY]]*) + +//CHECK: define linkonce hidden void @[[REGFN]](i8*) +//CHECK-SAME: comdat { +//CHECK: call i32 @__tgt_register_lib([[DSCTY]]* [[DESC]]) +//CHECK: call i32 @__cxa_atexit(void (i8*)* @[[UNREGFN]], i8* bitcast ([[DSCTY]]* [[DESC]] to i8*), +//CHECK: ret void +//CHECK: declare i32 @__tgt_register_lib([[DSCTY]]*) + +static __attribute__((init_priority(500))) SA a1; +SA a2; +SB __attribute__((init_priority(500))) b1; +SB __attribute__((init_priority(501))) b2; +static SC c1; +SD d1; +SE e1; +ST<100> t1; +ST<1000> t2; + + +int bar(int a){ + int r = a; + + a1.foo(); + a2.foo(); + b1.foo(); + b2.foo(); + c1.foo(); + d1.foo(); + e1.foo(); + t1.foo(); + t2.foo(); + + #pragma omp target parallel for simd + for (int i = 0; i < 10; ++i) + ++r; + + return r + *R; +} + +// Check metadata is properly generated: +// CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 195, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 247, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 265, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 272, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 284, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 291, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 415, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 298, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 291, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 298, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 284, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 221, i32 {{[0-9]+}}} + +// TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 195, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 247, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 265, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 272, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 284, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 291, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 415, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 298, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 291, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 298, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 284, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 221, i32 {{[0-9]+}}} + +#endif diff --git a/test/OpenMP/target_parallel_for_simd_codegen_registration_naming.cpp b/test/OpenMP/target_parallel_for_simd_codegen_registration_naming.cpp new file mode 100644 index 000000000000..bdfa38669859 --- /dev/null +++ b/test/OpenMP/target_parallel_for_simd_codegen_registration_naming.cpp @@ -0,0 +1,68 @@ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// Test target parallel for simd codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: [[CA:%.+]] = type { i32* } + +// CHECK: define {{.*}}i32 @[[NNAME:.+]](i32 {{.*}}%{{.+}}) +int nested(int a){ + // CHECK: call void @__omp_offloading_[[FILEID:[0-9a-f]+_[0-9a-f]+]]_[[NNAME]]_l[[T1L:[0-9]+]]( + #pragma omp target parallel for simd + for (int i = 0; i < 10; ++i) + ++a; + + // CHECK: call void @"[[LNAME:.+]]"([[CA]]* + auto F = [&](){ + #pragma omp parallel + { + #pragma omp target parallel for simd + for (int i = 0; i < 10; ++i) + ++a; + } + }; + + F(); + + return a; +} + +// CHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T1L]]( +// TCHECK: define {{.*}}void @__omp_offloading_[[FILEID:[0-9a-f]+_[0-9a-f]+]]_[[NNAME:.+]]_l[[T1L:[0-9]+]]( + +// CHECK: define {{.*}}void @"[[LNAME]]"( +// CHECK: call void {{.*}}@__kmpc_fork_call{{.+}}[[PNAME:@.+]] to + +// CHECK: define {{.*}}void [[PNAME]]( +// CHECK: call void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T2L:[0-9]+]]( + +// CHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T2L]]( +// TCHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME:.+]]_l[[T2L:[0-9]+]]( + + +// Check metadata is properly generated: +// CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} + +// TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +#endif diff --git a/test/OpenMP/target_parallel_for_simd_depend_messages.cpp b/test/OpenMP/target_parallel_for_simd_depend_messages.cpp index a8b4de7ff755..4375941036cf 100644 --- a/test/OpenMP/target_parallel_for_simd_depend_messages.cpp +++ b/test/OpenMP/target_parallel_for_simd_depend_messages.cpp @@ -37,21 +37,21 @@ int main(int argc, char **argv, char *env[]) { for (i = 0; i < argc; ++i) foo(); #pragma omp target parallel for simd depend (out: ) // expected-error {{expected expression}} for (i = 0; i < argc; ++i) foo(); - #pragma omp target parallel for simd depend (inout : foobool(argc)), depend (in, argc) // expected-error {{expected variable name, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}} + #pragma omp target parallel for simd depend (inout : foobool(argc)), depend (in, argc) // expected-error {{expected addressable lvalue expression, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}} for (i = 0; i < argc; ++i) foo(); #pragma omp target parallel for simd depend (out :S1) // expected-error {{'S1' does not refer to a value}} for (i = 0; i < argc; ++i) foo(); - #pragma omp target parallel for simd depend(in : argv[1][1] = '2') // expected-error {{expected variable name, array element or array section}} + #pragma omp target parallel for simd depend(in : argv[1][1] = '2') for (i = 0; i < argc; ++i) foo(); - #pragma omp target parallel for simd depend (in : vec[1]) // expected-error {{expected variable name, array element or array section}} + #pragma omp target parallel for simd depend (in : vec[1]) // expected-error {{expected addressable lvalue expression, array element or array section}} for (i = 0; i < argc; ++i) foo(); #pragma omp target parallel for simd depend (in : argv[0]) for (i = 0; i < argc; ++i) foo(); #pragma omp target parallel for simd depend (in : ) // expected-error {{expected expression}} for (i = 0; i < argc; ++i) foo(); - #pragma omp target parallel for simd depend (in : main) // expected-error {{expected variable name, array element or array section}} + #pragma omp target parallel for simd depend (in : main) for (i = 0; i < argc; ++i) foo(); - #pragma omp target parallel for simd depend(in : a[0]) // expected-error{{expected variable name, array element or array section}} + #pragma omp target parallel for simd depend(in : a[0]) // expected-error{{expected addressable lvalue expression, array element or array section}} for (i = 0; i < argc; ++i) foo(); #pragma omp target parallel for simd depend (in : vec[1:2]) // expected-error {{ value is not an array or pointer}} for (i = 0; i < argc; ++i) foo(); diff --git a/test/OpenMP/target_parallel_for_simd_firstprivate_messages.cpp b/test/OpenMP/target_parallel_for_simd_firstprivate_messages.cpp index 9397314d87cc..9ad255f033e4 100644 --- a/test/OpenMP/target_parallel_for_simd_firstprivate_messages.cpp +++ b/test/OpenMP/target_parallel_for_simd_firstprivate_messages.cpp @@ -125,11 +125,11 @@ int foomain(int argc, char **argv) { foo(); #pragma omp parallel private(i) #pragma omp target parallel for simd firstprivate(i) // expected-note {{defined as firstprivate}} - for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be firstprivate, predetermined as private}} + for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be firstprivate, predetermined as linear}} foo(); #pragma omp parallel reduction(+ : i) #pragma omp target parallel for simd firstprivate(i) // expected-note {{defined as firstprivate}} - for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be firstprivate, predetermined as private}} + for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be firstprivate, predetermined as linear}} foo(); return 0; } @@ -219,7 +219,7 @@ int main(int argc, char **argv) { for (i = 0; i < argc; ++i) foo(); #pragma omp target parallel for simd firstprivate(i) // expected-note {{defined as firstprivate}} - for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be firstprivate, predetermined as private}} + for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be firstprivate, predetermined as linear}} foo(); #pragma omp parallel shared(xa) #pragma omp target parallel for simd firstprivate(xa) // OK: may be firstprivate @@ -246,11 +246,11 @@ int main(int argc, char **argv) { } #pragma omp parallel private(i) #pragma omp target parallel for simd firstprivate(i) // expected-note {{defined as firstprivate}} - for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be firstprivate, predetermined as private}} + for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be firstprivate, predetermined as linear}} foo(); #pragma omp parallel reduction(+ : i) #pragma omp target parallel for simd firstprivate(i) // expected-note {{defined as firstprivate}} - for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be firstprivate, predetermined as private}} + for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be firstprivate, predetermined as linear}} foo(); static int si; #pragma omp target parallel for simd firstprivate(si) // OK diff --git a/test/OpenMP/target_parallel_for_simd_lastprivate_messages.cpp b/test/OpenMP/target_parallel_for_simd_lastprivate_messages.cpp index 51fc72464c9a..313192dbb5f6 100644 --- a/test/OpenMP/target_parallel_for_simd_lastprivate_messages.cpp +++ b/test/OpenMP/target_parallel_for_simd_lastprivate_messages.cpp @@ -18,9 +18,9 @@ public: S2 &operator=(const S2 &); const S2 &operator=(const S2 &) const; static float S2s; // expected-note {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note {{static data member is predetermined as shared}} }; -const float S2::S2sc = 0; // expected-note {{static data member is predetermined as shared}} +const float S2::S2sc = 0; const S2 b; const S2 ba[5]; class S3 { @@ -209,6 +209,8 @@ int main(int argc, char **argv) { #pragma omp target parallel for simd private(xa), lastprivate(xa) // expected-error {{private variable cannot be lastprivate}} expected-note {{defined as private}} for (i = 0; i < argc; ++i) foo(); +// expected-note@+2 {{defined as lastprivate}} +// expected-error@+2 {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be lastprivate, predetermined as linear}} #pragma omp target parallel for simd lastprivate(i) for (i = 0; i < argc; ++i) foo(); diff --git a/test/OpenMP/target_parallel_for_simd_loop_messages.cpp b/test/OpenMP/target_parallel_for_simd_loop_messages.cpp index ed184a5e4124..f07f19e2a690 100644 --- a/test/OpenMP/target_parallel_for_simd_loop_messages.cpp +++ b/test/OpenMP/target_parallel_for_simd_loop_messages.cpp @@ -238,27 +238,29 @@ int test_iteration_spaces() { c[ii] = a[ii]; // expected-note@+2 {{defined as firstprivate}} -// expected-error@+2 {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be firstprivate, predetermined as private}} +// expected-error@+2 {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be firstprivate, predetermined as linear}} #pragma omp target parallel for simd firstprivate(ii) for (ii = 0; ii < 10; ii++) c[ii] = a[ii]; -// expected-note@+2 {{defined as linear}} -// expected-error@+2 {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be linear, predetermined as private}} #pragma omp target parallel for simd linear(ii) for (ii = 0; ii < 10; ii++) c[ii] = a[ii]; +// expected-note@+2 {{defined as private}} +// expected-error@+2 {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be private, predetermined as linear}} #pragma omp target parallel for simd private(ii) for (ii = 0; ii < 10; ii++) c[ii] = a[ii]; +// expected-note@+2 {{defined as lastprivate}} +// expected-error@+2 {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be lastprivate, predetermined as linear}} #pragma omp target parallel for simd lastprivate(ii) for (ii = 0; ii < 10; ii++) c[ii] = a[ii]; { -// expected-error@+2 {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be threadprivate or thread local, predetermined as private}} +// expected-error@+2 {{loop iteration variable in the associated loop of 'omp target parallel for simd' directive may not be threadprivate or thread local, predetermined as linear}} #pragma omp target parallel for simd for (sii = 0; sii < 10; sii += 1) c[sii] = a[sii]; @@ -585,15 +587,15 @@ void test_loop_eh() { #pragma omp target parallel for simd for (int i = 0; i < 10; i++) { c[i] = a[i] + b[i]; - try { + try { // expected-error {{'try' statement cannot be used in OpenMP simd region}} for (int j = 0; j < 10; ++j) { if (a[i] > b[j]) - throw a[i]; + throw a[i]; // expected-error {{'throw' statement cannot be used in OpenMP simd region}} } - throw a[i]; + throw a[i]; // expected-error {{'throw' statement cannot be used in OpenMP simd region}} } catch (float f) { if (f > 0.1) - throw a[i]; + throw a[i]; // expected-error {{'throw' statement cannot be used in OpenMP simd region}} return; // expected-error {{cannot return from OpenMP region}} } switch (i) { @@ -605,7 +607,7 @@ void test_loop_eh() { } for (int j = 0; j < 10; j++) { if (c[i] > 10) - throw c[i]; + throw c[i]; // expected-error {{'throw' statement cannot be used in OpenMP simd region}} } } if (c[9] > 10) diff --git a/test/OpenMP/target_parallel_for_simd_map_messages.cpp b/test/OpenMP/target_parallel_for_simd_map_messages.cpp index 195b39c595c3..7e95e10ae5ca 100644 --- a/test/OpenMP/target_parallel_for_simd_map_messages.cpp +++ b/test/OpenMP/target_parallel_for_simd_map_messages.cpp @@ -14,8 +14,8 @@ class S2 { public: S2():a(0) { } S2(S2 &s2):a(s2.a) { } - static float S2s; // expected-note 4 {{mappable type cannot contain static members}} - static const float S2sc; // expected-note 4 {{mappable type cannot contain static members}} + static float S2s; + static const float S2sc; }; const float S2::S2sc = 0; const S2 b; @@ -116,9 +116,9 @@ T tmain(T argc) { for (i = 0; i < argc; ++i) foo(); #pragma omp target parallel for simd map(S1) // expected-error {{'S1' does not refer to a value}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target parallel for simd map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target parallel for simd map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target parallel for simd map(ba) // expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target parallel for simd map(ba) for (i = 0; i < argc; ++i) foo(); #pragma omp target parallel for simd map(ca) for (i = 0; i < argc; ++i) foo(); @@ -222,11 +222,11 @@ int main(int argc, char **argv) { for (i = 0; i < argc; ++i) foo(); #pragma omp target parallel for simd map(S1) // expected-error {{'S1' does not refer to a value}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target parallel for simd map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target parallel for simd map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} for (i = 0; i < argc; ++i) foo(); #pragma omp target parallel for simd map(argv[1]) for (i = 0; i < argc; ++i) foo(); -#pragma omp target parallel for simd map(ba) // expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target parallel for simd map(ba) for (i = 0; i < argc; ++i) foo(); #pragma omp target parallel for simd map(ca) for (i = 0; i < argc; ++i) foo(); diff --git a/test/OpenMP/target_parallel_for_simd_misc_messages.c b/test/OpenMP/target_parallel_for_simd_misc_messages.c index 2adc6f85116e..f2574a35472d 100644 --- a/test/OpenMP/target_parallel_for_simd_misc_messages.c +++ b/test/OpenMP/target_parallel_for_simd_misc_messages.c @@ -166,7 +166,7 @@ void test_collapse() { // expected-note@+1 {{variable with automatic storage duration is predetermined as private; perhaps you forget to enclose 'omp for' directive into a parallel or another task region?}} for (int j = 0; j < 16; ++j) // expected-error@+2 2 {{reduction variable must be shared}} -// expected-error@+1 {{region cannot be closely nested inside 'target parallel for simd' region; perhaps you forget to enclose 'omp for' directive into a parallel region?}} +// expected-error@+1 {{OpenMP constructs may not be nested inside a simd region}} #pragma omp for reduction(+ : i, j) for (int k = 0; k < 16; ++k) i += j; diff --git a/test/OpenMP/target_parallel_for_simd_ordered_messages.cpp b/test/OpenMP/target_parallel_for_simd_ordered_messages.cpp index 70a3b4ef6245..065d81517f83 100644 --- a/test/OpenMP/target_parallel_for_simd_ordered_messages.cpp +++ b/test/OpenMP/target_parallel_for_simd_ordered_messages.cpp @@ -6,7 +6,7 @@ void foo() { } #if __cplusplus >= 201103L - // expected-note@+2 4 {{declared here}} + // expected-note@+2 2 {{declared here}} #endif bool foobool(int argc) { return argc; @@ -15,7 +15,7 @@ bool foobool(int argc) { struct S1; // expected-note {{declared here}} template <class T, typename S, int N, int ST> // expected-note {{declared here}} -T tmain(T argc, S **argv) { //expected-note 2 {{declared here}} +T tmain(T argc, S **argv) { int j; // expected-note {{declared here}} #pragma omp target parallel for simd ordered for (int i = ST; i < N; i++) @@ -26,28 +26,26 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here} #pragma omp target parallel for simd ordered() // expected-error {{expected expression}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i - ST]; -// expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}} -// expected-error@+2 2 {{expression is not an integral constant expression}} -// expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}} +// expected-error@+2 {{'ordered' clause with a parameter can not be specified in '#pragma omp target parallel for simd' directive}} +// expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}} #pragma omp target parallel for simd ordered(argc for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i - ST]; -// expected-error@+1 2 {{argument to 'ordered' clause must be a strictly positive integer value}} +// expected-error@+1 {{'ordered' clause with a parameter can not be specified in '#pragma omp target parallel for simd' directive}} #pragma omp target parallel for simd ordered(ST // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i - ST]; +// expected-error@+1 {{'ordered' clause with a parameter can not be specified in '#pragma omp target parallel for simd' directive}} #pragma omp target parallel for simd ordered(1)) // expected-warning {{extra tokens at the end of '#pragma omp target parallel for simd' are ignored}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i - ST]; -#pragma omp target parallel for simd ordered((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'ordered' clause}} +// expected-error@+1 {{'ordered' clause with a parameter can not be specified in '#pragma omp target parallel for simd' directive}} +#pragma omp target parallel for simd ordered((ST > 0) ? 1 + ST : 2) for (int i = ST; i < N; i++) - argv[0][i] = argv[0][i] - argv[0][i - ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp target parallel for simd', but found only 1}} -#if __cplusplus >= 201103L - // expected-note@+5 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} -#endif -// expected-error@+3 2 {{directive '#pragma omp target parallel for simd' cannot contain more than one 'ordered' clause}} -// expected-error@+2 2 {{argument to 'ordered' clause must be a strictly positive integer value}} -// expected-error@+1 2 {{expression is not an integral constant expression}} + argv[0][i] = argv[0][i] - argv[0][i - ST]; +// expected-error@+3 2 {{argument to 'ordered' clause must be a strictly positive integer value}} +// expected-error@+2 2 {{directive '#pragma omp target parallel for simd' cannot contain more than one 'ordered' clause}} +// expected-error@+1 {{'ordered' clause with a parameter can not be specified in '#pragma omp target parallel for simd' directive}} #pragma omp target parallel for simd ordered(foobool(argc)), ordered(true), ordered(-5) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i - ST]; @@ -59,14 +57,17 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here} #pragma omp target parallel for simd ordered(j = 2) // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i - ST]; +// expected-error@+1 {{'ordered' clause with a parameter can not be specified in '#pragma omp target parallel for simd' directive}} #pragma omp target parallel for simd ordered(1) for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i - ST]; -#pragma omp target parallel for simd ordered(N) // expected-error {{argument to 'ordered' clause must be a strictly positive integer value}} +// expected-error@+1 {{'ordered' clause with a parameter can not be specified in '#pragma omp target parallel for simd' directive}} +#pragma omp target parallel for simd ordered(N) for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i - ST]; -#pragma omp target parallel for simd ordered(2) // expected-note {{as specified in 'ordered' clause}} - foo(); // expected-error {{expected 2 for loops after '#pragma omp target parallel for simd'}} +// expected-error@+1 {{'ordered' clause with a parameter can not be specified in '#pragma omp target parallel for simd' directive}} +#pragma omp target parallel for simd ordered(2) + foo(); return argc; } @@ -81,12 +82,14 @@ int main(int argc, char **argv) { #pragma omp target parallel for simd ordered() // expected-error {{expected expression}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i - 4]; -#pragma omp target parallel for simd ordered(4 // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-note {{as specified in 'ordered' clause}} +// expected-error@+1 {{'ordered' clause with a parameter can not be specified in '#pragma omp target parallel for simd' directive}} +#pragma omp target parallel for simd ordered(4 // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = 4; i < 12; i++) - argv[0][i] = argv[0][i] - argv[0][i - 4]; // expected-error {{expected 4 for loops after '#pragma omp target parallel for simd', but found only 1}} -#pragma omp target parallel for simd ordered(2 + 2)) // expected-warning {{extra tokens at the end of '#pragma omp target parallel for simd' are ignored}} expected-note {{as specified in 'ordered' clause}} + argv[0][i] = argv[0][i] - argv[0][i - 4]; +// expected-error@+1 {{'ordered' clause with a parameter can not be specified in '#pragma omp target parallel for simd' directive}} +#pragma omp target parallel for simd ordered(2 + 2)) // expected-warning {{extra tokens at the end of '#pragma omp target parallel for simd' are ignored}} for (int i = 4; i < 12; i++) - argv[0][i] = argv[0][i] - argv[0][i - 4]; // expected-error {{expected 4 for loops after '#pragma omp target parallel for simd', but found only 1}} + argv[0][i] = argv[0][i] - argv[0][i - 4]; #if __cplusplus >= 201103L // expected-note@+2 {{non-constexpr function 'foobool' cannot be used in a constant expression}} #endif @@ -110,13 +113,12 @@ int main(int argc, char **argv) { #pragma omp target parallel for simd ordered(j = 2) // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i - 4]; -// expected-error@+3 {{statement after '#pragma omp target parallel for simd' must be a for loop}} -// expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, -1, -2>' requested here}} +// expected-error@+2 {{statement after '#pragma omp target parallel for simd' must be a for loop}} #pragma omp target parallel for simd ordered(ordered(tmain < int, char, -1, -2 > (argc, argv) // expected-error 2 {{expected ')'}} expected-note 2 {{to match this '('}} foo(); -#pragma omp target parallel for simd ordered(2) // expected-note {{as specified in 'ordered' clause}} - foo(); // expected-error {{expected 2 for loops after '#pragma omp target parallel for simd'}} - // expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, 1, 0>' requested here}} +// expected-error@+1 {{'ordered' clause with a parameter can not be specified in '#pragma omp target parallel for simd' directive}} +#pragma omp target parallel for simd ordered(2) + foo(); return tmain<int, char, 1, 0>(argc, argv); } diff --git a/test/OpenMP/target_parallel_for_simd_reduction_messages.cpp b/test/OpenMP/target_parallel_for_simd_reduction_messages.cpp index 289b5b2641b6..72e2130f3df4 100644 --- a/test/OpenMP/target_parallel_for_simd_reduction_messages.cpp +++ b/test/OpenMP/target_parallel_for_simd_reduction_messages.cpp @@ -25,9 +25,9 @@ public: S2() : a(0) {} S2(S2 &s2) : a(s2.a) {} static float S2s; // expected-note 2 {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note 2 {{'S2sc' declared here}} }; -const float S2::S2sc = 0; // expected-note 2 {{'S2sc' defined here}} +const float S2::S2sc = 0; S2 b; // expected-note 3 {{'b' defined here}} const S2 ba[5]; // expected-note 2 {{'ba' defined here}} class S3 { diff --git a/test/OpenMP/target_parallel_if_codegen.cpp b/test/OpenMP/target_parallel_if_codegen.cpp index 0aebcc218fbb..c155bf7c051a 100644 --- a/test/OpenMP/target_parallel_if_codegen.cpp +++ b/test/OpenMP/target_parallel_if_codegen.cpp @@ -130,8 +130,6 @@ int bar(int n){ return a; } - - // // CHECK: define {{.*}}[[FS1]]([[S1]]* {{%.+}}, i32 {{[^%]*}}[[PARM:%.+]]) // @@ -147,10 +145,8 @@ int bar(int n){ // CHECK: store i8 [[FB]], i8* [[CONV]], align // CHECK: [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align // -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 1, i32 0) -// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align -// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align -// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 1, i32 0) +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // // CHECK: [[FAIL]] @@ -175,28 +171,18 @@ int bar(int n){ // CHECK: br i1 [[CMP]], label {{%?}}[[IF_THEN:.+]], label {{%?}}[[IF_ELSE:.+]] // // CHECK: [[IF_THEN]] -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, {{.*}}, i32 1, i32 0) -// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align -// CHECK: br label {{%?}}[[END:.+]] -// -// CHECK: [[IF_ELSE]] -// CHECK: store i32 -1, i32* [[RHV]], align -// CHECK: br label {{%?}}[[END]] -// -// CHECK: [[END]] -// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align -// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 -// CHECK: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] -// +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, {{.*}}, i32 1, i32 0) +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // CHECK: [[FAIL]] // CHECK: call void [[HVT2:@.+]]([[S1]]* {{%.+}}, i[[SZ]] [[ARG]]) -// CHECK: br label {{%?}}[[END]] +// CHECK-NEXT: br label %[[END]] // CHECK: [[END]] - - - - - +// CHECK-NEXT: br label %[[IFEND:.+]] +// CHECK: [[IF_ELSE]] +// CHECK: call void [[HVT2]]([[S1]]* {{%.+}}, i[[SZ]] [[ARG]]) +// CHECK-NEXT: br label %[[IFEND]] +// CHECK: [[IFEND]] // // CHECK: define {{.*}}[[FSTATIC]](i32 {{[^%]*}}[[PARM:%.+]]) @@ -217,23 +203,18 @@ int bar(int n){ // CHECK: br i1 [[TB]], label {{%?}}[[IF_THEN:.+]], label {{%?}}[[IF_ELSE:.+]] // // CHECK: [[IF_THEN]] -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 0) -// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align -// CHECK: br label {{%?}}[[END:.+]] -// -// CHECK: [[IF_ELSE]] -// CHECK: store i32 -1, i32* [[RHV]], align -// CHECK: br label {{%?}}[[END]] -// -// CHECK: [[END]] -// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align -// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 -// CHECK: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] -// +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 0) +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // CHECK: [[FAIL]] // CHECK: call void [[HVT3:@.+]](i[[SZ]] [[ARG]]) -// CHECK: br label {{%?}}[[END]] +// CHECK-NEXT: br label %[[END]] // CHECK: [[END]] +// CHECK-NEXT: br label %[[IFEND:.+]] +// CHECK: [[IF_ELSE]] +// CHECK: call void [[HVT3]](i[[SZ]] [[ARG]]) +// CHECK-NEXT: br label %[[IFEND]] +// CHECK: [[IFEND]] // // // @@ -243,23 +224,18 @@ int bar(int n){ // CHECK: br i1 [[CMP]], label {{%?}}[[IF_THEN:.+]], label {{%?}}[[IF_ELSE:.+]] // // CHECK: [[IF_THEN]] -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i32 1, i32 0) -// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align -// CHECK: br label {{%?}}[[END:.+]] -// -// CHECK: [[IF_ELSE]] -// CHECK: store i32 -1, i32* [[RHV]], align -// CHECK: br label {{%?}}[[END]] -// -// CHECK: [[END]] -// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align -// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 -// CHECK: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] -// +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i32 1, i32 0) +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // CHECK: [[FAIL]] // CHECK: call void [[HVT4:@.+]]() -// CHECK: br label {{%?}}[[END]] +// CHECK-NEXT: br label %[[END]] // CHECK: [[END]] +// CHECK-NEXT: br label %[[IFEND:.+]] +// CHECK: [[IF_ELSE]] +// CHECK: call void [[HVT4]]() +// CHECK-NEXT: br label %[[IFEND]] +// CHECK: [[IFEND]] @@ -269,10 +245,8 @@ int bar(int n){ // // CHECK: define {{.*}}[[FTEMPLATE]] // -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 0) -// CHECK-NEXT: store i32 [[RET]], i32* [[RHV:%.+]], align -// CHECK-NEXT: [[RET2:%.+]] = load i32, i32* [[RHV]], align -// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 0) +// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // // CHECK: [[FAIL]] @@ -283,10 +257,8 @@ int bar(int n){ // // // -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, {{.*}}, i32 1, i32 0) -// CHECK-NEXT: store i32 [[RET]], i32* [[RHV:%.+]], align -// CHECK-NEXT: [[RET2:%.+]] = load i32, i32* [[RHV]], align -// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, {{.*}}, i32 1, i32 0) +// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // // CHECK: [[FAIL]] diff --git a/test/OpenMP/target_parallel_map_messages.cpp b/test/OpenMP/target_parallel_map_messages.cpp index ca794cecf4c5..d3cc742decd5 100644 --- a/test/OpenMP/target_parallel_map_messages.cpp +++ b/test/OpenMP/target_parallel_map_messages.cpp @@ -14,8 +14,8 @@ class S2 { public: S2():a(0) { } S2(S2 &s2):a(s2.a) { } - static float S2s; // expected-note 4 {{mappable type cannot contain static members}} - static const float S2sc; // expected-note 4 {{mappable type cannot contain static members}} + static float S2s; + static const float S2sc; }; const float S2::S2sc = 0; const S2 b; @@ -116,9 +116,9 @@ T tmain(T argc) { foo(); #pragma omp target parallel map(S1) // expected-error {{'S1' does not refer to a value}} foo(); -#pragma omp target parallel map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target parallel map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} foo(); -#pragma omp target parallel map(ba) // expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target parallel map(ba) foo(); #pragma omp target parallel map(ca) foo(); @@ -221,11 +221,11 @@ int main(int argc, char **argv) { foo(); #pragma omp target parallel map(S1) // expected-error {{'S1' does not refer to a value}} foo(); -#pragma omp target parallel map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target parallel map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} foo(); #pragma omp target parallel map(argv[1]) foo(); -#pragma omp target parallel map(ba) // expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target parallel map(ba) foo(); #pragma omp target parallel map(ca) foo(); diff --git a/test/OpenMP/target_parallel_no_exceptions.cpp b/test/OpenMP/target_parallel_no_exceptions.cpp new file mode 100644 index 000000000000..95189a3ade2b --- /dev/null +++ b/test/OpenMP/target_parallel_no_exceptions.cpp @@ -0,0 +1,18 @@ +/// Make sure no exception messages are inclided in the llvm output. +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHK-EXCEPTION + +void test_increment() { +#pragma omp target + { + []() { return; }(); + } +} + +int main() { + test_increment(); + return 0; +} + +//CHK-EXCEPTION-NOT: invoke + diff --git a/test/OpenMP/target_parallel_num_threads_codegen.cpp b/test/OpenMP/target_parallel_num_threads_codegen.cpp index 225c2dd1d06a..836a92d9a5db 100644 --- a/test/OpenMP/target_parallel_num_threads_codegen.cpp +++ b/test/OpenMP/target_parallel_num_threads_codegen.cpp @@ -147,10 +147,8 @@ int bar(int n){ // CHECK: [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align // CHECK: [[THREADS:%.+]] = load i32, i32* [[CAPE_ADDR]], align // -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 1, i32 [[THREADS]]) -// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align -// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align -// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 1, i32 [[THREADS]]) +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // // CHECK: [[FAIL]] @@ -160,10 +158,8 @@ int bar(int n){ // // // -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.+}}, i32 1, i32 1024) -// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align -// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align -// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.+}}, i32 1, i32 1024) +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // // CHECK: [[FAIL]] @@ -190,10 +186,8 @@ int bar(int n){ // CHECK: [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align // CHECK: [[THREADS:%.+]] = load i32, i32* [[CAPE_ADDR]], align // -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 [[THREADS]]) -// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align -// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align -// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 [[THREADS]]) +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // // CHECK: [[FAIL]] @@ -213,10 +207,8 @@ int bar(int n){ // CHECK: [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align // CHECK: [[THREADS:%.+]] = load i32, i32* [[CAPE_ADDR]], align // -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 [[THREADS]]) -// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align -// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align -// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 [[THREADS]]) +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // // CHECK: [[FAIL]] @@ -233,10 +225,8 @@ int bar(int n){ // // CHECK: define {{.*}}[[FTEMPLATE]] // -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i32 1, i32 20) -// CHECK-NEXT: store i32 [[RET]], i32* [[RHV:%.+]], align -// CHECK-NEXT: [[RET2:%.+]] = load i32, i32* [[RHV]], align -// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i32 1, i32 20) +// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // // CHECK: [[FAIL]] @@ -257,10 +247,8 @@ int bar(int n){ // CHECK: [[T:%.+]] = load i16, i16* [[CAPE_ADDR]], align // CHECK: [[THREADS:%.+]] = sext i16 [[T]] to i32 // -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 1, i32 [[THREADS]]) -// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align -// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align -// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 1, i32 [[THREADS]]) +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // // CHECK: [[FAIL]] diff --git a/test/OpenMP/target_parallel_reduction_messages.cpp b/test/OpenMP/target_parallel_reduction_messages.cpp index 52338ee71cbe..038f099c5a6c 100644 --- a/test/OpenMP/target_parallel_reduction_messages.cpp +++ b/test/OpenMP/target_parallel_reduction_messages.cpp @@ -24,9 +24,9 @@ public: S2() : a(0) {} S2(S2 &s2) : a(s2.a) {} static float S2s; // expected-note 2 {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note 2 {{'S2sc' declared here}} }; -const float S2::S2sc = 0; // expected-note 2 {{'S2sc' defined here}} +const float S2::S2sc = 0; S2 b; // expected-note 3 {{'b' defined here}} const S2 ba[5]; // expected-note 2 {{'ba' defined here}} class S3 { diff --git a/test/OpenMP/target_reduction_codegen.cpp b/test/OpenMP/target_reduction_codegen.cpp new file mode 100644 index 000000000000..f86719b55fa1 --- /dev/null +++ b/test/OpenMP/target_reduction_codegen.cpp @@ -0,0 +1,215 @@ +// Only test codegen on target side, as private clause does not require any action on the host side +// Test target codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +template<typename tx, typename ty> +struct TT{ + tx X; + ty Y; + TT<tx, ty> operator*(const TT<tx, ty> &) { return *this; } +}; + +// TCHECK: [[S1:%.+]] = type { double } + +int foo(int n) { + int a = 0; + short aa = 0; + float b[10]; + float bn[n]; + double c[5][10]; + double cn[5][n]; + TT<long long, char> d; + + #pragma omp target reduction(*:a) + { + } + + // TCHECK: define void @__omp_offloading_{{.+}}(i32*{{.+}} %{{.+}}) + // TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}*, + // TCHECK: store {{.+}}, {{.+}} [[A]], + // TCHECK: load i32*, i32** [[A]], + // TCHECK: ret void + +#pragma omp target reduction(+:a) + { + a = 1; + } + + // TCHECK: define void @__omp_offloading_{{.+}}(i32*{{.+}} %{{.+}}) + // TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}*, + // TCHECK: store {{.+}}, {{.+}} [[A]], + // TCHECK: [[REF:%.+]] = load i32*, i32** [[A]], + // TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[REF]], + // TCHECK: ret void + + #pragma omp target reduction(-:a, aa) + { + a = 1; + aa = 1; + } + + // TCHECK: define void @__omp_offloading_{{.+}}(i32*{{.+}} [[A:%.+]], i16*{{.+}} [[AA:%.+]]) + // TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}*, + // TCHECK: [[AA:%.+]] = alloca i{{[0-9]+}}*, + // TCHECK: store {{.+}}, {{.+}} [[A]], + // TCHECK: store {{.+}}, {{.+}} [[AA]], + // TCHECK: [[A_REF:%.+]] = load i32*, i32** [[A]], + // TCHECK: [[AA_REF:%.+]] = load i16*, i16** [[AA]], + // TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[A_REF]], + // TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[AA_REF]], + // TCHECK: ret void + + return a; +} + + +template<typename tx> +tx ftemplate(int n) { + tx a = 0; + short aa = 0; + tx b[10]; + +#pragma omp target reduction(+:a,aa,b) + { + a = 1; + aa = 1; + b[2] = 1; + } + + return a; +} + +static +int fstatic(int n) { + int a = 0; + short aa = 0; + char aaa = 0; + int b[10]; + +#pragma omp target reduction(-:a,aa,aaa,b) + { + a = 1; + aa = 1; + aaa = 1; + b[2] = 1; + } + + return a; +} + +// TCHECK: define void @__omp_offloading_{{.+}}(i32*{{.+}}, i16*{{.+}}, i8*{{.+}}, [10 x i32]*{{.+}}) +// TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}*, +// TCHECK: [[A2:%.+]] = alloca i{{[0-9]+}}*, +// TCHECK: [[A3:%.+]] = alloca i{{[0-9]+}}*, +// TCHECK: [[B:%.+]] = alloca [10 x i{{[0-9]+}}]*, +// TCHECK: store {{.+}}, {{.+}} [[A]], +// TCHECK: store {{.+}}, {{.+}} [[A2]], +// TCHECK: store {{.+}}, {{.+}} [[A3]], +// TCHECK: store {{.+}}, {{.+}} [[B]], +// TCHECK: [[A_REF:%.+]] = load i32*, i32** [[A]], +// TCHECK: [[AA_REF:%.+]] = load i16*, i16** [[AA]], +// TCHECK: [[A3_REF:%.+]] = load i8*, i8** [[A3]], +// TCHECK: [[B_REF:%.+]] = load {{.+}}*, {{.+}}** [[B]], +// TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[A_REF]], +// TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[AA_REF]], +// TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[A3_REF]], +// TCHECK: [[B_GEP:%.+]] = getelementptr inbounds [10 x i{{[0-9]+}}], [10 x i{{[0-9]+}}]* [[B_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 +// TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[B_GEP]], +// TCHECK: ret void + +struct S1 { + double a; + + int r1(int n){ + int b = n+1; + short int c[2][n]; + +#pragma omp target reduction(max:b,c) + { + this->a = (double)b + 1.5; + c[1][1] = ++a; + } + + return c[1][1] + (int)b; + } + + // TCHECK: define void @__omp_offloading_{{.+}}([[S1]]* [[TH:%.+]], i32*{{.+}}, i{{[0-9]+}} [[VLA:%.+]], i{{[0-9]+}} [[VLA1:%.+]], i16*{{.+}}) + // TCHECK: [[TH_ADDR:%.+]] = alloca [[S1]]*, + // TCHECK: [[B_ADDR:%.+]] = alloca i{{[0-9]+}}*, + // TCHECK: [[VLA_ADDR:%.+]] = alloca i{{[0-9]+}}, + // TCHECK: [[VLA_ADDR2:%.+]] = alloca i{{[0-9]+}}, + // TCHECK: [[C_ADDR:%.+]] = alloca i{{[0-9]+}}*, + // TCHECK: store [[S1]]* [[TH]], [[S1]]** [[TH_ADDR]], + // TCHECK: store i{{[0-9]+}}* {{.+}}, i{{[0-9]+}}** [[B_ADDR]], + // TCHECK: store i{{[0-9]+}} [[VLA]], i{{[0-9]+}}* [[VLA_ADDR]], + // TCHECK: store i{{[0-9]+}} [[VLA1]], i{{[0-9]+}}* [[VLA_ADDR2]], + // TCHECK: store i{{[0-9]+}}* {{.+}}, i{{[0-9]+}}** [[C_ADDR]], + // TCHECK: [[TH_ADDR_REF:%.+]] = load [[S1]]*, [[S1]]** [[TH_ADDR]], + // TCHECK: [[B_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[B_ADDR]], + // TCHECK: [[VLA_ADDR_REF:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[VLA_ADDR]], + // TCHECK: [[VLA_ADDR_REF2:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[VLA_ADDR2]], + // TCHECK: [[C_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[C_ADDR]], + + // this->a = (double)b + 1.5; + // TCHECK: [[B_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[B_REF]], + // TCHECK: [[B_CONV:%.+]] = sitofp i{{[0-9]+}} [[B_VAL]] to double + // TCHECK: [[NEW_A_VAL:%.+]] = fadd double [[B_CONV]], 1.5{{.+}}+00 + // TCHECK: [[A_FIELD:%.+]] = getelementptr inbounds [[S1]], [[S1]]* [[TH_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // TCHECK: store double [[NEW_A_VAL]], double* [[A_FIELD]], + + // c[1][1] = ++a; + // TCHECK: [[A_FIELD4:%.+]] = getelementptr inbounds [[S1]], [[S1]]* [[TH_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // TCHECK: [[A_FIELD4_VAL:%.+]] = load double, double* [[A_FIELD4]], + // TCHECK: [[A_FIELD_INC:%.+]] = fadd double [[A_FIELD4_VAL]], 1.0{{.+}}+00 + // TCHECK: store double [[A_FIELD_INC]], double* [[A_FIELD4]], + // TCHECK: [[A_FIELD_INC_CONV:%.+]] = fptosi double [[A_FIELD_INC]] to i{{[0-9]+}} + // TCHECK: [[C_IND:%.+]] = mul{{.+}} i{{[0-9]+}} 1, [[VLA_ADDR_REF2]] + // TCHECK: [[C_1_REF:%.+]] = getelementptr inbounds i{{[0-9]+}}, i{{[0-9]+}}* [[C_REF]], i{{[0-9]+}} [[C_IND]] + // TCHECK: [[C_1_1_REF:%.+]] = getelementptr inbounds i{{[0-9]+}}, i{{[0-9]+}}* [[C_1_REF]], i{{[0-9]+}} 1 + // TCHECK: store i{{[0-9]+}} [[A_FIELD_INC_CONV]], i{{[0-9]+}}* [[C_1_1_REF]], + + // finish + // TCHECK: ret void +}; + + +int bar(int n){ + int a = 0; + a += foo(n); + S1 S; + a += S.r1(n); + a += fstatic(n); + a += ftemplate<int>(n); + + return a; +} + +// template +// TCHECK: define void @__omp_offloading_{{.+}}(i{{[0-9]+}}*{{.+}}, i{{[0-9]+}}*{{.+}}, [10 x i32]*{{.+}}) +// TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}*, +// TCHECK: [[A2:%.+]] = alloca i{{[0-9]+}}*, +// TCHECK: [[B:%.+]] = alloca [10 x i{{[0-9]+}}]*, +// TCHECK: store {{.+}}, {{.+}} [[A]], +// TCHECK: store {{.+}}, {{.+}} [[A2]], +// TCHECK: store {{.+}}, {{.+}} [[B]], +// TCHECK: [[A_REF:%.+]] = load i32*, i32** [[A]], +// TCHECK: [[AA_REF:%.+]] = load i16*, i16** [[AA]], +// TCHECK: [[B_REF:%.+]] = load {{.+}}*, {{.+}}** [[B]], +// TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[A_REF]], +// TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[AA_REF]], +// TCHECK: [[B_GEP:%.+]] = getelementptr inbounds [10 x i{{[0-9]+}}], [10 x i{{[0-9]+}}]* [[B_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 +// TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[B_GEP]], +// TCHECK: ret void + +#endif diff --git a/test/OpenMP/target_reduction_messages.cpp b/test/OpenMP/target_reduction_messages.cpp new file mode 100644 index 000000000000..8cf79f9a5d99 --- /dev/null +++ b/test/OpenMP/target_reduction_messages.cpp @@ -0,0 +1,262 @@ +// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 150 -o - %s +// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s +// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s + +void foo() { +} + +bool foobool(int argc) { + return argc; +} + +void foobar(int &ref) { +#pragma omp target reduction(+:ref) + foo(); +} + +struct S1; // expected-note {{declared here}} expected-note 4 {{forward declaration of 'S1'}} +extern S1 a; +class S2 { + mutable int a; + S2 &operator+(const S2 &arg) { return (*this); } // expected-note 3 {{implicitly declared private here}} + +public: + S2() : a(0) {} + S2(S2 &s2) : a(s2.a) {} + static float S2s; // expected-note 2 {{static data member is predetermined as shared}} + static const float S2sc; // expected-note 2 {{'S2sc' declared here}} +}; +const float S2::S2sc = 0; +S2 b; // expected-note 3 {{'b' defined here}} +const S2 ba[5]; // expected-note 2 {{'ba' defined here}} +class S3 { + int a; + +public: + int b; + S3() : a(0) {} + S3(const S3 &s3) : a(s3.a) {} + S3 operator+(const S3 &arg1) { return arg1; } +}; +int operator+(const S3 &arg1, const S3 &arg2) { return 5; } +S3 c; // expected-note 3 {{'c' defined here}} +const S3 ca[5]; // expected-note 2 {{'ca' defined here}} +extern const int f; // expected-note 4 {{'f' declared here}} +class S4 { + int a; + S4(); // expected-note {{implicitly declared private here}} + S4(const S4 &s4); + S4 &operator+(const S4 &arg) { return (*this); } + +public: + S4(int v) : a(v) {} +}; +S4 &operator&=(S4 &arg1, S4 &arg2) { return arg1; } +class S5 { + int a; + S5() : a(0) {} // expected-note {{implicitly declared private here}} + S5(const S5 &s5) : a(s5.a) {} + S5 &operator+(const S5 &arg); + +public: + S5(int v) : a(v) {} +}; +class S6 { // expected-note 3 {{candidate function (the implicit copy assignment operator) not viable: no known conversion from 'int' to 'const S6' for 1st argument}} +#if __cplusplus >= 201103L // C++11 or later +// expected-note@-2 3 {{candidate function (the implicit move assignment operator) not viable}} +#endif + int a; + +public: + S6() : a(6) {} + operator int() { return 6; } +} o; + +S3 h, k; +#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}} + +template <class T> // expected-note {{declared here}} +T tmain(T argc) { + const T d = T(); // expected-note 4 {{'d' defined here}} + const T da[5] = {T()}; // expected-note 2 {{'da' defined here}} + T qa[5] = {T()}; + T i; + T &j = i; // expected-note 4 {{'j' defined here}} + S3 &p = k; // expected-note 2 {{'p' defined here}} + const T &r = da[(int)i]; // expected-note 2 {{'r' defined here}} + T &q = qa[(int)i]; // expected-note 2 {{'q' defined here}} + T fl; +#pragma omp target reduction // expected-error {{expected '(' after 'reduction'}} + foo(); +#pragma omp target reduction + // expected-error {{expected '(' after 'reduction'}} expected-warning {{extra tokens at the end of '#pragma omp target' are ignored}} + foo(); +#pragma omp target reduction( // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected ')'}} expected-note {{to match this '('}} + foo(); +#pragma omp target reduction(- // expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + foo(); +#pragma omp target reduction() // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} + foo(); +#pragma omp target reduction(*) // expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected expression}} + foo(); +#pragma omp target reduction(\) // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} + foo(); +#pragma omp target reduction(& : argc // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{invalid operands to binary expression ('float' and 'float')}} + foo(); +#pragma omp target reduction(| : argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{invalid operands to binary expression ('float' and 'float')}} + foo(); +#pragma omp target reduction(|| : argc ? i : argc) // expected-error 2 {{expected variable name, array element or array section}} + foo(); +#pragma omp target reduction(foo : argc) //expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max' or declare reduction for type 'float'}} expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max' or declare reduction for type 'int'}} + foo(); +#pragma omp target reduction(&& : argc) + foo(); +#pragma omp target reduction(^ : T) // expected-error {{'T' does not refer to a value}} + foo(); +#pragma omp target reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 3 {{const-qualified list item cannot be reduction}} expected-error 2 {{'operator+' is a private member of 'S2'}} + foo(); +#pragma omp target reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 4 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 3 {{const-qualified list item cannot be reduction}} + foo(); +#pragma omp target reduction(max : h.b) // expected-error {{expected variable name, array element or array section}} + foo(); +#pragma omp target reduction(+ : ba) // expected-error {{const-qualified list item cannot be reduction}} + foo(); +#pragma omp target reduction(* : ca) // expected-error {{const-qualified list item cannot be reduction}} + foo(); +#pragma omp target reduction(- : da) // expected-error {{const-qualified list item cannot be reduction}} expected-error {{const-qualified list item cannot be reduction}} + foo(); +#pragma omp target reduction(^ : fl) // expected-error {{invalid operands to binary expression ('float' and 'float')}} + foo(); +#pragma omp target reduction(&& : S2::S2s) // expected-error {{shared variable cannot be reduction}} + foo(); +#pragma omp target reduction(&& : S2::S2sc) // expected-error {{const-qualified list item cannot be reduction}} + foo(); +#pragma omp target reduction(+ : h, k) // expected-error {{threadprivate or thread local variable cannot be reduction}} + foo(); +#pragma omp target reduction(+ : o) // expected-error 2 {{no viable overloaded '='}} + foo(); +#pragma omp parallel private(i), reduction(+ : j), reduction(+ : q) // expected-error 4 {{argument of OpenMP clause 'reduction' must reference the same object in all threads}} + foo(); +#pragma omp parallel private(k) +#pragma omp target reduction(+ : p), reduction(+ : p) // expected-error 2 {{argument of OpenMP clause 'reduction' must reference the same object in all threads}} + foo(); +#pragma omp target reduction(+ : p), reduction(+ : p) // expected-error 2 {{variable can appear only once in OpenMP 'reduction' clause}} expected-note 2 {{previously referenced here}} + foo(); +#pragma omp target reduction(+ : r) // expected-error 2 {{const-qualified list item cannot be reduction}} + foo(); +#pragma omp parallel shared(i) +#pragma omp target reduction(min : i) +#pragma omp parallel reduction(max : j) // expected-error 2 {{argument of OpenMP clause 'reduction' must reference the same object in all threads}} + foo(); +#pragma omp parallel +#pragma omp for private(fl) + for (int i = 0; i < 10; ++i) +#pragma omp target reduction(+ : fl) + foo(); +#pragma omp parallel +#pragma omp for reduction(- : fl) + for (int i = 0; i < 10; ++i) +#pragma omp target reduction(+ : fl) + foo(); + + return T(); +} + +namespace A { +double x; +#pragma omp threadprivate(x) // expected-note {{defined as threadprivate or thread local}} +} +namespace B { +using A::x; +} + +int main(int argc, char **argv) { + const int d = 5; // expected-note 2 {{'d' defined here}} + const int da[5] = {0}; // expected-note {{'da' defined here}} + int qa[5] = {0}; + S4 e(4); + S5 g(5); + int i; + int &j = i; // expected-note 2 {{'j' defined here}} + S3 &p = k; // expected-note 2 {{'p' defined here}} + const int &r = da[i]; // expected-note {{'r' defined here}} + int &q = qa[i]; // expected-note {{'q' defined here}} + float fl; +#pragma omp target reduction // expected-error {{expected '(' after 'reduction'}} + foo(); +#pragma omp target reduction + // expected-error {{expected '(' after 'reduction'}} expected-warning {{extra tokens at the end of '#pragma omp target' are ignored}} + foo(); +#pragma omp target reduction( // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected ')'}} expected-note {{to match this '('}} + foo(); +#pragma omp target reduction(- // expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + foo(); +#pragma omp target reduction() // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} + foo(); +#pragma omp target reduction(*) // expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected expression}} + foo(); +#pragma omp target reduction(\) // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} + foo(); +#pragma omp target reduction(foo : argc // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max'}} + foo(); +#pragma omp target reduction(| : argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + foo(); +#pragma omp target reduction(|| : argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name, array element or array section}} + foo(); +#pragma omp target reduction(~ : argc) // expected-error {{expected unqualified-id}} + foo(); +#pragma omp target reduction(&& : argc) + foo(); +#pragma omp target reduction(^ : S1) // expected-error {{'S1' does not refer to a value}} + foo(); +#pragma omp target reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{const-qualified list item cannot be reduction}} expected-error {{'operator+' is a private member of 'S2'}} + foo(); +#pragma omp target reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 2 {{const-qualified list item cannot be reduction}} + foo(); +#pragma omp target reduction(max : h.b) // expected-error {{expected variable name, array element or array section}} + foo(); +#pragma omp target reduction(+ : ba) // expected-error {{const-qualified list item cannot be reduction}} + foo(); +#pragma omp target reduction(* : ca) // expected-error {{const-qualified list item cannot be reduction}} + foo(); +#pragma omp target reduction(- : da) // expected-error {{const-qualified list item cannot be reduction}} + foo(); +#pragma omp target reduction(^ : fl) // expected-error {{invalid operands to binary expression ('float' and 'float')}} + foo(); +#pragma omp target reduction(&& : S2::S2s) // expected-error {{shared variable cannot be reduction}} + foo(); +#pragma omp target reduction(&& : S2::S2sc) // expected-error {{const-qualified list item cannot be reduction}} + foo(); +#pragma omp target reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{nvalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}} + foo(); +#pragma omp target reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}} + foo(); +#pragma omp target reduction(+ : o) // expected-error {{no viable overloaded '='}} + foo(); +#pragma omp parallel private(i), reduction(+ : j), reduction(+ : q) // expected-error 2 {{argument of OpenMP clause 'reduction' must reference the same object in all threads}} + foo(); +#pragma omp parallel private(k) +#pragma omp target reduction(+ : p), reduction(+ : p) // expected-error 2 {{argument of OpenMP clause 'reduction' must reference the same object in all threads}} + foo(); +#pragma omp target reduction(+ : p), reduction(+ : p) // expected-error {{variable can appear only once in OpenMP 'reduction' clause}} expected-note {{previously referenced here}} + foo(); +#pragma omp target reduction(+ : r) // expected-error {{const-qualified list item cannot be reduction}} + foo(); +#pragma omp parallel shared(i) +#pragma omp target reduction(min : i) +#pragma omp parallel reduction(max : j) // expected-error {{argument of OpenMP clause 'reduction' must reference the same object in all threads}} + foo(); +#pragma omp parallel +#pragma omp for private(fl) + for (int i = 0; i < 10; ++i) +#pragma omp target reduction(+ : fl) + foo(); +#pragma omp parallel +#pragma omp for reduction(- : fl) + for (int i = 0; i < 10; ++i) +#pragma omp target reduction(+ : fl) + foo(); + static int m; +#pragma omp target reduction(+ : m) // OK + m++; + + return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain<int>' requested here}} expected-note {{in instantiation of function template specialization 'tmain<float>' requested here}} +} diff --git a/test/OpenMP/target_simd_codegen.cpp b/test/OpenMP/target_simd_codegen.cpp new file mode 100644 index 000000000000..d57e3818559a --- /dev/null +++ b/test/OpenMP/target_simd_codegen.cpp @@ -0,0 +1,675 @@ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 + +// Test target codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK-DAG: [[TT:%.+]] = type { i64, i8 } +// CHECK-DAG: [[S1:%.+]] = type { double } +// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 } +// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* } +// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* } + +// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 } + +// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat + +// We have 8 target regions, but only 7 that actually will generate offloading +// code, only 6 will have mapped arguments, and only 4 have all-constant map +// sizes. + +// CHECK-DAG: [[SIZET2:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 2, i[[SZ]] 4, i[[SZ]] 4] +// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 288, i64 288] +// CHECK-DAG: [[SIZET3:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2] +// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i64] [i64 288, i64 288] +// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [9 x i64] [i64 288, i64 547, i64 288, i64 547, i64 547, i64 288, i64 288, i64 547, i64 547] +// CHECK-DAG: [[SIZET5:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 40] +// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 288, i64 547] +// CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [4 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 1, i[[SZ]] 40] +// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i64] [i64 288, i64 288, i64 288, i64 547] +// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i64] [i64 547, i64 288, i64 288, i64 288, i64 547] +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 + +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK-NOT: @{{.+}} = constant [[ENTTY]] + +// Check if offloading descriptor is created. +// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]] +// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]] +// CHECK: [[DEVBEGIN:@.+]] = external constant i8 +// CHECK: [[DEVEND:@.+]] = external constant i8 +// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]]) +// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]]) + +// Check target registration is registered as a Ctor. +// CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* @[[REGFN]] to void ()*), i8* bitcast (void (i8*)* @[[REGFN]] to i8*) }] + + +template<typename tx, typename ty> +struct TT{ + tx X; + ty Y; +}; + +// CHECK-LABEL: get_val +long long get_val() { return 0; } + +// CHECK: define {{.*}}[[FOO:@.+]]( +int foo(int n) { + int a = 0; + short aa = 0; + float b[10]; + float bn[n]; + double c[5][10]; + double cn[5][n]; + TT<long long, char> d; + + // CHECK: [[RET:%.+]] = call i32 @__tgt_target_nowait(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null) + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT0:@.+]]() + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + #pragma omp target simd nowait + for (int i = 3; i < 32; i += 5) { + } + + // CHECK: call void [[HVT1:@.+]](i[[SZ]] {{[^,]+}}, i{{32|64}}{{[*]*}} {{[^)]+}}) + long long k = get_val(); + #pragma omp target simd if(target: 0) linear(k : 3) + for (int i = 10; i > 1; i--) { + a += 1; + } + + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT2]], i32 0, i32 0)) + // CHECK-DAG: [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 0 + // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 0 + // CHECK-DAG: [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]* + // CHECK-DAG: [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] [[VAL0:%.+]], i[[SZ]]* [[CBPADDR0]], + // CHECK-DAG: store i[[SZ]] [[VAL0]], i[[SZ]]* [[CPADDR0]], + // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 1 + // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 1 + // CHECK-DAG: [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]* + // CHECK-DAG: [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] [[VAL1:%.+]], i[[SZ]]* [[CBPADDR1]], + // CHECK-DAG: store i[[SZ]] [[VAL1]], i[[SZ]]* [[CPADDR1]], + // CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 1 + // CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 1 + // CHECK-DAG: [[CBPADDR2:%.+]] = bitcast i8** [[BPADDR2]] to i[[SZ]]* + // CHECK-DAG: [[CPADDR2:%.+]] = bitcast i8** [[PADDR2]] to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] [[VAL2:%.+]], i[[SZ]]* [[CBPADDR2]], + // CHECK-DAG: store i[[SZ]] [[VAL2]], i[[SZ]]* [[CPADDR2]], + + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT2:@.+]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}}) + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + int lin = 12; + #pragma omp target simd if(target: 1) linear(lin, a : get_val()) + for (unsigned long long it = 2000; it >= 600; it-=400) { + aa += 1; + } + + // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10 + // CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] + // CHECK: [[IFTHEN]] + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0)) + // CHECK-DAG: [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0 + + // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0 + // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0 + // CHECK-DAG: [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]* + // CHECK-DAG: [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] [[VAL0:%.+]], i[[SZ]]* [[CBPADDR0]], + // CHECK-DAG: store i[[SZ]] [[VAL0]], i[[SZ]]* [[CPADDR0]], + + // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1 + // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1 + // CHECK-DAG: [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]* + // CHECK-DAG: [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] [[VAL1:%.+]], i[[SZ]]* [[CBPADDR1]], + // CHECK-DAG: store i[[SZ]] [[VAL1]], i[[SZ]]* [[CPADDR1]], + // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT3:@.+]]({{[^,]+}}, {{[^,]+}}) + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + // CHECK-NEXT: br label %[[IFEND:.+]] + // CHECK: [[IFELSE]] + // CHECK: call void [[HVT3]]({{[^,]+}}, {{[^,]+}}) + // CHECK-NEXT: br label %[[IFEND]] + // CHECK: [[IFEND]] + + #pragma omp target simd if(target: n>10) + for (short it = 6; it <= 20; it-=-4) { + a += 1; + aa += 1; + } + + // We capture 3 VLA sizes in this target region + // CHECK-64: [[A_VAL:%.+]] = load i32, i32* %{{.+}}, + // CHECK-64: [[A_ADDR:%.+]] = bitcast i[[SZ]]* [[A_CADDR:%.+]] to i32* + // CHECK-64: store i32 [[A_VAL]], i32* [[A_ADDR]], + // CHECK-64: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]], + + // CHECK-32: [[A_VAL:%.+]] = load i32, i32* %{{.+}}, + // CHECK-32: store i32 [[A_VAL]], i32* [[A_CADDR:%.+]], + // CHECK-32: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]], + + // CHECK: [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4 + // CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]] + // CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8 + + // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20 + // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] + // CHECK: [[TRY]] + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0)) + // CHECK-DAG: [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[SR]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S:%[^,]+]], i32 0, i32 0 + + // CHECK-DAG: [[SADDR0:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX0:[0-9]+]] + // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX0]] + // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX0]] + // CHECK-DAG: [[SADDR1:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX1:[0-9]+]] + // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX1]] + // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX1]] + // CHECK-DAG: [[SADDR2:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX2:[0-9]+]] + // CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX2]] + // CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX2]] + // CHECK-DAG: [[SADDR3:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX3:[0-9]+]] + // CHECK-DAG: [[BPADDR3:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX3]] + // CHECK-DAG: [[PADDR3:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX3]] + // CHECK-DAG: [[SADDR4:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX4:[0-9]+]] + // CHECK-DAG: [[BPADDR4:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX4]] + // CHECK-DAG: [[PADDR4:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX4]] + // CHECK-DAG: [[SADDR5:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX5:[0-9]+]] + // CHECK-DAG: [[BPADDR5:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX5]] + // CHECK-DAG: [[PADDR5:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX5]] + // CHECK-DAG: [[SADDR6:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX6:[0-9]+]] + // CHECK-DAG: [[BPADDR6:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX6]] + // CHECK-DAG: [[PADDR6:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX6]] + // CHECK-DAG: [[SADDR7:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX7:[0-9]+]] + // CHECK-DAG: [[BPADDR7:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX7]] + // CHECK-DAG: [[PADDR7:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX7]] + // CHECK-DAG: [[SADDR8:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX8:[0-9]+]] + // CHECK-DAG: [[BPADDR8:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX8]] + // CHECK-DAG: [[PADDR8:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX8]] + + // The names below are not necessarily consistent with the names used for the + // addresses above as some are repeated. + // CHECK-DAG: store i[[SZ]] [[VLA0]], i[[SZ]]* [[CBPADDR0:%.+]], + // CHECK-DAG: store i[[SZ]] [[VLA0]], i[[SZ]]* [[CPADDR0:%.+]], + // CHECK-DAG: [[CBPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: [[CPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store i[[SZ]] [[VLA1]], i[[SZ]]* [[CBPADDR1:%.+]], + // CHECK-DAG: store i[[SZ]] [[VLA1]], i[[SZ]]* [[CPADDR1:%.+]], + // CHECK-DAG: [[CBPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: [[CPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store i[[SZ]] 5, i[[SZ]]* [[CBPADDR2:%.+]], + // CHECK-DAG: store i[[SZ]] 5, i[[SZ]]* [[CPADDR2:%.+]], + // CHECK-DAG: [[CBPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: [[CPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store i[[SZ]] [[A_CVAL]], i[[SZ]]* [[CBPADDR3:%.+]], + // CHECK-DAG: store i[[SZ]] [[A_CVAL]], i[[SZ]]* [[CPADDR3:%.+]], + // CHECK-DAG: [[CBPADDR3]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: [[CPADDR3]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] 4, i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store [10 x float]* %{{.+}}, [10 x float]** [[CBPADDR4:%.+]], + // CHECK-DAG: store [10 x float]* %{{.+}}, [10 x float]** [[CPADDR4:%.+]], + // CHECK-DAG: [[CBPADDR4]] = bitcast i8** {{%[^,]+}} to [10 x float]** + // CHECK-DAG: [[CPADDR4]] = bitcast i8** {{%[^,]+}} to [10 x float]** + // CHECK-DAG: store i[[SZ]] 40, i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store float* %{{.+}}, float** [[CBPADDR5:%.+]], + // CHECK-DAG: store float* %{{.+}}, float** [[CPADDR5:%.+]], + // CHECK-DAG: [[CBPADDR5]] = bitcast i8** {{%[^,]+}} to float** + // CHECK-DAG: [[CPADDR5]] = bitcast i8** {{%[^,]+}} to float** + // CHECK-DAG: store i[[SZ]] [[BNSIZE]], i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store [5 x [10 x double]]* %{{.+}}, [5 x [10 x double]]** [[CBPADDR6:%.+]], + // CHECK-DAG: store [5 x [10 x double]]* %{{.+}}, [5 x [10 x double]]** [[CPADDR6:%.+]], + // CHECK-DAG: [[CBPADDR6]] = bitcast i8** {{%[^,]+}} to [5 x [10 x double]]** + // CHECK-DAG: [[CPADDR6]] = bitcast i8** {{%[^,]+}} to [5 x [10 x double]]** + // CHECK-DAG: store i[[SZ]] 400, i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store double* %{{.+}}, double** [[CBPADDR7:%.+]], + // CHECK-DAG: store double* %{{.+}}, double** [[CPADDR7:%.+]], + // CHECK-DAG: [[CBPADDR7]] = bitcast i8** {{%[^,]+}} to double** + // CHECK-DAG: [[CPADDR7]] = bitcast i8** {{%[^,]+}} to double** + // CHECK-DAG: store i[[SZ]] [[CNSIZE]], i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store [[TT]]* %{{.+}}, [[TT]]** [[CBPADDR8:%.+]], + // CHECK-DAG: store [[TT]]* %{{.+}}, [[TT]]** [[CPADDR8:%.+]], + // CHECK-DAG: [[CBPADDR8]] = bitcast i8** {{%[^,]+}} to [[TT]]** + // CHECK-DAG: [[CPADDR8]] = bitcast i8** {{%[^,]+}} to [[TT]]** + // CHECK-DAG: store i[[SZ]] {{12|16}}, i[[SZ]]* {{%[^,]+}} + + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + + // CHECK: [[FAIL]] + // CHECK: call void [[HVT4:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + #pragma omp target simd if(target: n>20) + for (unsigned char it = 'z'; it >= 'a'; it+=-1) { + a += 1; + b[2] += 1.0; + bn[3] += 1.0; + c[1][2] += 1.0; + cn[1][3] += 1.0; + d.X += 1; + d.Y += 1; + } + + return a; +} + +// Check that the offloading functions are emitted and that the arguments are +// correct and loaded correctly for the target regions in foo(). + +// CHECK: define internal void [[HVT0]]() +// CHECK: !llvm.loop +// CHECK: ret void +// CHECK-NEXT: } + + +// CHECK: define internal void [[HVT1]](i[[SZ]] %{{.+}}, i{{32|64}}{{[*]*.*}} %{{.+}}) +// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align +// CHECK-64: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32* +// CHECK-64: [[AA:%.+]] = load i32, i32* [[AA_CADDR]], align +// CHECK-32: [[AA:%.+]] = load i32, i32* [[AA_ADDR]], align +// CHECK: !llvm.mem.parallel_loop_access +// CHECK: !llvm.loop +// CHECK: ret void +// CHECK-NEXT: } + +// CHECK: define internal void [[HVT2]](i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}) +// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align +// CHECK: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16* +// CHECK: [[AA:%.+]] = load i16, i16* [[AA_CADDR]], align +// CHECK: !llvm.loop +// CHECK: ret void +// CHECK-NEXT: } + +// CHECK: define internal void [[HVT3]] +// CHECK: [[A_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[A_ADDR]], align +// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align +// CHECK-64-DAG:[[A_CADDR:%.+]] = bitcast i[[SZ]]* [[A_ADDR]] to i32* +// CHECK-DAG: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16* +// CHECK: !llvm.loop +// CHECK: ret void +// CHECK-NEXT: } + +// CHECK: define internal void [[HVT4]] +// Create local storage for each capture. +// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_B:%.+]] = alloca [10 x float]* +// CHECK: [[LOCAL_VLA1:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_BN:%.+]] = alloca float* +// CHECK: [[LOCAL_C:%.+]] = alloca [5 x [10 x double]]* +// CHECK: [[LOCAL_VLA2:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_VLA3:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_CN:%.+]] = alloca double* +// CHECK: [[LOCAL_D:%.+]] = alloca [[TT]]* +// CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] +// CHECK-DAG: store [10 x float]* [[ARG_B:%.+]], [10 x float]** [[LOCAL_B]] +// CHECK-DAG: store i[[SZ]] [[ARG_VLA1:%.+]], i[[SZ]]* [[LOCAL_VLA1]] +// CHECK-DAG: store float* [[ARG_BN:%.+]], float** [[LOCAL_BN]] +// CHECK-DAG: store [5 x [10 x double]]* [[ARG_C:%.+]], [5 x [10 x double]]** [[LOCAL_C]] +// CHECK-DAG: store i[[SZ]] [[ARG_VLA2:%.+]], i[[SZ]]* [[LOCAL_VLA2]] +// CHECK-DAG: store i[[SZ]] [[ARG_VLA3:%.+]], i[[SZ]]* [[LOCAL_VLA3]] +// CHECK-DAG: store double* [[ARG_CN:%.+]], double** [[LOCAL_CN]] +// CHECK-DAG: store [[TT]]* [[ARG_D:%.+]], [[TT]]** [[LOCAL_D]] + +// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* +// CHECK-DAG: [[REF_B:%.+]] = load [10 x float]*, [10 x float]** [[LOCAL_B]], +// CHECK-DAG: [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA1]], +// CHECK-DAG: [[REF_BN:%.+]] = load float*, float** [[LOCAL_BN]], +// CHECK-DAG: [[REF_C:%.+]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[LOCAL_C]], +// CHECK-DAG: [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA2]], +// CHECK-DAG: [[VAL_VLA3:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA3]], +// CHECK-DAG: [[REF_CN:%.+]] = load double*, double** [[LOCAL_CN]], +// CHECK-DAG: [[REF_D:%.+]] = load [[TT]]*, [[TT]]** [[LOCAL_D]], + + +template<typename tx> +tx ftemplate(int n) { + tx a = 0; + short aa = 0; + tx b[10]; + + #pragma omp target simd if(target: n>40) + for (long long i = -10; i < 10; i += 3) { + a += 1; + aa += 1; + b[2] += 1; + } + + return a; +} + +static +int fstatic(int n) { + int a = 0; + short aa = 0; + char aaa = 0; + int b[10]; + + #pragma omp target simd if(target: n>50) + for (unsigned i=100; i<10; i+=10) { + a += 1; + aa += 1; + aaa += 1; + b[2] += 1; + } + + return a; +} + +struct S1 { + double a; + + int r1(int n){ + int b = n+1; + short int c[2][n]; + + #pragma omp target simd if(target: n>60) + for (unsigned long long it = 2000; it >= 600; it -= 400) { + this->a = (double)b + 1.5; + c[1][1] = ++a; + } + + return c[1][1] + (int)b; + } +}; + +// CHECK: define {{.*}}@{{.*}}bar{{.*}} +int bar(int n){ + int a = 0; + + // CHECK: call {{.*}}i32 [[FOO]](i32 {{.*}}) + a += foo(n); + + S1 S; + // CHECK: call {{.*}}i32 [[FS1:@.+]]([[S1]]* {{.*}}, i32 {{.*}}) + a += S.r1(n); + + // CHECK: call {{.*}}i32 [[FSTATIC:@.+]](i32 {{.*}}) + a += fstatic(n); + + // CHECK: call {{.*}}i32 [[FTEMPLATE:@.+]](i32 {{.*}}) + a += ftemplate<int>(n); + + return a; +} + +// +// CHECK: define {{.*}}[[FS1]] +// +// CHECK: i8* @llvm.stacksave() +// CHECK-64: [[B_ADDR:%.+]] = bitcast i[[SZ]]* [[B_CADDR:%.+]] to i32* +// CHECK-64: store i32 %{{.+}}, i32* [[B_ADDR]], +// CHECK-64: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_CADDR]], + +// CHECK-32: store i32 %{{.+}}, i32* [[B_ADDR:%.+]], +// CHECK-32: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]], + +// We capture 2 VLA sizes in this target region +// CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]] +// CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2 + +// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60 +// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] +// CHECK: [[TRY]] +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0)) +// CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0 +// CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0 +// CHECK-DAG: [[SR]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S:%.+]], i32 0, i32 0 +// CHECK-DAG: [[SADDR0:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX0:[0-9]+]] +// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX0]] +// CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX0]] +// CHECK-DAG: [[SADDR1:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX1:[0-9]+]] +// CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX1]] +// CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX1]] +// CHECK-DAG: [[SADDR2:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX2:[0-9]+]] +// CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX2]] +// CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX2]] +// CHECK-DAG: [[SADDR3:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX3:[0-9]+]] +// CHECK-DAG: [[BPADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX3]] +// CHECK-DAG: [[PADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX3]] + +// The names below are not necessarily consistent with the names used for the +// addresses above as some are repeated. +// CHECK-DAG: store i[[SZ]] [[VLA0]], i[[SZ]]* [[CBPADDR0:%.+]], +// CHECK-DAG: store i[[SZ]] [[VLA0]], i[[SZ]]* [[CPADDR0:%.+]], +// CHECK-DAG: [[CBPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: [[CPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}} + +// CHECK-DAG: store i[[SZ]] 2, i[[SZ]]* [[CBPADDR1:%.+]], +// CHECK-DAG: store i[[SZ]] 2, i[[SZ]]* [[CPADDR1:%.+]], +// CHECK-DAG: [[CBPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: [[CPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}} + +// CHECK-DAG: store i[[SZ]] [[B_CVAL]], i[[SZ]]* [[CBPADDR2:%.+]], +// CHECK-DAG: store i[[SZ]] [[B_CVAL]], i[[SZ]]* [[CPADDR2:%.+]], +// CHECK-DAG: [[CBPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: [[CPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: store i[[SZ]] 4, i[[SZ]]* {{%[^,]+}} + +// CHECK-DAG: store [[S1]]* %{{.+}}, [[S1]]** [[CBPADDR3:%.+]], +// CHECK-DAG: store [[S1]]* %{{.+}}, [[S1]]** [[CPADDR3:%.+]], +// CHECK-DAG: [[CBPADDR3]] = bitcast i8** {{%[^,]+}} to [[S1]]** +// CHECK-DAG: [[CPADDR3]] = bitcast i8** {{%[^,]+}} to [[S1]]** +// CHECK-DAG: store i[[SZ]] 8, i[[SZ]]* {{%[^,]+}} + +// CHECK-DAG: store i16* %{{.+}}, i16** [[CBPADDR4:%.+]], +// CHECK-DAG: store i16* %{{.+}}, i16** [[CPADDR4:%.+]], +// CHECK-DAG: [[CBPADDR4]] = bitcast i8** {{%[^,]+}} to i16** +// CHECK-DAG: [[CPADDR4]] = bitcast i8** {{%[^,]+}} to i16** +// CHECK-DAG: store i[[SZ]] [[CSIZE]], i[[SZ]]* {{%[^,]+}} + +// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + +// CHECK: [[FAIL]] +// CHECK: call void [[HVT7:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[END]] +// CHECK: [[END]] + +// +// CHECK: define {{.*}}[[FSTATIC]] +// +// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50 +// CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] +// CHECK: [[IFTHEN]] +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([4 x i[[SZ]]], [4 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0)) +// CHECK-DAG: [[BPR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP:%.+]], i32 0, i32 0 +// CHECK-DAG: [[PR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P:%.+]], i32 0, i32 0 + +// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 0 +// CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 0 +// CHECK-DAG: [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]* +// CHECK-DAG: [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]* +// CHECK-DAG: store i[[SZ]] [[VAL0:%.+]], i[[SZ]]* [[CBPADDR0]], +// CHECK-DAG: store i[[SZ]] [[VAL0]], i[[SZ]]* [[CPADDR0]], + +// CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 1 +// CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 1 +// CHECK-DAG: [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]* +// CHECK-DAG: [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]* +// CHECK-DAG: store i[[SZ]] [[VAL1:%.+]], i[[SZ]]* [[CBPADDR1]], +// CHECK-DAG: store i[[SZ]] [[VAL1]], i[[SZ]]* [[CPADDR1]], + +// CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 2 +// CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 2 +// CHECK-DAG: [[CBPADDR2:%.+]] = bitcast i8** [[BPADDR2]] to i[[SZ]]* +// CHECK-DAG: [[CPADDR2:%.+]] = bitcast i8** [[PADDR2]] to i[[SZ]]* +// CHECK-DAG: store i[[SZ]] [[VAL2:%.+]], i[[SZ]]* [[CBPADDR2]], +// CHECK-DAG: store i[[SZ]] [[VAL2]], i[[SZ]]* [[CPADDR2]], + +// CHECK-DAG: [[BPADDR3:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 3 +// CHECK-DAG: [[PADDR3:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 3 +// CHECK-DAG: [[CBPADDR3:%.+]] = bitcast i8** [[BPADDR3]] to [10 x i32]** +// CHECK-DAG: [[CPADDR3:%.+]] = bitcast i8** [[PADDR3]] to [10 x i32]** +// CHECK-DAG: store [10 x i32]* [[VAL3:%.+]], [10 x i32]** [[CBPADDR3]], +// CHECK-DAG: store [10 x i32]* [[VAL3]], [10 x i32]** [[CPADDR3]], + +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] +// CHECK: [[FAIL]] +// CHECK: call void [[HVT6:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[END]] +// CHECK: [[END]] +// CHECK-NEXT: br label %[[IFEND:.+]] +// CHECK: [[IFELSE]] +// CHECK: call void [[HVT6]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[IFEND]] +// CHECK: [[IFEND]] + +// +// CHECK: define {{.*}}[[FTEMPLATE]] +// +// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40 +// CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] +// CHECK: [[IFTHEN]] +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0)) +// CHECK-DAG: [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0 +// CHECK-DAG: [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0 + +// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 0 +// CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 0 +// CHECK-DAG: [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]* +// CHECK-DAG: [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]* +// CHECK-DAG: store i[[SZ]] [[VAL0:%.+]], i[[SZ]]* [[CBPADDR0]], +// CHECK-DAG: store i[[SZ]] [[VAL0]], i[[SZ]]* [[CPADDR0]], + +// CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 1 +// CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 1 +// CHECK-DAG: [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]* +// CHECK-DAG: [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]* +// CHECK-DAG: store i[[SZ]] [[VAL1:%.+]], i[[SZ]]* [[CBPADDR1]], +// CHECK-DAG: store i[[SZ]] [[VAL1]], i[[SZ]]* [[CPADDR1]], + +// CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 2 +// CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 2 +// CHECK-DAG: [[CBPADDR2:%.+]] = bitcast i8** [[BPADDR2]] to [10 x i32]** +// CHECK-DAG: [[CPADDR2:%.+]] = bitcast i8** [[PADDR2]] to [10 x i32]** +// CHECK-DAG: store [10 x i32]* [[VAL2:%.+]], [10 x i32]** [[CBPADDR2]], +// CHECK-DAG: store [10 x i32]* [[VAL2]], [10 x i32]** [[CPADDR2]], + +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] +// CHECK: [[FAIL]] +// CHECK: call void [[HVT5:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[END]] +// CHECK: [[END]] +// CHECK-NEXT: br label %[[IFEND:.+]] +// CHECK: [[IFELSE]] +// CHECK: call void [[HVT:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[IFEND]] +// CHECK: [[IFEND]] + +// Check that the offloading functions are emitted and that the arguments are +// correct and loaded correctly for the target regions of the callees of bar(). + +// CHECK: define internal void [[HVT7]] +// Create local storage for each capture. +// CHECK: [[LOCAL_THIS:%.+]] = alloca [[S1]]* +// CHECK: [[LOCAL_B:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_VLA1:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_VLA2:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_C:%.+]] = alloca i16* +// CHECK-DAG: store [[S1]]* [[ARG_THIS:%.+]], [[S1]]** [[LOCAL_THIS]] +// CHECK-DAG: store i[[SZ]] [[ARG_B:%.+]], i[[SZ]]* [[LOCAL_B]] +// CHECK-DAG: store i[[SZ]] [[ARG_VLA1:%.+]], i[[SZ]]* [[LOCAL_VLA1]] +// CHECK-DAG: store i[[SZ]] [[ARG_VLA2:%.+]], i[[SZ]]* [[LOCAL_VLA2]] +// CHECK-DAG: store i16* [[ARG_C:%.+]], i16** [[LOCAL_C]] +// Store captures in the context. +// CHECK-DAG: [[REF_THIS:%.+]] = load [[S1]]*, [[S1]]** [[LOCAL_THIS]], +// CHECK-64-DAG:[[CONV_BP:%.+]] = bitcast i[[SZ]]* [[LOCAL_B]] to i32* +// CHECK-DAG: [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA1]], +// CHECK-DAG: [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA2]], +// CHECK-DAG: [[REF_C:%.+]] = load i16*, i16** [[LOCAL_C]], + + +// CHECK: define internal void [[HVT6]] +// Create local storage for each capture. +// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_AA:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_AAA:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_B:%.+]] = alloca [10 x i32]* +// CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] +// CHECK-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] +// CHECK-DAG: store i[[SZ]] [[ARG_AAA:%.+]], i[[SZ]]* [[LOCAL_AAA]] +// CHECK-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] +// Store captures in the context. +// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* +// CHECK-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* +// CHECK-DAG: [[CONV_AAAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AAA]] to i8* +// CHECK-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], + +// CHECK: define internal void [[HVT5]] +// Create local storage for each capture. +// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_AA:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_B:%.+]] = alloca [10 x i32]* +// CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] +// CHECK-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] +// CHECK-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] +// Store captures in the context. +// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* +// CHECK-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* +// CHECK-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], + +#endif diff --git a/test/OpenMP/target_simd_codegen_registration.cpp b/test/OpenMP/target_simd_codegen_registration.cpp new file mode 100644 index 000000000000..a6e6e9b38565 --- /dev/null +++ b/test/OpenMP/target_simd_codegen_registration.cpp @@ -0,0 +1,451 @@ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// Test target simd codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK + +// Check that no target code is emmitted if no omptests flag was provided. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK-NTARGET + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK-DAG: [[SA:%.+]] = type { [4 x i32] } +// CHECK-DAG: [[SB:%.+]] = type { [8 x i32] } +// CHECK-DAG: [[SC:%.+]] = type { [16 x i32] } +// CHECK-DAG: [[SD:%.+]] = type { [32 x i32] } +// CHECK-DAG: [[SE:%.+]] = type { [64 x i32] } +// CHECK-DAG: [[ST1:%.+]] = type { [228 x i32] } +// CHECK-DAG: [[ST2:%.+]] = type { [1128 x i32] } +// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 } +// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* } +// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* } + +// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 } + +// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat + +// CHECK-DAG: [[A1:@.+]] = internal global [[SA]] +// CHECK-DAG: [[A2:@.+]] = global [[SA]] +// CHECK-DAG: [[B1:@.+]] = global [[SB]] +// CHECK-DAG: [[B2:@.+]] = global [[SB]] +// CHECK-DAG: [[C1:@.+]] = internal global [[SC]] +// CHECK-DAG: [[D1:@.+]] = global [[SD]] +// CHECK-DAG: [[E1:@.+]] = global [[SE]] +// CHECK-DAG: [[T1:@.+]] = global [[ST1]] +// CHECK-DAG: [[T2:@.+]] = global [[ST2]] + +// CHECK-NTARGET-DAG: [[SA:%.+]] = type { [4 x i32] } +// CHECK-NTARGET-DAG: [[SB:%.+]] = type { [8 x i32] } +// CHECK-NTARGET-DAG: [[SC:%.+]] = type { [16 x i32] } +// CHECK-NTARGET-DAG: [[SD:%.+]] = type { [32 x i32] } +// CHECK-NTARGET-DAG: [[SE:%.+]] = type { [64 x i32] } +// CHECK-NTARGET-DAG: [[ST1:%.+]] = type { [228 x i32] } +// CHECK-NTARGET-DAG: [[ST2:%.+]] = type { [1128 x i32] } +// CHECK-NTARGET-NOT: type { i8*, i8*, % +// CHECK-NTARGET-NOT: type { i32, % + +// We have 7 target regions + +// CHECK-DAG: {{@.+}} = private constant i8 0 +// TCHECK-NOT: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] + +// CHECK-NTARGET-NOT: private constant i8 0 +// CHECK-NTARGET-NOT: private unnamed_addr constant [1 x i + +// CHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:__omp_offloading_[0-9a-f]+_[0-9a-f]+__Z.+_l[0-9]+]]\00" +// CHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00" +// CHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00" +// CHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00" +// CHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00" +// CHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00" +// CHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00" +// CHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00" +// CHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00" +// CHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00" +// CHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00" +// CHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00" +// CHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 + +// TCHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:__omp_offloading_[0-9a-f]+_[0-9a-f]+__Z.+_l[0-9]+]]\00" +// TCHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00" +// TCHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00" +// TCHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00" +// TCHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00" +// TCHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00" +// TCHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00" +// TCHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00" +// TCHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00" +// TCHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00" +// TCHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00" +// TCHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00" +// TCHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 + +// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]] +// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]] +// CHECK: [[DEVBEGIN:@.+]] = external constant i8 +// CHECK: [[DEVEND:@.+]] = external constant i8 +// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]]) +// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]]) + +// We have 4 initializers, one for the 500 priority, another one for 501, or more for the default priority, and the last one for the offloading registration function. +// CHECK: @llvm.global_ctors = appending global [4 x { i32, void ()*, i8* }] [ +// CHECK-SAME: { i32, void ()*, i8* } { i32 500, void ()* [[P500:@[^,]+]], i8* null }, +// CHECK-SAME: { i32, void ()*, i8* } { i32 501, void ()* [[P501:@[^,]+]], i8* null }, +// CHECK-SAME: { i32, void ()*, i8* } { i32 65535, void ()* [[PMAX:@[^,]+]], i8* null }, +// CHECK-SAME: { i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* @[[REGFN]] to void ()*), i8* bitcast (void (i8*)* @[[REGFN]] to i8*) }] + +// CHECK-NTARGET: @llvm.global_ctors = appending global [3 x { i32, void ()*, i8* }] [ + +extern int *R; + +struct SA { + int arr[4]; + void foo() { + int a = *R; + a += 1; + *R = a; + } + SA() { + int a = *R; + a += 2; + *R = a; + } + ~SA() { + int a = *R; + a += 3; + *R = a; + } +}; + +struct SB { + int arr[8]; + void foo() { + int a = *R; + #pragma omp target simd + for (int i = 0; i < 10; ++i) + a += 4; + *R = a; + } + SB() { + int a = *R; + a += 5; + *R = a; + } + ~SB() { + int a = *R; + a += 6; + *R = a; + } +}; + +struct SC { + int arr[16]; + void foo() { + int a = *R; + a += 7; + *R = a; + } + SC() { + int a = *R; + #pragma omp target simd + for (int i = 0; i < 10; ++i) + a += 8; + *R = a; + } + ~SC() { + int a = *R; + a += 9; + *R = a; + } +}; + +struct SD { + int arr[32]; + void foo() { + int a = *R; + a += 10; + *R = a; + } + SD() { + int a = *R; + a += 11; + *R = a; + } + ~SD() { + int a = *R; + #pragma omp target simd + for (int i = 0; i < 10; ++i) + a += 12; + *R = a; + } +}; + +struct SE { + int arr[64]; + void foo() { + int a = *R; + #pragma omp target simd if(target: 0) + for (int i = 0; i < 10; ++i) + a += 13; + *R = a; + } + SE() { + int a = *R; + #pragma omp target simd + for (int i = 0; i < 10; ++i) + a += 14; + *R = a; + } + ~SE() { + int a = *R; + #pragma omp target simd + for (int i = 0; i < 10; ++i) + a += 15; + *R = a; + } +}; + +template <int x> +struct ST { + int arr[128 + x]; + void foo() { + int a = *R; + #pragma omp target simd + for (int i = 0; i < 10; ++i) + a += 16 + x; + *R = a; + } + ST() { + int a = *R; + #pragma omp target simd + for (int i = 0; i < 10; ++i) + a += 17 + x; + *R = a; + } + ~ST() { + int a = *R; + #pragma omp target simd + for (int i = 0; i < 10; ++i) + a += 18 + x; + *R = a; + } +}; + +// We have to make sure we us all the target regions: +//CHECK-DAG: define internal void @[[NAME1]]( +//CHECK-DAG: call void @[[NAME1]]( +//CHECK-DAG: define internal void @[[NAME2]]( +//CHECK-DAG: call void @[[NAME2]]( +//CHECK-DAG: define internal void @[[NAME3]]( +//CHECK-DAG: call void @[[NAME3]]( +//CHECK-DAG: define internal void @[[NAME4]]( +//CHECK-DAG: call void @[[NAME4]]( +//CHECK-DAG: define internal void @[[NAME5]]( +//CHECK-DAG: call void @[[NAME5]]( +//CHECK-DAG: define internal void @[[NAME6]]( +//CHECK-DAG: call void @[[NAME6]]( +//CHECK-DAG: define internal void @[[NAME7]]( +//CHECK-DAG: call void @[[NAME7]]( +//CHECK-DAG: define internal void @[[NAME8]]( +//CHECK-DAG: call void @[[NAME8]]( +//CHECK-DAG: define internal void @[[NAME9]]( +//CHECK-DAG: call void @[[NAME9]]( +//CHECK-DAG: define internal void @[[NAME10]]( +//CHECK-DAG: call void @[[NAME10]]( +//CHECK-DAG: define internal void @[[NAME11]]( +//CHECK-DAG: call void @[[NAME11]]( +//CHECK-DAG: define internal void @[[NAME12]]( +//CHECK-DAG: call void @[[NAME12]]( + +//TCHECK-DAG: define void @[[NAME1]]( +//TCHECK-DAG: define void @[[NAME2]]( +//TCHECK-DAG: define void @[[NAME3]]( +//TCHECK-DAG: define void @[[NAME4]]( +//TCHECK-DAG: define void @[[NAME5]]( +//TCHECK-DAG: define void @[[NAME6]]( +//TCHECK-DAG: define void @[[NAME7]]( +//TCHECK-DAG: define void @[[NAME8]]( +//TCHECK-DAG: define void @[[NAME9]]( +//TCHECK-DAG: define void @[[NAME10]]( +//TCHECK-DAG: define void @[[NAME11]]( +//TCHECK-DAG: define void @[[NAME12]]( + +// CHECK-NTARGET-NOT: __tgt_target +// CHECK-NTARGET-NOT: __tgt_register_lib +// CHECK-NTARGET-NOT: __tgt_unregister_lib + +// TCHECK-NOT: __tgt_target +// TCHECK-NOT: __tgt_register_lib +// TCHECK-NOT: __tgt_unregister_lib + +// We have 2 initializers with priority 500 +//CHECK: define internal void [[P500]]( +//CHECK: call void @{{.+}}() +//CHECK: call void @{{.+}}() +//CHECK-NOT: call void @{{.+}}() +//CHECK: ret void + +// We have 1 initializers with priority 501 +//CHECK: define internal void [[P501]]( +//CHECK: call void @{{.+}}() +//CHECK-NOT: call void @{{.+}}() +//CHECK: ret void + +// We have 6 initializers with default priority +//CHECK: define internal void [[PMAX]]( +//CHECK: call void @{{.+}}() +//CHECK: call void @{{.+}}() +//CHECK: call void @{{.+}}() +//CHECK: call void @{{.+}}() +//CHECK: call void @{{.+}}() +//CHECK: call void @{{.+}}() +//CHECK-NOT: call void @{{.+}}() +//CHECK: ret void + +// Check registration and unregistration + +//CHECK: define internal void @[[UNREGFN:.+]](i8*) +//CHECK-SAME: comdat($[[REGFN]]) { +//CHECK: call i32 @__tgt_unregister_lib([[DSCTY]]* [[DESC]]) +//CHECK: ret void +//CHECK: declare i32 @__tgt_unregister_lib([[DSCTY]]*) + +//CHECK: define linkonce hidden void @[[REGFN]](i8*) +//CHECK-SAME: comdat { +//CHECK: call i32 @__tgt_register_lib([[DSCTY]]* [[DESC]]) +//CHECK: call i32 @__cxa_atexit(void (i8*)* @[[UNREGFN]], i8* bitcast ([[DSCTY]]* [[DESC]] to i8*), +//CHECK: ret void +//CHECK: declare i32 @__tgt_register_lib([[DSCTY]]*) + +static __attribute__((init_priority(500))) SA a1; +SA a2; +SB __attribute__((init_priority(500))) b1; +SB __attribute__((init_priority(501))) b2; +static SC c1; +SD d1; +SE e1; +ST<100> t1; +ST<1000> t2; + + +int bar(int a){ + int r = a; + + a1.foo(); + a2.foo(); + b1.foo(); + b2.foo(); + c1.foo(); + d1.foo(); + e1.foo(); + t1.foo(); + t2.foo(); + + #pragma omp target simd + for (int i = 0; i < 10; ++i) + ++r; + + return r + *R; +} + +// Check metadata is properly generated: +// CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 195, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 247, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 265, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 272, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 284, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 291, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 415, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 298, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 291, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 298, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 284, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 221, i32 {{[0-9]+}}} + +// TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 195, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 247, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 265, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 272, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 284, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 291, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 415, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 298, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 291, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 298, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 284, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 221, i32 {{[0-9]+}}} + +#endif diff --git a/test/OpenMP/target_simd_codegen_registration_naming.cpp b/test/OpenMP/target_simd_codegen_registration_naming.cpp new file mode 100644 index 000000000000..2ed74da4f05b --- /dev/null +++ b/test/OpenMP/target_simd_codegen_registration_naming.cpp @@ -0,0 +1,68 @@ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// Test target simd codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: [[CA:%.+]] = type { i32* } + +// CHECK: define {{.*}}i32 @[[NNAME:.+]](i32 {{.*}}%{{.+}}) +int nested(int a){ + // CHECK: call void @__omp_offloading_[[FILEID:[0-9a-f]+_[0-9a-f]+]]_[[NNAME]]_l[[T1L:[0-9]+]]( + #pragma omp target simd + for (int i = 0; i < 10; ++i) + ++a; + + // CHECK: call void @"[[LNAME:.+]]"([[CA]]* + auto F = [&](){ + #pragma omp parallel + { + #pragma omp target simd + for (int i = 0; i < 10; ++i) + ++a; + } + }; + + F(); + + return a; +} + +// CHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T1L]]( +// TCHECK: define {{.*}}void @__omp_offloading_[[FILEID:[0-9a-f]+_[0-9a-f]+]]_[[NNAME:.+]]_l[[T1L:[0-9]+]]( + +// CHECK: define {{.*}}void @"[[LNAME]]"( +// CHECK: call void {{.*}}@__kmpc_fork_call{{.+}}[[PNAME:@.+]] to + +// CHECK: define {{.*}}void [[PNAME]]( +// CHECK: call void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T2L:[0-9]+]]( + +// CHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T2L]]( +// TCHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME:.+]]_l[[T2L:[0-9]+]]( + + +// Check metadata is properly generated: +// CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} + +// TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +#endif diff --git a/test/OpenMP/target_simd_depend_messages.cpp b/test/OpenMP/target_simd_depend_messages.cpp index 3fc46f4c0848..89184f25d7a1 100644 --- a/test/OpenMP/target_simd_depend_messages.cpp +++ b/test/OpenMP/target_simd_depend_messages.cpp @@ -37,21 +37,21 @@ int main(int argc, char **argv, char *env[]) { for (i = 0; i < argc; ++i) foo(); #pragma omp target simd depend (out: ) // expected-error {{expected expression}} for (i = 0; i < argc; ++i) foo(); - #pragma omp target simd depend (inout : foobool(argc)), depend (in, argc) // expected-error {{expected variable name, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}} + #pragma omp target simd depend (inout : foobool(argc)), depend (in, argc) // expected-error {{expected addressable lvalue expression, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}} for (i = 0; i < argc; ++i) foo(); #pragma omp target simd depend (out :S1) // expected-error {{'S1' does not refer to a value}} for (i = 0; i < argc; ++i) foo(); - #pragma omp target simd depend(in : argv[1][1] = '2') // expected-error {{expected variable name, array element or array section}} + #pragma omp target simd depend(in : argv[1][1] = '2') for (i = 0; i < argc; ++i) foo(); - #pragma omp target simd depend (in : vec[1]) // expected-error {{expected variable name, array element or array section}} + #pragma omp target simd depend (in : vec[1]) // expected-error {{expected addressable lvalue expression, array element or array section}} for (i = 0; i < argc; ++i) foo(); #pragma omp target simd depend (in : argv[0]) for (i = 0; i < argc; ++i) foo(); #pragma omp target simd depend (in : ) // expected-error {{expected expression}} for (i = 0; i < argc; ++i) foo(); - #pragma omp target simd depend (in : main) // expected-error {{expected variable name, array element or array section}} + #pragma omp target simd depend (in : main) for (i = 0; i < argc; ++i) foo(); - #pragma omp target simd depend(in : a[0]) // expected-error{{expected variable name, array element or array section}} + #pragma omp target simd depend(in : a[0]) // expected-error{{expected addressable lvalue expression, array element or array section}} for (i = 0; i < argc; ++i) foo(); #pragma omp target simd depend (in : vec[1:2]) // expected-error {{ value is not an array or pointer}} for (i = 0; i < argc; ++i) foo(); diff --git a/test/OpenMP/target_simd_lastprivate_messages.cpp b/test/OpenMP/target_simd_lastprivate_messages.cpp index afb38d9c9222..9a6eb241728b 100644 --- a/test/OpenMP/target_simd_lastprivate_messages.cpp +++ b/test/OpenMP/target_simd_lastprivate_messages.cpp @@ -18,9 +18,9 @@ public: S2 &operator=(const S2 &); const S2 &operator=(const S2 &) const; static float S2s; // expected-note {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note {{static data member is predetermined as shared}} }; -const float S2::S2sc = 0; // expected-note {{static data member is predetermined as shared}} +const float S2::S2sc = 0; const S2 b; const S2 ba[5]; class S3 { diff --git a/test/OpenMP/target_simd_map_messages.cpp b/test/OpenMP/target_simd_map_messages.cpp index 63bbdde77255..2473bbb49bea 100644 --- a/test/OpenMP/target_simd_map_messages.cpp +++ b/test/OpenMP/target_simd_map_messages.cpp @@ -14,8 +14,8 @@ class S2 { public: S2():a(0) { } S2(S2 &s2):a(s2.a) { } - static float S2s; // expected-note 4 {{mappable type cannot contain static members}} - static const float S2sc; // expected-note 4 {{mappable type cannot contain static members}} + static float S2s; + static const float S2sc; }; const float S2::S2sc = 0; const S2 b; @@ -112,9 +112,9 @@ T tmain(T argc) { for (i = 0; i < argc; ++i) foo(); #pragma omp target simd map(S1) // expected-error {{'S1' does not refer to a value}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target simd map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target simd map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target simd map(ba) // expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target simd map(ba) for (i = 0; i < argc; ++i) foo(); #pragma omp target simd map(ca) for (i = 0; i < argc; ++i) foo(); @@ -214,11 +214,11 @@ int main(int argc, char **argv) { for (i = 0; i < argc; ++i) foo(); #pragma omp target simd map(S1) // expected-error {{'S1' does not refer to a value}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target simd map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target simd map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} for (i = 0; i < argc; ++i) foo(); #pragma omp target simd map(argv[1]) for (i = 0; i < argc; ++i) foo(); -#pragma omp target simd map(ba) // expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target simd map(ba) for (i = 0; i < argc; ++i) foo(); #pragma omp target simd map(ca) for (i = 0; i < argc; ++i) foo(); diff --git a/test/OpenMP/target_simd_reduction_messages.cpp b/test/OpenMP/target_simd_reduction_messages.cpp index ca14acb2293f..9e1bb0521d71 100644 --- a/test/OpenMP/target_simd_reduction_messages.cpp +++ b/test/OpenMP/target_simd_reduction_messages.cpp @@ -25,9 +25,9 @@ public: S2() : a(0) {} S2(S2 &s2) : a(s2.a) {} static float S2s; // expected-note 2 {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note 2 {{'S2sc' declared here}} }; -const float S2::S2sc = 0; // expected-note 2 {{'S2sc' defined here}} +const float S2::S2sc = 0; S2 b; // expected-note 3 {{'b' defined here}} const S2 ba[5]; // expected-note 2 {{'ba' defined here}} class S3 { diff --git a/test/OpenMP/target_teams_codegen.cpp b/test/OpenMP/target_teams_codegen.cpp index 208cda6bb350..e6c250178d06 100644 --- a/test/OpenMP/target_teams_codegen.cpp +++ b/test/OpenMP/target_teams_codegen.cpp @@ -38,16 +38,18 @@ // code, only 6 will have mapped arguments, and only 4 have all-constant map // sizes. -// CHECK-DAG: [[SIZET2:@.+]] = private unnamed_addr constant [1 x i{{32|64}}] [i[[SZ:32|64]] 2] -// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: [[SIZET:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 2, i[[SZ]] 4, i[[SZ]] 4] +// CHECK-DAG: [[MAPT:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 288, i64 288] +// CHECK-DAG: [[SIZET2:@.+]] = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 2] +// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: [[SIZET3:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2] -// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i32] [i32 288, i32 288] -// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [9 x i32] [i32 288, i32 35, i32 288, i32 35, i32 35, i32 288, i32 288, i32 35, i32 35] +// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i64] [i64 288, i64 288] +// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [9 x i64] [i64 288, i64 547, i64 288, i64 547, i64 547, i64 288, i64 288, i64 547, i64 547] // CHECK-DAG: [[SIZET5:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 40] -// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i32] [i32 288, i32 288, i32 35] +// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 288, i64 547] // CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [4 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 1, i[[SZ]] 40] -// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i32] [i32 288, i32 288, i32 288, i32 35] -// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i32] [i32 35, i32 288, i32 288, i32 288, i32 35] +// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i64] [i64 288, i64 288, i64 288, i64 547] +// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i64] [i64 547, i64 288, i64 288, i64 288, i64 547] // CHECK-DAG: @{{.*}} = private constant i8 0 // CHECK-DAG: @{{.*}} = private constant i8 0 // CHECK-DAG: @{{.*}} = private constant i8 0 @@ -83,6 +85,8 @@ struct TT{ ty Y; }; +int global; + // CHECK: define {{.*}}[[FOO:@.+]]( int foo(int n) { int a = 0; @@ -93,30 +97,44 @@ int foo(int n) { double cn[5][n]; TT<long long, char> d; - // CHECK: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i32* null, i32 0, i32 0) - // CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4 - // CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4 - // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i32 {{[^,]+}}, i32 {{[^)]+}}) + // CHECK-DAG: [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]] + // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 [[IDX0]] + // CHECK-DAG: [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]* + // CHECK-DAG: [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR0]] + // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR0]] + // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX1:[0-9]+]] + // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 [[IDX1]] + // CHECK-DAG: [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]* + // CHECK-DAG: [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR1]] + // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR1]] + // CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX1:[0-9]+]] + // CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 [[IDX1]] + // CHECK-DAG: [[CBPADDR2:%.+]] = bitcast i8** [[BPADDR2]] to i[[SZ]]* + // CHECK-DAG: [[CPADDR2:%.+]] = bitcast i8** [[PADDR2]] to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR2]] + // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR2]] + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] // CHECK: [[FAIL]] - // CHECK: call void [[HVT0:@.+]]() + // CHECK: call void [[HVT0:@.+]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}}) // CHECK-NEXT: br label %[[END]] // CHECK: [[END]] - #pragma omp target teams + #pragma omp target teams num_teams(a) thread_limit(a) firstprivate(aa) nowait { } - // CHECK: store i32 0, i32* [[RHV:%.+]], align 4 - // CHECK: store i32 -1, i32* [[RHV]], align 4 - // CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4 - // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 // CHECK: call void [[HVT1:@.+]](i[[SZ]] {{[^,]+}}) #pragma omp target teams if(target: 0) { a += 1; } - // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i32* getelementptr inbounds ([1 x i32], [1 x i32]* [[MAPT2]], i32 0, i32 0), i32 0, i32 0) + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT2]], i32 0, i32 0), i32 0, i32 0) // CHECK-DAG: [[BP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[P]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]] @@ -126,9 +144,7 @@ int foo(int n) { // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR0]] // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR0]] - // CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4 - // CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4 - // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] // CHECK: [[FAIL]] // CHECK: call void [[HVT2:@.+]](i[[SZ]] {{[^,]+}}) @@ -142,7 +158,7 @@ int foo(int n) { // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10 // CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] // CHECK: [[IFTHEN]] - // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i32* getelementptr inbounds ([2 x i32], [2 x i32]* [[MAPT3]], i32 0, i32 0), i32 0, i32 0) + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i32 0, i32 0) // CHECK-DAG: [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0 @@ -159,21 +175,17 @@ int foo(int n) { // CHECK-DAG: [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]* // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR1]] // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR1]] - // CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4 - // CHECK-NEXT: br label %[[IFEND:.+]] - - // CHECK: [[IFELSE]] - // CHECK: store i32 -1, i32* [[RHV]], align 4 - // CHECK-NEXT: br label %[[IFEND:.+]] - - // CHECK: [[IFEND]] - // CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4 - // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 + // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // CHECK: [[FAIL]] // CHECK: call void [[HVT3:@.+]]({{[^,]+}}, {{[^,]+}}) // CHECK-NEXT: br label %[[END]] // CHECK: [[END]] + // CHECK-NEXT: br label %[[IFEND:.+]] + // CHECK: [[IFELSE]] + // CHECK: call void [[HVT3]]({{[^,]+}}, {{[^,]+}}) + // CHECK-NEXT: br label %[[IFEND]] + // CHECK: [[IFEND]] #pragma omp target teams if(target: n>10) { a += 1; @@ -197,7 +209,7 @@ int foo(int n) { // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20 // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] // CHECK: [[TRY]] - // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i32* getelementptr inbounds ([9 x i32], [9 x i32]* [[MAPT4]], i32 0, i32 0), i32 0, i32 0) + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i32 0, i32 0) // CHECK-DAG: [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[SR]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S:%[^,]+]], i32 0, i32 0 @@ -286,9 +298,7 @@ int foo(int n) { // CHECK-DAG: [[CPADDR8]] = bitcast i8** {{%[^,]+}} to [[TT]]** // CHECK-DAG: store i[[SZ]] {{12|16}}, i[[SZ]]* {{%[^,]+}} - // CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4 - // CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4 - // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] // CHECK: [[FAIL]] @@ -312,11 +322,12 @@ int foo(int n) { // Check that the offloading functions are emitted and that the arguments are // correct and loaded correctly for the target regions in foo(). -// CHECK: define internal void [[HVT0]]() -// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*)) +// CHECK: define internal void [[HVT0]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}}) +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]])* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] {{[^)]+}}) // // -// CHECK: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid.) +// CHECK: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] {{[^)]+}}) +// CHECK: alloca i16, // CHECK: ret void // CHECK-NEXT: } @@ -441,7 +452,7 @@ int foo(int n) { // CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 9, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], [10 x float]*, i[[SZ]], float*, [5 x [10 x double]]*, i[[SZ]], i[[SZ]], double*, [[TT]]*)* [[OMP_OUTLINED4:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], [10 x float]* [[REF_B]], i[[SZ]] [[VAL_VLA1]], float* [[REF_BN]], [5 x [10 x double]]* [[REF_C]], i[[SZ]] [[VAL_VLA2]], i[[SZ]] [[VAL_VLA3]], double* [[REF_CN]], [[TT]]* [[REF_D]]) // // -// CHECK: define internal {{.*}}void [[OMP_OUTLINED4]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, [10 x float]* {{.+}}, i[[SZ]] %{{.+}}, float* %{{.+}}, [5 x [10 x double]]* {{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, double* %{{.+}}, [[TT]]* {{.+}}) +// CHECK: define internal {{.*}}void [[OMP_OUTLINED4]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, [10 x float]* {{.+}}, i[[SZ]] %{{.+}}, float* {{.+}}, [5 x [10 x double]]* {{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, double* {{.+}}, [[TT]]* {{.+}}) // To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. template<typename tx> @@ -533,7 +544,7 @@ int bar(int n){ // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60 // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] // CHECK: [[TRY]] -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i32* getelementptr inbounds ([5 x i32], [5 x i32]* [[MAPT7]], i32 0, i32 0), i32 0, i32 0) +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0), i32 0, i32 0) // CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0 // CHECK-DAG: [[SR]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S:%.+]], i32 0, i32 0 @@ -582,9 +593,7 @@ int bar(int n){ // CHECK-DAG: [[CPADDR4]] = bitcast i8** {{%[^,]+}} to i16** // CHECK-DAG: store i[[SZ]] [[CSIZE]], i[[SZ]]* {{%[^,]+}} -// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4 -// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4 -// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] // CHECK: [[FAIL]] @@ -598,7 +607,7 @@ int bar(int n){ // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50 // CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] // CHECK: [[IFTHEN]] -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([4 x i[[SZ]]], [4 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i32* getelementptr inbounds ([4 x i32], [4 x i32]* [[MAPT6]], i32 0, i32 0), i32 0, i32 0) +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([4 x i[[SZ]]], [4 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0), i32 0, i32 0) // CHECK-DAG: [[BPR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP:%.+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P:%.+]], i32 0, i32 0 @@ -630,21 +639,17 @@ int bar(int n){ // CHECK-DAG: [[CBPADDR3]] = bitcast i8** {{%[^,]+}} to [10 x i32]** // CHECK-DAG: [[CPADDR3]] = bitcast i8** {{%[^,]+}} to [10 x i32]** -// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4 -// CHECK-NEXT: br label %[[IFEND:.+]] - -// CHECK: [[IFELSE]] -// CHECK: store i32 -1, i32* [[RHV]], align 4 -// CHECK-NEXT: br label %[[IFEND:.+]] - -// CHECK: [[IFEND]] -// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4 -// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // CHECK: [[FAIL]] // CHECK: call void [[HVT6:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) // CHECK-NEXT: br label %[[END]] // CHECK: [[END]] +// CHECK-NEXT: br label %[[IFEND:.+]] +// CHECK: [[IFELSE]] +// CHECK: call void [[HVT6]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[IFEND]] +// CHECK: [[IFEND]] // // CHECK: define {{.*}}[[FTEMPLATE]] @@ -652,7 +657,7 @@ int bar(int n){ // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40 // CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] // CHECK: [[IFTHEN]] -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i32* getelementptr inbounds ([3 x i32], [3 x i32]* [[MAPT5]], i32 0, i32 0), i32 0, i32 0) +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0), i32 0, i32 0) // CHECK-DAG: [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0 @@ -677,21 +682,17 @@ int bar(int n){ // CHECK-DAG: [[CBPADDR2]] = bitcast i8** {{%[^,]+}} to [10 x i32]** // CHECK-DAG: [[CPADDR2]] = bitcast i8** {{%[^,]+}} to [10 x i32]** -// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4 -// CHECK-NEXT: br label %[[IFEND:.+]] - -// CHECK: [[IFELSE]] -// CHECK: store i32 -1, i32* [[RHV]], align 4 -// CHECK-NEXT: br label %[[IFEND:.+]] - -// CHECK: [[IFEND]] -// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4 -// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // CHECK: [[FAIL]] // CHECK: call void [[HVT5:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}) // CHECK-NEXT: br label %[[END]] // CHECK: [[END]] +// CHECK-NEXT: br label %[[IFEND:.+]] +// CHECK: [[IFELSE]] +// CHECK: call void [[HVT5]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[IFEND]] +// CHECK: [[IFEND]] @@ -728,7 +729,7 @@ int bar(int n){ // CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [[S1]]*, i[[SZ]], i[[SZ]], i[[SZ]], i16*)* [[OMP_OUTLINED5:@.+]] to void (i32*, i32*, ...)*), [[S1]]* [[REF_THIS]], i[[SZ]] [[REF_B]], i[[SZ]] [[VAL_VLA1]], i[[SZ]] [[VAL_VLA2]], i16* [[REF_C]]) // // -// CHECK: define internal {{.*}}void [[OMP_OUTLINED5]](i32* noalias %.global_tid., i32* noalias %.bound_tid., [[S1]]* %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i16* %{{.+}}) +// CHECK: define internal {{.*}}void [[OMP_OUTLINED5]](i32* noalias %.global_tid., i32* noalias %.bound_tid., [[S1]]* %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i16* {{.+}}) // To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. diff --git a/test/OpenMP/target_teams_codegen_registration.cpp b/test/OpenMP/target_teams_codegen_registration.cpp index b8154538df04..38668a46b161 100644 --- a/test/OpenMP/target_teams_codegen_registration.cpp +++ b/test/OpenMP/target_teams_codegen_registration.cpp @@ -63,40 +63,40 @@ // CHECK-DAG: {{@.+}} = private constant i8 0 // TCHECK-NOT: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-DAG: {{@.+}} = private constant i8 0 // CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] -// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] // CHECK-NTARGET-NOT: private constant i8 0 // CHECK-NTARGET-NOT: private unnamed_addr constant [1 x i diff --git a/test/OpenMP/target_teams_depend_messages.cpp b/test/OpenMP/target_teams_depend_messages.cpp index 9308e8972872..57983eec8882 100644 --- a/test/OpenMP/target_teams_depend_messages.cpp +++ b/test/OpenMP/target_teams_depend_messages.cpp @@ -36,21 +36,21 @@ int main(int argc, char **argv, char *env[]) { foo(); #pragma omp target teams depend (out: ) // expected-error {{expected expression}} foo(); -#pragma omp target teams depend (inout : foobool(argc)), depend (in, argc) // expected-error {{expected variable name, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}} +#pragma omp target teams depend (inout : foobool(argc)), depend (in, argc) // expected-error {{expected addressable lvalue expression, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}} foo(); #pragma omp target teams depend (out :S1) // expected-error {{'S1' does not refer to a value}} foo(); -#pragma omp target teams depend(in : argv[1][1] = '2') // expected-error {{expected variable name, array element or array section}} +#pragma omp target teams depend(in : argv[1][1] = '2') foo(); -#pragma omp target teams depend (in : vec[1]) // expected-error {{expected variable name, array element or array section}} +#pragma omp target teams depend (in : vec[1]) // expected-error {{expected addressable lvalue expression, array element or array section}} foo(); #pragma omp target teams depend (in : argv[0]) foo(); #pragma omp target teams depend (in : ) // expected-error {{expected expression}} foo(); -#pragma omp target teams depend (in : main) // expected-error {{expected variable name, array element or array section}} +#pragma omp target teams depend (in : main) foo(); -#pragma omp target teams depend(in : a[0]) // expected-error{{expected variable name, array element or array section}} +#pragma omp target teams depend(in : a[0]) // expected-error{{expected addressable lvalue expression, array element or array section}} foo(); #pragma omp target teams depend (in : vec[1:2]) // expected-error {{ value is not an array or pointer}} foo(); diff --git a/test/OpenMP/target_teams_distribute_codegen.cpp b/test/OpenMP/target_teams_distribute_codegen.cpp new file mode 100644 index 000000000000..17baa603d12b --- /dev/null +++ b/test/OpenMP/target_teams_distribute_codegen.cpp @@ -0,0 +1,820 @@ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 + +// Test target codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK-DAG: %ident_t = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" +// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } + +// CHECK-DAG: [[TT:%.+]] = type { i64, i8 } +// CHECK-DAG: [[S1:%.+]] = type { double } +// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 } +// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* } +// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* } + +// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 } + +// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat + +// We have 8 target regions, but only 7 that actually will generate offloading +// code, only 6 will have mapped arguments, and only 4 have all-constant map +// sizes. + +// CHECK-DAG: [[SIZET:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 2, i[[SZ]] 4, i[[SZ]] 4] +// CHECK-DAG: [[MAPT:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 288, i64 288] +// CHECK-DAG: [[SIZET2:@.+]] = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 2] +// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: [[SIZET3:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2] +// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i64] [i64 288, i64 288] +// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [9 x i64] [i64 288, i64 547, i64 288, i64 547, i64 547, i64 288, i64 288, i64 547, i64 547] +// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [5 x i64] [i64 547, i64 288, i64 288, i64 288, i64 547] +// CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [5 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 1, i[[SZ]] 40] +// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [5 x i64] [i64 288, i64 288, i64 288, i64 288, i64 547] +// CHECK-DAG: [[SIZET7:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 40] +// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 288, i64 547] +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 + +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK-NOT: @{{.+}} = constant [[ENTTY]] + +// Check if offloading descriptor is created. +// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]] +// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]] +// CHECK: [[DEVBEGIN:@.+]] = external constant i8 +// CHECK: [[DEVEND:@.+]] = external constant i8 +// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]]) +// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]]) + +// Check target registration is registered as a Ctor. +// CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* @[[REGFN]] to void ()*), i8* bitcast (void (i8*)* @[[REGFN]] to i8*) }] + + +template<typename tx, typename ty> +struct TT{ + tx X; + ty Y; +}; + +int global; + +// CHECK: define {{.*}}[[FOO:@.+]]( +int foo(int n) { + int a = 0; + short aa = 0; + float b[10]; + float bn[n]; + double c[5][10]; + double cn[5][n]; + TT<long long, char> d; + + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i32 {{[^,]+}}, i32 {{[^)]+}}) + // CHECK-DAG: [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]] + // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 [[IDX0]] + // CHECK-DAG: [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]* + // CHECK-DAG: [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR0]] + // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR0]] + // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX1:[0-9]+]] + // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 [[IDX1]] + // CHECK-DAG: [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]* + // CHECK-DAG: [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR1]] + // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR1]] + // CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX1:[0-9]+]] + // CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 [[IDX1]] + // CHECK-DAG: [[CBPADDR2:%.+]] = bitcast i8** [[BPADDR2]] to i[[SZ]]* + // CHECK-DAG: [[CPADDR2:%.+]] = bitcast i8** [[PADDR2]] to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR2]] + // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR2]] + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT0:@.+]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}}) + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + #pragma omp target teams distribute num_teams(a) thread_limit(a) firstprivate(aa) nowait + for (int i = 0; i < 10; ++i) { + } + + // CHECK: call void [[HVT1:@.+]](i[[SZ]] {{[^,]+}}) + #pragma omp target teams distribute if(target: 0) + for (int i = 0; i < 10; ++i) { + a += 1; + } + + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT2]], i32 0, i32 0), i32 0, i32 0) + // CHECK-DAG: [[BP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[P]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]] + // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR]], i32 0, i32 [[IDX0]] + // CHECK-DAG: [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]* + // CHECK-DAG: [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR0]] + // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR0]] + + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT2:@.+]](i[[SZ]] {{[^,]+}}) + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + #pragma omp target teams distribute if(target: 1) + for (int i = 0; i < 10; ++i) { + aa += 1; + } + + // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10 + // CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] + // CHECK: [[IFTHEN]] + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i32 0, i32 0) + // CHECK-DAG: [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0 + + // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0 + // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0 + // CHECK-DAG: [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]* + // CHECK-DAG: [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR0]] + // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR0]] + + // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1 + // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1 + // CHECK-DAG: [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]* + // CHECK-DAG: [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR1]] + // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR1]] + // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT3:@.+]]({{[^,]+}}, {{[^,]+}}) + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + // CHECK-NEXT: br label %[[IFEND:.+]] + // CHECK: [[IFELSE]] + // CHECK: call void [[HVT3]]({{[^,]+}}, {{[^,]+}}) + // CHECK-NEXT: br label %[[IFEND]] + // CHECK: [[IFEND]] + #pragma omp target teams distribute if(target: n>10) + for (int i = 0; i < 10; ++i) { + a += 1; + aa += 1; + } + + // We capture 3 VLA sizes in this target region + // CHECK-64: [[A_VAL:%.+]] = load i32, i32* %{{.+}}, + // CHECK-64: [[A_ADDR:%.+]] = bitcast i[[SZ]]* [[A_CADDR:%.+]] to i32* + // CHECK-64: store i32 [[A_VAL]], i32* [[A_ADDR]], + // CHECK-64: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]], + + // CHECK-32: [[A_VAL:%.+]] = load i32, i32* %{{.+}}, + // CHECK-32: store i32 [[A_VAL]], i32* [[A_CADDR:%.+]], + // CHECK-32: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]], + + // CHECK: [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4 + // CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]] + // CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8 + + // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20 + // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] + // CHECK: [[TRY]] + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i32 0, i32 0) + // CHECK-DAG: [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[SR]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S:%[^,]+]], i32 0, i32 0 + + // CHECK-DAG: [[SADDR0:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX0:[0-9]+]] + // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX0]] + // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX0]] + // CHECK-DAG: [[SADDR1:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX1:[0-9]+]] + // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX1]] + // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX1]] + // CHECK-DAG: [[SADDR2:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX2:[0-9]+]] + // CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX2]] + // CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX2]] + // CHECK-DAG: [[SADDR3:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX3:[0-9]+]] + // CHECK-DAG: [[BPADDR3:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX3]] + // CHECK-DAG: [[PADDR3:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX3]] + // CHECK-DAG: [[SADDR4:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX4:[0-9]+]] + // CHECK-DAG: [[BPADDR4:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX4]] + // CHECK-DAG: [[PADDR4:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX4]] + // CHECK-DAG: [[SADDR5:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX5:[0-9]+]] + // CHECK-DAG: [[BPADDR5:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX5]] + // CHECK-DAG: [[PADDR5:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX5]] + // CHECK-DAG: [[SADDR6:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX6:[0-9]+]] + // CHECK-DAG: [[BPADDR6:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX6]] + // CHECK-DAG: [[PADDR6:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX6]] + // CHECK-DAG: [[SADDR7:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX7:[0-9]+]] + // CHECK-DAG: [[BPADDR7:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX7]] + // CHECK-DAG: [[PADDR7:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX7]] + // CHECK-DAG: [[SADDR8:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX8:[0-9]+]] + // CHECK-DAG: [[BPADDR8:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX8]] + // CHECK-DAG: [[PADDR8:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX8]] + + // The names below are not necessarily consistent with the names used for the + // addresses above as some are repeated. + // CHECK-DAG: store i[[SZ]] [[VLA0]], i[[SZ]]* [[CBPADDR0:%.+]], + // CHECK-DAG: store i[[SZ]] [[VLA0]], i[[SZ]]* [[CPADDR0:%.+]], + // CHECK-DAG: [[CBPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: [[CPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store i[[SZ]] [[VLA1]], i[[SZ]]* [[CBPADDR1:%.+]], + // CHECK-DAG: store i[[SZ]] [[VLA1]], i[[SZ]]* [[CPADDR1:%.+]], + // CHECK-DAG: [[CBPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: [[CPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store i[[SZ]] 5, i[[SZ]]* [[CBPADDR2:%.+]], + // CHECK-DAG: store i[[SZ]] 5, i[[SZ]]* [[CPADDR2:%.+]], + // CHECK-DAG: [[CBPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: [[CPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store i[[SZ]] [[A_CVAL]], i[[SZ]]* [[CBPADDR3:%.+]], + // CHECK-DAG: store i[[SZ]] [[A_CVAL]], i[[SZ]]* [[CPADDR3:%.+]], + // CHECK-DAG: [[CBPADDR3]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: [[CPADDR3]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] 4, i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store [10 x float]* %{{.+}}, [10 x float]** [[CBPADDR4:%.+]], + // CHECK-DAG: store [10 x float]* %{{.+}}, [10 x float]** [[CPADDR4:%.+]], + // CHECK-DAG: [[CBPADDR4]] = bitcast i8** {{%[^,]+}} to [10 x float]** + // CHECK-DAG: [[CPADDR4]] = bitcast i8** {{%[^,]+}} to [10 x float]** + // CHECK-DAG: store i[[SZ]] 40, i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store float* %{{.+}}, float** [[CBPADDR5:%.+]], + // CHECK-DAG: store float* %{{.+}}, float** [[CPADDR5:%.+]], + // CHECK-DAG: [[CBPADDR5]] = bitcast i8** {{%[^,]+}} to float** + // CHECK-DAG: [[CPADDR5]] = bitcast i8** {{%[^,]+}} to float** + // CHECK-DAG: store i[[SZ]] [[BNSIZE]], i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store [5 x [10 x double]]* %{{.+}}, [5 x [10 x double]]** [[CBPADDR6:%.+]], + // CHECK-DAG: store [5 x [10 x double]]* %{{.+}}, [5 x [10 x double]]** [[CPADDR6:%.+]], + // CHECK-DAG: [[CBPADDR6]] = bitcast i8** {{%[^,]+}} to [5 x [10 x double]]** + // CHECK-DAG: [[CPADDR6]] = bitcast i8** {{%[^,]+}} to [5 x [10 x double]]** + // CHECK-DAG: store i[[SZ]] 400, i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store double* %{{.+}}, double** [[CBPADDR7:%.+]], + // CHECK-DAG: store double* %{{.+}}, double** [[CPADDR7:%.+]], + // CHECK-DAG: [[CBPADDR7]] = bitcast i8** {{%[^,]+}} to double** + // CHECK-DAG: [[CPADDR7]] = bitcast i8** {{%[^,]+}} to double** + // CHECK-DAG: store i[[SZ]] [[CNSIZE]], i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store [[TT]]* %{{.+}}, [[TT]]** [[CBPADDR8:%.+]], + // CHECK-DAG: store [[TT]]* %{{.+}}, [[TT]]** [[CPADDR8:%.+]], + // CHECK-DAG: [[CBPADDR8]] = bitcast i8** {{%[^,]+}} to [[TT]]** + // CHECK-DAG: [[CPADDR8]] = bitcast i8** {{%[^,]+}} to [[TT]]** + // CHECK-DAG: store i[[SZ]] {{12|16}}, i[[SZ]]* {{%[^,]+}} + + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + + // CHECK: [[FAIL]] + // CHECK: call void [[HVT4:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + #pragma omp target teams distribute if(target: n>20) + for (int i = 0; i < 10; ++i) { + a += 1; + b[2] += 1.0; + bn[3] += 1.0; + c[1][2] += 1.0; + cn[1][3] += 1.0; + d.X += 1; + d.Y += 1; + } + + return a; +} + +// Check that the offloading functions are emitted and that the arguments are +// correct and loaded correctly for the target regions in foo(). + +// CHECK: define internal void [[HVT0]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}}) +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]])* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] {{[^)]+}}) +// +// +// CHECK: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] {{[^)]+}}) +// CHECK: alloca i16, +// CHECK: ret void +// CHECK-NEXT: } + + +// CHECK: define internal void [[HVT1]](i[[SZ]] %{{.+}}) +// Create stack storage and store argument in there. +// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: [[AA_CASTED:%.+]] = alloca i[[SZ]], align +// CHECK: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align +// CHECK-64: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32* +// CHECK-64: [[AA:%.+]] = load i32, i32* [[AA_CADDR]], align +// CHECK-32: [[AA:%.+]] = load i32, i32* [[AA_ADDR]], align +// CHECK-64: [[AA_C:%.+]] = bitcast i[[SZ]]* [[AA_CASTED]] to i32* +// CHECK-64: store i32 [[AA]], i32* [[AA_C]], align +// CHECK-32: store i32 [[AA]], i32* [[AA_CASTED]], align +// CHECK: [[PARAM:%.+]] = load i[[SZ]], i[[SZ]]* [[AA_CASTED]], align +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]])* [[OMP_OUTLINED1:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[PARAM]]) +// +// +// CHECK: define internal {{.*}}void [[OMP_OUTLINED1]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}) +// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align +// CHECK-64: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32* +// CHECK-64: [[AA:%.+]] = load i32, i32* [[AA_CADDR]], align +// CHECK-32: [[AA:%.+]] = load i32, i32* [[AA_ADDR]], align +// CHECK: ret void +// CHECK-NEXT: } + +// CHECK: define internal void [[HVT2]](i[[SZ]] %{{.+}}) +// Create stack storage and store argument in there. +// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: [[AA_CASTED:%.+]] = alloca i[[SZ]], align +// CHECK: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align +// CHECK: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16* +// CHECK: [[AA:%.+]] = load i16, i16* [[AA_CADDR]], align +// CHECK: [[AA_C:%.+]] = bitcast i[[SZ]]* [[AA_CASTED]] to i16* +// CHECK: store i16 [[AA]], i16* [[AA_C]], align +// CHECK: [[PARAM:%.+]] = load i[[SZ]], i[[SZ]]* [[AA_CASTED]], align +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]])* [[OMP_OUTLINED2:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[PARAM]]) +// +// +// CHECK: define internal {{.*}}void [[OMP_OUTLINED2]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}) +// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align +// CHECK: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16* +// CHECK: [[AA:%.+]] = load i16, i16* [[AA_CADDR]], align +// CHECK: ret void +// CHECK-NEXT: } + +// CHECK: define internal void [[HVT3]] +// Create stack storage and store argument in there. +// CHECK: [[A_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: [[A_CASTED:%.+]] = alloca i[[SZ]], align +// CHECK: [[AA_CASTED:%.+]] = alloca i[[SZ]], align +// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[A_ADDR]], align +// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align +// CHECK-64-DAG:[[A_CADDR:%.+]] = bitcast i[[SZ]]* [[A_ADDR]] to i32* +// CHECK-DAG: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16* +// CHECK-64-DAG:[[A:%.+]] = load i32, i32* [[A_CADDR]], align +// CHECK-32-DAG:[[A:%.+]] = load i32, i32* [[A_ADDR]], align +// CHECK-64-DAG:[[A_C:%.+]] = bitcast i[[SZ]]* [[A_CASTED]] to i32* +// CHECK-64-DAG:store i32 [[A]], i32* [[A_C]], align +// CHECK-32-DAG:store i32 [[A]], i32* [[A_CASTED]], align +// CHECK-DAG: [[AA:%.+]] = load i16, i16* [[AA_CADDR]], align +// CHECK-DAG: [[AA_C:%.+]] = bitcast i[[SZ]]* [[AA_CASTED]] to i16* +// CHECK-DAG: store i16 [[AA]], i16* [[AA_C]], align +// CHECK-DAG: [[PARAM1:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CASTED]], align +// CHECK-DAG: [[PARAM2:%.+]] = load i[[SZ]], i[[SZ]]* [[AA_CASTED]], align +// CHECK-DAG: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]])* [[OMP_OUTLINED3:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[PARAM1]], i[[SZ]] [[PARAM2]]) +// +// +// CHECK: define internal {{.*}}void [[OMP_OUTLINED3]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}) +// CHECK: [[A_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[A_ADDR]], align +// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align +// CHECK-64-DAG:[[A_CADDR:%.+]] = bitcast i[[SZ]]* [[A_ADDR]] to i32* +// CHECK-DAG: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16* +// CHECK: ret void +// CHECK-NEXT: } + +// CHECK: define internal void [[HVT4]] +// Create local storage for each capture. +// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_B:%.+]] = alloca [10 x float]* +// CHECK: [[LOCAL_VLA1:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_BN:%.+]] = alloca float* +// CHECK: [[LOCAL_C:%.+]] = alloca [5 x [10 x double]]* +// CHECK: [[LOCAL_VLA2:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_VLA3:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_CN:%.+]] = alloca double* +// CHECK: [[LOCAL_D:%.+]] = alloca [[TT]]* +// CHECK: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] +// CHECK-DAG: store [10 x float]* [[ARG_B:%.+]], [10 x float]** [[LOCAL_B]] +// CHECK-DAG: store i[[SZ]] [[ARG_VLA1:%.+]], i[[SZ]]* [[LOCAL_VLA1]] +// CHECK-DAG: store float* [[ARG_BN:%.+]], float** [[LOCAL_BN]] +// CHECK-DAG: store [5 x [10 x double]]* [[ARG_C:%.+]], [5 x [10 x double]]** [[LOCAL_C]] +// CHECK-DAG: store i[[SZ]] [[ARG_VLA2:%.+]], i[[SZ]]* [[LOCAL_VLA2]] +// CHECK-DAG: store i[[SZ]] [[ARG_VLA3:%.+]], i[[SZ]]* [[LOCAL_VLA3]] +// CHECK-DAG: store double* [[ARG_CN:%.+]], double** [[LOCAL_CN]] +// CHECK-DAG: store [[TT]]* [[ARG_D:%.+]], [[TT]]** [[LOCAL_D]] + +// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* +// CHECK-DAG: [[REF_B:%.+]] = load [10 x float]*, [10 x float]** [[LOCAL_B]], +// CHECK-DAG: [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA1]], +// CHECK-DAG: [[REF_BN:%.+]] = load float*, float** [[LOCAL_BN]], +// CHECK-DAG: [[REF_C:%.+]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[LOCAL_C]], +// CHECK-DAG: [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA2]], +// CHECK-DAG: [[VAL_VLA3:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA3]], +// CHECK-DAG: [[REF_CN:%.+]] = load double*, double** [[LOCAL_CN]], +// CHECK-DAG: [[REF_D:%.+]] = load [[TT]]*, [[TT]]** [[LOCAL_D]], + +// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]] +// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32* +// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align +// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]] +// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align +// CHECK-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]], + +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 9, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], [10 x float]*, i[[SZ]], float*, [5 x [10 x double]]*, i[[SZ]], i[[SZ]], double*, [[TT]]*)* [[OMP_OUTLINED4:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], [10 x float]* [[REF_B]], i[[SZ]] [[VAL_VLA1]], float* [[REF_BN]], [5 x [10 x double]]* [[REF_C]], i[[SZ]] [[VAL_VLA2]], i[[SZ]] [[VAL_VLA3]], double* [[REF_CN]], [[TT]]* [[REF_D]]) +// +// +// CHECK: define internal {{.*}}void [[OMP_OUTLINED4]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, [10 x float]* {{.+}}, i[[SZ]] %{{.+}}, float* {{.+}}, [5 x [10 x double]]* {{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, double* {{.+}}, [[TT]]* {{.+}}) +// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. + +template<typename tx> +tx ftemplate(int n) { + tx a = 0; + short aa = 0; + tx b[10]; + + #pragma omp target teams distribute if(target: n>40) + for (int i = 0; i < 10; ++i) { + a += 1; + aa += 1; + b[2] += 1; + } + + return a; +} + +static +int fstatic(int n) { + int a = 0; + short aa = 0; + char aaa = 0; + int b[10]; + + #pragma omp target teams distribute if(target: n>50) + for (int i = a; i < n; ++i) { + a += 1; + aa += 1; + aaa += 1; + b[2] += 1; + } + + return a; +} + +struct S1 { + double a; + + int r1(int n){ + int b = n+1; + short int c[2][n]; + + #pragma omp target teams distribute if(target: n>60) + for (int i = 0; i < 10; ++i) { + this->a = (double)b + 1.5; + c[1][1] = ++a; + } + + return c[1][1] + (int)b; + } +}; + +// CHECK: define {{.*}}@{{.*}}bar{{.*}} +int bar(int n){ + int a = 0; + + // CHECK: call {{.*}}i32 [[FOO]](i32 {{.*}}) + a += foo(n); + + S1 S; + // CHECK: call {{.*}}i32 [[FS1:@.+]]([[S1]]* {{.*}}, i32 {{.*}}) + a += S.r1(n); + + // CHECK: call {{.*}}i32 [[FSTATIC:@.+]](i32 {{.*}}) + a += fstatic(n); + + // CHECK: call {{.*}}i32 [[FTEMPLATE:@.+]](i32 {{.*}}) + a += ftemplate<int>(n); + + return a; +} + +// +// CHECK: define {{.*}}[[FS1]] +// +// CHECK: i8* @llvm.stacksave() +// CHECK-64: [[B_ADDR:%.+]] = bitcast i[[SZ]]* [[B_CADDR:%.+]] to i32* +// CHECK-64: store i32 %{{.+}}, i32* [[B_ADDR]], +// CHECK-64: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_CADDR]], + +// CHECK-32: store i32 %{{.+}}, i32* [[B_ADDR:%.+]], +// CHECK-32: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]], + +// We capture 2 VLA sizes in this target region +// CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]] +// CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2 + +// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60 +// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] +// CHECK: [[TRY]] +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT5]], i32 0, i32 0), i32 0, i32 0) +// CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0 +// CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0 +// CHECK-DAG: [[SR]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S:%.+]], i32 0, i32 0 +// CHECK-DAG: [[SADDR0:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX0:[0-9]+]] +// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX0]] +// CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX0]] +// CHECK-DAG: [[SADDR1:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX1:[0-9]+]] +// CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX1]] +// CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX1]] +// CHECK-DAG: [[SADDR2:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX2:[0-9]+]] +// CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX2]] +// CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX2]] +// CHECK-DAG: [[SADDR3:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX3:[0-9]+]] +// CHECK-DAG: [[BPADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX3]] +// CHECK-DAG: [[PADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX3]] + +// The names below are not necessarily consistent with the names used for the +// addresses above as some are repeated. +// CHECK-DAG: store i[[SZ]] [[VLA0]], i[[SZ]]* [[CBPADDR0:%.+]], +// CHECK-DAG: store i[[SZ]] [[VLA0]], i[[SZ]]* [[CPADDR0:%.+]], +// CHECK-DAG: [[CBPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: [[CPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}} + +// CHECK-DAG: store i[[SZ]] 2, i[[SZ]]* [[CBPADDR1:%.+]], +// CHECK-DAG: store i[[SZ]] 2, i[[SZ]]* [[CPADDR1:%.+]], +// CHECK-DAG: [[CBPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: [[CPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}} + +// CHECK-DAG: store i[[SZ]] [[B_CVAL]], i[[SZ]]* [[CBPADDR2:%.+]], +// CHECK-DAG: store i[[SZ]] [[B_CVAL]], i[[SZ]]* [[CPADDR2:%.+]], +// CHECK-DAG: [[CBPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: [[CPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: store i[[SZ]] 4, i[[SZ]]* {{%[^,]+}} + +// CHECK-DAG: store [[S1]]* %{{.+}}, [[S1]]** [[CBPADDR3:%.+]], +// CHECK-DAG: store [[S1]]* %{{.+}}, [[S1]]** [[CPADDR3:%.+]], +// CHECK-DAG: [[CBPADDR3]] = bitcast i8** {{%[^,]+}} to [[S1]]** +// CHECK-DAG: [[CPADDR3]] = bitcast i8** {{%[^,]+}} to [[S1]]** +// CHECK-DAG: store i[[SZ]] 8, i[[SZ]]* {{%[^,]+}} + +// CHECK-DAG: store i16* %{{.+}}, i16** [[CBPADDR4:%.+]], +// CHECK-DAG: store i16* %{{.+}}, i16** [[CPADDR4:%.+]], +// CHECK-DAG: [[CBPADDR4]] = bitcast i8** {{%[^,]+}} to i16** +// CHECK-DAG: [[CPADDR4]] = bitcast i8** {{%[^,]+}} to i16** +// CHECK-DAG: store i[[SZ]] [[CSIZE]], i[[SZ]]* {{%[^,]+}} + +// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + +// CHECK: [[FAIL]] +// CHECK: call void [[HVT7:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[END]] +// CHECK: [[END]] + +// +// CHECK: define {{.*}}[[FSTATIC]] +// +// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50 +// CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] +// CHECK: [[IFTHEN]] +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([5 x i[[SZ]]], [5 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT6]], i32 0, i32 0), i32 0, i32 0) +// CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0 +// CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0 + +// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 0, i32 0 +// CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 0, i32 0 +// CHECK-DAG: store i[[SZ]] [[VAL0:%.+]], i[[SZ]]* [[CBPADDR0:%.+]], +// CHECK-DAG: store i[[SZ]] [[VAL0]], i[[SZ]]* [[CPADDR0:%.+]], +// CHECK-DAG: [[CBPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: [[CPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + +// CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 0, i32 1 +// CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 0, i32 1 +// CHECK-DAG: store i[[SZ]] [[VAL1:%.+]], i[[SZ]]* [[CBPADDR1:%.+]], +// CHECK-DAG: store i[[SZ]] [[VAL1]], i[[SZ]]* [[CPADDR1:%.+]], +// CHECK-DAG: [[CBPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: [[CPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + +// CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 0, i32 2 +// CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 0, i32 2 +// CHECK-DAG: store i[[SZ]] [[VAL2:%.+]], i[[SZ]]* [[CBPADDR2:%.+]], +// CHECK-DAG: store i[[SZ]] [[VAL2]], i[[SZ]]* [[CPADDR2:%.+]], +// CHECK-DAG: [[CBPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: [[CPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + +// CHECK-DAG: [[BPADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 0, i32 3 +// CHECK-DAG: [[PADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 0, i32 3 +// CHECK-DAG: store i[[SZ]] [[VAL3:%.+]], i[[SZ]]* [[CBPADDR3:%.+]], +// CHECK-DAG: store i[[SZ]] [[VAL3]], i[[SZ]]* [[CPADDR3:%.+]], +// CHECK-DAG: [[CBPADDR3]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: [[CPADDR3]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + +// CHECK-DAG: [[BPADDR4:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 0, i32 4 +// CHECK-DAG: [[PADDR4:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 0, i32 4 +// CHECK-DAG: store [10 x i32]* %{{.+}}, [10 x i32]** [[CBPADDR4:%.+]], +// CHECK-DAG: store [10 x i32]* %{{.+}}, [10 x i32]** [[CPADDR4:%.+]], +// CHECK-DAG: [[CBPADDR4]] = bitcast i8** {{%[^,]+}} to [10 x i32]** +// CHECK-DAG: [[CPADDR4]] = bitcast i8** {{%[^,]+}} to [10 x i32]** + +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] +// CHECK: [[FAIL]] +// CHECK: call void [[HVT6:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[END]] +// CHECK: [[END]] +// CHECK-NEXT: br label %[[IFEND:.+]] +// CHECK: [[IFELSE]] +// CHECK: call void [[HVT6]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[IFEND]] +// CHECK: [[IFEND]] + +// +// CHECK: define {{.*}}[[FTEMPLATE]] +// +// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40 +// CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] +// CHECK: [[IFTHEN]] +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET7]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT7]], i32 0, i32 0), i32 0, i32 0) +// CHECK-DAG: [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0 +// CHECK-DAG: [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0 + +// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 0 +// CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 0 +// CHECK-DAG: store i[[SZ]] [[VAL0:%.+]], i[[SZ]]* [[CBPADDR0:%.+]], +// CHECK-DAG: store i[[SZ]] [[VAL0]], i[[SZ]]* [[CPADDR0:%.+]], +// CHECK-DAG: [[CBPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: [[CPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + +// CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 1 +// CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 1 +// CHECK-DAG: store i[[SZ]] [[VAL1:%.+]], i[[SZ]]* [[CBPADDR1:%.+]], +// CHECK-DAG: store i[[SZ]] [[VAL1]], i[[SZ]]* [[CPADDR1:%.+]], +// CHECK-DAG: [[CBPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: [[CPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + +// CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 2 +// CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 2 +// CHECK-DAG: store i[[SZ]] [[VAL2:%.+]], i[[SZ]]* [[CBPADDR2:%.+]], +// CHECK-DAG: store i[[SZ]] [[VAL2]], i[[SZ]]* [[CPADDR2:%.+]], +// CHECK-DAG: [[CBPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: [[CPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] +// CHECK: [[FAIL]] +// CHECK: call void [[HVT5:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[END]] +// CHECK: [[END]] +// CHECK-NEXT: br label %[[IFEND:.+]] +// CHECK: [[IFELSE]] +// CHECK: call void [[HVT5]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[IFEND]] +// CHECK: [[IFEND]] + + + +// Check that the offloading functions are emitted and that the arguments are +// correct and loaded correctly for the target regions of the callees of bar(). + +// CHECK: define internal void [[HVT7]] +// Create local storage for each capture. +// CHECK: [[LOCAL_THIS:%.+]] = alloca [[S1]]* +// CHECK: [[LOCAL_B:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_VLA1:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_VLA2:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_C:%.+]] = alloca i16* +// CHECK: [[LOCAL_B_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-DAG: store [[S1]]* [[ARG_THIS:%.+]], [[S1]]** [[LOCAL_THIS]] +// CHECK-DAG: store i[[SZ]] [[ARG_B:%.+]], i[[SZ]]* [[LOCAL_B]] +// CHECK-DAG: store i[[SZ]] [[ARG_VLA1:%.+]], i[[SZ]]* [[LOCAL_VLA1]] +// CHECK-DAG: store i[[SZ]] [[ARG_VLA2:%.+]], i[[SZ]]* [[LOCAL_VLA2]] +// CHECK-DAG: store i16* [[ARG_C:%.+]], i16** [[LOCAL_C]] +// Store captures in the context. +// CHECK-DAG: [[REF_THIS:%.+]] = load [[S1]]*, [[S1]]** [[LOCAL_THIS]], +// CHECK-64-DAG:[[CONV_BP:%.+]] = bitcast i[[SZ]]* [[LOCAL_B]] to i32* +// CHECK-DAG: [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA1]], +// CHECK-DAG: [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA2]], +// CHECK-DAG: [[REF_C:%.+]] = load i16*, i16** [[LOCAL_C]], + +// CHECK-64-DAG:[[CONV_B:%.+]] = load i32, i32* [[CONV_BP]] +// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_B_CASTED]] to i32* +// CHECK-64-DAG:store i32 [[CONV_B]], i32* [[CONV]], align +// CHECK-32-DAG:[[LOCAL_BV:%.+]] = load i32, i32* [[LOCAL_B]] +// CHECK-32-DAG:store i32 [[LOCAL_BV]], i32* [[LOCAL_B_CASTED]], align +// CHECK-DAG: [[REF_B:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_B_CASTED]], + +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [[S1]]*, i[[SZ]], i[[SZ]], i[[SZ]], i16*)* [[OMP_OUTLINED5:@.+]] to void (i32*, i32*, ...)*), [[S1]]* [[REF_THIS]], i[[SZ]] [[REF_B]], i[[SZ]] [[VAL_VLA1]], i[[SZ]] [[VAL_VLA2]], i16* [[REF_C]]) +// +// +// CHECK: define internal {{.*}}void [[OMP_OUTLINED5]](i32* noalias %.global_tid., i32* noalias %.bound_tid., [[S1]]* %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i16* {{.+}}) +// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. + + +// CHECK: define internal void [[HVT6]] +// Create local storage for each capture. +// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] +// CHECK: alloca i[[SZ]], +// CHECK: [[LOCAL_AA:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_AAA:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_B:%.+]] = alloca [10 x i32]* +// CHECK: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]] +// CHECK: alloca i[[SZ]], +// CHECK: [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_AAA_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] +// CHECK-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] +// CHECK-DAG: store i[[SZ]] [[ARG_AAA:%.+]], i[[SZ]]* [[LOCAL_AAA]] +// CHECK-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] +// Store captures in the context. +// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* +// CHECK-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* +// CHECK-DAG: [[CONV_AAAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AAA]] to i8* +// CHECK-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], + +// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]] +// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32* +// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align +// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]] +// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align +// CHECK-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]], + +// CHECK-DAG: [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]] +// CHECK-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16* +// CHECK-DAG: store i16 [[CONV_AA]], i16* [[CONV]], align +// CHECK-DAG: [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]], + +// CHECK-DAG: [[CONV_AAA:%.+]] = load i8, i8* [[CONV_AAAP]] +// CHECK-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AAA_CASTED]] to i8* +// CHECK-DAG: store i8 [[CONV_AAA]], i8* [[CONV]], align +// CHECK-DAG: [[REF_AAA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AAA_CASTED]], + +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED6:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] {{.+}}, i[[SZ]] [[REF_AA]], i[[SZ]] [[REF_AAA]], [10 x i32]* [[REF_B]]) +// +// +// CHECK: define internal {{.*}}void [[OMP_OUTLINED6]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}}) +// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. + +// CHECK: define internal void [[HVT5]] +// Create local storage for each capture. +// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_AA:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_B:%.+]] = alloca [10 x i32]* +// CHECK: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] +// CHECK-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] +// CHECK-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] +// Store captures in the context. +// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* +// CHECK-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* +// CHECK-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], + +// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]] +// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32* +// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align +// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]] +// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align +// CHECK-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]], + +// CHECK-DAG: [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]] +// CHECK-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16* +// CHECK-DAG: store i16 [[CONV_AA]], i16* [[CONV]], align +// CHECK-DAG: [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]], + +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED7:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] [[REF_AA]], [10 x i32]* [[REF_B]]) +// +// +// CHECK: define internal {{.*}}void [[OMP_OUTLINED7]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}}) +// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. + +#endif diff --git a/test/OpenMP/target_teams_distribute_codegen_registration.cpp b/test/OpenMP/target_teams_distribute_codegen_registration.cpp new file mode 100644 index 000000000000..c031731c8065 --- /dev/null +++ b/test/OpenMP/target_teams_distribute_codegen_registration.cpp @@ -0,0 +1,451 @@ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// Test target teams distribute codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK + +// Check that no target code is emmitted if no omptests flag was provided. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK-NTARGET + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK-DAG: [[SA:%.+]] = type { [4 x i32] } +// CHECK-DAG: [[SB:%.+]] = type { [8 x i32] } +// CHECK-DAG: [[SC:%.+]] = type { [16 x i32] } +// CHECK-DAG: [[SD:%.+]] = type { [32 x i32] } +// CHECK-DAG: [[SE:%.+]] = type { [64 x i32] } +// CHECK-DAG: [[ST1:%.+]] = type { [228 x i32] } +// CHECK-DAG: [[ST2:%.+]] = type { [1128 x i32] } +// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 } +// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* } +// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* } + +// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 } + +// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat + +// CHECK-DAG: [[A1:@.+]] = internal global [[SA]] +// CHECK-DAG: [[A2:@.+]] = global [[SA]] +// CHECK-DAG: [[B1:@.+]] = global [[SB]] +// CHECK-DAG: [[B2:@.+]] = global [[SB]] +// CHECK-DAG: [[C1:@.+]] = internal global [[SC]] +// CHECK-DAG: [[D1:@.+]] = global [[SD]] +// CHECK-DAG: [[E1:@.+]] = global [[SE]] +// CHECK-DAG: [[T1:@.+]] = global [[ST1]] +// CHECK-DAG: [[T2:@.+]] = global [[ST2]] + +// CHECK-NTARGET-DAG: [[SA:%.+]] = type { [4 x i32] } +// CHECK-NTARGET-DAG: [[SB:%.+]] = type { [8 x i32] } +// CHECK-NTARGET-DAG: [[SC:%.+]] = type { [16 x i32] } +// CHECK-NTARGET-DAG: [[SD:%.+]] = type { [32 x i32] } +// CHECK-NTARGET-DAG: [[SE:%.+]] = type { [64 x i32] } +// CHECK-NTARGET-DAG: [[ST1:%.+]] = type { [228 x i32] } +// CHECK-NTARGET-DAG: [[ST2:%.+]] = type { [1128 x i32] } +// CHECK-NTARGET-NOT: type { i8*, i8*, % +// CHECK-NTARGET-NOT: type { i32, % + +// We have 7 target regions + +// CHECK-DAG: {{@.+}} = private constant i8 0 +// TCHECK-NOT: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] + +// CHECK-NTARGET-NOT: private constant i8 0 +// CHECK-NTARGET-NOT: private unnamed_addr constant [1 x i + +// CHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:__omp_offloading_[0-9a-f]+_[0-9a-f]+__Z.+_l[0-9]+]]\00" +// CHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00" +// CHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00" +// CHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00" +// CHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00" +// CHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00" +// CHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00" +// CHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00" +// CHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00" +// CHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00" +// CHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00" +// CHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00" +// CHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 + +// TCHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:__omp_offloading_[0-9a-f]+_[0-9a-f]+__Z.+_l[0-9]+]]\00" +// TCHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00" +// TCHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00" +// TCHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00" +// TCHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00" +// TCHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00" +// TCHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00" +// TCHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00" +// TCHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00" +// TCHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00" +// TCHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00" +// TCHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00" +// TCHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 + +// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]] +// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]] +// CHECK: [[DEVBEGIN:@.+]] = external constant i8 +// CHECK: [[DEVEND:@.+]] = external constant i8 +// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]]) +// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]]) + +// We have 4 initializers, one for the 500 priority, another one for 501, or more for the default priority, and the last one for the offloading registration function. +// CHECK: @llvm.global_ctors = appending global [4 x { i32, void ()*, i8* }] [ +// CHECK-SAME: { i32, void ()*, i8* } { i32 500, void ()* [[P500:@[^,]+]], i8* null }, +// CHECK-SAME: { i32, void ()*, i8* } { i32 501, void ()* [[P501:@[^,]+]], i8* null }, +// CHECK-SAME: { i32, void ()*, i8* } { i32 65535, void ()* [[PMAX:@[^,]+]], i8* null }, +// CHECK-SAME: { i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* @[[REGFN]] to void ()*), i8* bitcast (void (i8*)* @[[REGFN]] to i8*) }] + +// CHECK-NTARGET: @llvm.global_ctors = appending global [3 x { i32, void ()*, i8* }] [ + +extern int *R; + +struct SA { + int arr[4]; + void foo() { + int a = *R; + a += 1; + *R = a; + } + SA() { + int a = *R; + a += 2; + *R = a; + } + ~SA() { + int a = *R; + a += 3; + *R = a; + } +}; + +struct SB { + int arr[8]; + void foo() { + int a = *R; + #pragma omp target teams distribute + for (int i = 0; i < 10; ++i) + a += 4; + *R = a; + } + SB() { + int a = *R; + a += 5; + *R = a; + } + ~SB() { + int a = *R; + a += 6; + *R = a; + } +}; + +struct SC { + int arr[16]; + void foo() { + int a = *R; + a += 7; + *R = a; + } + SC() { + int a = *R; + #pragma omp target teams distribute + for (int i = 0; i < 10; ++i) + a += 8; + *R = a; + } + ~SC() { + int a = *R; + a += 9; + *R = a; + } +}; + +struct SD { + int arr[32]; + void foo() { + int a = *R; + a += 10; + *R = a; + } + SD() { + int a = *R; + a += 11; + *R = a; + } + ~SD() { + int a = *R; + #pragma omp target teams distribute + for (int i = 0; i < 10; ++i) + a += 12; + *R = a; + } +}; + +struct SE { + int arr[64]; + void foo() { + int a = *R; + #pragma omp target teams distribute if(target: 0) + for (int i = 0; i < 10; ++i) + a += 13; + *R = a; + } + SE() { + int a = *R; + #pragma omp target teams distribute + for (int i = 0; i < 10; ++i) + a += 14; + *R = a; + } + ~SE() { + int a = *R; + #pragma omp target teams distribute + for (int i = 0; i < 10; ++i) + a += 15; + *R = a; + } +}; + +template <int x> +struct ST { + int arr[128 + x]; + void foo() { + int a = *R; + #pragma omp target teams distribute + for (int i = 0; i < 10; ++i) + a += 16 + x; + *R = a; + } + ST() { + int a = *R; + #pragma omp target teams distribute + for (int i = 0; i < 10; ++i) + a += 17 + x; + *R = a; + } + ~ST() { + int a = *R; + #pragma omp target teams distribute + for (int i = 0; i < 10; ++i) + a += 18 + x; + *R = a; + } +}; + +// We have to make sure we us all the target regions: +//CHECK-DAG: define internal void @[[NAME1]]( +//CHECK-DAG: call void @[[NAME1]]( +//CHECK-DAG: define internal void @[[NAME2]]( +//CHECK-DAG: call void @[[NAME2]]( +//CHECK-DAG: define internal void @[[NAME3]]( +//CHECK-DAG: call void @[[NAME3]]( +//CHECK-DAG: define internal void @[[NAME4]]( +//CHECK-DAG: call void @[[NAME4]]( +//CHECK-DAG: define internal void @[[NAME5]]( +//CHECK-DAG: call void @[[NAME5]]( +//CHECK-DAG: define internal void @[[NAME6]]( +//CHECK-DAG: call void @[[NAME6]]( +//CHECK-DAG: define internal void @[[NAME7]]( +//CHECK-DAG: call void @[[NAME7]]( +//CHECK-DAG: define internal void @[[NAME8]]( +//CHECK-DAG: call void @[[NAME8]]( +//CHECK-DAG: define internal void @[[NAME9]]( +//CHECK-DAG: call void @[[NAME9]]( +//CHECK-DAG: define internal void @[[NAME10]]( +//CHECK-DAG: call void @[[NAME10]]( +//CHECK-DAG: define internal void @[[NAME11]]( +//CHECK-DAG: call void @[[NAME11]]( +//CHECK-DAG: define internal void @[[NAME12]]( +//CHECK-DAG: call void @[[NAME12]]( + +//TCHECK-DAG: define void @[[NAME1]]( +//TCHECK-DAG: define void @[[NAME2]]( +//TCHECK-DAG: define void @[[NAME3]]( +//TCHECK-DAG: define void @[[NAME4]]( +//TCHECK-DAG: define void @[[NAME5]]( +//TCHECK-DAG: define void @[[NAME6]]( +//TCHECK-DAG: define void @[[NAME7]]( +//TCHECK-DAG: define void @[[NAME8]]( +//TCHECK-DAG: define void @[[NAME9]]( +//TCHECK-DAG: define void @[[NAME10]]( +//TCHECK-DAG: define void @[[NAME11]]( +//TCHECK-DAG: define void @[[NAME12]]( + +// CHECK-NTARGET-NOT: __tgt_target +// CHECK-NTARGET-NOT: __tgt_register_lib +// CHECK-NTARGET-NOT: __tgt_unregister_lib + +// TCHECK-NOT: __tgt_target +// TCHECK-NOT: __tgt_register_lib +// TCHECK-NOT: __tgt_unregister_lib + +// We have 2 initializers with priority 500 +//CHECK: define internal void [[P500]]( +//CHECK: call void @{{.+}}() +//CHECK: call void @{{.+}}() +//CHECK-NOT: call void @{{.+}}() +//CHECK: ret void + +// We have 1 initializers with priority 501 +//CHECK: define internal void [[P501]]( +//CHECK: call void @{{.+}}() +//CHECK-NOT: call void @{{.+}}() +//CHECK: ret void + +// We have 6 initializers with default priority +//CHECK: define internal void [[PMAX]]( +//CHECK: call void @{{.+}}() +//CHECK: call void @{{.+}}() +//CHECK: call void @{{.+}}() +//CHECK: call void @{{.+}}() +//CHECK: call void @{{.+}}() +//CHECK: call void @{{.+}}() +//CHECK-NOT: call void @{{.+}}() +//CHECK: ret void + +// Check registration and unregistration + +//CHECK: define internal void @[[UNREGFN:.+]](i8*) +//CHECK-SAME: comdat($[[REGFN]]) { +//CHECK: call i32 @__tgt_unregister_lib([[DSCTY]]* [[DESC]]) +//CHECK: ret void +//CHECK: declare i32 @__tgt_unregister_lib([[DSCTY]]*) + +//CHECK: define linkonce hidden void @[[REGFN]](i8*) +//CHECK-SAME: comdat { +//CHECK: call i32 @__tgt_register_lib([[DSCTY]]* [[DESC]]) +//CHECK: call i32 @__cxa_atexit(void (i8*)* @[[UNREGFN]], i8* bitcast ([[DSCTY]]* [[DESC]] to i8*), +//CHECK: ret void +//CHECK: declare i32 @__tgt_register_lib([[DSCTY]]*) + +static __attribute__((init_priority(500))) SA a1; +SA a2; +SB __attribute__((init_priority(500))) b1; +SB __attribute__((init_priority(501))) b2; +static SC c1; +SD d1; +SE e1; +ST<100> t1; +ST<1000> t2; + + +int bar(int a){ + int r = a; + + a1.foo(); + a2.foo(); + b1.foo(); + b2.foo(); + c1.foo(); + d1.foo(); + e1.foo(); + t1.foo(); + t2.foo(); + + #pragma omp target teams distribute + for (int i = 0; i < 10; ++i) + ++r; + + return r + *R; +} + +// Check metadata is properly generated: +// CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 195, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 247, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 265, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 272, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 284, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 291, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 415, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 298, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 291, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 298, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 284, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 221, i32 {{[0-9]+}}} + +// TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 195, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 247, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 265, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 272, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 284, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 291, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 415, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 298, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 291, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 298, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 284, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 221, i32 {{[0-9]+}}} + +#endif diff --git a/test/OpenMP/target_teams_distribute_codegen_registration_naming.cpp b/test/OpenMP/target_teams_distribute_codegen_registration_naming.cpp new file mode 100644 index 000000000000..c03466a52646 --- /dev/null +++ b/test/OpenMP/target_teams_distribute_codegen_registration_naming.cpp @@ -0,0 +1,68 @@ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// Test target teams distribute codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: [[CA:%.+]] = type { i32* } + +// CHECK: define {{.*}}i32 @[[NNAME:.+]](i32 {{.*}}%{{.+}}) +int nested(int a){ + // CHECK: call void @__omp_offloading_[[FILEID:[0-9a-f]+_[0-9a-f]+]]_[[NNAME]]_l[[T1L:[0-9]+]]( + #pragma omp target teams distribute + for (int i = 0; i < 10; ++i) + ++a; + + // CHECK: call void @"[[LNAME:.+]]"([[CA]]* + auto F = [&](){ + #pragma omp parallel + { + #pragma omp target teams distribute + for (int i = 0; i < 10; ++i) + ++a; + } + }; + + F(); + + return a; +} + +// CHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T1L]]( +// TCHECK: define {{.*}}void @__omp_offloading_[[FILEID:[0-9a-f]+_[0-9a-f]+]]_[[NNAME:.+]]_l[[T1L:[0-9]+]]( + +// CHECK: define {{.*}}void @"[[LNAME]]"( +// CHECK: call void {{.*}}@__kmpc_fork_call{{.+}}[[PNAME:@.+]] to + +// CHECK: define {{.*}}void [[PNAME]]( +// CHECK: call void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T2L:[0-9]+]]( + +// CHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T2L]]( +// TCHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME:.+]]_l[[T2L:[0-9]+]]( + + +// Check metadata is properly generated: +// CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} + +// TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +#endif diff --git a/test/OpenMP/target_teams_distribute_collapse_codegen.cpp b/test/OpenMP/target_teams_distribute_collapse_codegen.cpp new file mode 100644 index 000000000000..be99186e4681 --- /dev/null +++ b/test/OpenMP/target_teams_distribute_collapse_codegen.cpp @@ -0,0 +1,125 @@ +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// Test host codegen. +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +#ifdef CK1 + +template <typename T, int X, long long Y> +struct SS{ + T a[X][Y]; + + // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}( + int foo(void) { + + // CK1: call i32 @__tgt_target_teams( + // CK1: call void @[[OFFL1:.+]]( + #pragma omp target teams distribute collapse(2) + for(int i = 0; i < X; i++) { + for(int j = 0; j < Y; j++) { + a[i][j] = (T)0; + } + } + // CK1: define internal void @[[OFFL1]]( + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, + // CK1: ret void + + // CK1: define internal void @[[OUTL1]]({{.+}}) + // discard loop variables not needed here + // CK1: = alloca i32, + // CK1: = alloca i32, + // CK1: = alloca i32, + // CK1: = alloca i32, + // CK1: [[OMP_UB:%.+]] = alloca i32, + // CK1: store i32 56087, i32* [[OMP_UB]], + // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i32* [[OMP_UB]], + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + return a[0][0]; + } +}; + +int teams_template_struct(void) { + SS<int, 123, 456> V; + return V.foo(); + +} +#endif // CK1 + +// Test host codegen. +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +#ifdef CK2 + +template <typename T, int n, int m> +int tmain(T argc) { + T a[n][m]; + #pragma omp target teams distribute collapse(2) + for(int i = 0; i < n; i++) { + for(int j = 0; j < m; j++) { + a[i][j] = (T)0; + } + } + return 0; +} + +int main (int argc, char **argv) { + int n = 100; + int m = 2; + int a[n][m]; + #pragma omp target teams distribute collapse(2) + for(int i = 0; i < n; i++) { + for(int j = 0; j < m; j++) { + a[i][j] = 0; + } + } + return tmain<int, 10, 2>(argc); +} + +// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFL1:.+]]({{.+}}) +// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}}) +// CK2: ret + +// CK2: define {{.*}}void @[[OFFL1]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}} @[[OUTL1:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTL1]]({{.+}}) +// CK2: [[OMP_UB:%.omp.ub]] = alloca i64, +// CK2: store i64 {{.+}}, i64* [[OMP_UB]], +// CK2: call void @__kmpc_for_static_init_8({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i64* [[OMP_UB]], +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void +// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFLT1:.+]]({{.+}}) +// CK2: ret +// CK2-NEXT: } + +// CK2: define {{.*}}void @[[OFFLT1]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTLT1]]({{.+}}) +// discard loop variables not needed here +// CK2: [[OMP_UB:%.omp.ub]] = alloca i32, +// CK2: store i32 {{.+}}, i32* [[OMP_UB]], +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i32* [[OMP_UB]], +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +#endif // CK2 +#endif // #ifndef HEADER diff --git a/test/OpenMP/target_teams_distribute_depend_messages.cpp b/test/OpenMP/target_teams_distribute_depend_messages.cpp index b3e733141070..fbdd49590f87 100644 --- a/test/OpenMP/target_teams_distribute_depend_messages.cpp +++ b/test/OpenMP/target_teams_distribute_depend_messages.cpp @@ -37,21 +37,21 @@ int main(int argc, char **argv, char *env[]) { for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute depend (out: ) // expected-error {{expected expression}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute depend (inout : foobool(argc)), depend (in, argc) // expected-error {{expected variable name, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}} +#pragma omp target teams distribute depend (inout : foobool(argc)), depend (in, argc) // expected-error {{expected addressable lvalue expression, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}} for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute depend (out :S1) // expected-error {{'S1' does not refer to a value}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute depend(in : argv[1][1] = '2') // expected-error {{expected variable name, array element or array section}} +#pragma omp target teams distribute depend(in : argv[1][1] = '2') for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute depend (in : vec[1]) // expected-error {{expected variable name, array element or array section}} +#pragma omp target teams distribute depend (in : vec[1]) // expected-error {{expected addressable lvalue expression, array element or array section}} for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute depend (in : argv[0]) for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute depend (in : ) // expected-error {{expected expression}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute depend (in : main) // expected-error {{expected variable name, array element or array section}} +#pragma omp target teams distribute depend (in : main) for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute depend(in : a[0]) // expected-error{{expected variable name, array element or array section}} +#pragma omp target teams distribute depend(in : a[0]) // expected-error{{expected addressable lvalue expression, array element or array section}} for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute depend (in : vec[1:2]) // expected-error {{ value is not an array or pointer}} for (i = 0; i < argc; ++i) foo(); diff --git a/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp b/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp new file mode 100644 index 000000000000..0c9b2387debf --- /dev/null +++ b/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp @@ -0,0 +1,197 @@ +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// Test host codegen. +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +#ifdef CK1 + +template <typename T, int X, long long Y> +struct SS{ + T a[X]; + float b; + // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}( + int foo(void) { + + // CK1: call i32 @__tgt_target_teams( + // CK1: call void @[[OFFL1:.+]]( + #pragma omp target teams distribute + for(int i = 0; i < X; i++) { + a[i] = (T)0; + } + // CK1: call i32 @__tgt_target_teams( + // CK1: call void @[[OFFL2:.+]]( + #pragma omp target teams distribute dist_schedule(static) + for(int i = 0; i < X; i++) { + a[i] = (T)0; + } + // CK1: call i32 @__tgt_target_teams( + // CK1: call void @[[OFFL3:.+]]( + #pragma omp target teams distribute dist_schedule(static, X/2) + for(int i = 0; i < X; i++) { + a[i] = (T)0; + } + // CK1: define internal void @[[OFFL1]]( + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, + // CK1: ret void + + // CK1: define internal void @[[OUTL1]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[OFFL2]]( + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL2:.+]] to {{.+}}, + // CK1: ret void + + // CK1: define internal void @[[OUTL2]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[OFFL3]]( + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL3:.+]] to {{.+}}, + // CK1: ret void + + // CK1: define internal void @[[OUTL3]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91 + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + return a[0]; + } +}; + +int teams_template_struct(void) { + SS<int, 123, 456> V; + return V.foo(); + +} +#endif // CK1 + +// Test host codegen. +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +#ifdef CK2 + +template <typename T, int n> +int tmain(T argc) { + T a[n]; +#pragma omp target teams distribute + for(int i = 0; i < n; i++) { + a[i] = (T)0; + } +#pragma omp target teams distribute dist_schedule(static) + for(int i = 0; i < n; i++) { + a[i] = (T)0; + } +#pragma omp target teams distribute dist_schedule(static, n) + for(int i = 0; i < n; i++) { + a[i] = (T)0; + } + return 0; +} + +int main (int argc, char **argv) { + int n = 100; + int a[n]; +#pragma omp target teams distribute + for(int i = 0; i < n; i++) { + a[i] = 0; + } +#pragma omp target teams distribute dist_schedule(static) + for(int i = 0; i < n; i++) { + a[i] = 0; + } +#pragma omp target teams distribute dist_schedule(static, n) + for(int i = 0; i < n; i++) { + a[i] = 0; + } + return tmain<int, 10>(argc); +} + +// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFL1:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFL2:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFL3:.+]]({{.+}}) +// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}}) +// CK2: ret + +// CK2: define {{.*}}void @[[OFFL1]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTL1]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFL2]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL2:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTL2]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFL3]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[OUTL3:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTL3]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91 +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFLT1:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFLT2:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFLT3:.+]]({{.+}}) +// CK2: ret +// CK2-NEXT: } + +// CK2: define {{.*}}void @[[OFFLT1]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTLT1]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFLT2]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT2:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTLT2]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFLT3]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT3:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTLT3]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91 +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +#endif // CK2 +#endif // #ifndef HEADER diff --git a/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp b/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp new file mode 100644 index 000000000000..35abffa92600 --- /dev/null +++ b/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp @@ -0,0 +1,342 @@ +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 + +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +struct St { + int a, b; + St() : a(0), b(0) {} + St(const St &st) : a(st.a + st.b), b(0) {} + ~St() {} +}; + +volatile int g = 1212; +volatile int &g1 = g; + +template <class T> +struct S { + T f; + S(T a) : f(a + g) {} + S() : f(g) {} + S(const S &s, St t = St()) : f(s.f + t.a) {} + operator T() { return T(); } + ~S() {} +}; + +// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float } +// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } +// CHECK-DAG: [[ST_TY:%.+]] = type { i{{[0-9]+}}, i{{[0-9]+}} } + +template <typename T> +T tmain() { + S<T> test; + T t_var = T(); + T vec[] = {1, 2}; + S<T> s_arr[] = {1, 2}; + S<T> &var = test; +#pragma omp target teams distribute firstprivate(t_var, vec, s_arr, var) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + return T(); +} + +// CHECK-DAG: [[TEST:@.+]] = global [[S_FLOAT_TY]] zeroinitializer, +S<float> test; +// CHECK-DAG: [[T_VAR:@.+]] = global i{{[0-9]+}} 333, +int t_var = 333; +// CHECK-DAG: [[VEC:@.+]] = global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2], +int vec[] = {1, 2}; +// CHECK-DAG: [[S_ARR:@.+]] = global [2 x [[S_FLOAT_TY]]] zeroinitializer, +S<float> s_arr[] = {1, 2}; +// CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer, +S<float> var(3); +// CHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0, + +int main() { + static int sivar; +#ifdef LAMBDA + // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212, + // LAMBDA-LABEL: @main + // LAMBDA: call void [[OUTER_LAMBDA:@.+]]( + [&]() { + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // LAMBDA: call void @[[LOFFL1:.+]](i{{64|32}} %{{.+}}) + // LAMBDA: ret +#pragma omp target teams distribute firstprivate(g, g1, sivar) + for (int i = 0; i < 2; ++i) { + // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} {{%.+}}, i{{64|32}} {{%.+}}) + // LAMBDA: {{%.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{%.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{%.+}} = alloca i{{[0-9]+}}, + // LAMBDA: [[G_CAST:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_CAST:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[SIVAR_CAST:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA-DAG: [[G_CAST_VAL:%.+]] = load{{.+}} [[G_CAST]], + // LAMBDA-DAG: [[G1_CAST_VAL:%.+]] = load{{.+}} [[G1_CAST]], + // LAMBDA-DAG: [[SIVAR_CAST_VAL:%.+]] = load{{.+}} [[SIVAR_CAST]], + // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[LOUTL1:.+]] to {{.+}}, {{.+}} [[G_CAST_VAL]], {{.+}} [[G1_CAST_VAL]], {{.+}} [[SIVAR_CAST_VAL]]) + // LAMBDA: ret void + + // LAMBDA: define internal void @[[LOUTL1]]({{.+}}) + // Skip global and bound tid vars + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: [[G_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: {{%.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{%.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{%.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{%.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{%.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{%.+}} = alloca i{{[0-9]+}}, + // LAMBDA: [[G_PRIV_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_PRIV_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_TMP:%.+]] = alloca i32*, + // LAMBDA: [[SIVAR_PRIV_ADDR:%.+]] = alloca i{{[0-9]+}}, + // skip loop vars + // LAMBDA-DAG: store {{.+}}, {{.+}} [[G_ADDR]], + // LAMBDA-DAG: store {{.+}}, {{.+}} [[G1_ADDR]], + // LAMBDA-DAG: store {{.+}}, {{.+}} [[SIVAR_ADDR]], + // LAMBDA-DAG: [[G_CONV:%.+]] = bitcast {{.+}} [[G_ADDR]] to + // LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast {{.+}} [[G1_ADDR]] to + // LAMBDA-DAG: [[SIVAR_CONV:%.+]] = bitcast {{.+}} [[SIVAR_ADDR]] to + // LAMBDA-DAG: store{{.+}} [[G1_PRIV_ADDR]], {{.+}} [[G1_TMP]], + g = 1; + g1 = 1; + sivar = 2; + // LAMBDA: call void @__kmpc_for_static_init_4( + // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G_PRIV_ADDR]], + // LAMBDA-DAG: [[G1:%.+]] = load{{.+}}, {{.+}}* [[G1_TMP]] + // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1]], + // LAMBDA-DAG: store{{.+}} 2, {{.+}} [[SIVAR_PRIV_ADDR]], + // LAMBDA-DAG: [[G1_REF:%.+]] = load{{.+}}, {{.+}} [[G1_TMP]], + // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1_REF]], + // LAMBDA: call void [[INNER_LAMBDA:@.+]]( + // LAMBDA: call void @__kmpc_for_static_fini( + [&]() { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + g = 2; + g1 = 2; + sivar = 4; + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + + // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]] + // LAMBDA: [[G1_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G1_REF]] + // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]] + }(); + } + }(); + return 0; +#else +#pragma omp target teams distribute firstprivate(t_var, vec, s_arr, var, sivar) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + sivar += i; + } + return tmain<int>(); +#endif +} + +// CHECK: define {{.*}}i{{[0-9]+}} @main() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) +// CHECK: call void @[[OFFL1:.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]() +// CHECK: ret + +// CHECK: define{{.*}} void @[[OFFL1]]({{.+}}) +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_CAST:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[SIVAR_CAST:%.+]] = alloca i{{[0-9]+}}, + +// CHECK-DAG: [[VEC_TE_PAR:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_PRIV]], +// CHECK-DAG: [[T_VAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_CAST]], +// CHECK-DAG: [[S_ARR_TE_PAR:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_PRIV]], +// CHECK-DAG: [[VAR_TE_PAR:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_PRIV]], +// CHECK-DAG: [[SIVAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_CAST]], + +// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}} @[[OUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[VEC_TE_PAR]], i{{[0-9]+}} [[T_VAR_TE_PAR]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_TE_PAR]], [[S_FLOAT_TY]]* [[VAR_TE_PAR]], i{{[0-9]+}} [[SIVAR_TE_PAR]]) +// CHECK: ret void + +// CHECK: define internal void @[[OUTL1]]({{.+}}) +// Skip global and bound tid vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// Skip temp vars for loop +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]], + +// param copy +// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]], +// CHECK: store [2 x [[S_FLOAT_TY]]]* {{.+}}, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK: store [[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]** [[VAR_ADDR]], +// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[SIVAR_ADDR]], + +// T_VAR and SIVAR +// CHECK-DAG-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32* +// CHECK-DAG-64: [[CONV_SIVAR:%.+]] = bitcast i64* [[SIVAR_ADDR]] to i32* + +// preparation vars +// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK-DAG: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]], + +// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2 +// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}}) + +// firstprivate(s_arr) +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]] to +// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]], +// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ] +// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}}], [ [[S_ARR_DST:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]]) +// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]], +// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]], + +// firstprivate(var) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]]) + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK-DAG-32: {{.+}} = {{.+}} [[T_VAR_ADDR]] +// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_TVAR]] +// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[VAR_PRIV]] +// CHECK-DAG-32: {{.+}} = {{.+}} [[SIVAR_ADDR]] +// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_SIVAR]] +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) +// CHECK: call void @[[TOFFL1:.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK: ret + +// CHECK: define {{.*}}void @[[TOFFL1]]({{.+}}) +// CHECK: [[TVEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[TT_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[TS_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK: [[TVAR_PRIV:%.+]] = alloca [[S_INT_TY]]*, +// CHECK: [[TT_VAR_CAST:%.+]] = alloca i{{[0-9]+}}, + +// CHECK-DAG: [[TVEC_TE_PAR:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[TVEC_PRIV]], +// CHECK-DAG: [[TT_VAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[TT_VAR_CAST]], +// CHECK-DAG: [[TS_ARR_TE_PAR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[TS_ARR_PRIV]], +// CHECK-DAG: [[TVAR_TE_PAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TVAR_PRIV]], + +// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[TOUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[TVEC_TE_PAR]], i{{[0-9]+}} [[TT_VAR_TE_PAR]], [2 x [[S_INT_TY]]]* [[TS_ARR_TE_PAR]], [[S_INT_TY]]* [[TVAR_TE_PAR]]) +// CHECK: ret void + +// CHECK: define internal void @[[TOUTL1]]({{.+}}) +// Skip global and bound tid vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, +// Skip temp vars for loop +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]], +// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*, + +// param copy +// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]], +// CHECK: store [2 x [[S_INT_TY]]]* {{.+}}, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]], +// CHECK: store [[S_INT_TY]]* {{.+}}, [[S_INT_TY]]** [[VAR_ADDR]], + + +// T_VAR and preparation variables +// CHECK: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32* +// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]], + +// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2 +// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}}) + +// firstprivate(s_arr) +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]] to +// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]], +// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ] +// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_DST:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]]) +// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]], +// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]], + +// firstprivate(var) +// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]], +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]]) +// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP]], + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK-DAG-32: {{.+}} = {{.+}} [[T_VAR_ADDR]] +// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_TVAR]] +// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[TMP]] +// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]] +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +#endif diff --git a/test/OpenMP/target_teams_distribute_firstprivate_messages.cpp b/test/OpenMP/target_teams_distribute_firstprivate_messages.cpp index 602e039802d1..934149e7c139 100644 --- a/test/OpenMP/target_teams_distribute_firstprivate_messages.cpp +++ b/test/OpenMP/target_teams_distribute_firstprivate_messages.cpp @@ -125,7 +125,8 @@ int main(int argc, char **argv) { #pragma omp target teams distribute firstprivate(j) for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute lastprivate(argc), firstprivate(argc) // OK +// expected-error@+1 {{lastprivate variable cannot be firstprivate}} expected-note@+1 {{defined as lastprivate}} +#pragma omp target teams distribute lastprivate(argc), firstprivate(argc) for (i = 0; i < argc; ++i) foo(); return 0; diff --git a/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp b/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp new file mode 100644 index 000000000000..b41a6016079b --- /dev/null +++ b/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp @@ -0,0 +1,384 @@ +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 + +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +template <class T> +struct S { + T f; + S(T a) : f(a) {} + S() : f() {} + operator T() { return T(); } + ~S() {} +}; + +// CHECK: [[S_FLOAT_TY:%.+]] = type { float } +// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } +template <typename T> +T tmain() { + S<T> test; + T t_var = T(); + T vec[] = {1, 2}; + S<T> s_arr[] = {1, 2}; + S<T> &var = test; + #pragma omp target teams distribute lastprivate(t_var, vec, s_arr, s_arr, var, var) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + return T(); +} + +int main() { + static int svar; + volatile double g; + volatile double &g1 = g; + + #ifdef LAMBDA + // LAMBDA-LABEL: @main + // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]]( + [&]() { + static float sfvar; + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( + // LAMBDA: call void [[OFFLOADING_FUN:@.+]]( + + // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]]( + // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}}) + #pragma omp target teams distribute lastprivate(g, g1, svar, sfvar) + for (int i = 0; i < 2; ++i) { + // LAMBDA-64: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 [[G_IN:%.+]], i64 [[G1_IN:%.+]], i64 [[SVAR_IN:%.+]], i64 [[SFVAR_IN:%.+]]) + // LAMBDA-32: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, double*{{.+}} [[G_IN:%.+]], double*{{.+}} [[G1_IN:%.+]], i32 [[SVAR_IN:%.+]], i32 [[SFVAR_IN:%.+]]) + // LAMBDA-64: [[G_PRIVATE_ADDR:%.+]] = alloca i64, + // LAMBDA-32: [[G_PRIVATE_ADDR:%.+]] = alloca double*, + // LAMBDA-64: [[G1_PRIVATE_ADDR:%.+]] = alloca i64, + // LAMBDA-32: [[G1_PRIVATE_ADDR:%.+]] = alloca double*, + // LAMBDA-64: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i64, + // LAMBDA-32: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i32, + // LAMBDA-64: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca i64, + // LAMBDA-32: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca i32, + // loop variables + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G_PRIVATE:%.+]] = alloca double, + // LAMBDA: [[G1_PRIVATE:%.+]] = alloca double, + // LAMBDA: [[TMP_G1_PRIVATE:%.+]] = alloca double*, + // LAMBDA: [[SVAR_PRIVATE:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[SFVAR_PRIVATE:%.+]] = alloca float, + // LAMBDA-64: store i64 [[G_IN]], i64* [[G_PRIVATE_ADDR]], + // LAMBDA-32: store double* [[G_IN]], double** [[G_PRIVATE_ADDR]], + // LAMBDA-64: store i64 [[G1_IN]], i64* [[G1_PRIVATE_ADDR]], + // LAMBDA-32: store double* [[G1_IN]], double** [[G1_PRIVATE_ADDR]], + // LAMBDA-64: store i64 [[SVAR_IN]], i64* [[SVAR_PRIVATE_ADDR]], + // LAMBDA-32: store i32 [[SVAR_IN]], i32* [[SVAR_PRIVATE_ADDR]], + // LAMBDA-64: store i64 [[SFVAR_IN]], i64* [[SFVAR_PRIVATE_ADDR]], + // LAMBDA-32: store i32 [[SFVAR_IN]], i32* [[SFVAR_PRIVATE_ADDR]], + + // init private variables + // LAMBDA-64: [[G_IN_REF:%.+]] = bitcast i64* [[G_PRIVATE_ADDR]] to double* + // LAMBDA-32: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]], + // LAMBDA-64: [[G1_IN_REF:%.+]] = bitcast i64* [[G1_PRIVATE_ADDR]] to double* + // LAMBDA-64: store double* [[G1_IN_REF]], double** [[G1_IN_ADDR_REF:%.+]], + // LAMBDA-64: [[SVAR_IN_REF:%.+]] = bitcast i64* [[SVAR_PRIVATE_ADDR]] to i32* + // LAMBDA-64: [[SFVAR_IN_REF:%.+]] = bitcast i64* [[SFVAR_PRIVATE_ADDR]] to float* + // LAMBDA-32: [[SFVAR_IN_REF:%.+]] = bitcast i32* [[SFVAR_PRIVATE_ADDR]] to float* + // LAMBDA-64: [[G1_IN_REF:%.+]] = load double*, double** [[G1_IN_ADDR_REF]], + // LAMBDA-32: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]], + // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]], + g = 1; + g1 = 1; + svar = 3; + sfvar = 4.0; + // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4( + // LAMBDA: store double 1.0{{.+}}, double* [[G_PRIVATE]], + // LAMBDA: [[TMP_G1_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]], + // LAMBDA: store{{.+}} double 1.0{{.+}}, double* [[TMP_G1_REF]], + // LAMBDA: store i{{[0-9]+}} 3, i{{[0-9]+}}* [[SVAR_PRIVATE]], + // LAMBDA: store float 4.0{{.+}}, float* [[SFVAR_PRIVATE]], + // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: store double* [[G_PRIVATE]], double** [[G_PRIVATE_ADDR_REF]], + // LAMBDA: [[TMP_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_PRIVATE_ADDR_FROM_TMP:%.+]] = load double*, double** [[TMP_G1_PRIVATE]], + // LAMBDA: store double* [[G1_PRIVATE_ADDR_FROM_TMP]], double** [[TMP_PRIVATE_ADDR_REF]], + // LAMBDA: [[SVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: store i{{[0-9]+}}* [[SVAR_PRIVATE]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]] + // LAMBDA: [[SFVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 + // LAMBDA: store float* [[SFVAR_PRIVATE]], float** [[SFVAR_PRIVATE_ADDR_REF]] + // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]]) + // LAMBDA: call {{.*}}void @__kmpc_for_static_fini( + // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]], + // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 + // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] + + // LAMBDA: [[OMP_LASTPRIV_BLOCK]]: + // LAMBDA: [[G_PRIV_VAL:%.+]] = load double, double* [[G_PRIVATE]], + // LAMBDA: store{{.*}} double [[G_PRIV_VAL]], double* [[G_IN_REF]], + // LAMBDA: [[TMP_G1_PRIV_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]], + // LAMBDA: [[TMP_G1_PRIV_VAL:%.+]] = load double, double* [[TMP_G1_PRIV_REF]], + // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* [[G1_IN_REF]], + + // LAMBDA: [[SVAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]], + // LAMBDA-64: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_IN_REF]], + // LAMBDA-32: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_PRIVATE_ADDR]], + // LAMBDA: [[SFVAR_PRIV_VAL:%.+]] = load float, float* [[SFVAR_PRIVATE]], + // LAMBDA: store float [[SFVAR_PRIV_VAL]], float* [[SFVAR_IN_REF]], + // LAMBDA: br label %[[OMP_LASTPRIV_DONE]] + // LAMBDA: [[OMP_LASTPRIV_DONE]]: + // LAMBDA: ret + [&]() { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + g = 2; + g1 = 2; + svar = 4; + sfvar = 8.0; + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]] + // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]] + + // LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]] + // LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]], + // LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]] + // LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 + // LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]] + // LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]] + }(); + } + }(); + return 0; + #else + S<float> test; + int t_var = 0; + int vec[] = {1, 2}; + S<float> s_arr[] = {1, 2}; + S<float> &var = test; + + #pragma omp target teams distribute lastprivate(t_var, vec, s_arr, s_arr, var, var, svar) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + int i; + + return tmain<int>(); + #endif +} + +// CHECK: define{{.*}} i{{[0-9]+}} @main() +// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOAD_FUN:@.+]]([2 x i{{[0-9]+}}]* {{.+}}, i{{[0-9]+}} {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}}) +// CHECK: ret + +// CHECK: define{{.+}} [[OFFLOAD_FUN]]([2 x i{{[0-9]+}}]*{{.+}} {{.+}}, i{{[0-9]+}} {{.+}}, [2 x [[S_FLOAT_TY]]]*{{.+}} {{.+}}, [[S_FLOAT_TY]]*{{.+}} {{.+}}, i{{[0-9]+}} {{.+}}) +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams( +// CHECK: ret +// +// CHECK: define internal void [[OMP_OUTLINED:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN:%.+]], i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.+}} [[VAR_IN:%.+]], i{{[0-9]+}} [[S_VAR_IN:%.+]]) +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// skip loop variables +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, + +// copy from parameters to local address variables +// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: store i{{[0-9]+}} [[T_VAR_IN]], i{{[0-9]+}}* [[T_VAR_ADDR]], +// CHECK: store [2 x [[S_FLOAT_TY]]]* [[S_ARR_IN]], [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK: store [[S_FLOAT_TY]]* [[VAR_IN]], [[S_FLOAT_TY]]** [[VAR_ADDR]], +// CHECK: store i{{[0-9]+}} [[S_VAR_IN]], i{{[0-9]+}}* [[SVAR_ADDR]], + +// load content of local address variables +// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK-64: [[T_VAR_ADDR_REF:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32* +// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK-64: [[SVAR_ADDR_REF:%.+]] = bitcast i64* [[SVAR_ADDR]] to i32* +// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]], +// CHECK: [[VAR_ADDR_REF:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]], +// the distribute loop +// CHECK: call void @__kmpc_for_static_init_4( +// assignment: vec[i] = t_var; +// CHECK: [[T_VAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]], +// CHECK: [[VEC_PTR:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}} +// CHECK: store i{{[0-9]+}} [[T_VAR_PRIV_VAL]], i{{[0-9]+}}* [[VEC_PTR]], + +// assignment: s_arr[i] = var; +// CHECK-DAG: [[S_ARR_PTR:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], +// CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_PTR]] to i8* +// CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST]], i8* [[TMP_VAL_BCAST]], +// CHECK: call void @__kmpc_for_static_fini( + +// lastprivates +// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]], +// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 +// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] + +// CHECK: [[OMP_LASTPRIV_BLOCK]]: +// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]], +// CHECK-64: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]], +// CHECK-32: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR]], +// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* +// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]* +// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 +// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]] +// CHECK: [[S_ARR_COPY_BLOCK]]: +// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8* +// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 +// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} +// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]] +// CHECK: [[S_ARR_COPY_DONE]]: +// CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], +// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8* +// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]], +// CHECK-64: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]], +// CHECK-32: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR]], +// CHECK: ret void + +// template tmain +// CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]() +// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], +// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOAD_FUN_1:@.+]]([2 x i{{[0-9]+}}]* {{.+}}, i{{[0-9]+}} {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}}) +// CHECK: ret + + +// CHECK: define internal void [[OFFLOAD_FUN_1]]( +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, +// CHECK: ret + +// CHECK: define internal void [[OMP_OUTLINED_1:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR1:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN1:%.+]], i{{[0-9]+}} [[T_VAR_IN1:%.+]], [2 x [[S_INT_TY]]]*{{.+}} [[S_ARR_IN1:%.+]], [[S_INT_TY]]*{{.+}} [[VAR_IN1:%.+]]) +// skip alloca of global_tid and bound_tid +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: [[VEC_ADDR1:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR1:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[S_ARR_ADDR1:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK: [[VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*, +// skip loop variables +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: [[OMP_IS_LAST1:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_PRIV1:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV1:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV1:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK: [[VAR_PRIV1:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[TMP_PRIV1:%.+]] = alloca [[S_INT_TY]]*, + +// skip init of bound and global tid +// CHECK: store i{{[0-9]+}}* {{.*}}, +// CHECK: store i{{[0-9]+}}* {{.*}}, +// copy from parameters to local address variables +// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN1]], [2 x i{{[0-9]+}}]** [[VEC_ADDR1]], +// CHECK: store i{{[0-9]+}} [[T_VAR_IN1]], i{{[0-9]+}}* [[T_VAR_ADDR1]], +// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_IN1]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]], +// CHECK: store [[S_INT_TY]]* [[VAR_IN1]], [[S_INT_TY]]** [[VAR_ADDR1]], + +// load content of local address variables +// CHECK: [[VEC_ADDR_REF1:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR1]], +// CHECK-64: [[T_VAR_ADDR_REF1:%.+]] = bitcast i64* [[T_VAR_ADDR1]] to i32* +// CHECK: [[S_ARR_ADDR_REF1:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]], +// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]], +// CHECK: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR1]], +// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV1]], [[S_INT_TY]]** [[TMP_PRIV1]], +// CHECK: call void @__kmpc_for_static_init_4( +// assignment: vec[i] = t_var; +// CHECK: [[IV_VAL1:%.+]] = +// CHECK: [[T_VAR_PRIV_VAL1:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]], +// CHECK: [[VEC_PTR1:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV1]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}} +// CHECK: store i{{[0-9]+}} [[T_VAR_PRIV_VAL1]], i{{[0-9]+}}* [[VEC_PTR1]], + +// assignment: s_arr[i] = var; +// CHECK-DAG: [[S_ARR_PTR1:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]], +// CHECK-DAG: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]], +// CHECK-DAG: [[S_ARR_PTR_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_PTR1]] to i8* +// CHECK-DAG: [[TMP_VAL_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* +// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST1]], i8* [[TMP_VAL_BCAST1]], +// CHECK: call void @__kmpc_for_static_fini( + +// lastprivates +// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST1]], +// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 +// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] + +// CHECK: [[OMP_LASTPRIV_BLOCK]]: +// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]], +// CHECK-64: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF1]], +// CHECK-32: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR1]], +// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF1]] to i8* +// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV1]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]] to [[S_INT_TY]]* +// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 +// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]] +// CHECK: [[S_ARR_COPY_BLOCK]]: +// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8* +// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 +// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} +// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]] +// CHECK: [[S_ARR_COPY_DONE]]: +// CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]], +// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR1_REF]] to i8* +// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: ret void +#endif diff --git a/test/OpenMP/target_teams_distribute_lastprivate_messages.cpp b/test/OpenMP/target_teams_distribute_lastprivate_messages.cpp index 8d594e94b09f..898291833d20 100644 --- a/test/OpenMP/target_teams_distribute_lastprivate_messages.cpp +++ b/test/OpenMP/target_teams_distribute_lastprivate_messages.cpp @@ -18,14 +18,14 @@ public: const S2 &operator =(const S2&) const; S2 &operator =(const S2&); static float S2s; // expected-note {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note {{static data member is predetermined as shared}} }; -const float S2::S2sc = 0; // expected-note {{static data member is predetermined as shared}} +const float S2::S2sc = 0; const S2 b; const S2 ba[5]; class S3 { int a; - S3 &operator=(const S3 &s3); // expected-note 2 {{implicitly declared private here}} + S3 &operator=(const S3 &s3); // expected-note {{implicitly declared private here}} public: S3() : a(0) {} @@ -52,7 +52,7 @@ public: }; class S6 { int a; - S6() : a(0) {} + S6() : a(0) {} // expected-note {{implicitly declared private here}} public: S6(const S6 &s6) : a(s6.a) {} @@ -215,10 +215,12 @@ int main(int argc, char **argv) { #pragma omp target teams distribute lastprivate(j) for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute firstprivate(m) lastprivate(m) // expected-error {{'operator=' is a private member of 'S3'}} +// expected-error@+1 {{firstprivate variable cannot be lastprivate}} expected-note@+1 {{defined as firstprivate}} +#pragma omp target teams distribute firstprivate(m) lastprivate(m) for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute lastprivate(n) firstprivate(n) // OK +// expected-error@+1 {{lastprivate variable cannot be firstprivate}} expected-note@+1 {{defined as lastprivate}} +#pragma omp target teams distribute lastprivate(n) firstprivate(n) // expected-error {{calling a private constructor of class 'S6'}} for (i = 0; i < argc; ++i) foo(); static int si; diff --git a/test/OpenMP/target_teams_distribute_loop_messages.cpp b/test/OpenMP/target_teams_distribute_loop_messages.cpp index 441abcba0049..0a825ab1e609 100644 --- a/test/OpenMP/target_teams_distribute_loop_messages.cpp +++ b/test/OpenMP/target_teams_distribute_loop_messages.cpp @@ -2,7 +2,7 @@ class S { int a; - S() : a(0) {} + S() : a(0) {} // expected-note {{implicitly declared private here}} public: S(int v) : a(v) {} @@ -607,7 +607,8 @@ void test_loop_eh() { void test_loop_firstprivate_lastprivate() { S s(4); -#pragma omp target teams distribute lastprivate(s) firstprivate(s) +// expected-error@+1 {{lastprivate variable cannot be firstprivate}} expected-note@+1 {{defined as lastprivate}} +#pragma omp target teams distribute lastprivate(s) firstprivate(s) // expected-error {{calling a private constructor of class 'S'}} for (int i = 0; i < 16; ++i) ; } diff --git a/test/OpenMP/target_teams_distribute_map_messages.cpp b/test/OpenMP/target_teams_distribute_map_messages.cpp index 5d815de85b04..eafdccb66bc9 100644 --- a/test/OpenMP/target_teams_distribute_map_messages.cpp +++ b/test/OpenMP/target_teams_distribute_map_messages.cpp @@ -14,8 +14,8 @@ class S2 { public: S2():a(0) { } S2(S2 &s2):a(s2.a) { } - static float S2s; // expected-note 4 {{mappable type cannot contain static members}} - static const float S2sc; // expected-note 4 {{mappable type cannot contain static members}} + static float S2s; + static const float S2sc; }; const float S2::S2sc = 0; const S2 b; @@ -116,9 +116,9 @@ T tmain(T argc) { for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute map(S1) // expected-error {{'S1' does not refer to a value}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target teams distribute map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute map(ba) // expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target teams distribute map(ba) for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute map(ca) for (i = 0; i < argc; ++i) foo(); @@ -222,11 +222,11 @@ int main(int argc, char **argv) { for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute map(S1) // expected-error {{'S1' does not refer to a value}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target teams distribute map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute map(argv[1]) for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute map(ba) // expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target teams distribute map(ba) for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute map(ca) for (i = 0; i < argc; ++i) foo(); diff --git a/test/OpenMP/target_teams_distribute_misc_messages.c b/test/OpenMP/target_teams_distribute_misc_messages.c index bf4df0894ad3..33496a70d140 100644 --- a/test/OpenMP/target_teams_distribute_misc_messages.c +++ b/test/OpenMP/target_teams_distribute_misc_messages.c @@ -285,12 +285,15 @@ void test_firstprivate() { ; int x, y, z; +// expected-error@+1 {{lastprivate variable cannot be firstprivate}} expected-note@+1 {{defined as lastprivate}} #pragma omp target teams distribute lastprivate(x) firstprivate(x) for (i = 0; i < 16; ++i) ; +// expected-error@+1 2 {{lastprivate variable cannot be firstprivate}} expected-note@+1 2 {{defined as lastprivate}} #pragma omp target teams distribute lastprivate(x, y) firstprivate(x, y) for (i = 0; i < 16; ++i) ; +// expected-error@+1 3 {{lastprivate variable cannot be firstprivate}} expected-note@+1 3 {{defined as lastprivate}} #pragma omp target teams distribute lastprivate(x, y, z) firstprivate(x, y, z) for (i = 0; i < 16; ++i) ; diff --git a/test/OpenMP/target_teams_distribute_parallel_for_ast_print.cpp b/test/OpenMP/target_teams_distribute_parallel_for_ast_print.cpp index 8df3c972dce0..f51c4fe60d7f 100644 --- a/test/OpenMP/target_teams_distribute_parallel_for_ast_print.cpp +++ b/test/OpenMP/target_teams_distribute_parallel_for_ast_print.cpp @@ -24,8 +24,10 @@ protected: public: S7(typename T::type v) : a(v) { #pragma omp target teams distribute parallel for private(a) private(this->a) private(T::a) - for (int k = 0; k < a.a; ++k) + for (int k = 0; k < a.a; ++k) { ++this->a.a; +#pragma omp cancel for + } } S7 &operator=(S7 &s) { #pragma omp target teams distribute parallel for private(a) private(this->a) @@ -43,6 +45,7 @@ public: } }; // CHECK: #pragma omp target teams distribute parallel for private(this->a) private(this->a) private(T::a) +// CHECK: #pragma omp cancel for // CHECK: #pragma omp target teams distribute parallel for private(this->a) private(this->a) // CHECK: #pragma omp target teams distribute parallel for default(none) private(b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d) @@ -113,10 +116,10 @@ T tmain(T argc) { // CHECK: #pragma omp target teams distribute parallel for default(none) private(b) firstprivate(argc) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d) // CHECK-NEXT: for (int k = 0; k < 10; ++k) // CHECK-NEXT: e += d + argc; -#pragma omp target teams distribute parallel for linear(d) +#pragma omp target teams distribute parallel for for (int k = 0; k < 10; ++k) e += d + argc; -// CHECK: #pragma omp target teams distribute parallel for linear(d) +// CHECK: #pragma omp target teams distribute parallel for // CHECK-NEXT: for (int k = 0; k < 10; ++k) // CHECK-NEXT: e += d + argc; return T(); @@ -158,10 +161,10 @@ int main (int argc, char **argv) { // CHECK: #pragma omp target teams distribute parallel for default(none) private(b) firstprivate(argc) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d) // CHECK-NEXT: for (int k = 0; k < 10; ++k) // CHECK-NEXT: e += d + argc; -#pragma omp target teams distribute parallel for linear(d) +#pragma omp target teams distribute parallel for for (int k = 0; k < 10; ++k) e += d + argc; -// CHECK: #pragma omp target teams distribute parallel for linear(d) +// CHECK: #pragma omp target teams distribute parallel for // CHECK-NEXT: for (int k = 0; k < 10; ++k) // CHECK-NEXT: e += d + argc; return (0); diff --git a/test/OpenMP/target_teams_distribute_parallel_for_depend_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_depend_messages.cpp index ca7241a16c0a..c4688958e554 100644 --- a/test/OpenMP/target_teams_distribute_parallel_for_depend_messages.cpp +++ b/test/OpenMP/target_teams_distribute_parallel_for_depend_messages.cpp @@ -37,21 +37,21 @@ int main(int argc, char **argv, char *env[]) { for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute parallel for depend (out: ) // expected-error {{expected expression}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute parallel for depend (inout : foobool(argc)), depend (in, argc) // expected-error {{expected variable name, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}} +#pragma omp target teams distribute parallel for depend (inout : foobool(argc)), depend (in, argc) // expected-error {{expected addressable lvalue expression, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}} for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute parallel for depend (out :S1) // expected-error {{'S1' does not refer to a value}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute parallel for depend(in : argv[1][1] = '2') // expected-error {{expected variable name, array element or array section}} +#pragma omp target teams distribute parallel for depend(in : argv[1][1] = '2') for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute parallel for depend (in : vec[1]) // expected-error {{expected variable name, array element or array section}} +#pragma omp target teams distribute parallel for depend (in : vec[1]) // expected-error {{expected addressable lvalue expression, array element or array section}} for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute parallel for depend (in : argv[0]) for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute parallel for depend (in : ) // expected-error {{expected expression}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute parallel for depend (in : main) // expected-error {{expected variable name, array element or array section}} +#pragma omp target teams distribute parallel for depend (in : main) for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute parallel for depend(in : a[0]) // expected-error{{expected variable name, array element or array section}} +#pragma omp target teams distribute parallel for depend(in : a[0]) // expected-error{{expected addressable lvalue expression, array element or array section}} for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute parallel for depend (in : vec[1:2]) // expected-error {{ value is not an array or pointer}} for (i = 0; i < argc; ++i) foo(); diff --git a/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_messages.cpp index d4e708308a02..0d8355047830 100644 --- a/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_messages.cpp +++ b/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_messages.cpp @@ -124,7 +124,8 @@ int main(int argc, char **argv) { #pragma omp target teams distribute parallel for firstprivate(j) for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute parallel for lastprivate(argc), firstprivate(argc) // OK +// expected-error@+1 {{lastprivate variable cannot be firstprivate}} expected-note@+1 {{defined as lastprivate}} +#pragma omp target teams distribute parallel for lastprivate(argc), firstprivate(argc) for (i = 0; i < argc; ++i) foo(); return 0; diff --git a/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_messages.cpp index d31acef0039d..84a6df28174d 100644 --- a/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_messages.cpp +++ b/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_messages.cpp @@ -18,14 +18,14 @@ public: const S2 &operator =(const S2&) const; S2 &operator =(const S2&); static float S2s; // expected-note {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note {{static data member is predetermined as shared}} }; -const float S2::S2sc = 0; // expected-note {{static data member is predetermined as shared}} +const float S2::S2sc = 0; const S2 b; const S2 ba[5]; class S3 { int a; - S3 &operator=(const S3 &s3); // expected-note 2 {{implicitly declared private here}} + S3 &operator=(const S3 &s3); // expected-note {{implicitly declared private here}} public: S3() : a(0) {} @@ -52,7 +52,7 @@ public: }; class S6 { int a; - S6() : a(0) {} + S6() : a(0) {} // expected-note {{implicitly declared private here}} public: S6(const S6 &s6) : a(s6.a) {} @@ -216,10 +216,12 @@ int main(int argc, char **argv) { #pragma omp target teams distribute parallel for lastprivate(j) for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute parallel for firstprivate(m) lastprivate(m) // expected-error {{'operator=' is a private member of 'S3'}} +// expected-error@+1 {{firstprivate variable cannot be lastprivate}} expected-note@+1 {{defined as firstprivate}} +#pragma omp target teams distribute parallel for firstprivate(m) lastprivate(m) for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute parallel for lastprivate(n) firstprivate(n) // OK +// expected-error@+1 {{lastprivate variable cannot be firstprivate}} expected-note@+1 {{defined as lastprivate}} +#pragma omp target teams distribute parallel for lastprivate(n) firstprivate(n) // expected-error {{calling a private constructor of class 'S6'}} for (i = 0; i < argc; ++i) foo(); static int si; diff --git a/test/OpenMP/target_teams_distribute_parallel_for_linear_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_linear_messages.cpp deleted file mode 100644 index 82220dcf45c6..000000000000 --- a/test/OpenMP/target_teams_distribute_parallel_for_linear_messages.cpp +++ /dev/null @@ -1,248 +0,0 @@ -// RUN: %clang_cc1 -verify -fopenmp %s - -namespace X { - int x; -}; - -struct B { - static int ib; // expected-note {{'B::ib' declared here}} - static int bfoo() { return 8; } -}; - -int bfoo() { return 4; } - -int z; -const int C1 = 1; -const int C2 = 2; -void test_linear_colons() -{ - int B = 0; - -#pragma omp target teams distribute parallel for linear(B:bfoo()) - for (int i = 0; i < 10; ++i) ; - -#pragma omp target teams distribute parallel for linear(B::ib:B:bfoo()) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'}} - for (int i = 0; i < 10; ++i) ; - -#pragma omp target teams distribute parallel for linear(B:ib) // expected-error {{use of undeclared identifier 'ib'; did you mean 'B::ib'}} - for (int i = 0; i < 10; ++i) ; - -#pragma omp target teams distribute parallel for linear(z:B:ib) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'?}} - for (int i = 0; i < 10; ++i) ; - -#pragma omp target teams distribute parallel for linear(B:B::bfoo()) - for (int i = 0; i < 10; ++i) ; - -#pragma omp target teams distribute parallel for linear(X::x : ::z) - for (int i = 0; i < 10; ++i) ; - -#pragma omp target teams distribute parallel for linear(B,::z, X::x) - for (int i = 0; i < 10; ++i) ; - -#pragma omp target teams distribute parallel for linear(::z) - for (int i = 0; i < 10; ++i) ; - -#pragma omp target teams distribute parallel for linear(B::bfoo()) // expected-error {{expected variable name}} - for (int i = 0; i < 10; ++i) ; - -#pragma omp target teams distribute parallel for linear(B::ib,B:C1+C2) - for (int i = 0; i < 10; ++i) ; -} - -template<int L, class T, class N> T test_template(T* arr, N num) { - N i; - T sum = (T)0; - T ind2 = - num * L; // expected-note {{'ind2' defined here}} - -#pragma omp target teams distribute parallel for linear(ind2:L) // expected-error {{argument of a linear clause should be of integral or pointer type}} - for (i = 0; i < num; ++i) { - T cur = arr[(int)ind2]; - ind2 += L; - sum += cur; - } - return T(); -} - -template<int LEN> int test_warn() { - int ind2 = 0; -#pragma omp target teams distribute parallel for linear(ind2:LEN) // expected-warning {{zero linear step (ind2 should probably be const)}} - for (int i = 0; i < 100; i++) { - ind2 += LEN; - } - return ind2; -} - -struct S1; // expected-note 2 {{declared here}} expected-note 2 {{forward declaration of 'S1'}} -extern S1 a; -class S2 { - mutable int a; -public: - S2():a(0) { } -}; -const S2 b; // expected-note 2 {{'b' defined here}} -const S2 ba[5]; -class S3 { - int a; -public: - S3():a(0) { } -}; -const S3 ca[5]; -class S4 { - int a; - S4(); -public: - S4(int v):a(v) { } -}; -class S5 { - int a; - S5():a(0) {} -public: - S5(int v):a(v) { } -}; - -S3 h; -#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}} - -template<class I, class C> int foomain(I argc, C **argv) { - I e(4); - I g(5); - int i; - int &j = i; - -#pragma omp target teams distribute parallel for linear // expected-error {{expected '(' after 'linear'}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target teams distribute parallel for linear ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target teams distribute parallel for linear () // expected-error {{expected expression}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target teams distribute parallel for linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target teams distribute parallel for linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target teams distribute parallel for linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target teams distribute parallel for linear (argc : 5) - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target teams distribute parallel for linear (S1) // expected-error {{'S1' does not refer to a value}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target teams distribute parallel for linear (a, b:B::ib) // expected-error {{linear variable with incomplete type 'S1'}} expected-error {{const-qualified variable cannot be linear}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target teams distribute parallel for linear (argv[1]) // expected-error {{expected variable name}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target teams distribute parallel for linear(e, g) - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target teams distribute parallel for linear(h) // expected-error {{threadprivate or thread local variable cannot be linear}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target teams distribute parallel for linear(i) - for (int k = 0; k < argc; ++k) ++k; - - #pragma omp parallel - { - int v = 0; - int i; - #pragma omp target teams distribute parallel for linear(v:i) - for (int k = 0; k < argc; ++k) { i = k; v += i; } - } - -#pragma omp target teams distribute parallel for linear(j) - for (int k = 0; k < argc; ++k) ++k; - - int v = 0; - -#pragma omp target teams distribute parallel for linear(v:j) - for (int k = 0; k < argc; ++k) { ++k; v += j; } - -#pragma omp target teams distribute parallel for linear(i) - for (int k = 0; k < argc; ++k) ++k; - return 0; -} - -namespace A { -double x; -#pragma omp threadprivate(x) // expected-note {{defined as threadprivate or thread local}} -} -namespace C { -using A::x; -} - -int main(int argc, char **argv) { - double darr[100]; - // expected-note@+1 {{in instantiation of function template specialization 'test_template<-4, double, int>' requested here}} - test_template<-4>(darr, 4); - // expected-note@+1 {{in instantiation of function template specialization 'test_warn<0>' requested here}} - test_warn<0>(); - - S4 e(4); // expected-note {{'e' defined here}} - S5 g(5); // expected-note {{'g' defined here}} - int i; - int &j = i; - -#pragma omp target teams distribute parallel for linear // expected-error {{expected '(' after 'linear'}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target teams distribute parallel for linear ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target teams distribute parallel for linear () // expected-error {{expected expression}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target teams distribute parallel for linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target teams distribute parallel for linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target teams distribute parallel for linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target teams distribute parallel for linear (argc) - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target teams distribute parallel for linear (S1) // expected-error {{'S1' does not refer to a value}} - for (int k = 0; k < argc; ++k) ++k; - - -#pragma omp target teams distribute parallel for linear (a, b) // expected-error {{linear variable with incomplete type 'S1'}} expected-error {{const-qualified variable cannot be linear}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target teams distribute parallel for linear (argv[1]) // expected-error {{expected variable name}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target teams distribute parallel for linear(e, g) // expected-error {{argument of a linear clause should be of integral or pointer type, not 'S4'}} expected-error {{argument of a linear clause should be of integral or pointer type, not 'S5'}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target teams distribute parallel for linear(h, C::x) // expected-error 2 {{threadprivate or thread local variable cannot be linear}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp parallel -{ - int i; -#pragma omp target teams distribute parallel for linear(i) - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target teams distribute parallel for linear(i : 4) - for (int k = 0; k < argc; ++k) { ++k; i += 4; } -} - -#pragma omp target teams distribute parallel for linear(j) - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target teams distribute parallel for linear(i) - for (int k = 0; k < argc; ++k) ++k; - - foomain<int,char>(argc,argv); // expected-note {{in instantiation of function template specialization 'foomain<int, char>' requested here}} - return 0; -} - diff --git a/test/OpenMP/target_teams_distribute_parallel_for_loop_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_loop_messages.cpp index d912737a2146..57ab273c4601 100644 --- a/test/OpenMP/target_teams_distribute_parallel_for_loop_messages.cpp +++ b/test/OpenMP/target_teams_distribute_parallel_for_loop_messages.cpp @@ -2,7 +2,7 @@ class S { int a; - S() : a(0) {} + S() : a(0) {} // expected-note {{implicitly declared private here}} public: S(int v) : a(v) {} @@ -605,7 +605,8 @@ void test_loop_eh() { void test_loop_firstprivate_lastprivate() { S s(4); -#pragma omp target teams distribute parallel for lastprivate(s) firstprivate(s) +// expected-error@+1 {{lastprivate variable cannot be firstprivate}} expected-note@+1 {{defined as lastprivate}} +#pragma omp target teams distribute parallel for lastprivate(s) firstprivate(s) // expected-error {{calling a private constructor of class 'S'}} for (int i = 0; i < 16; ++i) ; } diff --git a/test/OpenMP/target_teams_distribute_parallel_for_map_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_map_messages.cpp index 326db95f90f8..1fb466ee5518 100644 --- a/test/OpenMP/target_teams_distribute_parallel_for_map_messages.cpp +++ b/test/OpenMP/target_teams_distribute_parallel_for_map_messages.cpp @@ -14,8 +14,8 @@ class S2 { public: S2():a(0) { } S2(S2 &s2):a(s2.a) { } - static float S2s; // expected-note 4 {{mappable type cannot contain static members}} - static const float S2sc; // expected-note 4 {{mappable type cannot contain static members}} + static float S2s; + static const float S2sc; }; const float S2::S2sc = 0; const S2 b; @@ -116,9 +116,9 @@ T tmain(T argc) { for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute parallel for map(S1) // expected-error {{'S1' does not refer to a value}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute parallel for map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target teams distribute parallel for map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute parallel for map(ba) // expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target teams distribute parallel for map(ba) for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute parallel for map(ca) for (i = 0; i < argc; ++i) foo(); @@ -222,11 +222,11 @@ int main(int argc, char **argv) { for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute parallel for map(S1) // expected-error {{'S1' does not refer to a value}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute parallel for map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target teams distribute parallel for map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute parallel for map(argv[1]) for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute parallel for map(ba) // expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target teams distribute parallel for map(ba) for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute parallel for map(ca) for (i = 0; i < argc; ++i) foo(); diff --git a/test/OpenMP/target_teams_distribute_parallel_for_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_messages.cpp index cb76ef2e0124..15135838e981 100644 --- a/test/OpenMP/target_teams_distribute_parallel_for_messages.cpp +++ b/test/OpenMP/target_teams_distribute_parallel_for_messages.cpp @@ -27,7 +27,7 @@ int main(int argc, char **argv) { #pragma omp target teams distribute parallel for } // expected-warning {{extra tokens at the end of '#pragma omp target teams distribute parallel for' are ignored}} for (int i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute parallel for +#pragma omp target teams distribute parallel for linear(argc) // expected-error {{unexpected OpenMP clause 'linear' in directive '#pragma omp target teams distribute parallel for'}} for (int i = 0; i < argc; ++i) foo(); // expected-warning@+1 {{extra tokens at the end of '#pragma omp target teams distribute parallel for' are ignored}} diff --git a/test/OpenMP/target_teams_distribute_parallel_for_misc_messages.c b/test/OpenMP/target_teams_distribute_parallel_for_misc_messages.c index 769be6c10c3a..c48a2dfa9e68 100644 --- a/test/OpenMP/target_teams_distribute_parallel_for_misc_messages.c +++ b/test/OpenMP/target_teams_distribute_parallel_for_misc_messages.c @@ -285,12 +285,15 @@ void test_firstprivate() { ; int x, y, z; +// expected-error@+1 {{lastprivate variable cannot be firstprivate}} expected-note@+1 {{defined as lastprivate}} #pragma omp target teams distribute parallel for lastprivate(x) firstprivate(x) for (i = 0; i < 16; ++i) ; +// expected-error@+1 2 {{lastprivate variable cannot be firstprivate}} expected-note@+1 2 {{defined as lastprivate}} #pragma omp target teams distribute parallel for lastprivate(x, y) firstprivate(x, y) for (i = 0; i < 16; ++i) ; +// expected-error@+1 3 {{lastprivate variable cannot be firstprivate}} expected-note@+1 3 {{defined as lastprivate}} #pragma omp target teams distribute parallel for lastprivate(x, y, z) firstprivate(x, y, z) for (i = 0; i < 16; ++i) ; diff --git a/test/OpenMP/target_teams_distribute_parallel_for_reduction_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_reduction_messages.cpp index ba9586a1cef8..59b35417d401 100644 --- a/test/OpenMP/target_teams_distribute_parallel_for_reduction_messages.cpp +++ b/test/OpenMP/target_teams_distribute_parallel_for_reduction_messages.cpp @@ -19,9 +19,9 @@ public: S2() : a(0) {} S2(S2 &s2) : a(s2.a) {} static float S2s; // expected-note 2 {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note 2 {{'S2sc' declared here}} }; -const float S2::S2sc = 0; // expected-note 2 {{'S2sc' defined here}} +const float S2::S2sc = 0; S2 b; // expected-note 3 {{'b' defined here}} const S2 ba[5]; // expected-note 2 {{'ba' defined here}} class S3 { diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_ast_print.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_ast_print.cpp index 29a0c6d36333..af588f1b81d9 100644 --- a/test/OpenMP/target_teams_distribute_parallel_for_simd_ast_print.cpp +++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_ast_print.cpp @@ -28,8 +28,9 @@ public: ++this->a.a; } S7 &operator=(S7 &s) { -#pragma omp target teams distribute parallel for simd private(a) private(this->a) - for (int k = 0; k < s.a.a; ++k) + int k; +#pragma omp target teams distribute parallel for simd private(a) private(this->a) linear(k) + for (k = 0; k < s.a.a; ++k) ++s.a.a; foo(); @@ -53,7 +54,7 @@ public: } }; // CHECK: #pragma omp target teams distribute parallel for simd private(this->a) private(this->a) private(T::a) -// CHECK: #pragma omp target teams distribute parallel for simd private(this->a) private(this->a) +// CHECK: #pragma omp target teams distribute parallel for simd private(this->a) private(this->a) linear(k) // CHECK: #pragma omp target teams distribute parallel for simd default(none) private(b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d) // CHECK: #pragma omp target teams distribute parallel for simd simdlen(slen1) safelen(slen2) aligned(arr: alen) @@ -135,10 +136,10 @@ T tmain(T argc) { // CHECK: #pragma omp target teams distribute parallel for simd default(none) private(b) firstprivate(argc) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d) // CHECK-NEXT: for (int k = 0; k < 10; ++k) // CHECK-NEXT: e += d + argc; -#pragma omp target teams distribute parallel for simd simdlen(clen-1) linear(d) +#pragma omp target teams distribute parallel for simd simdlen(clen-1) for (int k = 0; k < 10; ++k) e += d + argc; -// CHECK: #pragma omp target teams distribute parallel for simd simdlen(clen - 1) linear(d) +// CHECK: #pragma omp target teams distribute parallel for simd simdlen(clen - 1) // CHECK-NEXT: for (int k = 0; k < 10; ++k) // CHECK-NEXT: e += d + argc; #pragma omp target teams distribute parallel for simd safelen(clen-1) aligned(arr:alen) @@ -186,10 +187,10 @@ int main (int argc, char **argv) { // CHECK: #pragma omp target teams distribute parallel for simd default(none) private(b) firstprivate(argc) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d) // CHECK-NEXT: for (int k = 0; k < 10; ++k) // CHECK-NEXT: e += d + argc; -#pragma omp target teams distribute parallel for simd simdlen(clen-1) linear(d) +#pragma omp target teams distribute parallel for simd simdlen(clen-1) for (int k = 0; k < 10; ++k) e += d + argc; -// CHECK: #pragma omp target teams distribute parallel for simd simdlen(clen - 1) linear(d) +// CHECK: #pragma omp target teams distribute parallel for simd simdlen(clen - 1) // CHECK-NEXT: for (int k = 0; k < 10; ++k) // CHECK-NEXT: e += d + argc; #pragma omp target teams distribute parallel for simd safelen(clen-1) aligned(arr:N+6) diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_depend_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_depend_messages.cpp index 2dc9979cdc1c..4a9bf01e40bf 100644 --- a/test/OpenMP/target_teams_distribute_parallel_for_simd_depend_messages.cpp +++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_depend_messages.cpp @@ -37,21 +37,21 @@ int main(int argc, char **argv, char *env[]) { for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute parallel for simd depend (out: ) // expected-error {{expected expression}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute parallel for simd depend (inout : foobool(argc)), depend (in, argc) // expected-error {{expected variable name, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}} +#pragma omp target teams distribute parallel for simd depend (inout : foobool(argc)), depend (in, argc) // expected-error {{expected addressable lvalue expression, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}} for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute parallel for simd depend (out :S1) // expected-error {{'S1' does not refer to a value}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute parallel for simd depend(in : argv[1][1] = '2') // expected-error {{expected variable name, array element or array section}} +#pragma omp target teams distribute parallel for simd depend(in : argv[1][1] = '2') for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute parallel for simd depend (in : vec[1]) // expected-error {{expected variable name, array element or array section}} +#pragma omp target teams distribute parallel for simd depend (in : vec[1]) // expected-error {{expected addressable lvalue expression, array element or array section}} for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute parallel for simd depend (in : argv[0]) for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute parallel for simd depend (in : ) // expected-error {{expected expression}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute parallel for simd depend (in : main) // expected-error {{expected variable name, array element or array section}} +#pragma omp target teams distribute parallel for simd depend (in : main) for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute parallel for simd depend(in : a[0]) // expected-error{{expected variable name, array element or array section}} +#pragma omp target teams distribute parallel for simd depend(in : a[0]) // expected-error{{expected addressable lvalue expression, array element or array section}} for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute parallel for simd depend (in : vec[1:2]) // expected-error {{ value is not an array or pointer}} for (i = 0; i < argc; ++i) foo(); diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_messages.cpp index d26044abc81b..4790b4bffdaf 100644 --- a/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_messages.cpp +++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_messages.cpp @@ -124,7 +124,8 @@ int main(int argc, char **argv) { #pragma omp target teams distribute parallel for simd firstprivate(j) for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute parallel for simd lastprivate(argc), firstprivate(argc) // OK +// expected-error@+1 {{lastprivate variable cannot be firstprivate}} expected-note@+1 {{defined as lastprivate}} +#pragma omp target teams distribute parallel for simd lastprivate(argc), firstprivate(argc) for (i = 0; i < argc; ++i) foo(); return 0; diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_messages.cpp index 954830eb19f5..156f1a66c191 100644 --- a/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_messages.cpp +++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_messages.cpp @@ -18,14 +18,14 @@ public: const S2 &operator =(const S2&) const; S2 &operator =(const S2&); static float S2s; // expected-note {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note {{static data member is predetermined as shared}} }; -const float S2::S2sc = 0; // expected-note {{static data member is predetermined as shared}} +const float S2::S2sc = 0; const S2 b; const S2 ba[5]; class S3 { int a; - S3 &operator=(const S3 &s3); // expected-note 2 {{implicitly declared private here}} + S3 &operator=(const S3 &s3); // expected-note {{implicitly declared private here}} public: S3() : a(0) {} @@ -52,7 +52,7 @@ public: }; class S6 { int a; - S6() : a(0) {} + S6() : a(0) {} // expected-note {{implicitly declared private here}} public: S6(const S6 &s6) : a(s6.a) {} @@ -216,10 +216,12 @@ int main(int argc, char **argv) { #pragma omp target teams distribute parallel for simd lastprivate(j) for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute parallel for simd firstprivate(m) lastprivate(m) // expected-error {{'operator=' is a private member of 'S3'}} +// expected-error@+1 {{firstprivate variable cannot be lastprivate}} expected-note@+1 {{defined as firstprivate}} +#pragma omp target teams distribute parallel for simd firstprivate(m) lastprivate(m) for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute parallel for simd lastprivate(n) firstprivate(n) // OK +// expected-error@+1 {{lastprivate variable cannot be firstprivate}} expected-note@+1 {{defined as lastprivate}} +#pragma omp target teams distribute parallel for simd lastprivate(n) firstprivate(n) // expected-error {{calling a private constructor of class 'S6'}} for (i = 0; i < argc; ++i) foo(); static int si; diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_linear_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_linear_messages.cpp index 24c432ade9e4..257244ede444 100644 --- a/test/OpenMP/target_teams_distribute_parallel_for_simd_linear_messages.cpp +++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_linear_messages.cpp @@ -18,33 +18,42 @@ void test_linear_colons() { int B = 0; +// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute parallel for simd linear(B:bfoo()) for (int i = 0; i < 10; ++i) ; +// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute parallel for simd linear(B::ib:B:bfoo()) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'}} for (int i = 0; i < 10; ++i) ; +// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute parallel for simd linear(B:ib) // expected-error {{use of undeclared identifier 'ib'; did you mean 'B::ib'}} for (int i = 0; i < 10; ++i) ; +// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute parallel for simd linear(z:B:ib) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'?}} for (int i = 0; i < 10; ++i) ; +// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute parallel for simd linear(B:B::bfoo()) for (int i = 0; i < 10; ++i) ; +// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute parallel for simd linear(X::x : ::z) for (int i = 0; i < 10; ++i) ; +// expected-error@+1 3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute parallel for simd linear(B,::z, X::x) for (int i = 0; i < 10; ++i) ; +// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute parallel for simd linear(::z) for (int i = 0; i < 10; ++i) ; #pragma omp target teams distribute parallel for simd linear(B::bfoo()) // expected-error {{expected variable name}} for (int i = 0; i < 10; ++i) ; +// expected-error@+1 2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute parallel for simd linear(B::ib,B:C1+C2) for (int i = 0; i < 10; ++i) ; } @@ -65,6 +74,7 @@ template<int L, class T, class N> T test_template(T* arr, N num) { template<int LEN> int test_warn() { int ind2 = 0; +// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute parallel for simd linear(ind2:LEN) // expected-warning {{zero linear step (ind2 should probably be const)}} for (int i = 0; i < 100; i++) { ind2 += LEN; @@ -118,15 +128,18 @@ template<class I, class C> int foomain(I argc, C **argv) { #pragma omp target teams distribute parallel for simd linear () // expected-error {{expected expression}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute parallel for simd linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute parallel for simd linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int k = 0; k < argc; ++k) ++k; #pragma omp target teams distribute parallel for simd linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute parallel for simd linear (argc : 5) for (int k = 0; k < argc; ++k) ++k; @@ -139,33 +152,17 @@ template<class I, class C> int foomain(I argc, C **argv) { #pragma omp target teams distribute parallel for simd linear (argv[1]) // expected-error {{expected variable name}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+1 2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute parallel for simd linear(e, g) for (int k = 0; k < argc; ++k) ++k; #pragma omp target teams distribute parallel for simd linear(h) // expected-error {{threadprivate or thread local variable cannot be linear}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute parallel for simd linear(i) for (int k = 0; k < argc; ++k) ++k; - #pragma omp parallel - { - int v = 0; - int i; - #pragma omp target teams distribute parallel for simd linear(v:i) - for (int k = 0; k < argc; ++k) { i = k; v += i; } - } - -#pragma omp target teams distribute parallel for simd linear(j) - for (int k = 0; k < argc; ++k) ++k; - - int v = 0; - -#pragma omp target teams distribute parallel for simd linear(v:j) - for (int k = 0; k < argc; ++k) { ++k; v += j; } - -#pragma omp target teams distribute parallel for simd linear(i) - for (int k = 0; k < argc; ++k) ++k; return 0; } @@ -198,15 +195,18 @@ int main(int argc, char **argv) { #pragma omp target teams distribute parallel for simd linear () // expected-error {{expected expression}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute parallel for simd linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute parallel for simd linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int k = 0; k < argc; ++k) ++k; #pragma omp target teams distribute parallel for simd linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute parallel for simd linear (argc) for (int k = 0; k < argc; ++k) ++k; @@ -226,22 +226,6 @@ int main(int argc, char **argv) { #pragma omp target teams distribute parallel for simd linear(h, C::x) // expected-error 2 {{threadprivate or thread local variable cannot be linear}} for (int k = 0; k < argc; ++k) ++k; - #pragma omp parallel - { - int i; - #pragma omp target teams distribute parallel for simd linear(i) - for (int k = 0; k < argc; ++k) ++k; - - #pragma omp target teams distribute parallel for simd linear(i : 4) - for (int k = 0; k < argc; ++k) { ++k; i += 4; } - } - -#pragma omp target teams distribute parallel for simd linear(j) - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target teams distribute parallel for simd linear(i) - for (int k = 0; k < argc; ++k) ++k; - foomain<int,char>(argc,argv); // expected-note {{in instantiation of function template specialization 'foomain<int, char>' requested here}} return 0; } diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_loop_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_loop_messages.cpp index 0ed57baa5c3b..a16dec190dd2 100644 --- a/test/OpenMP/target_teams_distribute_parallel_for_simd_loop_messages.cpp +++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_loop_messages.cpp @@ -2,7 +2,7 @@ class S { int a; - S() : a(0) {} + S() : a(0) {} // expected-note {{implicitly declared private here}} public: S(int v) : a(v) {} @@ -608,7 +608,8 @@ void test_loop_eh() { void test_loop_firstprivate_lastprivate() { S s(4); -#pragma omp target teams distribute parallel for simd lastprivate(s) firstprivate(s) +// expected-error@+1 {{lastprivate variable cannot be firstprivate}} expected-note@+1 {{defined as lastprivate}} +#pragma omp target teams distribute parallel for simd lastprivate(s) firstprivate(s) // expected-error {{calling a private constructor of class 'S'}} for (int i = 0; i < 16; ++i) ; } diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_map_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_map_messages.cpp index 8f9c3e7e073c..b230d9ab24fe 100644 --- a/test/OpenMP/target_teams_distribute_parallel_for_simd_map_messages.cpp +++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_map_messages.cpp @@ -14,8 +14,8 @@ class S2 { public: S2():a(0) { } S2(S2 &s2):a(s2.a) { } - static float S2s; // expected-note 4 {{mappable type cannot contain static members}} - static const float S2sc; // expected-note 4 {{mappable type cannot contain static members}} + static float S2s; + static const float S2sc; }; const float S2::S2sc = 0; const S2 b; @@ -116,9 +116,9 @@ T tmain(T argc) { for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute parallel for simd map(S1) // expected-error {{'S1' does not refer to a value}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute parallel for simd map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target teams distribute parallel for simd map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute parallel for simd map(ba) // expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target teams distribute parallel for simd map(ba) for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute parallel for simd map(ca) for (i = 0; i < argc; ++i) foo(); @@ -222,11 +222,11 @@ int main(int argc, char **argv) { for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute parallel for simd map(S1) // expected-error {{'S1' does not refer to a value}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute parallel for simd map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target teams distribute parallel for simd map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute parallel for simd map(argv[1]) for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute parallel for simd map(ba) // expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target teams distribute parallel for simd map(ba) for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute parallel for simd map(ca) for (i = 0; i < argc; ++i) foo(); diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_misc_messages.c b/test/OpenMP/target_teams_distribute_parallel_for_simd_misc_messages.c index d444844be5b4..1ea02f6d2b57 100644 --- a/test/OpenMP/target_teams_distribute_parallel_for_simd_misc_messages.c +++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_misc_messages.c @@ -285,15 +285,22 @@ void test_firstprivate() { ; int x, y, z; +// expected-error@+1 {{lastprivate variable cannot be firstprivate}} expected-note@+1 {{defined as lastprivate}} #pragma omp target teams distribute parallel for simd lastprivate(x) firstprivate(x) for (i = 0; i < 16; ++i) ; +// expected-error@+1 2 {{lastprivate variable cannot be firstprivate}} expected-note@+1 2 {{defined as lastprivate}} #pragma omp target teams distribute parallel for simd lastprivate(x, y) firstprivate(x, y) for (i = 0; i < 16; ++i) ; +// expected-error@+1 3 {{lastprivate variable cannot be firstprivate}} expected-note@+1 3 {{defined as lastprivate}} #pragma omp target teams distribute parallel for simd lastprivate(x, y, z) firstprivate(x, y, z) for (i = 0; i < 16; ++i) ; +// expected-error@+1 {{the value of 'simdlen' parameter must be less than or equal to the value of the 'safelen' parameter}} +#pragma omp target teams distribute parallel for simd simdlen(64) safelen(8) + for (i = 0; i < 16; ++i) + ; } void test_loop_messages() { diff --git a/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_messages.cpp b/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_messages.cpp index cadccbf5cda3..63744c77fb3a 100644 --- a/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_messages.cpp +++ b/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_messages.cpp @@ -19,9 +19,9 @@ public: S2() : a(0) {} S2(S2 &s2) : a(s2.a) {} static float S2s; // expected-note 2 {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note 2 {{'S2sc' declared here}} }; -const float S2::S2sc = 0; // expected-note 2 {{'S2sc' defined here}} +const float S2::S2sc = 0; S2 b; // expected-note 3 {{'b' defined here}} const S2 ba[5]; // expected-note 2 {{'ba' defined here}} class S3 { diff --git a/test/OpenMP/target_teams_distribute_private_codegen.cpp b/test/OpenMP/target_teams_distribute_private_codegen.cpp new file mode 100644 index 000000000000..7c88c01c4ab3 --- /dev/null +++ b/test/OpenMP/target_teams_distribute_private_codegen.cpp @@ -0,0 +1,233 @@ +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 + +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +struct St { + int a, b; + St() : a(0), b(0) {} + St(const St &st) : a(st.a + st.b), b(0) {} + ~St() {} +}; + +volatile int g = 1212; +volatile int &g1 = g; + +template <class T> +struct S { + T f; + S(T a) : f(a + g) {} + S() : f(g) {} + S(const S &s, St t = St()) : f(s.f + t.a) {} + operator T() { return T(); } + ~S() {} +}; + +// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float } +// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } + +template <typename T> +T tmain() { + S<T> test; + T t_var = T(); + T vec[] = {1, 2}; + S<T> s_arr[] = {1, 2}; + S<T> &var = test; +#pragma omp target teams distribute private(t_var, vec, s_arr, var) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + return T(); +} + +// CHECK-DAG: [[TEST:@.+]] = global [[S_FLOAT_TY]] zeroinitializer, +S<float> test; +// CHECK-DAG: [[T_VAR:@.+]] = global i{{[0-9]+}} 333, +int t_var = 333; +// CHECK-DAG: [[VEC:@.+]] = global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2], +int vec[] = {1, 2}; +// CHECK-DAG: [[S_ARR:@.+]] = global [2 x [[S_FLOAT_TY]]] zeroinitializer, +S<float> s_arr[] = {1, 2}; +// CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer, +S<float> var(3); +// CHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0, + +int main() { + static int sivar; +#ifdef LAMBDA + // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212, + // LAMBDA-LABEL: @main + // LAMBDA: call void [[OUTER_LAMBDA:@.+]]( + [&]() { + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 0) + // LAMBDA: call void @[[LOFFL1:.+]]() + // LAMBDA: ret +#pragma omp target teams distribute private(g, g1, sivar) + for (int i = 0; i < 2; ++i) { + // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]]() + // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[LOUTL1:.+]] to {{.+}}) + // LAMBDA: ret void + + // LAMBDA: define internal void @[[LOUTL1]]({{.+}}) + // Skip global, bound tid and loop vars + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: [[G_PRIV:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_PRIV:%.+]] = alloca i{{[0-9]+}} + // LAMBDA: [[TMP:%.+]] = alloca i{{[0-9]+}}*, + // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: store{{.+}} [[G1_PRIV]], {{.+}} [[TMP]], + g = 1; + g1 = 1; + sivar = 2; + // LAMBDA: call void @__kmpc_for_static_init_4( + // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G_PRIV]], + // LAMBDA-DAG: store{{.+}} 2, {{.+}} [[SIVAR_PRIV]], + // LAMBDA-DAG: [[G1_REF:%.+]] = load{{.+}}, {{.+}} [[TMP]], + // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1_REF]], + // LAMBDA: call void [[INNER_LAMBDA:@.+]]( + // LAMBDA: call void @__kmpc_for_static_fini( + [&]() { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + g = 2; + g1 = 2; + sivar = 4; + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + + // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]] + // LAMBDA: [[G1_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G1_REF]] + // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]] + }(); + } + }(); + return 0; +#else +#pragma omp target teams distribute private(t_var, vec, s_arr, var, sivar) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + sivar += i; + } + return tmain<int>(); +#endif +} + +// CHECK: define {{.*}}i{{[0-9]+}} @main() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 0) +// CHECK: call void @[[OFFL1:.+]]() +// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]() +// CHECK: ret + +// CHECK: define{{.*}} void @[[OFFL1]]() +// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[OUTL1:.+]] to {{.+}}) +// CHECK: ret void + +// CHECK: define internal void @[[OUTL1]]({{.+}}) +// Skip global, bound tid and loop vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK-DAG: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK-DAG: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK-DAG: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}}, + +// private(s_arr) +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]]) +// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]], + +// private(var) +// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]]) + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK-DAG: {{.+}} = {{.+}} [[T_VAR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[VAR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[SIVAR_PRIV]] +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + + +// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, +// CHECK: call void @[[TOFFL1:.+]]() +// CHECK: ret + +// CHECK: define {{.*}}void @[[TOFFL1]]() +// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[TOUTL1:.+]] to {{.+}}) +// CHECK: ret void + +// CHECK: define internal void @[[TOUTL1]]({{.+}}) +// Skip global, bound tid and loop vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*, + +// private(s_arr) +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]]) +// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]], + +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]]) +// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]], + +// private(var) +// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]]) +// CHECK-DAG: store{{.+}} [[VAR_PRIV]], {{.+}} [[TMP]] + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK-DAG: {{.+}} = {{.+}} [[T_VAR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[TMP]] +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +#endif diff --git a/test/OpenMP/target_teams_distribute_reduction_codegen.cpp b/test/OpenMP/target_teams_distribute_reduction_codegen.cpp new file mode 100644 index 000000000000..188b75306453 --- /dev/null +++ b/test/OpenMP/target_teams_distribute_reduction_codegen.cpp @@ -0,0 +1,211 @@ +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 + +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +template <typename T> +T tmain() { + T t_var = T(); + T vec[] = {1, 2}; +#pragma omp target teams distribute reduction(+: t_var) + for (int i = 0; i < 2; ++i) { + t_var += (T) i; + } + return T(); +} + +int main() { + static int sivar; +#ifdef LAMBDA + // LAMBDA: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer + + // LAMBDA-LABEL: @main + // LAMBDA: call void [[OUTER_LAMBDA:@.+]]( + [&]() { + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // LAMBDA: call void @[[LOFFL1:.+]]( + // LAMBDA: ret +#pragma omp target teams distribute reduction(+: sivar) + for (int i = 0; i < 2; ++i) { + // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i32*{{.+}} [[SIVAR_ARG:%.+]]) + // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*, + // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]], + // LAMBDA: [[SIVAR:%.+]] = load i32*, i32** [[SIVAR_ADDR]], + // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[LOUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR]]) + // LAMBDA: ret void + + // LAMBDA: define internal void @[[LOUTL1]]({{.+}}, {{.+}}, {{.+}}*{{.+}} [[SIVAR_ARG:%.+]]) + // Skip global and bound tid vars + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*, + // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{.+}}, + // LAMBDA: [[RED_LIST:%.+]] = alloca [1 x {{.+}}], + // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]], + // LAMBDA: [[SIVAR_REF:%.+]] = load {{.+}}, {{.+}} [[SIVAR_ADDR]], + // LAMBDA: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]], + + // LAMBDA: call void @__kmpc_for_static_init_4( + // LAMBDA: store{{.+}}, {{.+}} [[SIVAR_PRIV]], + // LAMBDA: call void [[INNER_LAMBDA:@.+]]( + // LAMBDA: call void @__kmpc_for_static_fini( + // LAMBDA: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]], + // LAMBDA: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to + // LAMBDA: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]], + // LAMBDA: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to + // LAMBDA: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]]) + // LAMBDA: switch{{.+}} [[K_RED_RET]], label{{.+}} [ + // LAMBDA: {{.+}}, label %[[CASE1:.+]] + // LAMBDA: {{.+}}, label %[[CASE2:.+]] + // LAMBDA: ] + // LAMBDA: [[CASE1]]: + // LAMBDA-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]], + // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]], + // LAMBDA-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]] + // LAMBDA: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]], + // LAMBDA: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) + // LAMBDA: br + // LAMBDA: [[CASE2]]: + // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]], + // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] + // LAMBDA: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) + // LAMBDA: br + sivar += i; + + [&]() { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + + sivar += 4; + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + + // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]] + // LAMBDA: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF]] + // LAMBDA: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], 4 + // LAMBDA: store i{{[0-9]+}} [[SIVAR_INC]], i{{[0-9]+}}* [[SIVAR_REF]] + }(); + } + }(); + return 0; +#else +#pragma omp target teams distribute reduction(+: sivar) + for (int i = 0; i < 2; ++i) { + sivar += i; + } + return tmain<int>(); +#endif +} + +// CHECK: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer + +// CHECK: define {{.*}}i{{[0-9]+}} @main() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) +// CHECK: call void @[[OFFL1:.+]](i32* {{.+}}) +// CHECK: [[RES:%.+]] = call{{.*}} i32 @[[TMAIN_INT:[^(]+]]() +// CHECK: ret i32 [[RES]] + +// CHECK: define{{.*}} void @[[OFFL1]](i32*{{.+}} [[SIVAR_ARG:%.+]]) +// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*, +// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}}** [[SIVAR_ADDR]], +// CHECK: [[SIVAR_LOAD:%.+]] = load i32*, i32** [[SIVAR_ADDR]], +// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_LOAD]]) +// CHECK: ret void + +// CHECK: define internal void @[[OUTL1]]({{.+}}, {{.+}}, i32*{{.+}} [[SIVAR_ARG:%.+]]) +// Skip global and bound tid vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*, +// CHECK: [[SIVAR_PRIV:%.+]] = alloca i32, +// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}], +// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]], +// CHECK: [[SIVAR_REF:%.+]] = load i32*, i32** [[SIVAR_ADDR]], +// CHECK: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]], + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: store{{.+}}, {{.+}} [[SIVAR_PRIV]], +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]], +// CHECK: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to +// CHECK: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]], +// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to +// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]]) +// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [ +// CHECK: {{.+}}, label %[[CASE1:.+]] +// CHECK: {{.+}}, label %[[CASE2:.+]] +// CHECK: ] +// CHECK: [[CASE1]]: +// CHECK-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]], +// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]], +// CHECK-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]] +// CHECK: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]], +// CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) +// CHECK: br +// CHECK: [[CASE2]]: +// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]], +// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] +// CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) +// CHECK: br + + +// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, +// CHECK: call void @[[TOFFL1:.+]]({{.+}}* {{.+}}) +// CHECK: ret + +// CHECK: define{{.*}} void @[[TOFFL1]](i32*{{.+}} [[TVAR_ARG:%.+]]) +// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}}*, +// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]], +// CHECK: [[TVAR:%.+]] = load i32*, i32** [[TVAR_ADDR]], +// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[TOUTL1:.+]] to {{.+}}, {{.+}} [[TVAR]]) +// CHECK: ret void + +// CHECK: define internal void @[[TOUTL1]]({{.+}}, {{.+}}, {{.+}}*{{.+}} [[TVAR_ARG:%.+]]) +// Skip global and bound tid vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}}*, +// CHECK: [[TVAR_PRIV:%.+]] = alloca i{{.+}}, +// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}], +// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]], +// CHECK: [[TVAR_REF:%.+]] = load i32*, i32** [[TVAR_ADDR]], +// CHECK: store{{.+}} 0, {{.+}} [[TVAR_PRIV]], + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: store{{.+}}, {{.+}} [[TVAR_PRIV]], +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]], +// CHECK: [[TVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[TVAR_PRIV]] to +// CHECK: store{{.+}} [[TVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]], +// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to +// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]]) +// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [ +// CHECK: {{.+}}, label %[[CASE1:.+]] +// CHECK: {{.+}}, label %[[CASE2:.+]] +// CHECK: ] +// CHECK: [[CASE1]]: +// CHECK-DAG: [[TVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_REF]], +// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]], +// CHECK-DAG: [[TVAR_INC:%.+]] = add{{.+}} [[TVAR_VAL]], [[TVAR_PRIV_VAL]] +// CHECK: store{{.+}} [[TVAR_INC]], {{.+}} [[TVAR_REF]], +// CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) +// CHECK: br +// CHECK: [[CASE2]]: +// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]], +// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]] +// CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) +// CHECK: br + +#endif diff --git a/test/OpenMP/target_teams_distribute_reduction_messages.cpp b/test/OpenMP/target_teams_distribute_reduction_messages.cpp index d52ee076130b..3cfb08651a61 100644 --- a/test/OpenMP/target_teams_distribute_reduction_messages.cpp +++ b/test/OpenMP/target_teams_distribute_reduction_messages.cpp @@ -24,9 +24,9 @@ public: S2() : a(0) {} S2(S2 &s2) : a(s2.a) {} static float S2s; // expected-note 2 {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note 2 {{'S2sc' declared here}} }; -const float S2::S2sc = 0; // expected-note 2 {{'S2sc' defined here}} +const float S2::S2sc = 0; S2 b; // expected-note 3 {{'b' defined here}} const S2 ba[5]; // expected-note 2 {{'ba' defined here}} class S3 { diff --git a/test/OpenMP/target_teams_distribute_simd_ast_print.cpp b/test/OpenMP/target_teams_distribute_simd_ast_print.cpp index 77732ef56011..31b9ad0c5fc1 100644 --- a/test/OpenMP/target_teams_distribute_simd_ast_print.cpp +++ b/test/OpenMP/target_teams_distribute_simd_ast_print.cpp @@ -28,8 +28,9 @@ public: ++this->a.a; } S7 &operator=(S7 &s) { -#pragma omp target teams distribute simd private(a) private(this->a) - for (int k = 0; k < s.a.a; ++k) + int k; +#pragma omp target teams distribute simd private(a) private(this->a) linear(k) + for (k = 0; k < s.a.a; ++k) ++s.a.a; return *this; } @@ -50,7 +51,7 @@ public: } }; // CHECK: #pragma omp target teams distribute simd private(this->a) private(this->a) private(T::a) -// CHECK: #pragma omp target teams distribute simd private(this->a) private(this->a) +// CHECK: #pragma omp target teams distribute simd private(this->a) private(this->a) linear(k) // CHECK: #pragma omp target teams distribute simd private(b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d) // CHECK: #pragma omp target teams distribute simd simdlen(slen1) safelen(slen2) aligned(arr: alen) // CHECK: #pragma omp target teams distribute simd private(this->a) private(this->a) private(this->S::a) @@ -129,10 +130,10 @@ T tmain(T argc) { // CHECK: #pragma omp target teams distribute simd private(b) firstprivate(argc) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d) // CHECK-NEXT: for (int k = 0; k < 10; ++k) // CHECK-NEXT: e += d + argc; -#pragma omp target teams distribute simd simdlen(clen-1) linear(d) +#pragma omp target teams distribute simd simdlen(clen-1) for (int k = 0; k < 10; ++k) e += d + argc; -// CHECK: #pragma omp target teams distribute simd simdlen(clen - 1) linear(d) +// CHECK: #pragma omp target teams distribute simd simdlen(clen - 1) // CHECK-NEXT: for (int k = 0; k < 10; ++k) // CHECK-NEXT: e += d + argc; #pragma omp target teams distribute simd safelen(clen-1) aligned(arr:alen) @@ -180,10 +181,10 @@ int main (int argc, char **argv) { // CHECK: #pragma omp target teams distribute simd private(b) firstprivate(argc) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d) // CHECK-NEXT: for (int k = 0; k < 10; ++k) // CHECK-NEXT: e += d + argc; -#pragma omp target teams distribute simd simdlen(clen-1) linear(d) +#pragma omp target teams distribute simd simdlen(clen-1) for (int k = 0; k < 10; ++k) e += d + argc; -// CHECK: #pragma omp target teams distribute simd simdlen(clen - 1) linear(d) +// CHECK: #pragma omp target teams distribute simd simdlen(clen - 1) // CHECK-NEXT: for (int k = 0; k < 10; ++k) // CHECK-NEXT: e += d + argc; #pragma omp target teams distribute simd safelen(clen-1) aligned(arr:N+6) diff --git a/test/OpenMP/target_teams_distribute_simd_codegen.cpp b/test/OpenMP/target_teams_distribute_simd_codegen.cpp new file mode 100644 index 000000000000..7fb76d9198db --- /dev/null +++ b/test/OpenMP/target_teams_distribute_simd_codegen.cpp @@ -0,0 +1,824 @@ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 + +// Test target codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +// CHECK-DAG: %ident_t = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" +// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } + +// CHECK-DAG: [[TT:%.+]] = type { i64, i8 } +// CHECK-DAG: [[S1:%.+]] = type { double } +// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 } +// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* } +// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* } + +// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 } + +// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat + +// We have 8 target regions, but only 7 that actually will generate offloading +// code, only 6 will have mapped arguments, and only 4 have all-constant map +// sizes. + +// CHECK-DAG: [[SIZET:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 2, i[[SZ]] 4, i[[SZ]] 4] +// CHECK-DAG: [[MAPT:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 288, i64 288] +// CHECK-DAG: [[SIZET2:@.+]] = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 2] +// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: [[SIZET3:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2] +// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i64] [i64 288, i64 288] +// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [9 x i64] [i64 288, i64 547, i64 288, i64 547, i64 547, i64 288, i64 288, i64 547, i64 547] +// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [5 x i64] [i64 547, i64 288, i64 288, i64 288, i64 547] +// CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [5 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 1, i[[SZ]] 40] +// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [5 x i64] [i64 288, i64 288, i64 288, i64 288, i64 547] +// CHECK-DAG: [[SIZET7:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 40] +// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 288, i64 547] +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 + +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK-NOT: @{{.+}} = constant [[ENTTY]] + +// Check if offloading descriptor is created. +// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]] +// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]] +// CHECK: [[DEVBEGIN:@.+]] = external constant i8 +// CHECK: [[DEVEND:@.+]] = external constant i8 +// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]]) +// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]]) + +// Check target registration is registered as a Ctor. +// CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* @[[REGFN]] to void ()*), i8* bitcast (void (i8*)* @[[REGFN]] to i8*) }] + + +template<typename tx, typename ty> +struct TT{ + tx X; + ty Y; +}; + +int global; + +// CHECK: define {{.*}}[[FOO:@.+]]( +int foo(int n) { + int a = 0; + short aa = 0; + float b[10]; + float bn[n]; + double c[5][10]; + double cn[5][n]; + TT<long long, char> d; + + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i32 {{[^,]+}}, i32 {{[^)]+}}) + // CHECK-DAG: [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]] + // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 [[IDX0]] + // CHECK-DAG: [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]* + // CHECK-DAG: [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR0]] + // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR0]] + // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX1:[0-9]+]] + // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 [[IDX1]] + // CHECK-DAG: [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]* + // CHECK-DAG: [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR1]] + // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR1]] + // CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX1:[0-9]+]] + // CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 [[IDX1]] + // CHECK-DAG: [[CBPADDR2:%.+]] = bitcast i8** [[BPADDR2]] to i[[SZ]]* + // CHECK-DAG: [[CPADDR2:%.+]] = bitcast i8** [[PADDR2]] to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR2]] + // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR2]] + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT0:@.+]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}}) + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + #pragma omp target teams distribute simd num_teams(a) thread_limit(a) firstprivate(aa) simdlen(16) nowait + for (int i = 0; i < 10; ++i) { + } + + // CHECK: call void [[HVT1:@.+]](i[[SZ]] {{[^,]+}}) + #pragma omp target teams distribute simd if(target: 0) safelen(32) linear(a) + for (a = 0; a < 10; ++a) { + a += 1; + } + + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT2]], i32 0, i32 0), i32 0, i32 0) + // CHECK-DAG: [[BP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[P]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]] + // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR]], i32 0, i32 [[IDX0]] + // CHECK-DAG: [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]* + // CHECK-DAG: [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR0]] + // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR0]] + + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT2:@.+]](i[[SZ]] {{[^,]+}}) + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + #pragma omp target teams distribute simd if(target: 1) + for (int i = 0; i < 10; ++i) { + aa += 1; + } + + // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10 + // CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] + // CHECK: [[IFTHEN]] + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i32 0, i32 0) + // CHECK-DAG: [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0 + + // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0 + // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0 + // CHECK-DAG: [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]* + // CHECK-DAG: [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR0]] + // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR0]] + + // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1 + // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1 + // CHECK-DAG: [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]* + // CHECK-DAG: [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR1]] + // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR1]] + // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT3:@.+]]({{[^,]+}}, {{[^,]+}}) + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + // CHECK-NEXT: br label %[[IFEND:.+]] + // CHECK: [[IFELSE]] + // CHECK: call void [[HVT3]]({{[^,]+}}, {{[^,]+}}) + // CHECK-NEXT: br label %[[IFEND]] + // CHECK: [[IFEND]] + #pragma omp target teams distribute simd if(target: n>10) + for (int i = 0; i < 10; ++i) { + a += 1; + aa += 1; + } + + // We capture 3 VLA sizes in this target region + // CHECK-64: [[A_VAL:%.+]] = load i32, i32* %{{.+}}, + // CHECK-64: [[A_ADDR:%.+]] = bitcast i[[SZ]]* [[A_CADDR:%.+]] to i32* + // CHECK-64: store i32 [[A_VAL]], i32* [[A_ADDR]], + // CHECK-64: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]], + + // CHECK-32: [[A_VAL:%.+]] = load i32, i32* %{{.+}}, + // CHECK-32: store i32 [[A_VAL]], i32* [[A_CADDR:%.+]], + // CHECK-32: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]], + + // CHECK: [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4 + // CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]] + // CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8 + + // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20 + // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] + // CHECK: [[TRY]] + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i32 0, i32 0) + // CHECK-DAG: [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0 + // CHECK-DAG: [[SR]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S:%[^,]+]], i32 0, i32 0 + + // CHECK-DAG: [[SADDR0:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX0:[0-9]+]] + // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX0]] + // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX0]] + // CHECK-DAG: [[SADDR1:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX1:[0-9]+]] + // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX1]] + // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX1]] + // CHECK-DAG: [[SADDR2:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX2:[0-9]+]] + // CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX2]] + // CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX2]] + // CHECK-DAG: [[SADDR3:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX3:[0-9]+]] + // CHECK-DAG: [[BPADDR3:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX3]] + // CHECK-DAG: [[PADDR3:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX3]] + // CHECK-DAG: [[SADDR4:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX4:[0-9]+]] + // CHECK-DAG: [[BPADDR4:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX4]] + // CHECK-DAG: [[PADDR4:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX4]] + // CHECK-DAG: [[SADDR5:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX5:[0-9]+]] + // CHECK-DAG: [[BPADDR5:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX5]] + // CHECK-DAG: [[PADDR5:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX5]] + // CHECK-DAG: [[SADDR6:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX6:[0-9]+]] + // CHECK-DAG: [[BPADDR6:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX6]] + // CHECK-DAG: [[PADDR6:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX6]] + // CHECK-DAG: [[SADDR7:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX7:[0-9]+]] + // CHECK-DAG: [[BPADDR7:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX7]] + // CHECK-DAG: [[PADDR7:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX7]] + // CHECK-DAG: [[SADDR8:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX8:[0-9]+]] + // CHECK-DAG: [[BPADDR8:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX8]] + // CHECK-DAG: [[PADDR8:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX8]] + + // The names below are not necessarily consistent with the names used for the + // addresses above as some are repeated. + // CHECK-DAG: store i[[SZ]] [[VLA0]], i[[SZ]]* [[CBPADDR0:%.+]], + // CHECK-DAG: store i[[SZ]] [[VLA0]], i[[SZ]]* [[CPADDR0:%.+]], + // CHECK-DAG: [[CBPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: [[CPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store i[[SZ]] [[VLA1]], i[[SZ]]* [[CBPADDR1:%.+]], + // CHECK-DAG: store i[[SZ]] [[VLA1]], i[[SZ]]* [[CPADDR1:%.+]], + // CHECK-DAG: [[CBPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: [[CPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store i[[SZ]] 5, i[[SZ]]* [[CBPADDR2:%.+]], + // CHECK-DAG: store i[[SZ]] 5, i[[SZ]]* [[CPADDR2:%.+]], + // CHECK-DAG: [[CBPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: [[CPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store i[[SZ]] [[A_CVAL]], i[[SZ]]* [[CBPADDR3:%.+]], + // CHECK-DAG: store i[[SZ]] [[A_CVAL]], i[[SZ]]* [[CPADDR3:%.+]], + // CHECK-DAG: [[CBPADDR3]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: [[CPADDR3]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + // CHECK-DAG: store i[[SZ]] 4, i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store [10 x float]* %{{.+}}, [10 x float]** [[CBPADDR4:%.+]], + // CHECK-DAG: store [10 x float]* %{{.+}}, [10 x float]** [[CPADDR4:%.+]], + // CHECK-DAG: [[CBPADDR4]] = bitcast i8** {{%[^,]+}} to [10 x float]** + // CHECK-DAG: [[CPADDR4]] = bitcast i8** {{%[^,]+}} to [10 x float]** + // CHECK-DAG: store i[[SZ]] 40, i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store float* %{{.+}}, float** [[CBPADDR5:%.+]], + // CHECK-DAG: store float* %{{.+}}, float** [[CPADDR5:%.+]], + // CHECK-DAG: [[CBPADDR5]] = bitcast i8** {{%[^,]+}} to float** + // CHECK-DAG: [[CPADDR5]] = bitcast i8** {{%[^,]+}} to float** + // CHECK-DAG: store i[[SZ]] [[BNSIZE]], i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store [5 x [10 x double]]* %{{.+}}, [5 x [10 x double]]** [[CBPADDR6:%.+]], + // CHECK-DAG: store [5 x [10 x double]]* %{{.+}}, [5 x [10 x double]]** [[CPADDR6:%.+]], + // CHECK-DAG: [[CBPADDR6]] = bitcast i8** {{%[^,]+}} to [5 x [10 x double]]** + // CHECK-DAG: [[CPADDR6]] = bitcast i8** {{%[^,]+}} to [5 x [10 x double]]** + // CHECK-DAG: store i[[SZ]] 400, i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store double* %{{.+}}, double** [[CBPADDR7:%.+]], + // CHECK-DAG: store double* %{{.+}}, double** [[CPADDR7:%.+]], + // CHECK-DAG: [[CBPADDR7]] = bitcast i8** {{%[^,]+}} to double** + // CHECK-DAG: [[CPADDR7]] = bitcast i8** {{%[^,]+}} to double** + // CHECK-DAG: store i[[SZ]] [[CNSIZE]], i[[SZ]]* {{%[^,]+}} + + // CHECK-DAG: store [[TT]]* %{{.+}}, [[TT]]** [[CBPADDR8:%.+]], + // CHECK-DAG: store [[TT]]* %{{.+}}, [[TT]]** [[CPADDR8:%.+]], + // CHECK-DAG: [[CBPADDR8]] = bitcast i8** {{%[^,]+}} to [[TT]]** + // CHECK-DAG: [[CPADDR8]] = bitcast i8** {{%[^,]+}} to [[TT]]** + // CHECK-DAG: store i[[SZ]] {{12|16}}, i[[SZ]]* {{%[^,]+}} + + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + + // CHECK: [[FAIL]] + // CHECK: call void [[HVT4:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + #pragma omp target teams distribute simd if(target: n>20) aligned(b) + for (int i = 0; i < 10; ++i) { + a += 1; + b[2] += 1.0; + bn[3] += 1.0; + c[1][2] += 1.0; + cn[1][3] += 1.0; + d.X += 1; + d.Y += 1; + } + + return a; +} + +// Check that the offloading functions are emitted and that the arguments are +// correct and loaded correctly for the target regions in foo(). + +// CHECK: define internal void [[HVT0]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}}) +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]])* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] {{[^)]+}}) +// +// +// CHECK: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] {{[^)]+}}) +// CHECK: alloca i16, +// CHECK: ret void +// CHECK-NEXT: } + + +// CHECK: define internal void [[HVT1]](i[[SZ]] %{{.+}}) +// Create stack storage and store argument in there. +// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: [[AA_CASTED:%.+]] = alloca i[[SZ]], align +// CHECK: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align +// CHECK-64: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32* +// CHECK-64: [[AA:%.+]] = load i32, i32* [[AA_CADDR]], align +// CHECK-32: [[AA:%.+]] = load i32, i32* [[AA_ADDR]], align +// CHECK-64: [[AA_C:%.+]] = bitcast i[[SZ]]* [[AA_CASTED]] to i32* +// CHECK-64: store i32 [[AA]], i32* [[AA_C]], align +// CHECK-32: store i32 [[AA]], i32* [[AA_CASTED]], align +// CHECK: [[PARAM:%.+]] = load i[[SZ]], i[[SZ]]* [[AA_CASTED]], align +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]])* [[OMP_OUTLINED1:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[PARAM]]) +// +// +// CHECK: define internal {{.*}}void [[OMP_OUTLINED1]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}) +// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align +// CHECK-64: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32* +// CHECK-64: store i32 10, i32* [[AA_CADDR]], align +// CHECK-32: store i32 10, i32* [[AA_ADDR]], align +// CHECK: ret void +// CHECK-NEXT: } + +// CHECK: define internal void [[HVT2]](i[[SZ]] %{{.+}}) +// Create stack storage and store argument in there. +// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: [[AA_CASTED:%.+]] = alloca i[[SZ]], align +// CHECK: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align +// CHECK: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16* +// CHECK: [[AA:%.+]] = load i16, i16* [[AA_CADDR]], align +// CHECK: [[AA_C:%.+]] = bitcast i[[SZ]]* [[AA_CASTED]] to i16* +// CHECK: store i16 [[AA]], i16* [[AA_C]], align +// CHECK: [[PARAM:%.+]] = load i[[SZ]], i[[SZ]]* [[AA_CASTED]], align +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]])* [[OMP_OUTLINED2:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[PARAM]]) +// +// +// CHECK: define internal {{.*}}void [[OMP_OUTLINED2]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}) +// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align +// CHECK: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16* +// CHECK: [[AA:%.+]] = load i16, i16* [[AA_CADDR]], align +// CHECK: ret void +// CHECK-NEXT: } + +// CHECK: define internal void [[HVT3]] +// Create stack storage and store argument in there. +// CHECK: [[A_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: [[A_CASTED:%.+]] = alloca i[[SZ]], align +// CHECK: [[AA_CASTED:%.+]] = alloca i[[SZ]], align +// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[A_ADDR]], align +// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align +// CHECK-64-DAG:[[A_CADDR:%.+]] = bitcast i[[SZ]]* [[A_ADDR]] to i32* +// CHECK-DAG: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16* +// CHECK-64-DAG:[[A:%.+]] = load i32, i32* [[A_CADDR]], align +// CHECK-32-DAG:[[A:%.+]] = load i32, i32* [[A_ADDR]], align +// CHECK-64-DAG:[[A_C:%.+]] = bitcast i[[SZ]]* [[A_CASTED]] to i32* +// CHECK-64-DAG:store i32 [[A]], i32* [[A_C]], align +// CHECK-32-DAG:store i32 [[A]], i32* [[A_CASTED]], align +// CHECK-DAG: [[AA:%.+]] = load i16, i16* [[AA_CADDR]], align +// CHECK-DAG: [[AA_C:%.+]] = bitcast i[[SZ]]* [[AA_CASTED]] to i16* +// CHECK-DAG: store i16 [[AA]], i16* [[AA_C]], align +// CHECK-DAG: [[PARAM1:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CASTED]], align +// CHECK-DAG: [[PARAM2:%.+]] = load i[[SZ]], i[[SZ]]* [[AA_CASTED]], align +// CHECK-DAG: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]])* [[OMP_OUTLINED3:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[PARAM1]], i[[SZ]] [[PARAM2]]) +// +// +// CHECK: define internal {{.*}}void [[OMP_OUTLINED3]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}) +// CHECK: [[A_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align +// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[A_ADDR]], align +// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align +// CHECK-64-DAG:[[A_CADDR:%.+]] = bitcast i[[SZ]]* [[A_ADDR]] to i32* +// CHECK-DAG: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16* +// CHECK: ret void +// CHECK-NEXT: } + +// CHECK: define internal void [[HVT4]] +// Create local storage for each capture. +// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_B:%.+]] = alloca [10 x float]* +// CHECK: [[LOCAL_VLA1:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_BN:%.+]] = alloca float* +// CHECK: [[LOCAL_C:%.+]] = alloca [5 x [10 x double]]* +// CHECK: [[LOCAL_VLA2:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_VLA3:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_CN:%.+]] = alloca double* +// CHECK: [[LOCAL_D:%.+]] = alloca [[TT]]* +// CHECK: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] +// CHECK-DAG: store [10 x float]* [[ARG_B:%.+]], [10 x float]** [[LOCAL_B]] +// CHECK-DAG: store i[[SZ]] [[ARG_VLA1:%.+]], i[[SZ]]* [[LOCAL_VLA1]] +// CHECK-DAG: store float* [[ARG_BN:%.+]], float** [[LOCAL_BN]] +// CHECK-DAG: store [5 x [10 x double]]* [[ARG_C:%.+]], [5 x [10 x double]]** [[LOCAL_C]] +// CHECK-DAG: store i[[SZ]] [[ARG_VLA2:%.+]], i[[SZ]]* [[LOCAL_VLA2]] +// CHECK-DAG: store i[[SZ]] [[ARG_VLA3:%.+]], i[[SZ]]* [[LOCAL_VLA3]] +// CHECK-DAG: store double* [[ARG_CN:%.+]], double** [[LOCAL_CN]] +// CHECK-DAG: store [[TT]]* [[ARG_D:%.+]], [[TT]]** [[LOCAL_D]] + +// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* +// CHECK-DAG: [[REF_B:%.+]] = load [10 x float]*, [10 x float]** [[LOCAL_B]], +// CHECK-DAG: [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA1]], +// CHECK-DAG: [[REF_BN:%.+]] = load float*, float** [[LOCAL_BN]], +// CHECK-DAG: [[REF_C:%.+]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[LOCAL_C]], +// CHECK-DAG: [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA2]], +// CHECK-DAG: [[VAL_VLA3:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA3]], +// CHECK-DAG: [[REF_CN:%.+]] = load double*, double** [[LOCAL_CN]], +// CHECK-DAG: [[REF_D:%.+]] = load [[TT]]*, [[TT]]** [[LOCAL_D]], + +// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]] +// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32* +// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align +// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]] +// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align +// CHECK-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]], + +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 9, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], [10 x float]*, i[[SZ]], float*, [5 x [10 x double]]*, i[[SZ]], i[[SZ]], double*, [[TT]]*)* [[OMP_OUTLINED4:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], [10 x float]* [[REF_B]], i[[SZ]] [[VAL_VLA1]], float* [[REF_BN]], [5 x [10 x double]]* [[REF_C]], i[[SZ]] [[VAL_VLA2]], i[[SZ]] [[VAL_VLA3]], double* [[REF_CN]], [[TT]]* [[REF_D]]) +// +// +// CHECK: define internal {{.*}}void [[OMP_OUTLINED4]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, [10 x float]* {{.+}}, i[[SZ]] %{{.+}}, float* {{.+}}, [5 x [10 x double]]* {{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, double* {{.+}}, [[TT]]* {{.+}}) +// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. + +template<typename tx> +tx ftemplate(int n) { + tx a = 0; + short aa = 0; + tx b[10]; + + #pragma omp target teams distribute simd if(target: n>40) + for (int i = 0; i < 10; ++i) { + a += 1; + aa += 1; + b[2] += 1; + } + + return a; +} + +static +int fstatic(int n) { + int a = 0; + short aa = 0; + char aaa = 0; + int b[10]; + + #pragma omp target teams distribute simd if(target: n>50) + for (int i = a; i < n; ++i) { + a += 1; + aa += 1; + aaa += 1; + b[2] += 1; + } + + return a; +} + +struct S1 { + double a; + + int r1(int n){ + int b = n+1; + short int c[2][n]; + + #pragma omp target teams distribute simd if(target: n>60) + for (int i = 0; i < 10; ++i) { + this->a = (double)b + 1.5; + c[1][1] = ++a; + } + + return c[1][1] + (int)b; + } +}; + +// CHECK: define {{.*}}@{{.*}}bar{{.*}} +int bar(int n){ + int a = 0; + + // CHECK: call {{.*}}i32 [[FOO]](i32 {{.*}}) + a += foo(n); + + S1 S; + // CHECK: call {{.*}}i32 [[FS1:@.+]]([[S1]]* {{.*}}, i32 {{.*}}) + a += S.r1(n); + + // CHECK: call {{.*}}i32 [[FSTATIC:@.+]](i32 {{.*}}) + a += fstatic(n); + + // CHECK: call {{.*}}i32 [[FTEMPLATE:@.+]](i32 {{.*}}) + a += ftemplate<int>(n); + + return a; +} + +// +// CHECK: define {{.*}}[[FS1]] +// +// CHECK: i8* @llvm.stacksave() +// CHECK-64: [[B_ADDR:%.+]] = bitcast i[[SZ]]* [[B_CADDR:%.+]] to i32* +// CHECK-64: store i32 %{{.+}}, i32* [[B_ADDR]], +// CHECK-64: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_CADDR]], + +// CHECK-32: store i32 %{{.+}}, i32* [[B_ADDR:%.+]], +// CHECK-32: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]], + +// We capture 2 VLA sizes in this target region +// CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]] +// CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2 + +// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60 +// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] +// CHECK: [[TRY]] +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT5]], i32 0, i32 0), i32 0, i32 0) +// CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0 +// CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0 +// CHECK-DAG: [[SR]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S:%.+]], i32 0, i32 0 +// CHECK-DAG: [[SADDR0:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX0:[0-9]+]] +// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX0]] +// CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX0]] +// CHECK-DAG: [[SADDR1:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX1:[0-9]+]] +// CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX1]] +// CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX1]] +// CHECK-DAG: [[SADDR2:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX2:[0-9]+]] +// CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX2]] +// CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX2]] +// CHECK-DAG: [[SADDR3:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX3:[0-9]+]] +// CHECK-DAG: [[BPADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX3]] +// CHECK-DAG: [[PADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX3]] + +// The names below are not necessarily consistent with the names used for the +// addresses above as some are repeated. +// CHECK-DAG: store i[[SZ]] [[VLA0]], i[[SZ]]* [[CBPADDR0:%.+]], +// CHECK-DAG: store i[[SZ]] [[VLA0]], i[[SZ]]* [[CPADDR0:%.+]], +// CHECK-DAG: [[CBPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: [[CPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}} + +// CHECK-DAG: store i[[SZ]] 2, i[[SZ]]* [[CBPADDR1:%.+]], +// CHECK-DAG: store i[[SZ]] 2, i[[SZ]]* [[CPADDR1:%.+]], +// CHECK-DAG: [[CBPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: [[CPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}} + +// CHECK-DAG: store i[[SZ]] [[B_CVAL]], i[[SZ]]* [[CBPADDR2:%.+]], +// CHECK-DAG: store i[[SZ]] [[B_CVAL]], i[[SZ]]* [[CPADDR2:%.+]], +// CHECK-DAG: [[CBPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: [[CPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: store i[[SZ]] 4, i[[SZ]]* {{%[^,]+}} + +// CHECK-DAG: store [[S1]]* %{{.+}}, [[S1]]** [[CBPADDR3:%.+]], +// CHECK-DAG: store [[S1]]* %{{.+}}, [[S1]]** [[CPADDR3:%.+]], +// CHECK-DAG: [[CBPADDR3]] = bitcast i8** {{%[^,]+}} to [[S1]]** +// CHECK-DAG: [[CPADDR3]] = bitcast i8** {{%[^,]+}} to [[S1]]** +// CHECK-DAG: store i[[SZ]] 8, i[[SZ]]* {{%[^,]+}} + +// CHECK-DAG: store i16* %{{.+}}, i16** [[CBPADDR4:%.+]], +// CHECK-DAG: store i16* %{{.+}}, i16** [[CPADDR4:%.+]], +// CHECK-DAG: [[CBPADDR4]] = bitcast i8** {{%[^,]+}} to i16** +// CHECK-DAG: [[CPADDR4]] = bitcast i8** {{%[^,]+}} to i16** +// CHECK-DAG: store i[[SZ]] [[CSIZE]], i[[SZ]]* {{%[^,]+}} + +// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + +// CHECK: [[FAIL]] +// CHECK: call void [[HVT7:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[END]] +// CHECK: [[END]] + +// +// CHECK: define {{.*}}[[FSTATIC]] +// +// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50 +// CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] +// CHECK: [[IFTHEN]] +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([5 x i[[SZ]]], [5 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT6]], i32 0, i32 0), i32 0, i32 0) +// CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0 +// CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0 + +// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 0, i32 0 +// CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 0, i32 0 +// CHECK-DAG: store i[[SZ]] [[VAL0:%.+]], i[[SZ]]* [[CBPADDR0:%.+]], +// CHECK-DAG: store i[[SZ]] [[VAL0]], i[[SZ]]* [[CPADDR0:%.+]], +// CHECK-DAG: [[CBPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: [[CPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + +// CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 0, i32 1 +// CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 0, i32 1 +// CHECK-DAG: store i[[SZ]] [[VAL1:%.+]], i[[SZ]]* [[CBPADDR1:%.+]], +// CHECK-DAG: store i[[SZ]] [[VAL1]], i[[SZ]]* [[CPADDR1:%.+]], +// CHECK-DAG: [[CBPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: [[CPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + +// CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 0, i32 2 +// CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 0, i32 2 +// CHECK-DAG: store i[[SZ]] [[VAL2:%.+]], i[[SZ]]* [[CBPADDR2:%.+]], +// CHECK-DAG: store i[[SZ]] [[VAL2]], i[[SZ]]* [[CPADDR2:%.+]], +// CHECK-DAG: [[CBPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: [[CPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + +// CHECK-DAG: [[BPADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 0, i32 3 +// CHECK-DAG: [[PADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 0, i32 3 +// CHECK-DAG: store i[[SZ]] [[VAL3:%.+]], i[[SZ]]* [[CBPADDR3:%.+]], +// CHECK-DAG: store i[[SZ]] [[VAL3]], i[[SZ]]* [[CPADDR3:%.+]], +// CHECK-DAG: [[CBPADDR3]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: [[CPADDR3]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + +// CHECK-DAG: [[BPADDR4:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 0, i32 4 +// CHECK-DAG: [[PADDR4:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 0, i32 4 +// CHECK-DAG: store [10 x i32]* %{{.+}}, [10 x i32]** [[CBPADDR4:%.+]], +// CHECK-DAG: store [10 x i32]* %{{.+}}, [10 x i32]** [[CPADDR4:%.+]], +// CHECK-DAG: [[CBPADDR4]] = bitcast i8** {{%[^,]+}} to [10 x i32]** +// CHECK-DAG: [[CPADDR4]] = bitcast i8** {{%[^,]+}} to [10 x i32]** + +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] +// CHECK: [[FAIL]] +// CHECK: call void [[HVT6:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[END]] +// CHECK: [[END]] +// CHECK-NEXT: br label %[[IFEND:.+]] +// CHECK: [[IFELSE]] +// CHECK: call void [[HVT6]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[IFEND]] +// CHECK: [[IFEND]] + +// +// CHECK: define {{.*}}[[FTEMPLATE]] +// +// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40 +// CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] +// CHECK: [[IFTHEN]] +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET7]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT7]], i32 0, i32 0), i32 0, i32 0) +// CHECK-DAG: [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0 +// CHECK-DAG: [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0 + +// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 0 +// CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 0 +// CHECK-DAG: store i[[SZ]] [[VAL0:%.+]], i[[SZ]]* [[CBPADDR0:%.+]], +// CHECK-DAG: store i[[SZ]] [[VAL0]], i[[SZ]]* [[CPADDR0:%.+]], +// CHECK-DAG: [[CBPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: [[CPADDR0]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + +// CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 1 +// CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 1 +// CHECK-DAG: store i[[SZ]] [[VAL1:%.+]], i[[SZ]]* [[CBPADDR1:%.+]], +// CHECK-DAG: store i[[SZ]] [[VAL1]], i[[SZ]]* [[CPADDR1:%.+]], +// CHECK-DAG: [[CBPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: [[CPADDR1]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + +// CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 2 +// CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 2 +// CHECK-DAG: store i[[SZ]] [[VAL2:%.+]], i[[SZ]]* [[CBPADDR2:%.+]], +// CHECK-DAG: store i[[SZ]] [[VAL2]], i[[SZ]]* [[CPADDR2:%.+]], +// CHECK-DAG: [[CBPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* +// CHECK-DAG: [[CPADDR2]] = bitcast i8** {{%[^,]+}} to i[[SZ]]* + +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] +// CHECK: [[FAIL]] +// CHECK: call void [[HVT5:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[END]] +// CHECK: [[END]] +// CHECK-NEXT: br label %[[IFEND:.+]] +// CHECK: [[IFELSE]] +// CHECK: call void [[HVT5]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[IFEND]] +// CHECK: [[IFEND]] + + + +// Check that the offloading functions are emitted and that the arguments are +// correct and loaded correctly for the target regions of the callees of bar(). + +// CHECK: define internal void [[HVT7]] +// Create local storage for each capture. +// CHECK: [[LOCAL_THIS:%.+]] = alloca [[S1]]* +// CHECK: [[LOCAL_B:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_VLA1:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_VLA2:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_C:%.+]] = alloca i16* +// CHECK: [[LOCAL_B_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-DAG: store [[S1]]* [[ARG_THIS:%.+]], [[S1]]** [[LOCAL_THIS]] +// CHECK-DAG: store i[[SZ]] [[ARG_B:%.+]], i[[SZ]]* [[LOCAL_B]] +// CHECK-DAG: store i[[SZ]] [[ARG_VLA1:%.+]], i[[SZ]]* [[LOCAL_VLA1]] +// CHECK-DAG: store i[[SZ]] [[ARG_VLA2:%.+]], i[[SZ]]* [[LOCAL_VLA2]] +// CHECK-DAG: store i16* [[ARG_C:%.+]], i16** [[LOCAL_C]] +// Store captures in the context. +// CHECK-DAG: [[REF_THIS:%.+]] = load [[S1]]*, [[S1]]** [[LOCAL_THIS]], +// CHECK-64-DAG:[[CONV_BP:%.+]] = bitcast i[[SZ]]* [[LOCAL_B]] to i32* +// CHECK-DAG: [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA1]], +// CHECK-DAG: [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA2]], +// CHECK-DAG: [[REF_C:%.+]] = load i16*, i16** [[LOCAL_C]], + +// CHECK-64-DAG:[[CONV_B:%.+]] = load i32, i32* [[CONV_BP]] +// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_B_CASTED]] to i32* +// CHECK-64-DAG:store i32 [[CONV_B]], i32* [[CONV]], align +// CHECK-32-DAG:[[LOCAL_BV:%.+]] = load i32, i32* [[LOCAL_B]] +// CHECK-32-DAG:store i32 [[LOCAL_BV]], i32* [[LOCAL_B_CASTED]], align +// CHECK-DAG: [[REF_B:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_B_CASTED]], + +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [[S1]]*, i[[SZ]], i[[SZ]], i[[SZ]], i16*)* [[OMP_OUTLINED5:@.+]] to void (i32*, i32*, ...)*), [[S1]]* [[REF_THIS]], i[[SZ]] [[REF_B]], i[[SZ]] [[VAL_VLA1]], i[[SZ]] [[VAL_VLA2]], i16* [[REF_C]]) +// +// +// CHECK: define internal {{.*}}void [[OMP_OUTLINED5]](i32* noalias %.global_tid., i32* noalias %.bound_tid., [[S1]]* %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i16* {{.+}}) +// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. + + +// CHECK: define internal void [[HVT6]] +// Create local storage for each capture. +// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] +// CHECK: alloca i[[SZ]], +// CHECK: [[LOCAL_AA:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_AAA:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_B:%.+]] = alloca [10 x i32]* +// CHECK: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]] +// CHECK: alloca i[[SZ]], +// CHECK: [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_AAA_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] +// CHECK-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] +// CHECK-DAG: store i[[SZ]] [[ARG_AAA:%.+]], i[[SZ]]* [[LOCAL_AAA]] +// CHECK-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] +// Store captures in the context. +// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* +// CHECK-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* +// CHECK-DAG: [[CONV_AAAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AAA]] to i8* +// CHECK-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], + +// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]] +// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32* +// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align +// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]] +// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align +// CHECK-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]], + +// CHECK-DAG: [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]] +// CHECK-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16* +// CHECK-DAG: store i16 [[CONV_AA]], i16* [[CONV]], align +// CHECK-DAG: [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]], + +// CHECK-DAG: [[CONV_AAA:%.+]] = load i8, i8* [[CONV_AAAP]] +// CHECK-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AAA_CASTED]] to i8* +// CHECK-DAG: store i8 [[CONV_AAA]], i8* [[CONV]], align +// CHECK-DAG: [[REF_AAA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AAA_CASTED]], + +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED6:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] {{.+}}, i[[SZ]] [[REF_AA]], i[[SZ]] [[REF_AAA]], [10 x i32]* [[REF_B]]) +// +// +// CHECK: define internal {{.*}}void [[OMP_OUTLINED6]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}}) +// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. + +// CHECK: define internal void [[HVT5]] +// Create local storage for each capture. +// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_AA:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_B:%.+]] = alloca [10 x i32]* +// CHECK: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]] +// CHECK: [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] +// CHECK-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] +// CHECK-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] +// Store captures in the context. +// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* +// CHECK-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* +// CHECK-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], + +// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]] +// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32* +// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align +// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]] +// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align +// CHECK-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]], + +// CHECK-DAG: [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]] +// CHECK-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16* +// CHECK-DAG: store i16 [[CONV_AA]], i16* [[CONV]], align +// CHECK-DAG: [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]], + +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED7:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] [[REF_AA]], [10 x i32]* [[REF_B]]) +// +// +// CHECK: define internal {{.*}}void [[OMP_OUTLINED7]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}}) +// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. + +// CHECK-DAG: !{!"llvm.loop.vectorize.width", i32 16} +// CHECK-DAG: !{!"llvm.loop.vectorize.enable", i1 true} +// CHECK-DAG: !{!"llvm.loop.vectorize.width", i32 32} + +#endif diff --git a/test/OpenMP/target_teams_distribute_simd_codegen_registration.cpp b/test/OpenMP/target_teams_distribute_simd_codegen_registration.cpp new file mode 100644 index 000000000000..d138114633ea --- /dev/null +++ b/test/OpenMP/target_teams_distribute_simd_codegen_registration.cpp @@ -0,0 +1,454 @@ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// Test target teams distribute simd codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK + +// Check that no target code is emmitted if no omptests flag was provided. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK-NTARGET + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK-DAG: [[SA:%.+]] = type { [4 x i32] } +// CHECK-DAG: [[SB:%.+]] = type { [8 x i32] } +// CHECK-DAG: [[SC:%.+]] = type { [16 x i32] } +// CHECK-DAG: [[SD:%.+]] = type { [32 x i32] } +// CHECK-DAG: [[SE:%.+]] = type { [64 x i32] } +// CHECK-DAG: [[ST1:%.+]] = type { [228 x i32] } +// CHECK-DAG: [[ST2:%.+]] = type { [1128 x i32] } +// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 } +// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* } +// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* } + +// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 } + +// CHECK-DAG: $[[REGFN:\.omp_offloading\..+]] = comdat + +// CHECK-DAG: [[A1:@.+]] = internal global [[SA]] +// CHECK-DAG: [[A2:@.+]] = global [[SA]] +// CHECK-DAG: [[B1:@.+]] = global [[SB]] +// CHECK-DAG: [[B2:@.+]] = global [[SB]] +// CHECK-DAG: [[C1:@.+]] = internal global [[SC]] +// CHECK-DAG: [[D1:@.+]] = global [[SD]] +// CHECK-DAG: [[E1:@.+]] = global [[SE]] +// CHECK-DAG: [[T1:@.+]] = global [[ST1]] +// CHECK-DAG: [[T2:@.+]] = global [[ST2]] + +// CHECK-NTARGET-DAG: [[SA:%.+]] = type { [4 x i32] } +// CHECK-NTARGET-DAG: [[SB:%.+]] = type { [8 x i32] } +// CHECK-NTARGET-DAG: [[SC:%.+]] = type { [16 x i32] } +// CHECK-NTARGET-DAG: [[SD:%.+]] = type { [32 x i32] } +// CHECK-NTARGET-DAG: [[SE:%.+]] = type { [64 x i32] } +// CHECK-NTARGET-DAG: [[ST1:%.+]] = type { [228 x i32] } +// CHECK-NTARGET-DAG: [[ST2:%.+]] = type { [1128 x i32] } +// CHECK-NTARGET-NOT: type { i8*, i8*, % +// CHECK-NTARGET-NOT: type { i32, % + +// We have 7 target regions + +// CHECK-DAG: {{@.+}} = private constant i8 0 +// TCHECK-NOT: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] +// CHECK-DAG: {{@.+}} = private constant i8 0 +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4] +// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i64] [i64 288] + +// CHECK-NTARGET-NOT: private constant i8 0 +// CHECK-NTARGET-NOT: private unnamed_addr constant [1 x i + +// CHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:__omp_offloading_[0-9a-f]+_[0-9a-f]+__Z.+_l[0-9]+]]\00" +// CHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00" +// CHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00" +// CHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00" +// CHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00" +// CHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00" +// CHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00" +// CHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00" +// CHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00" +// CHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00" +// CHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00" +// CHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// CHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00" +// CHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 + +// TCHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:__omp_offloading_[0-9a-f]+_[0-9a-f]+__Z.+_l[0-9]+]]\00" +// TCHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00" +// TCHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00" +// TCHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00" +// TCHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00" +// TCHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00" +// TCHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00" +// TCHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00" +// TCHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00" +// TCHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00" +// TCHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00" +// TCHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 +// TCHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00" +// TCHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1 + +// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]] +// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]] +// CHECK: [[DEVBEGIN:@.+]] = external constant i8 +// CHECK: [[DEVEND:@.+]] = external constant i8 +// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }], comdat($[[REGFN]]) +// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }, comdat($[[REGFN]]) + +// We have 4 initializers, one for the 500 priority, another one for 501, or more for the default priority, and the last one for the offloading registration function. +// CHECK: @llvm.global_ctors = appending global [4 x { i32, void ()*, i8* }] [ +// CHECK-SAME: { i32, void ()*, i8* } { i32 500, void ()* [[P500:@[^,]+]], i8* null }, +// CHECK-SAME: { i32, void ()*, i8* } { i32 501, void ()* [[P501:@[^,]+]], i8* null }, +// CHECK-SAME: { i32, void ()*, i8* } { i32 65535, void ()* [[PMAX:@[^,]+]], i8* null }, +// CHECK-SAME: { i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* @[[REGFN]] to void ()*), i8* bitcast (void (i8*)* @[[REGFN]] to i8*) }] + +// CHECK-NTARGET: @llvm.global_ctors = appending global [3 x { i32, void ()*, i8* }] [ + +extern int *R; + +struct SA { + int arr[4]; + void foo() { + int a = *R; + a += 1; + *R = a; + } + SA() { + int a = *R; + a += 2; + *R = a; + } + ~SA() { + int a = *R; + a += 3; + *R = a; + } +}; + +struct SB { + int arr[8]; + void foo() { + int a = *R; + #pragma omp target teams distribute simd + for (int i = 0; i < 10; ++i) + a += 4; + *R = a; + } + SB() { + int a = *R; + a += 5; + *R = a; + } + ~SB() { + int a = *R; + a += 6; + *R = a; + } +}; + +struct SC { + int arr[16]; + void foo() { + int a = *R; + a += 7; + *R = a; + } + SC() { + int a = *R; + #pragma omp target teams distribute simd + for (int i = 0; i < 10; ++i) + a += 8; + *R = a; + } + ~SC() { + int a = *R; + a += 9; + *R = a; + } +}; + +struct SD { + int arr[32]; + void foo() { + int a = *R; + a += 10; + *R = a; + } + SD() { + int a = *R; + a += 11; + *R = a; + } + ~SD() { + int a = *R; + #pragma omp target teams distribute simd + for (int i = 0; i < 10; ++i) + a += 12; + *R = a; + } +}; + +struct SE { + int arr[64]; + void foo() { + int a = *R; + #pragma omp target teams distribute simd if(target: 0) + for (int i = 0; i < 10; ++i) + a += 13; + *R = a; + } + SE() { + int a = *R; + #pragma omp target teams distribute simd + for (int i = 0; i < 10; ++i) + a += 14; + *R = a; + } + ~SE() { + int a = *R; + #pragma omp target teams distribute simd + for (int i = 0; i < 10; ++i) + a += 15; + *R = a; + } +}; + +template <int x> +struct ST { + int arr[128 + x]; + void foo() { + int a = *R; + #pragma omp target teams distribute simd + for (int i = 0; i < 10; ++i) + a += 16 + x; + *R = a; + } + ST() { + int a = *R; + #pragma omp target teams distribute simd + for (int i = 0; i < 10; ++i) + a += 17 + x; + *R = a; + } + ~ST() { + int a = *R; + #pragma omp target teams distribute simd + for (int i = 0; i < 10; ++i) + a += 18 + x; + *R = a; + } +}; + +// We have to make sure we us all the target regions: +//CHECK-DAG: define internal void @[[NAME1]]( +//CHECK-DAG: call void @[[NAME1]]( +//CHECK-DAG: define internal void @[[NAME2]]( +//CHECK-DAG: call void @[[NAME2]]( +//CHECK-DAG: define internal void @[[NAME3]]( +//CHECK-DAG: call void @[[NAME3]]( +//CHECK-DAG: define internal void @[[NAME4]]( +//CHECK-DAG: call void @[[NAME4]]( +//CHECK-DAG: define internal void @[[NAME5]]( +//CHECK-DAG: call void @[[NAME5]]( +//CHECK-DAG: define internal void @[[NAME6]]( +//CHECK-DAG: call void @[[NAME6]]( +//CHECK-DAG: define internal void @[[NAME7]]( +//CHECK-DAG: call void @[[NAME7]]( +//CHECK-DAG: define internal void @[[NAME8]]( +//CHECK-DAG: call void @[[NAME8]]( +//CHECK-DAG: define internal void @[[NAME9]]( +//CHECK-DAG: call void @[[NAME9]]( +//CHECK-DAG: define internal void @[[NAME10]]( +//CHECK-DAG: call void @[[NAME10]]( +//CHECK-DAG: define internal void @[[NAME11]]( +//CHECK-DAG: call void @[[NAME11]]( +//CHECK-DAG: define internal void @[[NAME12]]( +//CHECK-DAG: call void @[[NAME12]]( + +//TCHECK-DAG: define void @[[NAME1]]( +//TCHECK-DAG: define void @[[NAME2]]( +//TCHECK-DAG: define void @[[NAME3]]( +//TCHECK-DAG: define void @[[NAME4]]( +//TCHECK-DAG: define void @[[NAME5]]( +//TCHECK-DAG: define void @[[NAME6]]( +//TCHECK-DAG: define void @[[NAME7]]( +//TCHECK-DAG: define void @[[NAME8]]( +//TCHECK-DAG: define void @[[NAME9]]( +//TCHECK-DAG: define void @[[NAME10]]( +//TCHECK-DAG: define void @[[NAME11]]( +//TCHECK-DAG: define void @[[NAME12]]( + +// CHECK-NTARGET-NOT: __tgt_target +// CHECK-NTARGET-NOT: __tgt_register_lib +// CHECK-NTARGET-NOT: __tgt_unregister_lib + +// TCHECK-NOT: __tgt_target +// TCHECK-NOT: __tgt_register_lib +// TCHECK-NOT: __tgt_unregister_lib + +// We have 2 initializers with priority 500 +//CHECK: define internal void [[P500]]( +//CHECK: call void @{{.+}}() +//CHECK: call void @{{.+}}() +//CHECK-NOT: call void @{{.+}}() +//CHECK: ret void + +// We have 1 initializers with priority 501 +//CHECK: define internal void [[P501]]( +//CHECK: call void @{{.+}}() +//CHECK-NOT: call void @{{.+}}() +//CHECK: ret void + +// We have 6 initializers with default priority +//CHECK: define internal void [[PMAX]]( +//CHECK: call void @{{.+}}() +//CHECK: call void @{{.+}}() +//CHECK: call void @{{.+}}() +//CHECK: call void @{{.+}}() +//CHECK: call void @{{.+}}() +//CHECK: call void @{{.+}}() +//CHECK-NOT: call void @{{.+}}() +//CHECK: ret void + +// Check registration and unregistration + +//CHECK: define internal void @[[UNREGFN:.+]](i8*) +//CHECK-SAME: comdat($[[REGFN]]) { +//CHECK: call i32 @__tgt_unregister_lib([[DSCTY]]* [[DESC]]) +//CHECK: ret void +//CHECK: declare i32 @__tgt_unregister_lib([[DSCTY]]*) + +//CHECK: define linkonce hidden void @[[REGFN]](i8*) +//CHECK-SAME: comdat { +//CHECK: call i32 @__tgt_register_lib([[DSCTY]]* [[DESC]]) +//CHECK: call i32 @__cxa_atexit(void (i8*)* @[[UNREGFN]], i8* bitcast ([[DSCTY]]* [[DESC]] to i8*), +//CHECK: ret void +//CHECK: declare i32 @__tgt_register_lib([[DSCTY]]*) + +static __attribute__((init_priority(500))) SA a1; +SA a2; +SB __attribute__((init_priority(500))) b1; +SB __attribute__((init_priority(501))) b2; +static SC c1; +SD d1; +SE e1; +ST<100> t1; +ST<1000> t2; + + +int bar(int a){ + int r = a; + + a1.foo(); + a2.foo(); + b1.foo(); + b2.foo(); + c1.foo(); + d1.foo(); + e1.foo(); + t1.foo(); + t2.foo(); + + #pragma omp target teams distribute simd + for (int i = 0; i < 10; ++i) + ++r; + + return r + *R; +} + +// Check metadata is properly generated: +// CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 195, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 247, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 265, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 272, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 284, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 291, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 415, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 298, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 291, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 298, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 284, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 221, i32 {{[0-9]+}}} + +// TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 195, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 247, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 265, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 272, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 284, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 291, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 415, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 298, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 291, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 298, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 284, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 221, i32 {{[0-9]+}}} + +// TCHECK-DAG: !{!"llvm.loop.vectorize.enable", i1 true} +// CHECK-DAG: !{!"llvm.loop.vectorize.enable", i1 true} + +#endif diff --git a/test/OpenMP/target_teams_distribute_simd_codegen_registration_naming.cpp b/test/OpenMP/target_teams_distribute_simd_codegen_registration_naming.cpp new file mode 100644 index 000000000000..76618accbd68 --- /dev/null +++ b/test/OpenMP/target_teams_distribute_simd_codegen_registration_naming.cpp @@ -0,0 +1,71 @@ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// Test target teams distribute simd codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: [[CA:%.+]] = type { i32* } + +// CHECK: define {{.*}}i32 @[[NNAME:.+]](i32 {{.*}}%{{.+}}) +int nested(int a){ + // CHECK: call void @__omp_offloading_[[FILEID:[0-9a-f]+_[0-9a-f]+]]_[[NNAME]]_l[[T1L:[0-9]+]]( + #pragma omp target teams distribute simd + for (int i = 0; i < 10; ++i) + ++a; + + // CHECK: call void @"[[LNAME:.+]]"([[CA]]* + auto F = [&](){ + #pragma omp parallel + { + #pragma omp target teams distribute simd + for (int i = 0; i < 10; ++i) + ++a; + } + }; + + F(); + + return a; +} + +// CHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T1L]]( +// TCHECK: define {{.*}}void @__omp_offloading_[[FILEID:[0-9a-f]+_[0-9a-f]+]]_[[NNAME:.+]]_l[[T1L:[0-9]+]]( + +// CHECK: define {{.*}}void @"[[LNAME]]"( +// CHECK: call void {{.*}}@__kmpc_fork_call{{.+}}[[PNAME:@.+]] to + +// CHECK: define {{.*}}void [[PNAME]]( +// CHECK: call void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T2L:[0-9]+]]( + +// CHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T2L]]( +// TCHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME:.+]]_l[[T2L:[0-9]+]]( + + +// Check metadata is properly generated: +// CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} + +// TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} + +// CHECK-DAG: !{!"llvm.loop.vectorize.enable", i1 true} +// TCHECK-DAG: !{!"llvm.loop.vectorize.enable", i1 true} +#endif diff --git a/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp b/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp new file mode 100644 index 000000000000..703136f0652e --- /dev/null +++ b/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp @@ -0,0 +1,125 @@ +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// Test host codegen. +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +#ifdef CK1 + +template <typename T, int X, long long Y> +struct SS{ + T a[X][Y]; + + // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}( + int foo(void) { + + // CK1: call i32 @__tgt_target_teams( + // CK1: call void @[[OFFL1:.+]]( + #pragma omp target teams distribute simd collapse(2) + for(int i = 0; i < X; i++) { + for(int j = 0; j < Y; j++) { + a[i][j] = (T)0; + } + } + // CK1: define internal void @[[OFFL1]]( + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, + // CK1: ret void + + // CK1: define internal void @[[OUTL1]]({{.+}}) + // discard loop variables not needed here + // CK1: = alloca i32, + // CK1: = alloca i32, + // CK1: = alloca i32, + // CK1: = alloca i32, + // CK1: [[OMP_UB:%.+]] = alloca i32, + // CK1: store i32 56087, i32* [[OMP_UB]], + // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i32* [[OMP_UB]], + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + return a[0][0]; + } +}; + +int teams_template_struct(void) { + SS<int, 123, 456> V; + return V.foo(); + +} +#endif // CK1 + +// Test host codegen. +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +#ifdef CK2 + +template <typename T, int n, int m> +int tmain(T argc) { + T a[n][m]; + #pragma omp target teams distribute simd collapse(2) + for(int i = 0; i < n; i++) { + for(int j = 0; j < m; j++) { + a[i][j] = (T)0; + } + } + return 0; +} + +int main (int argc, char **argv) { + int n = 100; + int m = 2; + int a[n][m]; + #pragma omp target teams distribute simd collapse(2) + for(int i = 0; i < n; i++) { + for(int j = 0; j < m; j++) { + a[i][j] = 0; + } + } + return tmain<int, 10, 2>(argc); +} + +// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFL1:.+]]({{.+}}) +// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}}) +// CK2: ret + +// CK2: define {{.*}}void @[[OFFL1]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}} @[[OUTL1:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTL1]]({{.+}}) +// CK2: [[OMP_UB:%.omp.ub]] = alloca i64, +// CK2: store i64 {{.+}}, i64* [[OMP_UB]], +// CK2: call void @__kmpc_for_static_init_8({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i64* [[OMP_UB]], +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void +// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFLT1:.+]]({{.+}}) +// CK2: ret +// CK2-NEXT: } + +// CK2: define {{.*}}void @[[OFFLT1]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTLT1]]({{.+}}) +// discard loop variables not needed here +// CK2: [[OMP_UB:%.omp.ub]] = alloca i32, +// CK2: store i32 {{.+}}, i32* [[OMP_UB]], +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i32* [[OMP_UB]], +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +#endif // CK2 +#endif // #ifndef HEADER diff --git a/test/OpenMP/target_teams_distribute_simd_depend_messages.cpp b/test/OpenMP/target_teams_distribute_simd_depend_messages.cpp index 39d12d8d4d0e..c24c7438b0b5 100644 --- a/test/OpenMP/target_teams_distribute_simd_depend_messages.cpp +++ b/test/OpenMP/target_teams_distribute_simd_depend_messages.cpp @@ -37,21 +37,21 @@ int main(int argc, char **argv, char *env[]) { for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute simd depend (out: ) // expected-error {{expected expression}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute simd depend (inout : foobool(argc)), depend (in, argc) // expected-error {{expected variable name, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}} +#pragma omp target teams distribute simd depend (inout : foobool(argc)), depend (in, argc) // expected-error {{expected addressable lvalue expression, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}} for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute simd depend (out :S1) // expected-error {{'S1' does not refer to a value}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute simd depend(in : argv[1][1] = '2') // expected-error {{expected variable name, array element or array section}} +#pragma omp target teams distribute simd depend(in : argv[1][1] = '2') for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute simd depend (in : vec[1]) // expected-error {{expected variable name, array element or array section}} +#pragma omp target teams distribute simd depend (in : vec[1]) // expected-error {{expected addressable lvalue expression, array element or array section}} for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute simd depend (in : argv[0]) for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute simd depend (in : ) // expected-error {{expected expression}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute simd depend (in : main) // expected-error {{expected variable name, array element or array section}} +#pragma omp target teams distribute simd depend (in : main) for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute simd depend(in : a[0]) // expected-error{{expected variable name, array element or array section}} +#pragma omp target teams distribute simd depend(in : a[0]) // expected-error{{expected addressable lvalue expression, array element or array section}} for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute simd depend (in : vec[1:2]) // expected-error {{ value is not an array or pointer}} for (i = 0; i < argc; ++i) foo(); diff --git a/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp b/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp new file mode 100644 index 000000000000..d096da329abc --- /dev/null +++ b/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp @@ -0,0 +1,197 @@ +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// Test host codegen. +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +#ifdef CK1 + +template <typename T, int X, long long Y> +struct SS{ + T a[X]; + float b; + // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}( + int foo(void) { + + // CK1: call i32 @__tgt_target_teams( + // CK1: call void @[[OFFL1:.+]]( + #pragma omp target teams distribute simd + for(int i = 0; i < X; i++) { + a[i] = (T)0; + } + // CK1: call i32 @__tgt_target_teams( + // CK1: call void @[[OFFL2:.+]]( + #pragma omp target teams distribute simd dist_schedule(static) + for(int i = 0; i < X; i++) { + a[i] = (T)0; + } + // CK1: call i32 @__tgt_target_teams( + // CK1: call void @[[OFFL3:.+]]( + #pragma omp target teams distribute simd dist_schedule(static, X/2) + for(int i = 0; i < X; i++) { + a[i] = (T)0; + } + // CK1: define internal void @[[OFFL1]]( + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, + // CK1: ret void + + // CK1: define internal void @[[OUTL1]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[OFFL2]]( + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL2:.+]] to {{.+}}, + // CK1: ret void + + // CK1: define internal void @[[OUTL2]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[OFFL3]]( + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL3:.+]] to {{.+}}, + // CK1: ret void + + // CK1: define internal void @[[OUTL3]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91 + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + return a[0]; + } +}; + +int teams_template_struct(void) { + SS<int, 123, 456> V; + return V.foo(); + +} +#endif // CK1 + +// Test host codegen. +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +#ifdef CK2 + +template <typename T, int n> +int tmain(T argc) { + T a[n]; +#pragma omp target teams distribute simd + for(int i = 0; i < n; i++) { + a[i] = (T)0; + } +#pragma omp target teams distribute simd dist_schedule(static) + for(int i = 0; i < n; i++) { + a[i] = (T)0; + } +#pragma omp target teams distribute simd dist_schedule(static, n) + for(int i = 0; i < n; i++) { + a[i] = (T)0; + } + return 0; +} + +int main (int argc, char **argv) { + int n = 100; + int a[n]; +#pragma omp target teams distribute simd + for(int i = 0; i < n; i++) { + a[i] = 0; + } +#pragma omp target teams distribute simd dist_schedule(static) + for(int i = 0; i < n; i++) { + a[i] = 0; + } +#pragma omp target teams distribute simd dist_schedule(static, n) + for(int i = 0; i < n; i++) { + a[i] = 0; + } + return tmain<int, 10>(argc); +} + +// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFL1:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFL2:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFL3:.+]]({{.+}}) +// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}}) +// CK2: ret + +// CK2: define {{.*}}void @[[OFFL1]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTL1]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFL2]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL2:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTL2]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFL3]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[OUTL3:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTL3]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91 +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFLT1:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFLT2:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFLT3:.+]]({{.+}}) +// CK2: ret +// CK2-NEXT: } + +// CK2: define {{.*}}void @[[OFFLT1]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTLT1]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFLT2]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT2:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTLT2]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFLT3]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT3:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTLT3]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91 +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +#endif // CK2 +#endif // #ifndef HEADER diff --git a/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp b/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp new file mode 100644 index 000000000000..f2871f811833 --- /dev/null +++ b/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp @@ -0,0 +1,342 @@ +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 + +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +struct St { + int a, b; + St() : a(0), b(0) {} + St(const St &st) : a(st.a + st.b), b(0) {} + ~St() {} +}; + +volatile int g = 1212; +volatile int &g1 = g; + +template <class T> +struct S { + T f; + S(T a) : f(a + g) {} + S() : f(g) {} + S(const S &s, St t = St()) : f(s.f + t.a) {} + operator T() { return T(); } + ~S() {} +}; + +// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float } +// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } +// CHECK-DAG: [[ST_TY:%.+]] = type { i{{[0-9]+}}, i{{[0-9]+}} } + +template <typename T> +T tmain() { + S<T> test; + T t_var = T(); + T vec[] = {1, 2}; + S<T> s_arr[] = {1, 2}; + S<T> &var = test; +#pragma omp target teams distribute simd firstprivate(t_var, vec, s_arr, var) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + return T(); +} + +// CHECK-DAG: [[TEST:@.+]] = global [[S_FLOAT_TY]] zeroinitializer, +S<float> test; +// CHECK-DAG: [[T_VAR:@.+]] = global i{{[0-9]+}} 333, +int t_var = 333; +// CHECK-DAG: [[VEC:@.+]] = global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2], +int vec[] = {1, 2}; +// CHECK-DAG: [[S_ARR:@.+]] = global [2 x [[S_FLOAT_TY]]] zeroinitializer, +S<float> s_arr[] = {1, 2}; +// CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer, +S<float> var(3); +// CHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0, + +int main() { + static int sivar; +#ifdef LAMBDA + // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212, + // LAMBDA-LABEL: @main + // LAMBDA: call void [[OUTER_LAMBDA:@.+]]( + [&]() { + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // LAMBDA: call void @[[LOFFL1:.+]](i{{64|32}} %{{.+}}) + // LAMBDA: ret +#pragma omp target teams distribute simd firstprivate(g, g1, sivar) + for (int i = 0; i < 2; ++i) { + // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} {{%.+}}, i{{64|32}} {{%.+}}) + // LAMBDA: {{%.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{%.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{%.+}} = alloca i{{[0-9]+}}, + // LAMBDA: [[G_CAST:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_CAST:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[SIVAR_CAST:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA-DAG: [[G_CAST_VAL:%.+]] = load{{.+}} [[G_CAST]], + // LAMBDA-DAG: [[G1_CAST_VAL:%.+]] = load{{.+}} [[G1_CAST]], + // LAMBDA-DAG: [[SIVAR_CAST_VAL:%.+]] = load{{.+}} [[SIVAR_CAST]], + // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[LOUTL1:.+]] to {{.+}}, {{.+}} [[G_CAST_VAL]], {{.+}} [[G1_CAST_VAL]], {{.+}} [[SIVAR_CAST_VAL]]) + // LAMBDA: ret void + + // LAMBDA: define internal void @[[LOUTL1]]({{.+}}) + // Skip global and bound tid vars + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: [[G_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: {{%.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{%.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{%.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{%.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{%.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{%.+}} = alloca i{{[0-9]+}}, + // LAMBDA: [[G_PRIV_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_PRIV_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_TMP:%.+]] = alloca i32*, + // LAMBDA: [[SIVAR_PRIV_ADDR:%.+]] = alloca i{{[0-9]+}}, + // skip loop vars + // LAMBDA-DAG: store {{.+}}, {{.+}} [[G_ADDR]], + // LAMBDA-DAG: store {{.+}}, {{.+}} [[G1_ADDR]], + // LAMBDA-DAG: store {{.+}}, {{.+}} [[SIVAR_ADDR]], + // LAMBDA-DAG: [[G_CONV:%.+]] = bitcast {{.+}} [[G_ADDR]] to + // LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast {{.+}} [[G1_ADDR]] to + // LAMBDA-DAG: [[SIVAR_CONV:%.+]] = bitcast {{.+}} [[SIVAR_ADDR]] to + // LAMBDA-DAG: store{{.+}} [[G1_PRIV_ADDR]], {{.+}} [[G1_TMP]], + g = 1; + g1 = 1; + sivar = 2; + // LAMBDA: call void @__kmpc_for_static_init_4( + // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G_PRIV_ADDR]], + // LAMBDA-DAG: [[G1:%.+]] = load{{.+}}, {{.+}}* [[G1_TMP]] + // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1]], + // LAMBDA-DAG: store{{.+}} 2, {{.+}} [[SIVAR_PRIV_ADDR]], + // LAMBDA-DAG: [[G1_REF:%.+]] = load{{.+}}, {{.+}} [[G1_TMP]], + // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1_REF]], + // LAMBDA: call void [[INNER_LAMBDA:@.+]]( + // LAMBDA: call void @__kmpc_for_static_fini( + [&]() { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + g = 2; + g1 = 2; + sivar = 4; + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + + // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]] + // LAMBDA: [[G1_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G1_REF]] + // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]] + }(); + } + }(); + return 0; +#else +#pragma omp target teams distribute simd firstprivate(t_var, vec, s_arr, var, sivar) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + sivar += i; + } + return tmain<int>(); +#endif +} + +// CHECK: define {{.*}}i{{[0-9]+}} @main() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) +// CHECK: call void @[[OFFL1:.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]() +// CHECK: ret + +// CHECK: define{{.*}} void @[[OFFL1]]({{.+}}) +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_CAST:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[SIVAR_CAST:%.+]] = alloca i{{[0-9]+}}, + +// CHECK-DAG: [[VEC_TE_PAR:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_PRIV]], +// CHECK-DAG: [[T_VAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_CAST]], +// CHECK-DAG: [[S_ARR_TE_PAR:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_PRIV]], +// CHECK-DAG: [[VAR_TE_PAR:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_PRIV]], +// CHECK-DAG: [[SIVAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_CAST]], + +// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}} @[[OUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[VEC_TE_PAR]], i{{[0-9]+}} [[T_VAR_TE_PAR]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_TE_PAR]], [[S_FLOAT_TY]]* [[VAR_TE_PAR]], i{{[0-9]+}} [[SIVAR_TE_PAR]]) +// CHECK: ret void + +// CHECK: define internal void @[[OUTL1]]({{.+}}) +// Skip global and bound tid vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// Skip temp vars for loop +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]], + +// param copy +// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]], +// CHECK: store [2 x [[S_FLOAT_TY]]]* {{.+}}, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK: store [[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]** [[VAR_ADDR]], +// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[SIVAR_ADDR]], + +// T_VAR and SIVAR +// CHECK-DAG-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32* +// CHECK-DAG-64: [[CONV_SIVAR:%.+]] = bitcast i64* [[SIVAR_ADDR]] to i32* + +// preparation vars +// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK-DAG: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]], + +// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2 +// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}}) + +// firstprivate(s_arr) +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]] to +// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]], +// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ] +// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}}], [ [[S_ARR_DST:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]]) +// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]], +// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]], + +// firstprivate(var) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]]) + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK-DAG-32: {{.+}} = {{.+}} [[T_VAR_ADDR]] +// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_TVAR]] +// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[VAR_PRIV]] +// CHECK-DAG-32: {{.+}} = {{.+}} [[SIVAR_ADDR]] +// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_SIVAR]] +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) +// CHECK: call void @[[TOFFL1:.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK: ret + +// CHECK: define {{.*}}void @[[TOFFL1]]({{.+}}) +// CHECK: [[TVEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[TT_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[TS_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK: [[TVAR_PRIV:%.+]] = alloca [[S_INT_TY]]*, +// CHECK: [[TT_VAR_CAST:%.+]] = alloca i{{[0-9]+}}, + +// CHECK-DAG: [[TVEC_TE_PAR:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[TVEC_PRIV]], +// CHECK-DAG: [[TT_VAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[TT_VAR_CAST]], +// CHECK-DAG: [[TS_ARR_TE_PAR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[TS_ARR_PRIV]], +// CHECK-DAG: [[TVAR_TE_PAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TVAR_PRIV]], + +// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[TOUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[TVEC_TE_PAR]], i{{[0-9]+}} [[TT_VAR_TE_PAR]], [2 x [[S_INT_TY]]]* [[TS_ARR_TE_PAR]], [[S_INT_TY]]* [[TVAR_TE_PAR]]) +// CHECK: ret void + +// CHECK: define internal void @[[TOUTL1]]({{.+}}) +// Skip global and bound tid vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, +// Skip temp vars for loop +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]], +// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*, + +// param copy +// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]], +// CHECK: store [2 x [[S_INT_TY]]]* {{.+}}, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]], +// CHECK: store [[S_INT_TY]]* {{.+}}, [[S_INT_TY]]** [[VAR_ADDR]], + + +// T_VAR and preparation variables +// CHECK: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32* +// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]], + +// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2 +// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}}) + +// firstprivate(s_arr) +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]] to +// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]], +// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ] +// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_DST:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]]) +// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]], +// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]], + +// firstprivate(var) +// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]], +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]]) +// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP]], + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK-DAG-32: {{.+}} = {{.+}} [[T_VAR_ADDR]] +// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_TVAR]] +// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[TMP]] +// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]] +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +#endif diff --git a/test/OpenMP/target_teams_distribute_simd_firstprivate_messages.cpp b/test/OpenMP/target_teams_distribute_simd_firstprivate_messages.cpp index b9b039efd929..235533b49d16 100644 --- a/test/OpenMP/target_teams_distribute_simd_firstprivate_messages.cpp +++ b/test/OpenMP/target_teams_distribute_simd_firstprivate_messages.cpp @@ -124,7 +124,8 @@ int main(int argc, char **argv) { #pragma omp target teams distribute simd firstprivate(j) for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute simd lastprivate(argc), firstprivate(argc) // OK +// expected-error@+1 {{lastprivate variable cannot be firstprivate}} expected-note@+1 {{defined as lastprivate}} +#pragma omp target teams distribute simd lastprivate(argc), firstprivate(argc) for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute simd firstprivate(argc) map(argc) // expected-error {{firstprivate variable cannot be in a map clause in '#pragma omp target teams distribute simd' directive}} expected-note {{defined as firstprivate}} diff --git a/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp b/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp new file mode 100644 index 000000000000..8a978a221954 --- /dev/null +++ b/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp @@ -0,0 +1,385 @@ +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 + +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +template <class T> +struct S { + T f; + S(T a) : f(a) {} + S() : f() {} + operator T() { return T(); } + ~S() {} +}; + +// CHECK: [[S_FLOAT_TY:%.+]] = type { float } +// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } +template <typename T> +T tmain() { + S<T> test; + T t_var = T(); + T vec[] = {1, 2}; + S<T> s_arr[] = {1, 2}; + S<T> &var = test; + #pragma omp target teams distribute simd lastprivate(t_var, vec, s_arr, s_arr, var, var) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + return T(); +} + +int main() { + static int svar; + volatile double g; + volatile double &g1 = g; + + #ifdef LAMBDA + // LAMBDA-LABEL: @main + // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]]( + [&]() { + static float sfvar; + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( + // LAMBDA: call void [[OFFLOADING_FUN:@.+]]( + + // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]]( + // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}}) + #pragma omp target teams distribute simd lastprivate(g, g1, svar, sfvar) + for (int i = 0; i < 2; ++i) { + // LAMBDA-64: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 [[G_IN:%.+]], i64 [[G1_IN:%.+]], i64 [[SVAR_IN:%.+]], i64 [[SFVAR_IN:%.+]]) + // LAMBDA-32: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, double*{{.+}} [[G_IN:%.+]], double*{{.+}} [[G1_IN:%.+]], i32 [[SVAR_IN:%.+]], i32 [[SFVAR_IN:%.+]]) + // LAMBDA-64: [[G_PRIVATE_ADDR:%.+]] = alloca i64, + // LAMBDA-32: [[G_PRIVATE_ADDR:%.+]] = alloca double*, + // LAMBDA-64: [[G1_PRIVATE_ADDR:%.+]] = alloca i64, + // LAMBDA-32: [[G1_PRIVATE_ADDR:%.+]] = alloca double*, + // LAMBDA-64: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i64, + // LAMBDA-32: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i32, + // LAMBDA-64: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca i64, + // LAMBDA-32: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca i32, + // loop variables + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G_PRIVATE:%.+]] = alloca double, + // LAMBDA: [[G1_PRIVATE:%.+]] = alloca double, + // LAMBDA: [[TMP_G1_PRIVATE:%.+]] = alloca double*, + // LAMBDA: [[SVAR_PRIVATE:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[SFVAR_PRIVATE:%.+]] = alloca float, + // LAMBDA-64: store i64 [[G_IN]], i64* [[G_PRIVATE_ADDR]], + // LAMBDA-32: store double* [[G_IN]], double** [[G_PRIVATE_ADDR]], + // LAMBDA-64: store i64 [[G1_IN]], i64* [[G1_PRIVATE_ADDR]], + // LAMBDA-32: store double* [[G1_IN]], double** [[G1_PRIVATE_ADDR]], + // LAMBDA-64: store i64 [[SVAR_IN]], i64* [[SVAR_PRIVATE_ADDR]], + // LAMBDA-32: store i32 [[SVAR_IN]], i32* [[SVAR_PRIVATE_ADDR]], + // LAMBDA-64: store i64 [[SFVAR_IN]], i64* [[SFVAR_PRIVATE_ADDR]], + // LAMBDA-32: store i32 [[SFVAR_IN]], i32* [[SFVAR_PRIVATE_ADDR]], + + // init private variables + // LAMBDA-64: [[G_IN_REF:%.+]] = bitcast i64* [[G_PRIVATE_ADDR]] to double* + // LAMBDA-32: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]], + // LAMBDA-64: [[G1_IN_REF:%.+]] = bitcast i64* [[G1_PRIVATE_ADDR]] to double* + // LAMBDA-64: store double* [[G1_IN_REF]], double** [[G1_IN_ADDR_REF:%.+]], + // LAMBDA-64: [[SVAR_IN_REF:%.+]] = bitcast i64* [[SVAR_PRIVATE_ADDR]] to i32* + // LAMBDA-64: [[SFVAR_IN_REF:%.+]] = bitcast i64* [[SFVAR_PRIVATE_ADDR]] to float* + // LAMBDA-32: [[SFVAR_IN_REF:%.+]] = bitcast i32* [[SFVAR_PRIVATE_ADDR]] to float* + // LAMBDA-64: [[G1_IN_REF:%.+]] = load double*, double** [[G1_IN_ADDR_REF]], + // LAMBDA-32: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]], + // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]], + g = 1; + g1 = 1; + svar = 3; + sfvar = 4.0; + // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4( + // LAMBDA: store double 1.0{{.+}}, double* [[G_PRIVATE]], + // LAMBDA: [[TMP_G1_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]], + // LAMBDA: store{{.+}} double 1.0{{.+}}, double* [[TMP_G1_REF]], + // LAMBDA: store i{{[0-9]+}} 3, i{{[0-9]+}}* [[SVAR_PRIVATE]], + // LAMBDA: store float 4.0{{.+}}, float* [[SFVAR_PRIVATE]], + // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: store double* [[G_PRIVATE]], double** [[G_PRIVATE_ADDR_REF]], + // LAMBDA: [[TMP_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_PRIVATE_ADDR_FROM_TMP:%.+]] = load double*, double** [[TMP_G1_PRIVATE]], + // LAMBDA: store double* [[G1_PRIVATE_ADDR_FROM_TMP]], double** [[TMP_PRIVATE_ADDR_REF]], + // LAMBDA: [[SVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: store i{{[0-9]+}}* [[SVAR_PRIVATE]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]] + // LAMBDA: [[SFVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 + // LAMBDA: store float* [[SFVAR_PRIVATE]], float** [[SFVAR_PRIVATE_ADDR_REF]] + // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]]) + // LAMBDA: call {{.*}}void @__kmpc_for_static_fini( + // LAMBDA: store i32 2, i32* % + // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]], + // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 + // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] + + // LAMBDA: [[OMP_LASTPRIV_BLOCK]]: + // LAMBDA: [[G_PRIV_VAL:%.+]] = load double, double* [[G_PRIVATE]], + // LAMBDA: store{{.*}} double [[G_PRIV_VAL]], double* [[G_IN_REF]], + // LAMBDA: [[TMP_G1_PRIV_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]], + // LAMBDA: [[TMP_G1_PRIV_VAL:%.+]] = load double, double* [[TMP_G1_PRIV_REF]], + // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* [[G1_IN_REF]], + + // LAMBDA: [[SVAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]], + // LAMBDA-64: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_IN_REF]], + // LAMBDA-32: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_PRIVATE_ADDR]], + // LAMBDA: [[SFVAR_PRIV_VAL:%.+]] = load float, float* [[SFVAR_PRIVATE]], + // LAMBDA: store float [[SFVAR_PRIV_VAL]], float* [[SFVAR_IN_REF]], + // LAMBDA: br label %[[OMP_LASTPRIV_DONE]] + // LAMBDA: [[OMP_LASTPRIV_DONE]]: + // LAMBDA: ret + [&]() { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + g = 2; + g1 = 2; + svar = 4; + sfvar = 8.0; + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]] + // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]] + + // LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]] + // LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]], + // LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]] + // LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 + // LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]] + // LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]] + }(); + } + }(); + return 0; + #else + S<float> test; + int t_var = 0; + int vec[] = {1, 2}; + S<float> s_arr[] = {1, 2}; + S<float> &var = test; + + #pragma omp target teams distribute simd lastprivate(t_var, vec, s_arr, s_arr, var, var, svar) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + int i; + + return tmain<int>(); + #endif +} + +// CHECK: define{{.*}} i{{[0-9]+}} @main() +// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOAD_FUN:@.+]]([2 x i{{[0-9]+}}]* {{.+}}, i{{[0-9]+}} {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}}) +// CHECK: ret + +// CHECK: define{{.+}} [[OFFLOAD_FUN]]([2 x i{{[0-9]+}}]*{{.+}} {{.+}}, i{{[0-9]+}} {{.+}}, [2 x [[S_FLOAT_TY]]]*{{.+}} {{.+}}, [[S_FLOAT_TY]]*{{.+}} {{.+}}, i{{[0-9]+}} {{.+}}) +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams( +// CHECK: ret +// +// CHECK: define internal void [[OMP_OUTLINED:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN:%.+]], i{{[0-9]+}} [[T_VAR_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.+}} [[VAR_IN:%.+]], i{{[0-9]+}} [[S_VAR_IN:%.+]]) +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// skip loop variables +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, + +// copy from parameters to local address variables +// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: store i{{[0-9]+}} [[T_VAR_IN]], i{{[0-9]+}}* [[T_VAR_ADDR]], +// CHECK: store [2 x [[S_FLOAT_TY]]]* [[S_ARR_IN]], [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK: store [[S_FLOAT_TY]]* [[VAR_IN]], [[S_FLOAT_TY]]** [[VAR_ADDR]], +// CHECK: store i{{[0-9]+}} [[S_VAR_IN]], i{{[0-9]+}}* [[SVAR_ADDR]], + +// load content of local address variables +// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK-64: [[T_VAR_ADDR_REF:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32* +// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK-64: [[SVAR_ADDR_REF:%.+]] = bitcast i64* [[SVAR_ADDR]] to i32* +// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]], +// CHECK: [[VAR_ADDR_REF:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]], +// the distribute loop +// CHECK: call void @__kmpc_for_static_init_4( +// assignment: vec[i] = t_var; +// CHECK: [[T_VAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]], +// CHECK: [[VEC_PTR:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}} +// CHECK: store i{{[0-9]+}} [[T_VAR_PRIV_VAL]], i{{[0-9]+}}* [[VEC_PTR]], + +// assignment: s_arr[i] = var; +// CHECK-DAG: [[S_ARR_PTR:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], +// CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_PTR]] to i8* +// CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST]], i8* [[TMP_VAL_BCAST]], +// CHECK: call void @__kmpc_for_static_fini( + +// lastprivates +// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]], +// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 +// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] + +// CHECK: [[OMP_LASTPRIV_BLOCK]]: +// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]], +// CHECK-64: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]], +// CHECK-32: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR]], +// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* +// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]* +// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 +// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]] +// CHECK: [[S_ARR_COPY_BLOCK]]: +// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8* +// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 +// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} +// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]] +// CHECK: [[S_ARR_COPY_DONE]]: +// CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], +// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8* +// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]], +// CHECK-64: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]], +// CHECK-32: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR]], +// CHECK: ret void + +// template tmain +// CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]() +// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], +// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOAD_FUN_1:@.+]]([2 x i{{[0-9]+}}]* {{.+}}, i{{[0-9]+}} {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}}) +// CHECK: ret + + +// CHECK: define internal void [[OFFLOAD_FUN_1]]( +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, +// CHECK: ret + +// CHECK: define internal void [[OMP_OUTLINED_1:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR1:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN1:%.+]], i{{[0-9]+}} [[T_VAR_IN1:%.+]], [2 x [[S_INT_TY]]]*{{.+}} [[S_ARR_IN1:%.+]], [[S_INT_TY]]*{{.+}} [[VAR_IN1:%.+]]) +// skip alloca of global_tid and bound_tid +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: [[VEC_ADDR1:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR1:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[S_ARR_ADDR1:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK: [[VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*, +// skip loop variables +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: [[OMP_IS_LAST1:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_PRIV1:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV1:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV1:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK: [[VAR_PRIV1:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[TMP_PRIV1:%.+]] = alloca [[S_INT_TY]]*, + +// skip init of bound and global tid +// CHECK: store i{{[0-9]+}}* {{.*}}, +// CHECK: store i{{[0-9]+}}* {{.*}}, +// copy from parameters to local address variables +// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN1]], [2 x i{{[0-9]+}}]** [[VEC_ADDR1]], +// CHECK: store i{{[0-9]+}} [[T_VAR_IN1]], i{{[0-9]+}}* [[T_VAR_ADDR1]], +// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_IN1]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]], +// CHECK: store [[S_INT_TY]]* [[VAR_IN1]], [[S_INT_TY]]** [[VAR_ADDR1]], + +// load content of local address variables +// CHECK: [[VEC_ADDR_REF1:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR1]], +// CHECK-64: [[T_VAR_ADDR_REF1:%.+]] = bitcast i64* [[T_VAR_ADDR1]] to i32* +// CHECK: [[S_ARR_ADDR_REF1:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]], +// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]], +// CHECK: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR1]], +// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV1]], [[S_INT_TY]]** [[TMP_PRIV1]], +// CHECK: call void @__kmpc_for_static_init_4( +// assignment: vec[i] = t_var; +// CHECK: [[IV_VAL1:%.+]] = +// CHECK: [[T_VAR_PRIV_VAL1:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]], +// CHECK: [[VEC_PTR1:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV1]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}} +// CHECK: store i{{[0-9]+}} [[T_VAR_PRIV_VAL1]], i{{[0-9]+}}* [[VEC_PTR1]], + +// assignment: s_arr[i] = var; +// CHECK-DAG: [[S_ARR_PTR1:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]], +// CHECK-DAG: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]], +// CHECK-DAG: [[S_ARR_PTR_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_PTR1]] to i8* +// CHECK-DAG: [[TMP_VAL_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* +// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST1]], i8* [[TMP_VAL_BCAST1]], +// CHECK: call void @__kmpc_for_static_fini( + +// lastprivates +// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST1]], +// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 +// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] + +// CHECK: [[OMP_LASTPRIV_BLOCK]]: +// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]], +// CHECK-64: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF1]], +// CHECK-32: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR1]], +// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF1]] to i8* +// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV1]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]] to [[S_INT_TY]]* +// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 +// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]] +// CHECK: [[S_ARR_COPY_BLOCK]]: +// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8* +// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 +// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} +// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]] +// CHECK: [[S_ARR_COPY_DONE]]: +// CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]], +// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR1_REF]] to i8* +// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: ret void +#endif diff --git a/test/OpenMP/target_teams_distribute_simd_lastprivate_messages.cpp b/test/OpenMP/target_teams_distribute_simd_lastprivate_messages.cpp index 3b533e2e5484..a143dc2fd883 100644 --- a/test/OpenMP/target_teams_distribute_simd_lastprivate_messages.cpp +++ b/test/OpenMP/target_teams_distribute_simd_lastprivate_messages.cpp @@ -18,14 +18,14 @@ public: const S2 &operator =(const S2&) const; S2 &operator =(const S2&); static float S2s; // expected-note {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note {{static data member is predetermined as shared}} }; -const float S2::S2sc = 0; // expected-note {{static data member is predetermined as shared}} +const float S2::S2sc = 0; const S2 b; const S2 ba[5]; class S3 { int a; - S3 &operator=(const S3 &s3); // expected-note 2 {{implicitly declared private here}} + S3 &operator=(const S3 &s3); // expected-note {{implicitly declared private here}} public: S3() : a(0) {} @@ -52,7 +52,7 @@ public: }; class S6 { int a; - S6() : a(0) {} + S6() : a(0) {} // expected-note {{implicitly declared private here}} public: S6(const S6 &s6) : a(s6.a) {} @@ -216,10 +216,12 @@ int main(int argc, char **argv) { #pragma omp target teams distribute simd lastprivate(j) for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute simd firstprivate(m) lastprivate(m) // expected-error {{'operator=' is a private member of 'S3'}} +// expected-error@+1 {{firstprivate variable cannot be lastprivate}} expected-note@+1 {{defined as firstprivate}} +#pragma omp target teams distribute simd firstprivate(m) lastprivate(m) for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute simd lastprivate(n) firstprivate(n) // OK +// expected-error@+1 {{lastprivate variable cannot be firstprivate}} expected-note@+1 {{defined as lastprivate}} +#pragma omp target teams distribute simd lastprivate(n) firstprivate(n) // expected-error {{calling a private constructor of class 'S6'}} for (i = 0; i < argc; ++i) foo(); static int si; diff --git a/test/OpenMP/target_teams_distribute_simd_linear_messages.cpp b/test/OpenMP/target_teams_distribute_simd_linear_messages.cpp index 8db57a0f3665..2be792bbda74 100644 --- a/test/OpenMP/target_teams_distribute_simd_linear_messages.cpp +++ b/test/OpenMP/target_teams_distribute_simd_linear_messages.cpp @@ -18,33 +18,42 @@ void test_linear_colons() { int B = 0; +// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute simd linear(B:bfoo()) for (int i = 0; i < 10; ++i) ; +// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute simd linear(B::ib:B:bfoo()) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'}} for (int i = 0; i < 10; ++i) ; +// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute simd linear(B:ib) // expected-error {{use of undeclared identifier 'ib'; did you mean 'B::ib'}} for (int i = 0; i < 10; ++i) ; +// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute simd linear(z:B:ib) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'?}} for (int i = 0; i < 10; ++i) ; +// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute simd linear(B:B::bfoo()) for (int i = 0; i < 10; ++i) ; +// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute simd linear(X::x : ::z) for (int i = 0; i < 10; ++i) ; +// expected-error@+1 3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute simd linear(B,::z, X::x) for (int i = 0; i < 10; ++i) ; +// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute simd linear(::z) for (int i = 0; i < 10; ++i) ; #pragma omp target teams distribute simd linear(B::bfoo()) // expected-error {{expected variable name}} for (int i = 0; i < 10; ++i) ; +// expected-error@+1 2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute simd linear(B::ib,B:C1+C2) for (int i = 0; i < 10; ++i) ; } @@ -65,6 +74,7 @@ template<int L, class T, class N> T test_template(T* arr, N num) { template<int LEN> int test_warn() { int ind2 = 0; +// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute simd linear(ind2:LEN) // expected-warning {{zero linear step (ind2 should probably be const)}} for (int i = 0; i < 100; i++) { ind2 += LEN; @@ -118,15 +128,18 @@ template<class I, class C> int foomain(I argc, C **argv) { #pragma omp target teams distribute simd linear () // expected-error {{expected expression}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute simd linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute simd linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int k = 0; k < argc; ++k) ++k; #pragma omp target teams distribute simd linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute simd linear (argc : 5) for (int k = 0; k < argc; ++k) ++k; @@ -139,33 +152,17 @@ template<class I, class C> int foomain(I argc, C **argv) { #pragma omp target teams distribute simd linear (argv[1]) // expected-error {{expected variable name}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+1 2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute simd linear(e, g) for (int k = 0; k < argc; ++k) ++k; #pragma omp target teams distribute simd linear(h) // expected-error {{threadprivate or thread local variable cannot be linear}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute simd linear(i) for (int k = 0; k < argc; ++k) ++k; - #pragma omp parallel - { - int v = 0; - int i; - #pragma omp target teams distribute simd linear(v:i) - for (int k = 0; k < argc; ++k) { i = k; v += i; } - } - -#pragma omp target teams distribute simd linear(j) - for (int k = 0; k < argc; ++k) ++k; - - int v = 0; - -#pragma omp target teams distribute simd linear(v:j) - for (int k = 0; k < argc; ++k) { ++k; v += j; } - -#pragma omp target teams distribute simd linear(i) - for (int k = 0; k < argc; ++k) ++k; return 0; } @@ -198,15 +195,18 @@ int main(int argc, char **argv) { #pragma omp target teams distribute simd linear () // expected-error {{expected expression}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute simd linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute simd linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int k = 0; k < argc; ++k) ++k; #pragma omp target teams distribute simd linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+1 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target teams distribute simd linear (argc) for (int k = 0; k < argc; ++k) ++k; @@ -226,22 +226,6 @@ int main(int argc, char **argv) { #pragma omp target teams distribute simd linear(h, C::x) // expected-error 2 {{threadprivate or thread local variable cannot be linear}} for (int k = 0; k < argc; ++k) ++k; - #pragma omp parallel - { - int i; - #pragma omp target teams distribute simd linear(i) - for (int k = 0; k < argc; ++k) ++k; - - #pragma omp target teams distribute simd linear(i : 4) - for (int k = 0; k < argc; ++k) { ++k; i += 4; } - } - -#pragma omp target teams distribute simd linear(j) - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target teams distribute simd linear(i) - for (int k = 0; k < argc; ++k) ++k; - foomain<int,char>(argc,argv); // expected-note {{in instantiation of function template specialization 'foomain<int, char>' requested here}} return 0; } diff --git a/test/OpenMP/target_teams_distribute_simd_loop_messages.cpp b/test/OpenMP/target_teams_distribute_simd_loop_messages.cpp index aae00a4433bc..09aa548bd5c6 100644 --- a/test/OpenMP/target_teams_distribute_simd_loop_messages.cpp +++ b/test/OpenMP/target_teams_distribute_simd_loop_messages.cpp @@ -2,7 +2,7 @@ class S { int a; - S() : a(0) {} + S() : a(0) {} // expected-note {{implicitly declared private here}} public: S(int v) : a(v) {} @@ -607,7 +607,8 @@ void test_loop_eh() { void test_loop_firstprivate_lastprivate() { S s(4); -#pragma omp target teams distribute simd lastprivate(s) firstprivate(s) +// expected-error@+1 {{lastprivate variable cannot be firstprivate}} expected-note@+1 {{defined as lastprivate}} +#pragma omp target teams distribute simd lastprivate(s) firstprivate(s) // expected-error {{calling a private constructor of class 'S'}} for (int i = 0; i < 16; ++i) ; } diff --git a/test/OpenMP/target_teams_distribute_simd_map_messages.cpp b/test/OpenMP/target_teams_distribute_simd_map_messages.cpp index 414f6f83debb..29d48d958a3d 100644 --- a/test/OpenMP/target_teams_distribute_simd_map_messages.cpp +++ b/test/OpenMP/target_teams_distribute_simd_map_messages.cpp @@ -14,8 +14,8 @@ class S2 { public: S2():a(0) { } S2(S2 &s2):a(s2.a) { } - static float S2s; // expected-note 4 {{mappable type cannot contain static members}} - static const float S2sc; // expected-note 4 {{mappable type cannot contain static members}} + static float S2s; + static const float S2sc; }; const float S2::S2sc = 0; const S2 b; @@ -116,9 +116,9 @@ T tmain(T argc) { for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute simd map(S1) // expected-error {{'S1' does not refer to a value}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute simd map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target teams distribute simd map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute simd map(ba) // expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target teams distribute simd map(ba) for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute simd map(ca) for (i = 0; i < argc; ++i) foo(); @@ -222,11 +222,11 @@ int main(int argc, char **argv) { for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute simd map(S1) // expected-error {{'S1' does not refer to a value}} for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute simd map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target teams distribute simd map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute simd map(argv[1]) for (i = 0; i < argc; ++i) foo(); -#pragma omp target teams distribute simd map(ba) // expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target teams distribute simd map(ba) for (i = 0; i < argc; ++i) foo(); #pragma omp target teams distribute simd map(ca) for (i = 0; i < argc; ++i) foo(); diff --git a/test/OpenMP/target_teams_distribute_simd_misc_messages.c b/test/OpenMP/target_teams_distribute_simd_misc_messages.c index c76fdea4fcf1..70ec508f6798 100644 --- a/test/OpenMP/target_teams_distribute_simd_misc_messages.c +++ b/test/OpenMP/target_teams_distribute_simd_misc_messages.c @@ -285,15 +285,22 @@ void test_firstprivate() { ; int x, y, z; +// expected-error@+1 {{lastprivate variable cannot be firstprivate}} expected-note@+1 {{defined as lastprivate}} #pragma omp target teams distribute simd lastprivate(x) firstprivate(x) for (i = 0; i < 16; ++i) ; +// expected-error@+1 2 {{lastprivate variable cannot be firstprivate}} expected-note@+1 2 {{defined as lastprivate}} #pragma omp target teams distribute simd lastprivate(x, y) firstprivate(x, y) for (i = 0; i < 16; ++i) ; +// expected-error@+1 3 {{lastprivate variable cannot be firstprivate}} expected-note@+1 3 {{defined as lastprivate}} #pragma omp target teams distribute simd lastprivate(x, y, z) firstprivate(x, y, z) for (i = 0; i < 16; ++i) ; +// expected-error@+1 {{the value of 'simdlen' parameter must be less than or equal to the value of the 'safelen' parameter}} +#pragma omp target teams distribute simd simdlen(64) safelen(8) + for (i = 0; i < 16; ++i) + ; } void test_loop_messages() { diff --git a/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp b/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp new file mode 100644 index 000000000000..c8883e18d7d9 --- /dev/null +++ b/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp @@ -0,0 +1,233 @@ +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 + +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +struct St { + int a, b; + St() : a(0), b(0) {} + St(const St &st) : a(st.a + st.b), b(0) {} + ~St() {} +}; + +volatile int g = 1212; +volatile int &g1 = g; + +template <class T> +struct S { + T f; + S(T a) : f(a + g) {} + S() : f(g) {} + S(const S &s, St t = St()) : f(s.f + t.a) {} + operator T() { return T(); } + ~S() {} +}; + +// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float } +// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } + +template <typename T> +T tmain() { + S<T> test; + T t_var = T(); + T vec[] = {1, 2}; + S<T> s_arr[] = {1, 2}; + S<T> &var = test; +#pragma omp target teams distribute simd private(t_var, vec, s_arr, var) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + return T(); +} + +// CHECK-DAG: [[TEST:@.+]] = global [[S_FLOAT_TY]] zeroinitializer, +S<float> test; +// CHECK-DAG: [[T_VAR:@.+]] = global i{{[0-9]+}} 333, +int t_var = 333; +// CHECK-DAG: [[VEC:@.+]] = global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2], +int vec[] = {1, 2}; +// CHECK-DAG: [[S_ARR:@.+]] = global [2 x [[S_FLOAT_TY]]] zeroinitializer, +S<float> s_arr[] = {1, 2}; +// CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer, +S<float> var(3); +// CHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0, + +int main() { + static int sivar; +#ifdef LAMBDA + // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212, + // LAMBDA-LABEL: @main + // LAMBDA: call void [[OUTER_LAMBDA:@.+]]( + [&]() { + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 0) + // LAMBDA: call void @[[LOFFL1:.+]]() + // LAMBDA: ret +#pragma omp target teams distribute simd private(g, g1, sivar) + for (int i = 0; i < 2; ++i) { + // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]]() + // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[LOUTL1:.+]] to {{.+}}) + // LAMBDA: ret void + + // LAMBDA: define internal void @[[LOUTL1]]({{.+}}) + // Skip global, bound tid and loop vars + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: [[G_PRIV:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_PRIV:%.+]] = alloca i{{[0-9]+}} + // LAMBDA: [[TMP:%.+]] = alloca i{{[0-9]+}}*, + // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: store{{.+}} [[G1_PRIV]], {{.+}} [[TMP]], + g = 1; + g1 = 1; + sivar = 2; + // LAMBDA: call void @__kmpc_for_static_init_4( + // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G_PRIV]], + // LAMBDA-DAG: store{{.+}} 2, {{.+}} [[SIVAR_PRIV]], + // LAMBDA-DAG: [[G1_REF:%.+]] = load{{.+}}, {{.+}} [[TMP]], + // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1_REF]], + // LAMBDA: call void [[INNER_LAMBDA:@.+]]( + // LAMBDA: call void @__kmpc_for_static_fini( + [&]() { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + g = 2; + g1 = 2; + sivar = 4; + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + + // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]] + // LAMBDA: [[G1_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G1_REF]] + // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]] + }(); + } + }(); + return 0; +#else +#pragma omp target teams distribute simd private(t_var, vec, s_arr, var, sivar) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + sivar += i; + } + return tmain<int>(); +#endif +} + +// CHECK: define {{.*}}i{{[0-9]+}} @main() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 0) +// CHECK: call void @[[OFFL1:.+]]() +// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]() +// CHECK: ret + +// CHECK: define{{.*}} void @[[OFFL1]]() +// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[OUTL1:.+]] to {{.+}}) +// CHECK: ret void + +// CHECK: define internal void @[[OUTL1]]({{.+}}) +// Skip global, bound tid and loop vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK-DAG: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK-DAG: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK-DAG: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}}, + +// private(s_arr) +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]]) +// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]], + +// private(var) +// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]]) + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK-DAG: {{.+}} = {{.+}} [[T_VAR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[VAR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[SIVAR_PRIV]] +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + + +// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, +// CHECK: call void @[[TOFFL1:.+]]() +// CHECK: ret + +// CHECK: define {{.*}}void @[[TOFFL1]]() +// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[TOUTL1:.+]] to {{.+}}) +// CHECK: ret void + +// CHECK: define internal void @[[TOUTL1]]({{.+}}) +// Skip global, bound tid and loop vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*, + +// private(s_arr) +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]]) +// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]], + +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]]) +// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]], + +// private(var) +// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]]) +// CHECK-DAG: store{{.+}} [[VAR_PRIV]], {{.+}} [[TMP]] + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK-DAG: {{.+}} = {{.+}} [[T_VAR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[TMP]] +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +#endif diff --git a/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp b/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp new file mode 100644 index 000000000000..76a1299981cb --- /dev/null +++ b/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp @@ -0,0 +1,211 @@ +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 + +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +template <typename T> +T tmain() { + T t_var = T(); + T vec[] = {1, 2}; +#pragma omp target teams distribute simd reduction(+: t_var) + for (int i = 0; i < 2; ++i) { + t_var += (T) i; + } + return T(); +} + +int main() { + static int sivar; +#ifdef LAMBDA + // LAMBDA: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer + + // LAMBDA-LABEL: @main + // LAMBDA: call void [[OUTER_LAMBDA:@.+]]( + [&]() { + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // LAMBDA: call void @[[LOFFL1:.+]]( + // LAMBDA: ret +#pragma omp target teams distribute simd reduction(+: sivar) + for (int i = 0; i < 2; ++i) { + // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i32*{{.+}} [[SIVAR_ARG:%.+]]) + // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*, + // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]], + // LAMBDA: [[SIVAR:%.+]] = load i32*, i32** [[SIVAR_ADDR]], + // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[LOUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR]]) + // LAMBDA: ret void + + // LAMBDA: define internal void @[[LOUTL1]]({{.+}}, {{.+}}, {{.+}}*{{.+}} [[SIVAR_ARG:%.+]]) + // Skip global and bound tid vars + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*, + // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{.+}}, + // LAMBDA: [[RED_LIST:%.+]] = alloca [1 x {{.+}}], + // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]], + // LAMBDA: [[SIVAR_REF:%.+]] = load {{.+}}, {{.+}} [[SIVAR_ADDR]], + // LAMBDA: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]], + + // LAMBDA: call void @__kmpc_for_static_init_4( + // LAMBDA: store{{.+}}, {{.+}} [[SIVAR_PRIV]], + // LAMBDA: call void [[INNER_LAMBDA:@.+]]( + // LAMBDA: call void @__kmpc_for_static_fini( + // LAMBDA: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]], + // LAMBDA: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to + // LAMBDA: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]], + // LAMBDA: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to + // LAMBDA: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]]) + // LAMBDA: switch{{.+}} [[K_RED_RET]], label{{.+}} [ + // LAMBDA: {{.+}}, label %[[CASE1:.+]] + // LAMBDA: {{.+}}, label %[[CASE2:.+]] + // LAMBDA: ] + // LAMBDA: [[CASE1]]: + // LAMBDA-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]], + // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]], + // LAMBDA-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]] + // LAMBDA: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]], + // LAMBDA: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) + // LAMBDA: br + // LAMBDA: [[CASE2]]: + // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]], + // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] + // LAMBDA: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) + // LAMBDA: br + sivar += i; + + [&]() { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + + sivar += 4; + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + + // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]] + // LAMBDA: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF]] + // LAMBDA: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], 4 + // LAMBDA: store i{{[0-9]+}} [[SIVAR_INC]], i{{[0-9]+}}* [[SIVAR_REF]] + }(); + } + }(); + return 0; +#else +#pragma omp target teams distribute simd reduction(+: sivar) + for (int i = 0; i < 2; ++i) { + sivar += i; + } + return tmain<int>(); +#endif +} + +// CHECK: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer + +// CHECK: define {{.*}}i{{[0-9]+}} @main() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) +// CHECK: call void @[[OFFL1:.+]](i32* {{.+}}) +// CHECK: [[RES:%.+]] = call{{.*}} i32 @[[TMAIN_INT:[^(]+]]() +// CHECK: ret i32 [[RES]] + +// CHECK: define{{.*}} void @[[OFFL1]](i32*{{.+}} [[SIVAR_ARG:%.+]]) +// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*, +// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}}** [[SIVAR_ADDR]], +// CHECK: [[SIVAR_LOAD:%.+]] = load i32*, i32** [[SIVAR_ADDR]], +// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_LOAD]]) +// CHECK: ret void + +// CHECK: define internal void @[[OUTL1]]({{.+}}, {{.+}}, i32*{{.+}} [[SIVAR_ARG:%.+]]) +// Skip global and bound tid vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*, +// CHECK: [[SIVAR_PRIV:%.+]] = alloca i32, +// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}], +// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]], +// CHECK: [[SIVAR_REF:%.+]] = load i32*, i32** [[SIVAR_ADDR]], +// CHECK: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]], + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: store{{.+}}, {{.+}} [[SIVAR_PRIV]], +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]], +// CHECK: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to +// CHECK: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]], +// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to +// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]]) +// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [ +// CHECK: {{.+}}, label %[[CASE1:.+]] +// CHECK: {{.+}}, label %[[CASE2:.+]] +// CHECK: ] +// CHECK: [[CASE1]]: +// CHECK-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]], +// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]], +// CHECK-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]] +// CHECK: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]], +// CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) +// CHECK: br +// CHECK: [[CASE2]]: +// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]], +// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] +// CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) +// CHECK: br + + +// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, +// CHECK: call void @[[TOFFL1:.+]]({{.+}}* {{.+}}) +// CHECK: ret + +// CHECK: define{{.*}} void @[[TOFFL1]](i32*{{.+}} [[TVAR_ARG:%.+]]) +// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}}*, +// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]], +// CHECK: [[TVAR:%.+]] = load i32*, i32** [[TVAR_ADDR]], +// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[TOUTL1:.+]] to {{.+}}, {{.+}} [[TVAR]]) +// CHECK: ret void + +// CHECK: define internal void @[[TOUTL1]]({{.+}}, {{.+}}, {{.+}}*{{.+}} [[TVAR_ARG:%.+]]) +// Skip global and bound tid vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}}*, +// CHECK: [[TVAR_PRIV:%.+]] = alloca i{{.+}}, +// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}], +// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]], +// CHECK: [[TVAR_REF:%.+]] = load i32*, i32** [[TVAR_ADDR]], +// CHECK: store{{.+}} 0, {{.+}} [[TVAR_PRIV]], + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: store{{.+}}, {{.+}} [[TVAR_PRIV]], +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]], +// CHECK: [[TVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[TVAR_PRIV]] to +// CHECK: store{{.+}} [[TVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]], +// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to +// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]]) +// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [ +// CHECK: {{.+}}, label %[[CASE1:.+]] +// CHECK: {{.+}}, label %[[CASE2:.+]] +// CHECK: ] +// CHECK: [[CASE1]]: +// CHECK-DAG: [[TVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_REF]], +// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]], +// CHECK-DAG: [[TVAR_INC:%.+]] = add{{.+}} [[TVAR_VAL]], [[TVAR_PRIV_VAL]] +// CHECK: store{{.+}} [[TVAR_INC]], {{.+}} [[TVAR_REF]], +// CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) +// CHECK: br +// CHECK: [[CASE2]]: +// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]], +// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]] +// CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) +// CHECK: br + +#endif diff --git a/test/OpenMP/target_teams_distribute_simd_reduction_messages.cpp b/test/OpenMP/target_teams_distribute_simd_reduction_messages.cpp index 1c0283285aa5..35d4e12dc339 100644 --- a/test/OpenMP/target_teams_distribute_simd_reduction_messages.cpp +++ b/test/OpenMP/target_teams_distribute_simd_reduction_messages.cpp @@ -19,9 +19,9 @@ public: S2() : a(0) {} S2(S2 &s2) : a(s2.a) {} static float S2s; // expected-note 2 {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note 2 {{'S2sc' declared here}} }; -const float S2::S2sc = 0; // expected-note 2 {{'S2sc' defined here}} +const float S2::S2sc = 0; S2 b; // expected-note 3 {{'b' defined here}} const S2 ba[5]; // expected-note 2 {{'ba' defined here}} class S3 { diff --git a/test/OpenMP/target_teams_map_messages.cpp b/test/OpenMP/target_teams_map_messages.cpp index 89425d622c10..7874c2e105cf 100644 --- a/test/OpenMP/target_teams_map_messages.cpp +++ b/test/OpenMP/target_teams_map_messages.cpp @@ -265,7 +265,7 @@ void SAclient(int arg) { {} #pragma omp target teams map((p+1)->A) // expected-error {{expected expression containing only member accesses and/or array sections based on named variables}} {} - #pragma omp target teams map(u.B) // expected-error {{mapped storage cannot be derived from a union}} + #pragma omp target teams map(u.B) // expected-error {{mapping of union members is not allowed}} {} #pragma omp target data map(to: r.C) //expected-note {{used here}} @@ -312,8 +312,8 @@ class S2 { public: S2():a(0) { } S2(S2 &s2):a(s2.a) { } - static float S2s; // expected-note 4 {{mappable type cannot contain static members}} - static const float S2sc; // expected-note 4 {{mappable type cannot contain static members}} + static float S2s; + static const float S2sc; }; const float S2::S2sc = 0; const S2 b; @@ -346,7 +346,7 @@ template <class T> struct S6; template<> -struct S6<int> // expected-note {{mappable type cannot be polymorphic}} +struct S6<int> { virtual void foo(); }; @@ -414,8 +414,8 @@ T tmain(T argc) { #pragma omp target data map(tofrom: argc > 0 ? x : y) // expected-error 2 {{expected expression containing only member accesses and/or array sections based on named variables}} #pragma omp target data map(argc) #pragma omp target data map(S1) // expected-error {{'S1' does not refer to a value}} -#pragma omp target data map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} expected-error 2 {{type 'S2' is not mappable to target}} -#pragma omp target data map(ba) // expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target data map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} +#pragma omp target data map(ba) #pragma omp target data map(ca) #pragma omp target data map(da) #pragma omp target data map(S2::S2s) @@ -488,9 +488,9 @@ int main(int argc, char **argv) { #pragma omp target data map(tofrom: argc > 0 ? argv[1] : argv[2]) // expected-error {{xpected expression containing only member accesses and/or array sections based on named variables}} #pragma omp target data map(argc) #pragma omp target data map(S1) // expected-error {{'S1' does not refer to a value}} -#pragma omp target data map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target data map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} #pragma omp target data map(argv[1]) -#pragma omp target data map(ba) // expected-error 2 {{type 'S2' is not mappable to target}} +#pragma omp target data map(ba) #pragma omp target data map(ca) #pragma omp target data map(da) #pragma omp target data map(S2::S2s) @@ -530,7 +530,7 @@ int main(int argc, char **argv) { #pragma omp target teams firstprivate(j) map(j) // expected-error {{firstprivate variable cannot be in a map clause in '#pragma omp target teams' directive}} expected-note {{defined as firstprivate}} {} -#pragma omp target teams map(m) // expected-error {{type 'S6<int>' is not mappable to target}} +#pragma omp target teams map(m) {} return tmain<int, 3>(argc)+tmain<from, 4>(argc); // expected-note {{in instantiation of function template specialization 'tmain<int, 3>' requested here}} expected-note {{in instantiation of function template specialization 'tmain<int, 4>' requested here}} diff --git a/test/OpenMP/target_teams_num_teams_codegen.cpp b/test/OpenMP/target_teams_num_teams_codegen.cpp index ad3626515f50..91288458c5aa 100644 --- a/test/OpenMP/target_teams_num_teams_codegen.cpp +++ b/test/OpenMP/target_teams_num_teams_codegen.cpp @@ -147,10 +147,8 @@ int bar(int n){ // CHECK: [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align // CHECK: [[TEAMS:%.+]] = load i32, i32* [[CAPE_ADDR]], align // -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 [[TEAMS]], i32 0) -// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align -// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align -// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 [[TEAMS]], i32 0) +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // // CHECK: [[FAIL]] @@ -160,10 +158,8 @@ int bar(int n){ // // // -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.+}}, i32 1024, i32 0) -// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align -// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align -// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.+}}, i32 1024, i32 0) +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // // CHECK: [[FAIL]] @@ -190,10 +186,8 @@ int bar(int n){ // CHECK: [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align // CHECK: [[TEAMS:%.+]] = load i32, i32* [[CAPE_ADDR]], align // -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 [[TEAMS]], i32 0) -// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align -// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align -// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 [[TEAMS]], i32 0) +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // // CHECK: [[FAIL]] @@ -213,10 +207,8 @@ int bar(int n){ // CHECK: [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align // CHECK: [[TEAMS:%.+]] = load i32, i32* [[CAPE_ADDR]], align // -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 [[TEAMS]], i32 0) -// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align -// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align -// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 [[TEAMS]], i32 0) +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // // CHECK: [[FAIL]] @@ -233,10 +225,8 @@ int bar(int n){ // // CHECK: define {{.*}}[[FTEMPLATE]] // -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i32 20, i32 0) -// CHECK-NEXT: store i32 [[RET]], i32* [[RHV:%.+]], align -// CHECK-NEXT: [[RET2:%.+]] = load i32, i32* [[RHV]], align -// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i32 20, i32 0) +// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // // CHECK: [[FAIL]] @@ -257,10 +247,8 @@ int bar(int n){ // CHECK: [[T:%.+]] = load i16, i16* [[CAPE_ADDR]], align // CHECK: [[TEAMS:%.+]] = sext i16 [[T]] to i32 // -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 [[TEAMS]], i32 0) -// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align -// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align -// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 [[TEAMS]], i32 0) +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // // CHECK: [[FAIL]] diff --git a/test/OpenMP/target_teams_reduction_messages.cpp b/test/OpenMP/target_teams_reduction_messages.cpp index 48fa310253c8..b5876b108508 100644 --- a/test/OpenMP/target_teams_reduction_messages.cpp +++ b/test/OpenMP/target_teams_reduction_messages.cpp @@ -24,9 +24,9 @@ public: S2() : a(0) {} S2(S2 &s2) : a(s2.a) {} static float S2s; // expected-note 2 {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note 2 {{'S2sc' declared here}} }; -const float S2::S2sc = 0; // expected-note 2 {{'S2sc' defined here}} +const float S2::S2sc = 0; S2 b; // expected-note 3 {{'b' defined here}} const S2 ba[5]; // expected-note 2 {{'ba' defined here}} class S3 { diff --git a/test/OpenMP/target_teams_thread_limit_codegen.cpp b/test/OpenMP/target_teams_thread_limit_codegen.cpp index d89f51561712..5d93c1de77fe 100644 --- a/test/OpenMP/target_teams_thread_limit_codegen.cpp +++ b/test/OpenMP/target_teams_thread_limit_codegen.cpp @@ -147,10 +147,8 @@ int bar(int n){ // CHECK: [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align // CHECK: [[TL:%.+]] = load i32, i32* [[CAPE_ADDR]], align // -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 0, i32 [[TL]]) -// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align -// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align -// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 0, i32 [[TL]]) +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // // CHECK: [[FAIL]] @@ -160,10 +158,8 @@ int bar(int n){ // // // -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.+}}, i32 0, i32 1024) -// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align -// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align -// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.+}}, i32 0, i32 1024) +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // // CHECK: [[FAIL]] @@ -199,10 +195,8 @@ int bar(int n){ // CHECK: [[TEAMS:%.+]] = load i32, i32* [[CAPE_ADDR1]], align // CHECK: [[TL:%.+]] = load i32, i32* [[CAPE_ADDR2]], align // -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, {{.*}}, i32 [[TEAMS]], i32 [[TL]]) -// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align -// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align -// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, {{.*}}, i32 [[TEAMS]], i32 [[TL]]) +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // // CHECK: [[FAIL]] @@ -222,10 +216,8 @@ int bar(int n){ // CHECK: [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align // CHECK: [[TL:%.+]] = load i32, i32* [[CAPE_ADDR]], align // -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 0, i32 [[TL]]) -// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align -// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align -// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 0, i32 [[TL]]) +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // // CHECK: [[FAIL]] @@ -242,10 +234,8 @@ int bar(int n){ // // CHECK: define {{.*}}[[FTEMPLATE]] // -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i32 0, i32 20) -// CHECK-NEXT: store i32 [[RET]], i32* [[RHV:%.+]], align -// CHECK-NEXT: [[RET2:%.+]] = load i32, i32* [[RHV]], align -// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i32 0, i32 20) +// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // // CHECK: [[FAIL]] @@ -266,10 +256,8 @@ int bar(int n){ // CHECK: [[T:%.+]] = load i16, i16* [[CAPE_ADDR]], align // CHECK: [[TEAMS:%.+]] = sext i16 [[T]] to i32 // -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 [[TEAMS]], i32 1024) -// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align -// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align -// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 [[TEAMS]], i32 1024) +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] // // CHECK: [[FAIL]] diff --git a/test/OpenMP/target_update_codegen.cpp b/test/OpenMP/target_update_codegen.cpp index f1e61a54c6e2..7f45c313a8a1 100644 --- a/test/OpenMP/target_update_codegen.cpp +++ b/test/OpenMP/target_update_codegen.cpp @@ -22,15 +22,15 @@ ST<int> gb; double gc[100]; // CK1: [[SIZE00:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 800] -// CK1: [[MTYPE00:@.+]] = {{.+}}constant [1 x i32] [i32 34] +// CK1: [[MTYPE00:@.+]] = {{.+}}constant [1 x i64] [i64 34] // CK1: [[SIZE02:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] 4] -// CK1: [[MTYPE02:@.+]] = {{.+}}constant [1 x i32] [i32 33] +// CK1: [[MTYPE02:@.+]] = {{.+}}constant [1 x i64] [i64 33] -// CK1: [[MTYPE03:@.+]] = {{.+}}constant [1 x i32] [i32 34] +// CK1: [[MTYPE03:@.+]] = {{.+}}constant [1 x i64] [i64 34] // CK1: [[SIZE04:@.+]] = {{.+}}constant [2 x i[[sz]]] [i[[sz]] {{8|4}}, i[[sz]] 24] -// CK1: [[MTYPE04:@.+]] = {{.+}}constant [2 x i32] [i32 33, i32 17] +// CK1: [[MTYPE04:@.+]] = {{.+}}constant [2 x i64] [i64 33, i64 17] // CK1-LABEL: _Z3fooi void foo(int arg) { @@ -38,8 +38,9 @@ void foo(int arg) { float lb[arg]; // Region 00 - // CK1-DAG: call void @__tgt_target_data_update(i32 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}) - // CK1-DAG: [[DEV]] = load i32, i32* %{{[^,]+}}, + // CK1-DAG: call void @__tgt_target_data_update_nowait(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}) + // CK1-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64 + // CK1-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}}, // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -51,7 +52,7 @@ void foo(int arg) { // CK1-DAG: store [100 x double]* @gc, [100 x double]** [[PC0]] // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1 - #pragma omp target update if(1+3-5) device(arg) from(gc) + #pragma omp target update if(1+3-5) device(arg) from(gc) nowait {++arg;} // Region 01 @@ -62,7 +63,7 @@ void foo(int arg) { // Region 02 // CK1: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] // CK1: [[IFTHEN]] - // CK1-DAG: call void @__tgt_target_data_update(i32 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}) + // CK1-DAG: call void @__tgt_target_data_update(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}) // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -85,7 +86,7 @@ void foo(int arg) { {++arg;} // Region 03 - // CK1-DAG: call void @__tgt_target_data_update(i32 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}) + // CK1-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}) // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]] @@ -108,7 +109,7 @@ void foo(int arg) { {++arg;} // Region 04 - // CK1-DAG: call void @__tgt_target_data_update(i32 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}) + // CK1-DAG: call void @__tgt_target_data_update(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}) // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] @@ -159,7 +160,7 @@ struct ST { }; // CK2: [[SIZE00:@.+]] = {{.+}}constant [2 x i[[sz:64|32]]] [i{{64|32}} {{8|4}}, i{{64|32}} 24] -// CK2: [[MTYPE00:@.+]] = {{.+}}constant [2 x i32] [i32 34, i32 18] +// CK2: [[MTYPE00:@.+]] = {{.+}}constant [2 x i64] [i64 34, i64 18] // CK2-LABEL: _Z3bari int bar(int arg){ @@ -170,8 +171,9 @@ int bar(int arg){ // Region 00 // CK2: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] // CK2: [[IFTHEN]] -// CK2-DAG: call void @__tgt_target_data_update(i32 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}) -// CK2-DAG: [[DEV]] = load i32, i32* %{{[^,]+}}, +// CK2-DAG: call void @__tgt_target_data_update(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}) +// CK2-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64 +// CK2-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}}, // CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK2-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] diff --git a/test/OpenMP/target_update_depend_messages.cpp b/test/OpenMP/target_update_depend_messages.cpp index 64383a049104..59e159e99fdb 100644 --- a/test/OpenMP/target_update_depend_messages.cpp +++ b/test/OpenMP/target_update_depend_messages.cpp @@ -35,14 +35,14 @@ int tmain(T argc, S **argv, R *env[]) { #pragma omp target update to(z) depend(source) // expected-error {{expected expression}} expected-warning {{missing ':' after dependency type - ignoring}} #pragma omp target update to(z) depend(in : argc)) // expected-warning {{extra tokens at the end of '#pragma omp target update' are ignored}} #pragma omp target update to(z) depend(out: ) // expected-error {{expected expression}} - #pragma omp target update to(z) depend(inout : foobool(argc)), depend (in, argc) // expected-error {{expected variable name, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}} + #pragma omp target update to(z) depend(inout : foobool(argc)), depend (in, argc) // expected-error {{expected addressable lvalue expression, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}} #pragma omp target update to(z) depend(out :S1) // expected-error {{'S1' does not refer to a value}} - #pragma omp target update to(z) depend(in : argv[1][1] = '2') // expected-error {{expected variable name, array element or array section}} - #pragma omp target update to(z) depend(in : vec[1]) // expected-error {{expected variable name, array element or array section}} + #pragma omp target update to(z) depend(in : argv[1][1] = '2') // expected-error {{expected addressable lvalue expression, array element or array section}} + #pragma omp target update to(z) depend(in : vec[1]) // expected-error {{expected addressable lvalue expression, array element or array section}} #pragma omp target update to(z) depend(in : argv[0]) #pragma omp target update to(z) depend(in : ) // expected-error {{expected expression}} - #pragma omp target update to(z) depend(in : tmain) // expected-error {{expected variable name, array element or array section}} - #pragma omp target update to(z) depend(in : a[0]) // expected-error{{expected variable name, array element or array section}} + #pragma omp target update to(z) depend(in : tmain) + #pragma omp target update to(z) depend(in : a[0]) // expected-error{{expected addressable lvalue expression, array element or array section}} #pragma omp target update to(z) depend(in : vec[1:2]) // expected-error {{ value is not an array or pointer}} #pragma omp target update to(z) depend(in : argv[ // expected-error {{expected expression}} expected-error {{expected ']'}} expected-error {{expected ')'}} expected-note {{to match this '['}} expected-note {{to match this '('}} #pragma omp target update to(z) depend(in : argv[: // expected-error {{expected expression}} expected-error {{expected ']'}} expected-error {{expected ')'}} expected-note {{to match this '['}} expected-note {{to match this '('}} @@ -83,14 +83,14 @@ int main(int argc, char **argv, char *env[]) { #pragma omp target update to(z) depend(source) // expected-error {{expected expression}} expected-warning {{missing ':' after dependency type - ignoring}} #pragma omp target update to(z) depend(in : argc)) // expected-warning {{extra tokens at the end of '#pragma omp target update' are ignored}} #pragma omp target update to(z) depend(out: ) // expected-error {{expected expression}} - #pragma omp target update to(z) depend(inout : foobool(argc)), depend (in, argc) // expected-error {{expected variable name, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}} + #pragma omp target update to(z) depend(inout : foobool(argc)), depend (in, argc) // expected-error {{expected addressable lvalue expression, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}} #pragma omp target update to(z) depend(out :S1) // expected-error {{'S1' does not refer to a value}} - #pragma omp target update to(z) depend(in : argv[1][1] = '2') // expected-error {{expected variable name, array element or array section}} - #pragma omp target update to(z) depend(in : vec[1]) // expected-error {{expected variable name, array element or array section}} + #pragma omp target update to(z) depend(in : argv[1][1] = '2') + #pragma omp target update to(z) depend(in : vec[1]) // expected-error {{expected addressable lvalue expression, array element or array section}} #pragma omp target update to(z) depend(in : argv[0]) #pragma omp target update to(z) depend(in : ) // expected-error {{expected expression}} - #pragma omp target update to(z) depend(in : main) // expected-error {{expected variable name, array element or array section}} - #pragma omp target update to(z) depend(in : a[0]) // expected-error{{expected variable name, array element or array section}} + #pragma omp target update to(z) depend(in : main) + #pragma omp target update to(z) depend(in : a[0]) // expected-error{{expected addressable lvalue expression, array element or array section}} #pragma omp target update to(z) depend(in : vec[1:2]) // expected-error {{ value is not an array or pointer}} #pragma omp target update to(z) depend(in : argv[ // expected-error {{expected expression}} expected-error {{expected ']'}} expected-error {{expected ')'}} expected-note {{to match this '['}} expected-note {{to match this '('}} #pragma omp target update to(z) depend(in : argv[: // expected-error {{expected expression}} expected-error {{expected ']'}} expected-error {{expected ')'}} expected-note {{to match this '['}} expected-note {{to match this '('}} diff --git a/test/OpenMP/target_update_from_messages.cpp b/test/OpenMP/target_update_from_messages.cpp index 6aff083b6a48..c42bd12f75b8 100644 --- a/test/OpenMP/target_update_from_messages.cpp +++ b/test/OpenMP/target_update_from_messages.cpp @@ -14,8 +14,8 @@ class S2 { public: S2():a(0) { } S2(S2 &s2):a(s2.a) { } - static float S2s; // expected-note 4 {{mappable type cannot contain static members}} - static const float S2sc; // expected-note 4 {{mappable type cannot contain static members}} + static float S2s; + static const float S2sc; }; const float S2::S2sc = 0; const S2 b; @@ -94,8 +94,8 @@ T tmain(T argc) { #pragma omp target update from(y x) // expected-error {{expected ',' or ')' in 'from' clause}} #pragma omp target update from(argc > 0 ? x : y) // expected-error 2 {{expected expression containing only member accesses and/or array sections based on named variables}} #pragma omp target update from(S1) // expected-error {{'S1' does not refer to a value}}} expected-error {{expected at least one 'to' clause or 'from' clause specified to '#pragma omp target update'}} -#pragma omp target update from(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} expected-error 2 {{type 'S2' is not mappable to target}} -#pragma omp target update from(ba) // expected-error 2 {{type 'S2' is not mappable to target}} expected-error {{expected at least one 'to' clause or 'from' clause specified to '#pragma omp target update'}} +#pragma omp target update from(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} +#pragma omp target update from(ba) #pragma omp target update from(h) // expected-error {{threadprivate variables are not allowed in 'from' clause}} expected-error {{expected at least one 'to' clause or 'from' clause specified to '#pragma omp target update'}} #pragma omp target update from(k), to(k) // expected-error 2 {{variable can appear only once in OpenMP 'target update' construct}} expected-note 2 {{used here}} #pragma omp target update from(t), from(t[:5]) // expected-error 2 {{variable can appear only once in OpenMP 'target update' construct}} expected-note 2 {{used here}} @@ -148,8 +148,8 @@ int main(int argc, char **argv) { #pragma omp target update from(y x) // expected-error {{expected ',' or ')' in 'from' clause}} #pragma omp target update from(argc > 0 ? x : y) // expected-error {{expected expression containing only member accesses and/or array sections based on named variables}} expected-error {{expected at least one 'to' clause or 'from' clause specified to '#pragma omp target update'}} #pragma omp target update from(S1) // expected-error {{'S1' does not refer to a value}}} expected-error {{expected at least one 'to' clause or 'from' clause specified to '#pragma omp target update'}} -#pragma omp target update from(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} expected-error 2 {{type 'S2' is not mappable to target}} -#pragma omp target update from(ba) // expected-error 2 {{type 'S2' is not mappable to target}} expected-error {{expected at least one 'to' clause or 'from' clause specified to '#pragma omp target update'}} +#pragma omp target update from(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} +#pragma omp target update from(ba) #pragma omp target update from(h) // expected-error {{threadprivate variables are not allowed in 'from' clause}} expected-error {{expected at least one 'to' clause or 'from' clause specified to '#pragma omp target update'}} #pragma omp target update from(k), to(k) // expected-error {{variable can appear only once in OpenMP 'target update' construct}} expected-note {{used here}} #pragma omp target update from(t), from(t[:5]) // expected-error {{variable can appear only once in OpenMP 'target update' construct}} expected-note {{used here}} diff --git a/test/OpenMP/target_update_to_messages.cpp b/test/OpenMP/target_update_to_messages.cpp index 641d0bd949a2..9bde51e9839b 100644 --- a/test/OpenMP/target_update_to_messages.cpp +++ b/test/OpenMP/target_update_to_messages.cpp @@ -14,8 +14,8 @@ class S2 { public: S2():a(0) { } S2(S2 &s2):a(s2.a) { } - static float S2s; // expected-note 4 {{mappable type cannot contain static members}} - static const float S2sc; // expected-note 4 {{mappable type cannot contain static members}} + static float S2s; + static const float S2sc; }; const float S2::S2sc = 0; const S2 b; @@ -94,8 +94,8 @@ T tmain(T argc) { #pragma omp target update to(y x) // expected-error {{expected ',' or ')' in 'to' clause}} #pragma omp target update to(argc > 0 ? x : y) // expected-error 2 {{expected expression containing only member accesses and/or array sections based on named variables}} #pragma omp target update to(S1) // expected-error {{'S1' does not refer to a value}}} expected-error {{expected at least one 'to' clause or 'from' clause specified to '#pragma omp target update'}} -#pragma omp target update to(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} expected-error 2 {{type 'S2' is not mappable to target}} -#pragma omp target update to(ba) // expected-error 2 {{type 'S2' is not mappable to target}} expected-error {{expected at least one 'to' clause or 'from' clause specified to '#pragma omp target update'}} +#pragma omp target update to(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} +#pragma omp target update to(ba) #pragma omp target update to(h) // expected-error {{threadprivate variables are not allowed in 'to' clause}} expected-error {{expected at least one 'to' clause or 'from' clause specified to '#pragma omp target update'}} #pragma omp target update to(k), from(k) // expected-error 2 {{variable can appear only once in OpenMP 'target update' construct}} expected-note 2 {{used here}} #pragma omp target update to(t), to(t[:5]) // expected-error 2 {{variable can appear only once in OpenMP 'target update' construct}} expected-note 2 {{used here}} @@ -147,8 +147,8 @@ int main(int argc, char **argv) { #pragma omp target update to(y x) // expected-error {{expected ',' or ')' in 'to' clause}} #pragma omp target update to(argc > 0 ? x : y) // expected-error {{expected expression containing only member accesses and/or array sections based on named variables}} expected-error {{expected at least one 'to' clause or 'from' clause specified to '#pragma omp target update'}} #pragma omp target update to(S1) // expected-error {{'S1' does not refer to a value}}} expected-error {{expected at least one 'to' clause or 'from' clause specified to '#pragma omp target update'}} -#pragma omp target update to(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} expected-error 2 {{type 'S2' is not mappable to target}} -#pragma omp target update to(ba) // expected-error 2 {{type 'S2' is not mappable to target}} expected-error {{expected at least one 'to' clause or 'from' clause specified to '#pragma omp target update'}} +#pragma omp target update to(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} +#pragma omp target update to(ba) #pragma omp target update to(h) // expected-error {{threadprivate variables are not allowed in 'to' clause}} expected-error {{expected at least one 'to' clause or 'from' clause specified to '#pragma omp target update'}} #pragma omp target update to(k), from(k) // expected-error {{variable can appear only once in OpenMP 'target update' construct}} expected-note {{used here}} #pragma omp target update to(t), to(t[:5]) // expected-error {{variable can appear only once in OpenMP 'target update' construct}} expected-note {{used here}} diff --git a/test/OpenMP/target_vla_messages.cpp b/test/OpenMP/target_vla_messages.cpp new file mode 100644 index 000000000000..c970d0def661 --- /dev/null +++ b/test/OpenMP/target_vla_messages.cpp @@ -0,0 +1,201 @@ +// PowerPC supports VLAs. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-unknown-unknown -emit-llvm-bc %s -o %t-ppc-host-ppc.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host-ppc.bc -o %t-ppc-device.ll + +// Nvidia GPUs don't support VLAs. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host-nvptx.bc +// RUN: %clang_cc1 -verify -DNO_VLA -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host-nvptx.bc -o %t-nvptx-device.ll + +#ifndef NO_VLA +// expected-no-diagnostics +#endif + +#pragma omp declare target +void declare(int arg) { + int a[2]; +#ifdef NO_VLA + // expected-error@+2 {{variable length arrays are not supported for the current target}} +#endif + int vla[arg]; +} + +void declare_parallel_reduction(int arg) { + int a[2]; + +#pragma omp parallel reduction(+: a) + { } + +#pragma omp parallel reduction(+: a[0:2]) + { } + +#ifdef NO_VLA + // expected-error@+3 {{cannot generate code for reduction on array section, which requires a variable length array}} + // expected-note@+2 {{variable length arrays are not supported for the current target}} +#endif +#pragma omp parallel reduction(+: a[0:arg]) + { } +} +#pragma omp end declare target + +template <typename T> +void target_template(int arg) { +#pragma omp target + { +#ifdef NO_VLA + // expected-error@+2 {{variable length arrays are not supported for the current target}} +#endif + T vla[arg]; + } +} + +void target(int arg) { +#pragma omp target + { +#ifdef NO_VLA + // expected-error@+2 {{variable length arrays are not supported for the current target}} +#endif + int vla[arg]; + } + +#pragma omp target + { +#pragma omp parallel + { +#ifdef NO_VLA + // expected-error@+2 {{variable length arrays are not supported for the current target}} +#endif + int vla[arg]; + } + } + + target_template<long>(arg); +} + +void teams_reduction(int arg) { + int a[2]; + int vla[arg]; + +#pragma omp target map(a) +#pragma omp teams reduction(+: a) + { } + +#ifdef NO_VLA + // expected-error@+4 {{cannot generate code for reduction on variable length array}} + // expected-note@+3 {{variable length arrays are not supported for the current target}} +#endif +#pragma omp target map(vla) +#pragma omp teams reduction(+: vla) + { } + +#pragma omp target map(a[0:2]) +#pragma omp teams reduction(+: a[0:2]) + { } + +#pragma omp target map(vla[0:2]) +#pragma omp teams reduction(+: vla[0:2]) + { } + +#ifdef NO_VLA + // expected-error@+4 {{cannot generate code for reduction on array section, which requires a variable length array}} + // expected-note@+3 {{variable length arrays are not supported for the current target}} +#endif +#pragma omp target map(a[0:arg]) +#pragma omp teams reduction(+: a[0:arg]) + { } + +#ifdef NO_VLA + // expected-error@+4 {{cannot generate code for reduction on array section, which requires a variable length array}} + // expected-note@+3 {{variable length arrays are not supported for the current target}} +#endif +#pragma omp target map(vla[0:arg]) +#pragma omp teams reduction(+: vla[0:arg]) + { } +} + +void parallel_reduction(int arg) { + int a[2]; + int vla[arg]; + +#pragma omp target map(a) +#pragma omp parallel reduction(+: a) + { } + +#ifdef NO_VLA + // expected-error@+4 {{cannot generate code for reduction on variable length array}} + // expected-note@+3 {{variable length arrays are not supported for the current target}} +#endif +#pragma omp target map(vla) +#pragma omp parallel reduction(+: vla) + { } + +#pragma omp target map(a[0:2]) +#pragma omp parallel reduction(+: a[0:2]) + { } + +#pragma omp target map(vla[0:2]) +#pragma omp parallel reduction(+: vla[0:2]) + { } + +#ifdef NO_VLA + // expected-error@+4 {{cannot generate code for reduction on array section, which requires a variable length array}} + // expected-note@+3 {{variable length arrays are not supported for the current target}} +#endif +#pragma omp target map(a[0:arg]) +#pragma omp parallel reduction(+: a[0:arg]) + { } + +#ifdef NO_VLA + // expected-error@+4 {{cannot generate code for reduction on array section, which requires a variable length array}} + // expected-note@+3 {{variable length arrays are not supported for the current target}} +#endif +#pragma omp target map(vla[0:arg]) +#pragma omp parallel reduction(+: vla[0:arg]) + { } +} + +void for_reduction(int arg) { + int a[2]; + int vla[arg]; + +#pragma omp target map(a) +#pragma omp parallel +#pragma omp for reduction(+: a) + for (int i = 0; i < arg; i++) ; + +#ifdef NO_VLA + // expected-error@+5 {{cannot generate code for reduction on variable length array}} + // expected-note@+4 {{variable length arrays are not supported for the current target}} +#endif +#pragma omp target map(vla) +#pragma omp parallel +#pragma omp for reduction(+: vla) + for (int i = 0; i < arg; i++) ; + +#pragma omp target map(a[0:2]) +#pragma omp parallel +#pragma omp for reduction(+: a[0:2]) + for (int i = 0; i < arg; i++) ; + +#pragma omp target map(vla[0:2]) +#pragma omp parallel +#pragma omp for reduction(+: vla[0:2]) + for (int i = 0; i < arg; i++) ; + +#ifdef NO_VLA + // expected-error@+5 {{cannot generate code for reduction on array section, which requires a variable length array}} + // expected-note@+4 {{variable length arrays are not supported for the current target}} +#endif +#pragma omp target map(a[0:arg]) +#pragma omp parallel +#pragma omp for reduction(+: a[0:arg]) + for (int i = 0; i < arg; i++) ; + +#ifdef NO_VLA + // expected-error@+5 {{cannot generate code for reduction on array section, which requires a variable length array}} + // expected-note@+4 {{variable length arrays are not supported for the current target}} +#endif +#pragma omp target map(vla[0:arg]) +#pragma omp parallel +#pragma omp for reduction(+: vla[0:arg]) + for (int i = 0; i < arg; i++) ; +} diff --git a/test/OpenMP/task_ast_print.cpp b/test/OpenMP/task_ast_print.cpp index 9a7d64ee5462..54c05d8ed410 100644 --- a/test/OpenMP/task_ast_print.cpp +++ b/test/OpenMP/task_ast_print.cpp @@ -13,17 +13,19 @@ struct S1 { S1(int v) : a(v) {} int a; typedef int type; + S1 operator +(const S1&); }; template <typename T> class S7 : public T { protected: - T a; + T a, b; S7() : a(0) {} public: S7(typename T::type v) : a(v) { -#pragma omp task private(a) private(this->a) private(T::a) +#pragma omp taskgroup task_reduction(+:b) +#pragma omp task private(a) private(this->a) private(T::a) in_reduction(+:this->b) for (int k = 0; k < a.a; ++k) ++this->a.a; } @@ -35,7 +37,8 @@ public: } }; -// CHECK: #pragma omp task private(this->a) private(this->a) private(T::a) +// CHECK: #pragma omp taskgroup task_reduction(+: this->b) +// CHECK: #pragma omp task private(this->a) private(this->a) private(T::a) in_reduction(+: this->b) // CHECK: #pragma omp task private(this->a) private(this->a) // CHECK: #pragma omp task private(this->a) private(this->a) private(this->S1::a) @@ -89,7 +92,8 @@ T tmain(T argc, T *argv) { a = 2; #pragma omp task default(none), private(argc, b) firstprivate(argv) shared(d) if (argc > 0) final(S<T>::TS > 0) priority(argc) foo(); -#pragma omp task if (C) mergeable priority(C) +#pragma omp taskgroup task_reduction(-: argc) +#pragma omp task if (C) mergeable priority(C) in_reduction(-: argc) foo(); return 0; } @@ -103,7 +107,8 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: a = 2; // CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) final(S<T>::TS > 0) priority(argc) // CHECK-NEXT: foo() -// CHECK-NEXT: #pragma omp task if(C) mergeable priority(C) +// CHECK-NEXT: #pragma omp taskgroup task_reduction(-: argc) +// CHECK-NEXT: #pragma omp task if(C) mergeable priority(C) in_reduction(-: argc) // CHECK-NEXT: foo() // CHECK: template<> int tmain<int, 5>(int argc, int *argv) { // CHECK-NEXT: int b = argc, c, d, e, f, g; @@ -114,7 +119,8 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: a = 2; // CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) final(S<int>::TS > 0) priority(argc) // CHECK-NEXT: foo() -// CHECK-NEXT: #pragma omp task if(5) mergeable priority(5) +// CHECK-NEXT: #pragma omp taskgroup task_reduction(-: argc) +// CHECK-NEXT: #pragma omp task if(5) mergeable priority(5) in_reduction(-: argc) // CHECK-NEXT: foo() // CHECK: template<> long tmain<long, 1>(long argc, long *argv) { // CHECK-NEXT: long b = argc, c, d, e, f, g; @@ -125,7 +131,8 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: a = 2; // CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) final(S<long>::TS > 0) priority(argc) // CHECK-NEXT: foo() -// CHECK-NEXT: #pragma omp task if(1) mergeable priority(1) +// CHECK-NEXT: #pragma omp taskgroup task_reduction(-: argc) +// CHECK-NEXT: #pragma omp task if(1) mergeable priority(1) in_reduction(-: argc) // CHECK-NEXT: foo() enum Enum {}; @@ -134,7 +141,7 @@ int main(int argc, char **argv) { long x; int b = argc, c, d, e, f, g; static int a; - int arr[10]; + int arr[10], arr1[argc]; #pragma omp threadprivate(a) Enum ee; // CHECK: Enum ee; @@ -142,8 +149,18 @@ int main(int argc, char **argv) { // CHECK-NEXT: #pragma omp task untied mergeable depend(out : argv[:a][1],(arr)[0:]) if(task: argc > 0) priority(f) a = 2; // CHECK-NEXT: a = 2; -#pragma omp task default(none), private(argc, b) firstprivate(argv) if (argc > 0) final(a > 0) depend(inout : a, argv[:argc],arr[:a]) priority(23) - // CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) if(argc > 0) final(a > 0) depend(inout : a,argv[:argc],arr[:a]) priority(23) +#pragma omp taskgroup task_reduction(min: arr1) +#pragma omp task default(none), private(argc, b) firstprivate(argv) if (argc > 0) final(a > 0) depend(inout : a, argv[:argc],arr[:a]) priority(23) in_reduction(min: arr1) + // CHECK-NEXT: #pragma omp taskgroup task_reduction(min: arr1) + // CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) if(argc > 0) final(a > 0) depend(inout : a,argv[:argc],arr[:a]) priority(23) in_reduction(min: arr1) + foo(); + // CHECK-NEXT: foo(); +#pragma omp taskgroup task_reduction(min: arr1) +#pragma omp parallel reduction(+:arr1) +#pragma omp task in_reduction(min: arr1) + // CHECK-NEXT: #pragma omp taskgroup task_reduction(min: arr1) + // CHECK-NEXT: #pragma omp parallel reduction(+: arr1) + // CHECK-NEXT: #pragma omp task in_reduction(min: arr1) foo(); // CHECK-NEXT: foo(); return tmain<int, 5>(b, &b) + tmain<long, 1>(x, &x); diff --git a/test/OpenMP/task_depend_messages.cpp b/test/OpenMP/task_depend_messages.cpp index 576738ceac46..1e51b479dd96 100644 --- a/test/OpenMP/task_depend_messages.cpp +++ b/test/OpenMP/task_depend_messages.cpp @@ -28,14 +28,14 @@ int main(int argc, char **argv, char *env[]) { #pragma omp task depend (source) // expected-error {{expected expression}} expected-warning {{missing ':' after dependency type - ignoring}} #pragma omp task depend (in : argc)) // expected-warning {{extra tokens at the end of '#pragma omp task' are ignored}} #pragma omp task depend (out: ) // expected-error {{expected expression}} - #pragma omp task depend (inout : foobool(argc)), depend (in, argc) // expected-error {{expected variable name, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}} + #pragma omp task depend (inout : foobool(argc)), depend (in, argc) // expected-error {{expected addressable lvalue expression, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}} #pragma omp task depend (out :S1) // expected-error {{'S1' does not refer to a value}} - #pragma omp task depend(in : argv[1][1] = '2') // expected-error {{expected variable name, array element or array section}} - #pragma omp task depend (in : vec[1]) // expected-error {{expected variable name, array element or array section}} + #pragma omp task depend(in : argv[1][1] = '2') + #pragma omp task depend (in : vec[1]) // expected-error {{expected addressable lvalue expression, array element or array section}} #pragma omp task depend (in : argv[0]) #pragma omp task depend (in : ) // expected-error {{expected expression}} - #pragma omp task depend (in : main) // expected-error {{expected variable name, array element or array section}} - #pragma omp task depend(in : a[0]) // expected-error{{expected variable name, array element or array section}} + #pragma omp task depend (in : main) + #pragma omp task depend(in : a[0]) // expected-error{{expected addressable lvalue expression, array element or array section}} #pragma omp task depend (in : vec[1:2]) // expected-error {{ value is not an array or pointer}} #pragma omp task depend (in : argv[ // expected-error {{expected expression}} expected-error {{expected ']'}} expected-error {{expected ')'}} expected-note {{to match this '['}} expected-note {{to match this '('}} #pragma omp task depend (in : argv[: // expected-error {{expected expression}} expected-error {{expected ']'}} expected-error {{expected ')'}} expected-note {{to match this '['}} expected-note {{to match this '('}} diff --git a/test/OpenMP/task_in_reduction_codegen.cpp b/test/OpenMP/task_in_reduction_codegen.cpp new file mode 100644 index 000000000000..2ad013a14a57 --- /dev/null +++ b/test/OpenMP/task_in_reduction_codegen.cpp @@ -0,0 +1,81 @@ +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: [[PRIVATES:%.+]] = type { i8*, i8* } + +struct S { + int a; + S() : a(0) {} + S(const S&) {} + S& operator=(const S&) {return *this;} + ~S() {} + friend S operator+(const S&a, const S&b) {return a;} +}; + + +int main(int argc, char **argv) { + int a; + float b; + S c[5]; + short d[argc]; +#pragma omp taskgroup task_reduction(+: a, b, argc) + { +#pragma omp taskgroup task_reduction(-:c, d) +#pragma omp parallel +#pragma omp task in_reduction(+:a) in_reduction(-:d) + a += d[a]; + } + return 0; +} + +// CHECK-LABEL: @main +// CHECK: void @__kmpc_taskgroup(%ident_t* @0, i32 [[GTID:%.+]]) +// CHECK: [[TD1:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 3, i8* % +// CHECK-NEXT: store i8* [[TD1]], i8** [[TD1_ADDR:%[^,]+]], +// CHECK-NEXT: call void @__kmpc_taskgroup(%ident_t* @0, i32 [[GTID]]) +// CHECK: [[TD2:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 2, i8* % +// CHECK-NEXT: store i8* [[TD2]], i8** [[TD2_ADDR:%[^,]+]], +// CHECK-NEXT: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* @0, i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**, i8**)* [[OMP_PARALLEL:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i64 %{{.+}}, i16* %{{.+}}, i8** [[TD1_ADDR]], i8** [[TD2_ADDR]]) +// CHECK-NEXT: call void @__kmpc_end_taskgroup(%ident_t* @0, i32 [[GTID]]) +// CHECK-NEXT: call void @__kmpc_end_taskgroup(%ident_t* @0, i32 [[GTID]]) + +// CHECK: define internal void [[OMP_PARALLEL]]( +// CHECK: [[TASK_T:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID:%.+]], i32 1, i64 56, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[T:%.+]]*)* [[OMP_TASK:@.+]] to i32 (i32, i8*)*)) +// CHECK-NEXT: [[TASK_T_WITH_PRIVS:%.+]] = bitcast i8* [[TASK_T]] to [[T]]* +// CHECK: [[PRIVS:%.+]] = getelementptr inbounds [[T]], [[T]]* [[TASK_T_WITH_PRIVS]], i32 0, i32 1 +// CHECK: [[TD1_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 0 +// CHECK-NEXT: [[TD1_SHAR:%.+]] = getelementptr inbounds % +// CHECK-NEXT: [[TD1_ADDR:%.+]] = load i8**, i8*** [[TD1_SHAR]], +// CHECK-NEXT: [[TD1:%.+]] = load i8*, i8** [[TD1_ADDR]], +// CHECK-NEXT: store i8* [[TD1]], i8** [[TD1_REF]], +// CHECK-NEXT: [[TD2_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 1 +// CHECK-NEXT: [[TD2_SHAR:%.+]] = getelementptr inbounds % +// CHECK-NEXT: [[TD2_ADDR:%.+]] = load i8**, i8*** [[TD2_SHAR]], +// CHECK-NEXT: [[TD2:%.+]] = load i8*, i8** [[TD2_ADDR]], +// CHECK-NEXT: store i8* [[TD2]], i8** [[TD2_REF]], +// CHECK-NEXT: call i32 @__kmpc_omp_task(%ident_t* @0, i32 [[GTID]], i8* [[TASK_T]]) +// CHECK-NEXT: ret void +// CHECK-NEXT: } + +// CHECK: define internal {{.*}} [[OMP_TASK]]( +// CHECK: call void (i8*, ...) %{{[^(]+}}(i8* %{{.+}}, i8*** [[TD1_REF:%[^,]+]], i8*** [[TD2_REF:%[^,]+]]) +// CHECK-NEXT: [[TD1_ADDR:%.+]] = load i8**, i8*** [[TD1_REF]], +// CHECK-NEXT: [[TD2_ADDR:%.+]] = load i8**, i8*** [[TD2_REF]], +// CHECK-NEXT: [[A_REF:%.+]] = getelementptr inbounds % +// CHECK-NEXT: [[A_ADDR:%.+]] = load i32*, i32** [[A_REF]], +// CHECK-NEXT: [[TD1:%.+]] = load i8*, i8** [[TD1_ADDR]], +// CHECK-NEXT: [[GTID:%.+]] = load i32, i32* % +// CHECK-NEXT: [[A_PTR:%.+]] = bitcast i32* [[A_ADDR]] to i8* +// CHECK-NEXT: call i8* @__kmpc_task_reduction_get_th_data(i32 [[GTID]], i8* [[TD1]], i8* [[A_PTR]]) +// CHECK: [[D_REF:%.+]] = getelementptr inbounds % +// CHECK-NEXT: [[D_ADDR:%.+]] = load i16*, i16** [[D_REF]], +// CHECK: [[TD2:%.+]] = load i8*, i8** [[TD2_ADDR]], +// CHECK-NEXT: [[D_PTR:%.+]] = bitcast i16* [[D_ADDR]] to i8* +// CHECK-NEXT: call i8* @__kmpc_task_reduction_get_th_data(i32 [[GTID]], i8* [[TD2]], i8* [[D_PTR]]) +// CHECK: add nsw i32 +// CHECK: store i32 % +#endif diff --git a/test/OpenMP/task_in_reduction_message.cpp b/test/OpenMP/task_in_reduction_message.cpp new file mode 100644 index 000000000000..e176dc9305b3 --- /dev/null +++ b/test/OpenMP/task_in_reduction_message.cpp @@ -0,0 +1,308 @@ +// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 150 -o - %s +// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s +// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s + +void foo() { +} + +bool foobool(int argc) { + return argc; +} + +void foobar(int &ref) { +#pragma omp taskgroup task_reduction(+:ref) +#pragma omp task in_reduction(+:ref) + foo(); +} + +void foobar1(int &ref) { +#pragma omp taskgroup task_reduction(+:ref) +#pragma omp task in_reduction(-:ref) + foo(); +} + +#pragma omp declare reduction (red:int:omp_out += omp_in) + +void foobar2(int &ref) { +#pragma omp taskgroup task_reduction(+:ref) // expected-note {{previously marked as task_reduction with different reduction operation}} +#pragma omp task in_reduction(red:ref) // expected-error{{in_reduction variable must have the same reduction operation as in a task_reduction clause}} + foo(); +} + +void foobar3(int &ref) { +#pragma omp taskgroup task_reduction(red:ref) // expected-note {{previously marked as task_reduction with different reduction operation}} +#pragma omp task in_reduction(min:ref) // expected-error{{in_reduction variable must have the same reduction operation as in a task_reduction clause}} + foo(); +} + +void foobar4(int &ref) { +#pragma omp task in_reduction(min:ref) // expected-error {{in_reduction variable must appear in a task_reduction clause}} + foo(); +} + +struct S1; // expected-note {{declared here}} expected-note 4 {{forward declaration of 'S1'}} +extern S1 a; +class S2 { + mutable int a; + S2 &operator+(const S2 &arg) { return (*this); } // expected-note 3 {{implicitly declared private here}} + +public: + S2() : a(0) {} + S2(S2 &s2) : a(s2.a) {} + static float S2s; // expected-note 2 {{static data member is predetermined as shared}} + static const float S2sc; // expected-note 2 {{'S2sc' declared here}} +}; +const float S2::S2sc = 0; +S2 b; // expected-note 3 {{'b' defined here}} +const S2 ba[5]; // expected-note 2 {{'ba' defined here}} +class S3 { + int a; + +public: + int b; + S3() : a(0) {} + S3(const S3 &s3) : a(s3.a) {} + S3 operator+(const S3 &arg1) { return arg1; } +}; +int operator+(const S3 &arg1, const S3 &arg2) { return 5; } +S3 c; // expected-note 3 {{'c' defined here}} +const S3 ca[5]; // expected-note 2 {{'ca' defined here}} +extern const int f; // expected-note 4 {{'f' declared here}} +class S4 { + int a; + S4(); // expected-note {{implicitly declared private here}} + S4(const S4 &s4); + S4 &operator+(const S4 &arg) { return (*this); } + +public: + S4(int v) : a(v) {} +}; +S4 &operator&=(S4 &arg1, S4 &arg2) { return arg1; } +class S5 { + int a; + S5() : a(0) {} // expected-note {{implicitly declared private here}} + S5(const S5 &s5) : a(s5.a) {} + S5 &operator+(const S5 &arg); + +public: + S5(int v) : a(v) {} +}; +class S6 { // expected-note 3 {{candidate function (the implicit copy assignment operator) not viable: no known conversion from 'int' to 'const S6' for 1st argument}} +#if __cplusplus >= 201103L // C++11 or later +// expected-note@-2 3 {{candidate function (the implicit move assignment operator) not viable}} +#endif + int a; + +public: + S6() : a(6) {} + operator int() { return 6; } +} o; + +S3 h, k; +#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}} + +template <class T> // expected-note {{declared here}} +T tmain(T argc) { + const T d = T(); // expected-note 4 {{'d' defined here}} + const T da[5] = {T()}; // expected-note 2 {{'da' defined here}} + T qa[5] = {T()}; + T i; + T &j = i; // expected-note 2 {{'j' defined here}} + S3 &p = k; // expected-note 2 {{'p' defined here}} + const T &r = da[(int)i]; // expected-note 2 {{'r' defined here}} + T &q = qa[(int)i]; + T fl; +#pragma omp taskgroup task_reduction(+:argc) +#pragma omp task in_reduction // expected-error {{expected '(' after 'in_reduction'}} + foo(); +#pragma omp taskgroup task_reduction(+:argc) +#pragma omp task in_reduction + // expected-error {{expected '(' after 'in_reduction'}} expected-warning {{extra tokens at the end of '#pragma omp task' are ignored}} + foo(); +#pragma omp taskgroup task_reduction(+:argc) +#pragma omp task in_reduction( // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected ')'}} expected-note {{to match this '('}} + foo(); +#pragma omp taskgroup task_reduction(+:argc) +#pragma omp task in_reduction(- // expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected ')'}} expected-note {{to match this '('}} + foo(); +#pragma omp taskgroup task_reduction(+:argc) +#pragma omp task in_reduction() // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} + foo(); +#pragma omp taskgroup task_reduction(+:argc) +#pragma omp task in_reduction(*) // expected-warning {{missing ':' after reduction identifier - ignoring}} + foo(); +#pragma omp taskgroup task_reduction(+:argc) +#pragma omp task in_reduction(\) // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} + foo(); +#pragma omp taskgroup task_reduction(&:argc) // expected-error {{invalid operands to binary expression ('float' and 'float')}} +#pragma omp task in_reduction(& : argc // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{invalid operands to binary expression ('float' and 'float')}} + foo(); +#pragma omp taskgroup task_reduction(|:argc) // expected-error {{invalid operands to binary expression ('float' and 'float')}} +#pragma omp task in_reduction(| : argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{invalid operands to binary expression ('float' and 'float')}} + foo(); +#pragma omp task in_reduction(|| : argc ? i : argc) // expected-error 2 {{expected variable name, array element or array section}} + foo(); +#pragma omp task in_reduction(foo : argc) //expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max' or declare reduction for type 'float'}} expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max' or declare reduction for type 'int'}} + foo(); +#pragma omp taskgroup task_reduction(&&:argc) +#pragma omp task in_reduction(&& : argc) + foo(); +#pragma omp task in_reduction(^ : T) // expected-error {{'T' does not refer to a value}} + foo(); +#pragma omp taskgroup task_reduction(+:c) +#pragma omp task in_reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 3 {{const-qualified list item cannot be reduction}} expected-error 2 {{'operator+' is a private member of 'S2'}} expected-error 2 {{in_reduction variable must appear in a task_reduction clause}} + foo(); +#pragma omp task in_reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 4 {{arguments of OpenMP clause 'in_reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 3 {{const-qualified list item cannot be reduction}} + foo(); +#pragma omp task in_reduction(max : h.b) // expected-error {{expected variable name, array element or array section}} + foo(); +#pragma omp task in_reduction(+ : ba) // expected-error {{const-qualified list item cannot be reduction}} + foo(); +#pragma omp task in_reduction(* : ca) // expected-error {{const-qualified list item cannot be reduction}} + foo(); +#pragma omp task in_reduction(- : da) // expected-error {{const-qualified list item cannot be reduction}} expected-error {{const-qualified list item cannot be reduction}} + foo(); +#pragma omp task in_reduction(^ : fl) // expected-error {{invalid operands to binary expression ('float' and 'float')}} expected-error {{in_reduction variable must appear in a task_reduction clause}} + foo(); +#pragma omp task in_reduction(&& : S2::S2s) // expected-error {{shared variable cannot be reduction}} + foo(); +#pragma omp task in_reduction(&& : S2::S2sc) // expected-error {{const-qualified list item cannot be reduction}} + foo(); +#pragma omp taskgroup task_reduction(+:k) +#pragma omp task in_reduction(+ : h, k) // expected-error {{threadprivate or thread local variable cannot be reduction}} + foo(); +#pragma omp task in_reduction(+ : o) // expected-error 2 {{no viable overloaded '='}} + foo(); +#pragma omp parallel private(k) +#pragma omp task in_reduction(+ : p), in_reduction(+ : p) // expected-error 2 {{argument of OpenMP clause 'in_reduction' must reference the same object in all threads}} + foo(); +#pragma omp taskgroup task_reduction(+:p) +#pragma omp task in_reduction(+ : p), in_reduction(+ : p) // expected-error 2 {{variable can appear only once in OpenMP 'in_reduction' clause}} expected-note 2 {{previously referenced here}} + foo(); +#pragma omp task in_reduction(+ : r) // expected-error 2 {{const-qualified list item cannot be reduction}} + foo(); +#pragma omp parallel shared(i) +#pragma omp parallel reduction(min : i) +#pragma omp task in_reduction(max : j) // expected-error 2 {{argument of OpenMP clause 'in_reduction' must reference the same object in all threads}} + foo(); +#pragma omp taskgroup task_reduction(+:fl) +#pragma omp task in_reduction(+ : fl) + foo(); +#pragma omp parallel +#pragma omp for reduction(- : fl) + for (int i = 0; i < 10; ++i) +#pragma omp taskgroup task_reduction(+:fl) +#pragma omp task in_reduction(+ : fl) + foo(); + + return T(); +} + +namespace A { +double x; +#pragma omp threadprivate(x) // expected-note {{defined as threadprivate or thread local}} +} +namespace B { +using A::x; +} + +int main(int argc, char **argv) { + const int d = 5; // expected-note 2 {{'d' defined here}} + const int da[5] = {0}; // expected-note {{'da' defined here}} + int qa[5] = {0}; + S4 e(4); + S5 g(5); + int i; + int &j = i; // expected-note {{'j' defined here}} + S3 &p = k; // expected-note 2 {{'p' defined here}} + const int &r = da[i]; // expected-note {{'r' defined here}} + int &q = qa[i]; + float fl; +#pragma omp task in_reduction // expected-error {{expected '(' after 'in_reduction'}} + foo(); +#pragma omp task in_reduction + // expected-error {{expected '(' after 'in_reduction'}} expected-warning {{extra tokens at the end of '#pragma omp task' are ignored}} + foo(); +#pragma omp task in_reduction( // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected ')'}} expected-note {{to match this '('}} + foo(); +#pragma omp task in_reduction(- // expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected ')'}} expected-note {{to match this '('}} + foo(); +#pragma omp task in_reduction() // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} + foo(); +#pragma omp task in_reduction(*) // expected-warning {{missing ':' after reduction identifier - ignoring}} + foo(); +#pragma omp task in_reduction(\) // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} + foo(); +#pragma omp task in_reduction(foo : argc // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max'}} + foo(); +#pragma omp taskgroup task_reduction(|:argc) +{ +#pragma omp taskgroup task_reduction(+:argc) // expected-note {{previously marked as task_reduction with different reduction operation}} +{ +#pragma omp task in_reduction(| : argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{in_reduction variable must have the same reduction operation as in a task_reduction clause}} + foo(); +} +#pragma omp task in_reduction(| : argc) + foo(); +} +#pragma omp task in_reduction(|| : argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name, array element or array section}} + foo(); +#pragma omp task in_reduction(~ : argc) // expected-error {{expected unqualified-id}} + foo(); +#pragma omp taskgroup task_reduction(&&:argc) +#pragma omp task in_reduction(&& : argc) + foo(); +#pragma omp task in_reduction(^ : S1) // expected-error {{'S1' does not refer to a value}} + foo(); +#pragma omp taskgroup task_reduction(+:c) +#pragma omp task in_reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{const-qualified list item cannot be reduction}} expected-error {{'operator+' is a private member of 'S2'}} expected-error {{in_reduction variable must appear in a task_reduction clause}} + foo(); +#pragma omp task in_reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'in_reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 2 {{const-qualified list item cannot be reduction}} + foo(); +#pragma omp task in_reduction(max : h.b) // expected-error {{expected variable name, array element or array section}} + foo(); +#pragma omp task in_reduction(+ : ba) // expected-error {{const-qualified list item cannot be reduction}} + foo(); +#pragma omp task in_reduction(* : ca) // expected-error {{const-qualified list item cannot be reduction}} + foo(); +#pragma omp task in_reduction(- : da) // expected-error {{const-qualified list item cannot be reduction}} + foo(); +#pragma omp task in_reduction(^ : fl) // expected-error {{invalid operands to binary expression ('float' and 'float')}} + foo(); +#pragma omp task in_reduction(&& : S2::S2s) // expected-error {{shared variable cannot be reduction}} + foo(); +#pragma omp task in_reduction(&& : S2::S2sc) // expected-error {{const-qualified list item cannot be reduction}} + foo(); +#pragma omp task in_reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{nvalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}} + foo(); +#pragma omp taskgroup task_reduction(+:k) +#pragma omp task in_reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}} + foo(); +#pragma omp task in_reduction(+ : o) // expected-error {{no viable overloaded '='}} + foo(); +#pragma omp parallel private(k) +#pragma omp task in_reduction(+ : p), in_reduction(+ : p) // expected-error 2 {{argument of OpenMP clause 'in_reduction' must reference the same object in all threads}} + foo(); +#pragma omp taskgroup task_reduction(+:p) +#pragma omp task in_reduction(+ : p), in_reduction(+ : p) // expected-error {{variable can appear only once in OpenMP 'in_reduction' clause}} expected-note {{previously referenced here}} + foo(); +#pragma omp task in_reduction(+ : r) // expected-error {{const-qualified list item cannot be reduction}} + foo(); +#pragma omp parallel shared(i) +#pragma omp parallel reduction(min : i) +#pragma omp task in_reduction(max : j) // expected-error {{argument of OpenMP clause 'in_reduction' must reference the same object in all threads}} + foo(); +#pragma omp parallel +#pragma omp for private(fl) + for (int i = 0; i < 10; ++i) +#pragma omp taskgroup task_reduction(+:fl) +#pragma omp task in_reduction(+ : fl) + foo(); +#pragma omp taskgroup task_reduction(+:fl) +#pragma omp task in_reduction(+ : fl) + foo(); + static int m; +#pragma omp taskgroup task_reduction(+:m) +#pragma omp task in_reduction(+ : m) // OK + m++; + + return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain<int>' requested here}} expected-note {{in instantiation of function template specialization 'tmain<float>' requested here}} +} diff --git a/test/OpenMP/taskgroup_task_reduction_codegen.cpp b/test/OpenMP/taskgroup_task_reduction_codegen.cpp new file mode 100644 index 000000000000..63e015182420 --- /dev/null +++ b/test/OpenMP/taskgroup_task_reduction_codegen.cpp @@ -0,0 +1,212 @@ +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple x86_64-apple-darwin10 | FileCheck %s --check-prefix=CHECK --check-prefix=DEBUG +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +struct S { + int a; + S() : a(0) {} + S(const S&) {} + S& operator=(const S&) {return *this;} + ~S() {} + friend S operator+(const S&a, const S&b) {return a;} +}; + + +int main(int argc, char **argv) { + int a; + float b; + S c[5]; + short d[argc]; +#pragma omp taskgroup task_reduction(+: a, b, argc) + { +#pragma omp taskgroup task_reduction(-:c, d) + ; + } + return 0; +} +// CHECK-LABEL: @main +// CHECK: alloca i32, +// CHECK: [[ARGC_ADDR:%.+]] = alloca i32, +// CHECK: [[ARGV_ADDR:%.+]] = alloca i8**, +// CHECK: [[A:%.+]] = alloca i32, +// CHECK: [[B:%.+]] = alloca float, +// CHECK: [[C:%.+]] = alloca [5 x %struct.S], +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%ident_t* +// CHECK: [[RD_IN1:%.+]] = alloca [3 x [[T1:%[^,]+]]], +// CHECK: [[TD1:%.+]] = alloca i8*, +// CHECK: [[RD_IN2:%.+]] = alloca [2 x [[T2:%[^,]+]]], +// CHECK: [[TD2:%.+]] = alloca i8*, + +// CHECK: [[VLA:%.+]] = alloca i16, i64 [[VLA_SIZE:%[^,]+]], + +// CHECK: call void @__kmpc_taskgroup(%ident_t* {{[^,]+}}, i32 [[GTID]]) +// CHECK-DAG: [[BC_A:%.+]] = bitcast i32* [[A]] to i8* +// CHECK-DAG: store i8* [[BC_A]], i8** [[A_REF:[^,]+]], +// CHECK-DAG: [[A_REF]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPA:%[^,]+]], i32 0, i32 0 +// CHECK-DAG: [[GEPA]] = getelementptr inbounds [3 x [[T1]]], [3 x [[T1]]]* [[RD_IN1]], i64 0, i64 +// CHECK-DAG: [[TMP6:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPA]], i32 0, i32 1 +// CHECK-DAG: store i64 4, i64* [[TMP6]], +// CHECK-DAG: [[TMP7:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPA]], i32 0, i32 2 +// CHECK-DAG: store i8* bitcast (void (i8*)* [[AINIT:@.+]] to i8*), i8** [[TMP7]], +// CHECK-DAG: [[TMP8:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPA]], i32 0, i32 3 +// CHECK-DAG: store i8* null, i8** [[TMP8]], +// CHECK-DAG: [[TMP9:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPA]], i32 0, i32 4 +// CHECK-DAG: store i8* bitcast (void (i8*, i8*)* [[ACOMB:@.+]] to i8*), i8** [[TMP9]], +// CHECK-DAG: [[TMP10:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPA]], i32 0, i32 5 +// CHECK-DAG: [[TMP11:%.+]] = bitcast i32* [[TMP10]] to i8* +// CHECK-DAG: call void @llvm.memset.p0i8.i64(i8* [[TMP11]], i8 0, i64 4, i32 8, i1 false) +// CHECK-DAG: [[TMP13:%.+]] = bitcast float* [[B]] to i8* +// CHECK-DAG: store i8* [[TMP13]], i8** [[TMP12:%[^,]+]], +// CHECK-DAG: [[TMP12]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPB:%[^,]+]], i32 0, i32 0 +// CHECK-DAG: [[GEPB]] = getelementptr inbounds [3 x [[T1]]], [3 x [[T1]]]* [[RD_IN1]], i64 0, i64 +// CHECK-DAG: [[TMP14:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPB]], i32 0, i32 1 +// CHECK-DAG: store i64 4, i64* [[TMP14]], +// CHECK-DAG: [[TMP15:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPB]], i32 0, i32 2 +// CHECK-DAG: store i8* bitcast (void (i8*)* [[BINIT:@.+]] to i8*), i8** [[TMP15]], +// CHECK-DAG: [[TMP16:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPB]], i32 0, i32 3 +// CHECK-DAG: store i8* null, i8** [[TMP16]], +// CHECK-DAG: [[TMP17:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPB]], i32 0, i32 4 +// CHECK-DAG: store i8* bitcast (void (i8*, i8*)* [[BCOMB:@.+]] to i8*), i8** [[TMP17]], +// CHECK-DAG: [[TMP18:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPB]], i32 0, i32 5 +// CHECK-DAG: [[TMP19:%.+]] = bitcast i32* [[TMP18]] to i8* +// CHECK-DAG: call void @llvm.memset.p0i8.i64(i8* [[TMP19]], i8 0, i64 4, i32 8, i1 false) +// CHECK-DAG: [[TMP21:%.+]] = bitcast i32* [[ARGC_ADDR]] to i8* +// CHECK-DAG: store i8* [[TMP21]], i8** [[TMP20:%[^,]+]], +// CHECK-DAG: [[TMP20]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPARGC:%[^,]+]], i32 0, i32 0 +// CHECK-DAG: [[GEPARGC]] = getelementptr inbounds [3 x [[T1]]], [3 x [[T1]]]* [[RD_IN1]], i64 0, i64 +// CHECK-DAG: [[TMP22:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPARGC]], i32 0, i32 1 +// CHECK-DAG: store i64 4, i64* [[TMP22]], +// CHECK-DAG: [[TMP23:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPARGC]], i32 0, i32 2 +// CHECK-DAG: store i8* bitcast (void (i8*)* [[ARGCINIT:@.+]] to i8*), i8** [[TMP23]], +// CHECK-DAG: [[TMP24:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPARGC]], i32 0, i32 3 +// CHECK-DAG: store i8* null, i8** [[TMP24]], +// CHECK-DAG: [[TMP25:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPARGC]], i32 0, i32 4 +// CHECK-DAG: store i8* bitcast (void (i8*, i8*)* [[ARGCCOMB:@.+]] to i8*), i8** [[TMP25]], +// CHECK-DAG: [[TMP26:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPARGC]], i32 0, i32 5 +// CHECK-DAG: [[TMP27:%.+]] = bitcast i32* [[TMP26]] to i8* +// CHECK-DAG: call void @llvm.memset.p0i8.i64(i8* [[TMP27]], i8 0, i64 4, i32 8, i1 false) +// CHECK-DAG: [[TMP28:%.+]] = bitcast [3 x [[T1]]]* [[RD_IN1]] to i8* +// CHECK-DAG: [[TMP29:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 3, i8* [[TMP28]]) +// DEBUG-DAG: call void @llvm.dbg.declare(metadata i8** [[TD1]], +// CHECK-DAG: store i8* [[TMP29]], i8** [[TD1]], +// CHECK-DAG: call void @__kmpc_taskgroup(%ident_t* {{[^,]+}}, i32 [[GTID]]) +// CHECK-DAG: [[TMP31:%.+]] = bitcast [5 x %struct.S]* [[C]] to i8* +// CHECK-DAG: store i8* [[TMP31]], i8** [[TMP30:%[^,]+]], +// CHECK-DAG: [[TMP30]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPC:%[^,]+]], i32 0, i32 0 +// CHECK-DAG: [[GEPC]] = getelementptr inbounds [2 x [[T2]]], [2 x [[T2]]]* [[RD_IN2]], i64 0, i64 +// CHECK-DAG: [[TMP32:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPC]], i32 0, i32 1 +// CHECK-DAG: store i64 20, i64* [[TMP32]], +// CHECK-DAG: [[TMP33:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPC]], i32 0, i32 2 +// CHECK-DAG: store i8* bitcast (void (i8*)* [[CINIT:@.+]] to i8*), i8** [[TMP33]], +// CHECK-DAG: [[TMP34:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPC]], i32 0, i32 3 +// CHECK-DAG: store i8* bitcast (void (i8*)* [[CFINI:@.+]] to i8*), i8** [[TMP34]], +// CHECK-DAG: [[TMP35:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPC]], i32 0, i32 4 +// CHECK-DAG: store i8* bitcast (void (i8*, i8*)* [[CCOMB:@.+]] to i8*), i8** [[TMP35]], +// CHECK-DAG: [[TMP36:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPC]], i32 0, i32 5 +// CHECK-DAG: [[TMP37:%.+]] = bitcast i32* [[TMP36]] to i8* +// CHECK-DAG: call void @llvm.memset.p0i8.i64(i8* [[TMP37]], i8 0, i64 4, i32 8, i1 false) +// CHECK-DAG: [[TMP39:%.+]] = bitcast i16* [[VLA]] to i8* +// CHECK-DAG: store i8* [[TMP39]], i8** [[TMP38:%[^,]+]], +// CHECK-DAG: [[TMP38]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPVLA:%[^,]+]], i32 0, i32 0 +// CHECK-DAG: [[GEPVLA]] = getelementptr inbounds [2 x [[T2]]], [2 x [[T2]]]* [[RD_IN2]], i64 0, i64 +// CHECK-DAG: [[TMP40:%.+]] = mul nuw i64 [[VLA_SIZE]], 2 +// CHECK-DAG: [[TMP41:%.+]] = udiv exact i64 [[TMP40]], ptrtoint (i16* getelementptr (i16, i16* null, i32 1) to i64) +// CHECK-DAG: [[TMP42:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPVLA]], i32 0, i32 1 +// CHECK-DAG: store i64 [[TMP40]], i64* [[TMP42]], +// CHECK-DAG: [[TMP43:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPVLA]], i32 0, i32 2 +// CHECK-DAG: store i8* bitcast (void (i8*)* [[VLAINIT:@.+]] to i8*), i8** [[TMP43]], +// CHECK-DAG: [[TMP44:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPVLA]], i32 0, i32 3 +// CHECK-DAG: store i8* null, i8** [[TMP44]], +// CHECK-DAG: [[TMP45:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPVLA]], i32 0, i32 4 +// CHECK-DAG: store i8* bitcast (void (i8*, i8*)* [[VLACOMB:@.+]] to i8*), i8** [[TMP45]], +// CHECK-DAG: [[TMP46:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPVLA]], i32 0, i32 5 +// CHECK-DAG: store i32 1, i32* [[TMP46]], +// CHECK: [[TMP47:%.+]] = bitcast [2 x [[T2]]]* [[RD_IN2]] to i8* +// CHECK: [[TMP48:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 2, i8* [[TMP47]]) +// CHECK: store i8* [[TMP48]], i8** [[TD2]], +// CHECK: call void @__kmpc_end_taskgroup(%ident_t* {{[^,]+}}, i32 [[GTID]]) +// CHECK: call void @__kmpc_end_taskgroup(%ident_t* {{[^,]+}}, i32 [[GTID]]) + +// CHECK-DAG: define internal void [[AINIT]](i8*) +// CHECK-DAG: store i32 0, i32* % +// CHECK-DAG: ret void +// CHECK-DAG: } + +// CHECK-DAG: define internal void [[ACOMB]](i8*, i8*) +// CHECK-DAG: add nsw i32 % +// CHECK-DAG: store i32 % +// CHECK-DAG: ret void +// CHECK-DAG: } + +// CHECK-DAG: define internal void [[BINIT]](i8*) +// CHECK-DAG: store float 0.000000e+00, float* % +// CHECK-DAG: ret void +// CHECK-DAG: } + +// CHECK-DAG: define internal void [[BCOMB]](i8*, i8*) +// CHECK-DAG: fadd float % +// CHECK-DAG: store float % +// CHECK-DAG: ret void +// CHECK-DAG: } + +// CHECK-DAG: define internal void [[ARGCINIT]](i8*) +// CHECK-DAG: store i32 0, i32* % +// CHECK-DAG: ret void +// CHECK-DAG: } + +// CHECK-DAG: define internal void [[ARGCCOMB]](i8*, i8*) +// CHECK-DAG: add nsw i32 % +// CHECK-DAG: store i32 % +// CHECK-DAG: ret void +// CHECK-DAG: } + +// CHECK-DAG: define internal void [[CINIT]](i8*) +// CHECK-DAG: phi %struct.S* [ +// CHECK-DAG: call {{.+}}(%struct.S* {{.+}}) +// CHECK-DAG: br i1 % +// CHECK-DAG: ret void +// CHECK-DAG: } + +// CHECK-DAG: define internal void [[CFINI]](i8*) +// CHECK-DAG: phi %struct.S* [ +// CHECK-DAG: call {{.+}}(%struct.S* {{.+}}) +// CHECK-DAG: br i1 % +// CHECK-DAG: ret void +// CHECK-DAG: } + +// CHECK-DAG: define internal void [[CCOMB]](i8*, i8*) +// CHECK-DAG: phi %struct.S* [ +// CHECK-DAG: phi %struct.S* [ +// CHECK-DAG: call {{.+}}(%struct.S* {{.+}}, %struct.S* {{.+}}, %struct.S* {{.+}}) +// CHECK-DAG: call {{.+}}(%struct.S* {{.+}}, %struct.S* {{.+}}) +// CHECK-DAG: call {{.+}}(%struct.S* {{.+}}) +// CHECK-DAG: br i1 % +// CHECK-DAG: ret void +// CHECK_DAG: } + +// CHECK-DAG: define internal void [[VLAINIT]](i8*) +// CHECK-DAG: call i32 @__kmpc_global_thread_num(%ident_t* {{[^,]+}}) +// CHECK-DAG: call i8* @__kmpc_threadprivate_cached(%ident_t* +// CHECK-DAG: phi i16* [ +// CHECK-DAG: store i16 0, i16* % +// CHECK-DAG: br i1 % +// CHECK-DAG: ret void +// CHECK-DAG: } + +// CHECK-DAG: define internal void [[VLACOMB]](i8*, i8*) +// CHECK-DAG: call i32 @__kmpc_global_thread_num(%ident_t* {{[^,]+}}) +// CHECK-DAG: call i8* @__kmpc_threadprivate_cached(%ident_t* +// CHECK-DAG: phi i16* [ +// CHECK-DAG: phi i16* [ +// CHECK-DAG: sext i16 %{{.+}} to i32 +// CHECK-DAG: add nsw i32 % +// CHECK-DAG: trunc i32 %{{.+}} to i16 +// CHECK-DAG: store i16 % +// CHECK_DAG: br i1 % +// CHECK-DAG: ret void +// CHECK-DAG: } +#endif diff --git a/test/OpenMP/taskgroup_task_reduction_messages.cpp b/test/OpenMP/taskgroup_task_reduction_messages.cpp index 9c8177b0bd43..819dc5bdaf68 100644 --- a/test/OpenMP/taskgroup_task_reduction_messages.cpp +++ b/test/OpenMP/taskgroup_task_reduction_messages.cpp @@ -24,9 +24,9 @@ public: S2() : a(0) {} S2(S2 &s2) : a(s2.a) {} static float S2s; // expected-note 2 {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note 2 {{'S2sc' declared here}} }; -const float S2::S2sc = 0; // expected-note 2 {{'S2sc' defined here}} +const float S2::S2sc = 0; S2 b; // expected-note 3 {{'b' defined here}} const S2 ba[5]; // expected-note 2 {{'ba' defined here}} class S3 { diff --git a/test/OpenMP/taskloop_ast_print.cpp b/test/OpenMP/taskloop_ast_print.cpp index dd0af85916cf..891b31dce4db 100644 --- a/test/OpenMP/taskloop_ast_print.cpp +++ b/test/OpenMP/taskloop_ast_print.cpp @@ -13,8 +13,10 @@ T tmain(T argc) { T b = argc, c, d, e, f, g; static T a; // CHECK: static T a; -#pragma omp taskloop if(taskloop: argc > N) default(shared) untied priority(N) grainsize(N) reduction(+:g) - // CHECK-NEXT: #pragma omp taskloop if(taskloop: argc > N) default(shared) untied priority(N) grainsize(N) reduction(+: g) +#pragma omp taskgroup task_reduction(+: d) +#pragma omp taskloop if(taskloop: argc > N) default(shared) untied priority(N) grainsize(N) reduction(+:g) in_reduction(+: d) + // CHECK-NEXT: #pragma omp taskgroup task_reduction(+: d) + // CHECK-NEXT: #pragma omp taskloop if(taskloop: argc > N) default(shared) untied priority(N) grainsize(N) reduction(+: g) in_reduction(+: d) for (int i = 0; i < 2; ++i) a = 2; // CHECK-NEXT: for (int i = 0; i < 2; ++i) @@ -53,8 +55,10 @@ int main(int argc, char **argv) { int b = argc, c, d, e, f, g; static int a; // CHECK: static int a; -#pragma omp taskloop if(taskloop: a) default(none) shared(a) final(b) priority(5) num_tasks(argc) reduction(*: g) - // CHECK-NEXT: #pragma omp taskloop if(taskloop: a) default(none) shared(a) final(b) priority(5) num_tasks(argc) reduction(*: g) +#pragma omp taskgroup task_reduction(+: d) +#pragma omp taskloop if(taskloop: a) default(none) shared(a) final(b) priority(5) num_tasks(argc) reduction(*: g) in_reduction(+:d) + // CHECK-NEXT: #pragma omp taskgroup task_reduction(+: d) + // CHECK-NEXT: #pragma omp taskloop if(taskloop: a) default(none) shared(a) final(b) priority(5) num_tasks(argc) reduction(*: g) in_reduction(+: d) for (int i = 0; i < 2; ++i) a = 2; // CHECK-NEXT: for (int i = 0; i < 2; ++i) diff --git a/test/OpenMP/taskloop_firstprivate_messages.cpp b/test/OpenMP/taskloop_firstprivate_messages.cpp index fe22311f650c..e63d6a678b8d 100644 --- a/test/OpenMP/taskloop_firstprivate_messages.cpp +++ b/test/OpenMP/taskloop_firstprivate_messages.cpp @@ -295,9 +295,9 @@ int main(int argc, char **argv) { #pragma omp taskloop firstprivate(i) // expected-note {{defined as firstprivate}} for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp taskloop' directive may not be firstprivate, predetermined as private}} foo(); -#pragma omp parallel reduction(+ : i) // expected-note 4 {{defined as reduction}} +#pragma omp parallel reduction(+ : i) // expected-note 2 {{defined as reduction}} #pragma omp taskloop firstprivate(i) //expected-error {{argument of a reduction clause of a parallel construct must not appear in a firstprivate clause on a task construct}} - for (i = 0; i < argc; ++i) // expected-error 3 {{reduction variables may not be accessed in an explicit task}} + for (i = 0; i < argc; ++i) // expected-error {{reduction variables may not be accessed in an explicit task}} foo(); #pragma omp parallel #pragma omp taskloop firstprivate(B::x) // expected-error {{threadprivate or thread local variable cannot be firstprivate}} diff --git a/test/OpenMP/taskloop_in_reduction_codegen.cpp b/test/OpenMP/taskloop_in_reduction_codegen.cpp new file mode 100644 index 000000000000..e9ee9a32b97b --- /dev/null +++ b/test/OpenMP/taskloop_in_reduction_codegen.cpp @@ -0,0 +1,82 @@ +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: [[PRIVATES:%.+]] = type { i8*, i8* } + +struct S { + int a; + S() : a(0) {} + S(const S&) {} + S& operator=(const S&) {return *this;} + ~S() {} + friend S operator+(const S&a, const S&b) {return a;} +}; + + +int main(int argc, char **argv) { + int a; + float b; + S c[5]; + short d[argc]; +#pragma omp taskgroup task_reduction(+: a, b, argc) + { +#pragma omp taskgroup task_reduction(-:c, d) +#pragma omp parallel +#pragma omp taskloop in_reduction(+:a) in_reduction(-:d) + for (int i = 0; i < 5; ++i) + a += d[a]; + } + return 0; +} + +// CHECK-LABEL: @main +// CHECK: void @__kmpc_taskgroup(%ident_t* @0, i32 [[GTID:%.+]]) +// CHECK: [[TD1:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 3, i8* % +// CHECK-NEXT: store i8* [[TD1]], i8** [[TD1_ADDR:%[^,]+]], +// CHECK-NEXT: call void @__kmpc_taskgroup(%ident_t* @0, i32 [[GTID]]) +// CHECK: [[TD2:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 2, i8* % +// CHECK-NEXT: store i8* [[TD2]], i8** [[TD2_ADDR:%[^,]+]], +// CHECK-NEXT: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* @0, i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**, i8**)* [[OMP_PARALLEL:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i64 %{{.+}}, i16* %{{.+}}, i8** [[TD1_ADDR]], i8** [[TD2_ADDR]]) +// CHECK-NEXT: call void @__kmpc_end_taskgroup(%ident_t* @0, i32 [[GTID]]) +// CHECK-NEXT: call void @__kmpc_end_taskgroup(%ident_t* @0, i32 [[GTID]]) + +// CHECK: define internal void [[OMP_PARALLEL]]( +// CHECK: [[TASK_T:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID:%.+]], i32 1, i64 96, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[T:%.+]]*)* [[OMP_TASK:@.+]] to i32 (i32, i8*)*)) +// CHECK-NEXT: [[TASK_T_WITH_PRIVS:%.+]] = bitcast i8* [[TASK_T]] to [[T]]* +// CHECK: [[PRIVS:%.+]] = getelementptr inbounds [[T]], [[T]]* [[TASK_T_WITH_PRIVS]], i32 0, i32 1 +// CHECK: [[TD1_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 0 +// CHECK-NEXT: [[TD1_SHAR:%.+]] = getelementptr inbounds % +// CHECK-NEXT: [[TD1_ADDR:%.+]] = load i8**, i8*** [[TD1_SHAR]], +// CHECK-NEXT: [[TD1:%.+]] = load i8*, i8** [[TD1_ADDR]], +// CHECK-NEXT: store i8* [[TD1]], i8** [[TD1_REF]], +// CHECK-NEXT: [[TD2_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 1 +// CHECK-NEXT: [[TD2_SHAR:%.+]] = getelementptr inbounds % +// CHECK-NEXT: [[TD2_ADDR:%.+]] = load i8**, i8*** [[TD2_SHAR]], +// CHECK-NEXT: [[TD2:%.+]] = load i8*, i8** [[TD2_ADDR]], +// CHECK-NEXT: store i8* [[TD2]], i8** [[TD2_REF]], +// CHECK: call void @__kmpc_taskloop(%ident_t* @0, i32 [[GTID]], i8* [[TASK_T]], i32 1, +// CHECK: ret void +// CHECK-NEXT: } + +// CHECK: define internal {{.*}} [[OMP_TASK]]( +// CHECK: call void (i8*, ...) %{{[^(]+}}(i8* %{{.+}}, i8*** [[TD1_REF:%[^,]+]], i8*** [[TD2_REF:%[^,]+]]) +// CHECK-NEXT: [[TD1_ADDR:%.+]] = load i8**, i8*** [[TD1_REF]], +// CHECK-NEXT: [[TD2_ADDR:%.+]] = load i8**, i8*** [[TD2_REF]], +// CHECK-NEXT: [[A_REF:%.+]] = getelementptr inbounds % +// CHECK-NEXT: [[A_ADDR:%.+]] = load i32*, i32** [[A_REF]], +// CHECK-NEXT: [[TD1:%.+]] = load i8*, i8** [[TD1_ADDR]], +// CHECK-NEXT: [[GTID:%.+]] = load i32, i32* % +// CHECK-NEXT: [[A_PTR:%.+]] = bitcast i32* [[A_ADDR]] to i8* +// CHECK-NEXT: call i8* @__kmpc_task_reduction_get_th_data(i32 [[GTID]], i8* [[TD1]], i8* [[A_PTR]]) +// CHECK: [[D_REF:%.+]] = getelementptr inbounds % +// CHECK-NEXT: [[D_ADDR:%.+]] = load i16*, i16** [[D_REF]], +// CHECK: [[TD2:%.+]] = load i8*, i8** [[TD2_ADDR]], +// CHECK-NEXT: [[D_PTR:%.+]] = bitcast i16* [[D_ADDR]] to i8* +// CHECK-NEXT: call i8* @__kmpc_task_reduction_get_th_data(i32 [[GTID]], i8* [[TD2]], i8* [[D_PTR]]) +// CHECK: add nsw i32 +// CHECK: store i32 % +#endif diff --git a/test/OpenMP/taskloop_in_reduction_messages.cpp b/test/OpenMP/taskloop_in_reduction_messages.cpp new file mode 100644 index 000000000000..409975565a91 --- /dev/null +++ b/test/OpenMP/taskloop_in_reduction_messages.cpp @@ -0,0 +1,376 @@ +// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 150 -o - %s +// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s +// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s + +void foo() { +} + +bool foobool(int argc) { + return argc; +} + +void foobar(int &ref) { +#pragma omp taskgroup task_reduction(+:ref) +#pragma omp taskloop in_reduction(+:ref) + for (int i = 0; i < 10; ++i) + foo(); +} + +void foobar1(int &ref) { +#pragma omp taskgroup task_reduction(+:ref) +#pragma omp taskloop in_reduction(-:ref) + for (int i = 0; i < 10; ++i) + foo(); +} + +#pragma omp declare reduction (red:int:omp_out += omp_in) + +void foobar2(int &ref) { +#pragma omp taskgroup task_reduction(+:ref) // expected-note {{previously marked as task_reduction with different reduction operation}} +#pragma omp taskloop in_reduction(red:ref) // expected-error{{in_reduction variable must have the same reduction operation as in a task_reduction clause}} + for (int i = 0; i < 10; ++i) + foo(); +} + +void foobar3(int &ref) { +#pragma omp taskgroup task_reduction(red:ref) // expected-note {{previously marked as task_reduction with different reduction operation}} +#pragma omp taskloop in_reduction(min:ref) // expected-error{{in_reduction variable must have the same reduction operation as in a task_reduction clause}} + for (int i = 0; i < 10; ++i) + foo(); +} + +void foobar4(int &ref) { +#pragma omp taskloop in_reduction(min:ref) // expected-error {{in_reduction variable must appear in a task_reduction clause}} + for (int i = 0; i < 10; ++i) + foo(); +} + +struct S1; // expected-note {{declared here}} expected-note 4 {{forward declaration of 'S1'}} +extern S1 a; +class S2 { + mutable int a; + S2 &operator+(const S2 &arg) { return (*this); } // expected-note 3 {{implicitly declared private here}} + +public: + S2() : a(0) {} + S2(S2 &s2) : a(s2.a) {} + static float S2s; // expected-note 2 {{static data member is predetermined as shared}} + static const float S2sc; // expected-note 2 {{'S2sc' declared here}} +}; +const float S2::S2sc = 0; +S2 b; // expected-note 3 {{'b' defined here}} +const S2 ba[5]; // expected-note 2 {{'ba' defined here}} +class S3 { + int a; + +public: + int b; + S3() : a(0) {} + S3(const S3 &s3) : a(s3.a) {} + S3 operator+(const S3 &arg1) { return arg1; } +}; +int operator+(const S3 &arg1, const S3 &arg2) { return 5; } +S3 c; // expected-note 3 {{'c' defined here}} +const S3 ca[5]; // expected-note 2 {{'ca' defined here}} +extern const int f; // expected-note 4 {{'f' declared here}} +class S4 { + int a; + S4(); // expected-note {{implicitly declared private here}} + S4(const S4 &s4); + S4 &operator+(const S4 &arg) { return (*this); } + +public: + S4(int v) : a(v) {} +}; +S4 &operator&=(S4 &arg1, S4 &arg2) { return arg1; } +class S5 { + int a; + S5() : a(0) {} // expected-note {{implicitly declared private here}} + S5(const S5 &s5) : a(s5.a) {} + S5 &operator+(const S5 &arg); + +public: + S5(int v) : a(v) {} +}; +class S6 { // expected-note 3 {{candidate function (the implicit copy assignment operator) not viable: no known conversion from 'int' to 'const S6' for 1st argument}} +#if __cplusplus >= 201103L // C++11 or later +// expected-note@-2 3 {{candidate function (the implicit move assignment operator) not viable}} +#endif + int a; + +public: + S6() : a(6) {} + operator int() { return 6; } +} o; + +S3 h, k; +#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}} + +template <class T> // expected-note {{declared here}} +T tmain(T argc) { + const T d = T(); // expected-note 4 {{'d' defined here}} + const T da[5] = {T()}; // expected-note 2 {{'da' defined here}} + T qa[5] = {T()}; + T i; + T &j = i; // expected-note 2 {{'j' defined here}} + S3 &p = k; // expected-note 2 {{'p' defined here}} + const T &r = da[(int)i]; // expected-note 2 {{'r' defined here}} + T &q = qa[(int)i]; + T fl; +#pragma omp taskgroup task_reduction(+:argc) +#pragma omp taskloop in_reduction // expected-error {{expected '(' after 'in_reduction'}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:argc) +#pragma omp taskloop in_reduction + // expected-error {{expected '(' after 'in_reduction'}} expected-warning {{extra tokens at the end of '#pragma omp taskloop' are ignored}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:argc) +#pragma omp taskloop in_reduction( // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:argc) +#pragma omp taskloop in_reduction(- // expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:argc) +#pragma omp taskloop in_reduction() // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:argc) +#pragma omp taskloop in_reduction(*) // expected-warning {{missing ':' after reduction identifier - ignoring}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:argc) +#pragma omp taskloop in_reduction(\) // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(&:argc) // expected-error {{invalid operands to binary expression ('float' and 'float')}} +#pragma omp taskloop in_reduction(& : argc // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{invalid operands to binary expression ('float' and 'float')}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(|:argc) // expected-error {{invalid operands to binary expression ('float' and 'float')}} +#pragma omp taskloop in_reduction(| : argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{invalid operands to binary expression ('float' and 'float')}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop in_reduction(|| : argc ? i : argc) // expected-error 2 {{expected variable name, array element or array section}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop in_reduction(foo : argc) //expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max' or declare reduction for type 'float'}} expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max' or declare reduction for type 'int'}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(&&:argc) +#pragma omp taskloop in_reduction(&& : argc) + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop in_reduction(^ : T) // expected-error {{'T' does not refer to a value}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:c) +#pragma omp taskloop in_reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 3 {{const-qualified list item cannot be reduction}} expected-error 2 {{'operator+' is a private member of 'S2'}} expected-error 2 {{in_reduction variable must appear in a task_reduction clause}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop in_reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 4 {{arguments of OpenMP clause 'in_reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 3 {{const-qualified list item cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop in_reduction(max : h.b) // expected-error {{expected variable name, array element or array section}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop in_reduction(+ : ba) // expected-error {{const-qualified list item cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop in_reduction(* : ca) // expected-error {{const-qualified list item cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop in_reduction(- : da) // expected-error {{const-qualified list item cannot be reduction}} expected-error {{const-qualified list item cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop in_reduction(^ : fl) // expected-error {{invalid operands to binary expression ('float' and 'float')}} expected-error {{in_reduction variable must appear in a task_reduction clause}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop in_reduction(&& : S2::S2s) // expected-error {{shared variable cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop in_reduction(&& : S2::S2sc) // expected-error {{const-qualified list item cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:k) +#pragma omp taskloop in_reduction(+ : h, k) // expected-error {{threadprivate or thread local variable cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop in_reduction(+ : o) // expected-error 2 {{no viable overloaded '='}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp parallel private(k) +#pragma omp taskloop in_reduction(+ : p), in_reduction(+ : p) // expected-error 2 {{argument of OpenMP clause 'in_reduction' must reference the same object in all threads}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:p) +#pragma omp taskloop in_reduction(+ : p), in_reduction(+ : p) // expected-error 2 {{variable can appear only once in OpenMP 'in_reduction' clause}} expected-note 2 {{previously referenced here}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop in_reduction(+ : r) // expected-error 2 {{const-qualified list item cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp parallel shared(i) +#pragma omp parallel reduction(min : i) +#pragma omp taskloop in_reduction(max : j) // expected-error 2 {{argument of OpenMP clause 'in_reduction' must reference the same object in all threads}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:fl) +{ +#pragma omp taskloop in_reduction(+ : fl) + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(*:fl) // expected-note 2 {{previously marked as task_reduction with different reduction operation}} +{ +#pragma omp taskloop in_reduction(+ : fl) // expected-error 2 {{in_reduction variable must have the same reduction operation as in a task_reduction clause}} + for (int i = 0; i < 10; ++i) + foo(); +} +} +#pragma omp parallel +#pragma omp for reduction(- : fl) + for (int i = 0; i < 10; ++i) +#pragma omp taskgroup task_reduction(+:fl) +#pragma omp taskloop in_reduction(+ : fl) + for (int j = 0; j < 10; ++j) + foo(); + + return T(); +} + +namespace A { +double x; +#pragma omp threadprivate(x) // expected-note {{defined as threadprivate or thread local}} +} +namespace B { +using A::x; +} + +int main(int argc, char **argv) { + const int d = 5; // expected-note 2 {{'d' defined here}} + const int da[5] = {0}; // expected-note {{'da' defined here}} + int qa[5] = {0}; + S4 e(4); + S5 g(5); + int i; + int &j = i; // expected-note {{'j' defined here}} + S3 &p = k; // expected-note 2 {{'p' defined here}} + const int &r = da[i]; // expected-note {{'r' defined here}} + int &q = qa[i]; + float fl; +#pragma omp taskloop in_reduction // expected-error {{expected '(' after 'in_reduction'}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop in_reduction + // expected-error {{expected '(' after 'in_reduction'}} expected-warning {{extra tokens at the end of '#pragma omp taskloop' are ignored}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop in_reduction( // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop in_reduction(- // expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop in_reduction() // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop in_reduction(*) // expected-warning {{missing ':' after reduction identifier - ignoring}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop in_reduction(\) // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop in_reduction(foo : argc // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max'}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(|:argc) +#pragma omp taskloop in_reduction(| : argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop in_reduction(|| : argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name, array element or array section}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop in_reduction(~ : argc) // expected-error {{expected unqualified-id}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(&&:argc) +#pragma omp taskloop in_reduction(&& : argc) + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop in_reduction(^ : S1) // expected-error {{'S1' does not refer to a value}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:c) +#pragma omp taskloop in_reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{const-qualified list item cannot be reduction}} expected-error {{'operator+' is a private member of 'S2'}} expected-error {{in_reduction variable must appear in a task_reduction clause}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop in_reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'in_reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 2 {{const-qualified list item cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop in_reduction(max : h.b) // expected-error {{expected variable name, array element or array section}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop in_reduction(+ : ba) // expected-error {{const-qualified list item cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop in_reduction(* : ca) // expected-error {{const-qualified list item cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop in_reduction(- : da) // expected-error {{const-qualified list item cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop in_reduction(^ : fl) // expected-error {{invalid operands to binary expression ('float' and 'float')}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop in_reduction(&& : S2::S2s) // expected-error {{shared variable cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop in_reduction(&& : S2::S2sc) // expected-error {{const-qualified list item cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop in_reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{nvalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:k) +#pragma omp taskloop in_reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop in_reduction(+ : o) // expected-error {{no viable overloaded '='}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp parallel private(k) +#pragma omp taskloop in_reduction(+ : p), in_reduction(+ : p) // expected-error 2 {{argument of OpenMP clause 'in_reduction' must reference the same object in all threads}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:p) +#pragma omp taskloop in_reduction(+ : p), in_reduction(+ : p) // expected-error {{variable can appear only once in OpenMP 'in_reduction' clause}} expected-note {{previously referenced here}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop in_reduction(+ : r) // expected-error {{const-qualified list item cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp parallel shared(i) +#pragma omp parallel reduction(min : i) +#pragma omp taskloop in_reduction(max : j) // expected-error {{argument of OpenMP clause 'in_reduction' must reference the same object in all threads}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp parallel +#pragma omp for private(fl) + for (int i = 0; i < 10; ++i) +#pragma omp taskgroup task_reduction(+:fl) +#pragma omp taskloop in_reduction(+ : fl) + for (int j = 0; j < 10; ++j) + foo(); +#pragma omp taskgroup task_reduction(+:fl) +#pragma omp taskloop in_reduction(+ : fl) + for (int i = 0; i < 10; ++i) + foo(); + static int m; +#pragma omp taskgroup task_reduction(+:m) +#pragma omp taskloop in_reduction(+ : m) // OK + for (int i = 0; i < 10; ++i) + m++; + + return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain<int>' requested here}} expected-note {{in instantiation of function template specialization 'tmain<float>' requested here}} +} diff --git a/test/OpenMP/taskloop_lastprivate_messages.cpp b/test/OpenMP/taskloop_lastprivate_messages.cpp index e4364448159c..1d238ea083e6 100644 --- a/test/OpenMP/taskloop_lastprivate_messages.cpp +++ b/test/OpenMP/taskloop_lastprivate_messages.cpp @@ -18,9 +18,9 @@ public: const S2 &operator =(const S2&) const; S2 &operator =(const S2&); static float S2s; // expected-note {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note {{static data member is predetermined as shared}} }; -const float S2::S2sc = 0; // expected-note {{static data member is predetermined as shared}} +const float S2::S2sc = 0; const S2 b; const S2 ba[5]; class S3 { diff --git a/test/OpenMP/taskloop_reduction_codegen.cpp b/test/OpenMP/taskloop_reduction_codegen.cpp index af071ae8bd4e..6af067642fea 100644 --- a/test/OpenMP/taskloop_reduction_codegen.cpp +++ b/test/OpenMP/taskloop_reduction_codegen.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fopenmp -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu -std=c++98 | FileCheck %s // expected-no-diagnostics struct S { @@ -51,6 +51,7 @@ sum = 0.0; // CHECK: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], // CHECK: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%ident_t* // CHECK: [[DOTRD_INPUT_:%.*]] = alloca [4 x %struct.kmp_task_red_input_t], +// CHECK: alloca i32, // CHECK: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, // CHECK: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, // CHECK: store i32 0, i32* [[RETVAL]], @@ -143,7 +144,7 @@ sum = 0.0; // CHECK: [[DIV:%.*]] = sdiv i32 [[ADD11]], 1 // CHECK: [[SUB12:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK: store i32 [[SUB12]], i32* [[DOTCAPTURE_EXPR_9]], -// CHECK: [[TMP65:%.*]] = call i8* @__kmpc_omp_task_alloc(%ident_t* %{{.+}}, i32 [[TMP0]], i32 1, i64 888, i64 72, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @{{.+}} to i32 (i32, i8*)*)) +// CHECK: [[TMP65:%.*]] = call i8* @__kmpc_omp_task_alloc(%ident_t* %{{.+}}, i32 [[TMP0]], i32 1, i64 888, i64 72, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @[[TASK:.+]] to i32 (i32, i8*)*)) // CHECK: call void @__kmpc_taskloop(%ident_t* %{{.+}}, i32 [[TMP0]], i8* [[TMP65]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 0, i32 0, i64 0, i8* null) // CHECK: call void @__kmpc_end_taskgroup(%ident_t* @@ -195,3 +196,4 @@ sum = 0.0; // CHECK: store float %{{.+}}, float* % // CHECK: ret void +// CHECK: distinct !DISubprogram(linkageName: "[[TASK]]", scope: ! diff --git a/test/OpenMP/taskloop_reduction_messages.cpp b/test/OpenMP/taskloop_reduction_messages.cpp index 7295075c3cb8..23f70b568525 100644 --- a/test/OpenMP/taskloop_reduction_messages.cpp +++ b/test/OpenMP/taskloop_reduction_messages.cpp @@ -25,9 +25,9 @@ public: S2() : a(0) {} S2(S2 &s2) : a(s2.a) {} static float S2s; // expected-note 2 {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note 2 {{'S2sc' declared here}} }; -const float S2::S2sc = 0; // expected-note 2 {{'S2sc' defined here}} +const float S2::S2sc = 0; S2 b; // expected-note 3 {{'b' defined here}} const S2 ba[5]; // expected-note 2 {{'ba' defined here}} class S3 { diff --git a/test/OpenMP/taskloop_simd_ast_print.cpp b/test/OpenMP/taskloop_simd_ast_print.cpp index 8678b9122e29..6f13c7e0baa5 100644 --- a/test/OpenMP/taskloop_simd_ast_print.cpp +++ b/test/OpenMP/taskloop_simd_ast_print.cpp @@ -14,8 +14,10 @@ T tmain(T argc) { T *ptr; static T a; // CHECK: static T a; -#pragma omp taskloop simd if(taskloop: argc > N) default(shared) untied priority(N) safelen(N) linear(c) aligned(ptr) grainsize(N) reduction(+:g) - // CHECK-NEXT: #pragma omp taskloop simd if(taskloop: argc > N) default(shared) untied priority(N) safelen(N) linear(c) aligned(ptr) grainsize(N) reduction(+: g) +#pragma omp taskgroup task_reduction(+: d) +#pragma omp taskloop simd if(taskloop: argc > N) default(shared) untied priority(N) safelen(N) linear(c) aligned(ptr) grainsize(N) reduction(+:g) in_reduction(+: d) + // CHECK-NEXT: #pragma omp taskgroup task_reduction(+: d) + // CHECK-NEXT: #pragma omp taskloop simd if(taskloop: argc > N) default(shared) untied priority(N) safelen(N) linear(c) aligned(ptr) grainsize(N) reduction(+: g) in_reduction(+: d) for (int i = 0; i < 2; ++i) a = 2; // CHECK-NEXT: for (int i = 0; i < 2; ++i) @@ -54,8 +56,10 @@ int main(int argc, char **argv) { int b = argc, c, d, e, f, g; static int a; // CHECK: static int a; -#pragma omp taskloop simd if(taskloop: a) default(none) shared(a) final(b) priority(5) safelen(8) linear(b), aligned(argv) num_tasks(argc) reduction(*: g) - // CHECK-NEXT: #pragma omp taskloop simd if(taskloop: a) default(none) shared(a) final(b) priority(5) safelen(8) linear(b) aligned(argv) num_tasks(argc) reduction(*: g) +#pragma omp taskgroup task_reduction(+: d) +#pragma omp taskloop simd if(taskloop: a) default(none) shared(a) final(b) priority(5) safelen(8) linear(b), aligned(argv) num_tasks(argc) reduction(*: g) in_reduction(+: d) + // CHECK-NEXT: #pragma omp taskgroup task_reduction(+: d) + // CHECK-NEXT: #pragma omp taskloop simd if(taskloop: a) default(none) shared(a) final(b) priority(5) safelen(8) linear(b) aligned(argv) num_tasks(argc) reduction(*: g) in_reduction(+: d) for (int i = 0; i < 2; ++i) a = 2; // CHECK-NEXT: for (int i = 0; i < 2; ++i) diff --git a/test/OpenMP/taskloop_simd_codegen.cpp b/test/OpenMP/taskloop_simd_codegen.cpp index f787689688e3..006fd59af3fa 100644 --- a/test/OpenMP/taskloop_simd_codegen.cpp +++ b/test/OpenMP/taskloop_simd_codegen.cpp @@ -158,7 +158,7 @@ struct S { // CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], // CHECK: [[NUM_TASKS:%.+]] = zext i32 %{{.+}} to i64 // CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 2, i64 [[NUM_TASKS]], i8* null) -#pragma omp taskloop simd shared(c) num_tasks(a) simdlen(64) safelen(8) +#pragma omp taskloop simd shared(c) num_tasks(a) simdlen(8) safelen(64) for (a = 0; a < c; ++a) ; } @@ -201,6 +201,6 @@ struct S { // CHECK: !{!"llvm.loop.vectorize.enable", i1 true} // CHECK: !{!"llvm.loop.vectorize.width", i32 4} // CHECK: !{!"llvm.loop.vectorize.width", i32 32} -// CHECK: !{!"llvm.loop.vectorize.width", i32 64} +// CHECK: !{!"llvm.loop.vectorize.width", i32 8} #endif diff --git a/test/OpenMP/taskloop_simd_firstprivate_messages.cpp b/test/OpenMP/taskloop_simd_firstprivate_messages.cpp index 18cefc1beeb2..176dcd7f8f9c 100644 --- a/test/OpenMP/taskloop_simd_firstprivate_messages.cpp +++ b/test/OpenMP/taskloop_simd_firstprivate_messages.cpp @@ -295,9 +295,9 @@ int main(int argc, char **argv) { #pragma omp taskloop simd firstprivate(i) // expected-note {{defined as firstprivate}} for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp taskloop simd' directive may not be firstprivate, predetermined as linear}} foo(); -#pragma omp parallel reduction(+ : i) // expected-note 4 {{defined as reduction}} +#pragma omp parallel reduction(+ : i) // expected-note 2 {{defined as reduction}} #pragma omp taskloop simd firstprivate(i) // expected-error {{argument of a reduction clause of a parallel construct must not appear in a firstprivate clause on a task construct}} - for (i = 0; i < argc; ++i) // expected-error 3 {{reduction variables may not be accessed in an explicit task}} + for (i = 0; i < argc; ++i) // expected-error {{reduction variables may not be accessed in an explicit task}} foo(); #pragma omp parallel #pragma omp taskloop simd firstprivate(B::x) // expected-error {{threadprivate or thread local variable cannot be firstprivate}} diff --git a/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp b/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp new file mode 100644 index 000000000000..d894943cc359 --- /dev/null +++ b/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp @@ -0,0 +1,82 @@ +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK: [[PRIVATES:%.+]] = type { i8*, i8* } + +struct S { + int a; + S() : a(0) {} + S(const S&) {} + S& operator=(const S&) {return *this;} + ~S() {} + friend S operator+(const S&a, const S&b) {return a;} +}; + + +int main(int argc, char **argv) { + int a; + float b; + S c[5]; + short d[argc]; +#pragma omp taskgroup task_reduction(+: a, b, argc) + { +#pragma omp taskgroup task_reduction(-:c, d) +#pragma omp parallel +#pragma omp taskloop simd in_reduction(+:a) in_reduction(-:d) + for (int i = 0; i < 5; ++i) + a += d[a]; + } + return 0; +} + +// CHECK-LABEL: @main +// CHECK: void @__kmpc_taskgroup(%ident_t* @0, i32 [[GTID:%.+]]) +// CHECK: [[TD1:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 3, i8* % +// CHECK-NEXT: store i8* [[TD1]], i8** [[TD1_ADDR:%[^,]+]], +// CHECK-NEXT: call void @__kmpc_taskgroup(%ident_t* @0, i32 [[GTID]]) +// CHECK: [[TD2:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 2, i8* % +// CHECK-NEXT: store i8* [[TD2]], i8** [[TD2_ADDR:%[^,]+]], +// CHECK-NEXT: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* @0, i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**, i8**)* [[OMP_PARALLEL:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i64 %{{.+}}, i16* %{{.+}}, i8** [[TD1_ADDR]], i8** [[TD2_ADDR]]) +// CHECK-NEXT: call void @__kmpc_end_taskgroup(%ident_t* @0, i32 [[GTID]]) +// CHECK-NEXT: call void @__kmpc_end_taskgroup(%ident_t* @0, i32 [[GTID]]) + +// CHECK: define internal void [[OMP_PARALLEL]]( +// CHECK: [[TASK_T:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* @0, i32 [[GTID:%.+]], i32 1, i64 96, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[T:%.+]]*)* [[OMP_TASK:@.+]] to i32 (i32, i8*)*)) +// CHECK-NEXT: [[TASK_T_WITH_PRIVS:%.+]] = bitcast i8* [[TASK_T]] to [[T]]* +// CHECK: [[PRIVS:%.+]] = getelementptr inbounds [[T]], [[T]]* [[TASK_T_WITH_PRIVS]], i32 0, i32 1 +// CHECK: [[TD1_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 0 +// CHECK-NEXT: [[TD1_SHAR:%.+]] = getelementptr inbounds % +// CHECK-NEXT: [[TD1_ADDR:%.+]] = load i8**, i8*** [[TD1_SHAR]], +// CHECK-NEXT: [[TD1:%.+]] = load i8*, i8** [[TD1_ADDR]], +// CHECK-NEXT: store i8* [[TD1]], i8** [[TD1_REF]], +// CHECK-NEXT: [[TD2_REF:%.+]] = getelementptr inbounds [[PRIVATES]], [[PRIVATES]]* [[PRIVS]], i32 0, i32 1 +// CHECK-NEXT: [[TD2_SHAR:%.+]] = getelementptr inbounds % +// CHECK-NEXT: [[TD2_ADDR:%.+]] = load i8**, i8*** [[TD2_SHAR]], +// CHECK-NEXT: [[TD2:%.+]] = load i8*, i8** [[TD2_ADDR]], +// CHECK-NEXT: store i8* [[TD2]], i8** [[TD2_REF]], +// CHECK: call void @__kmpc_taskloop(%ident_t* @0, i32 [[GTID]], i8* [[TASK_T]], i32 1, +// CHECK: ret void +// CHECK-NEXT: } + +// CHECK: define internal {{.*}} [[OMP_TASK]]( +// CHECK: call void (i8*, ...) %{{[^(]+}}(i8* %{{.+}}, i8*** [[TD1_REF:%[^,]+]], i8*** [[TD2_REF:%[^,]+]]) +// CHECK-NEXT: [[TD1_ADDR:%.+]] = load i8**, i8*** [[TD1_REF]], +// CHECK-NEXT: [[TD2_ADDR:%.+]] = load i8**, i8*** [[TD2_REF]], +// CHECK-NEXT: [[A_REF:%.+]] = getelementptr inbounds % +// CHECK-NEXT: [[A_ADDR:%.+]] = load i32*, i32** [[A_REF]], +// CHECK-NEXT: [[TD1:%.+]] = load i8*, i8** [[TD1_ADDR]], +// CHECK-NEXT: [[GTID:%.+]] = load i32, i32* % +// CHECK-NEXT: [[A_PTR:%.+]] = bitcast i32* [[A_ADDR]] to i8* +// CHECK-NEXT: call i8* @__kmpc_task_reduction_get_th_data(i32 [[GTID]], i8* [[TD1]], i8* [[A_PTR]]) +// CHECK: [[D_REF:%.+]] = getelementptr inbounds % +// CHECK-NEXT: [[D_ADDR:%.+]] = load i16*, i16** [[D_REF]], +// CHECK: [[TD2:%.+]] = load i8*, i8** [[TD2_ADDR]], +// CHECK-NEXT: [[D_PTR:%.+]] = bitcast i16* [[D_ADDR]] to i8* +// CHECK-NEXT: call i8* @__kmpc_task_reduction_get_th_data(i32 [[GTID]], i8* [[TD2]], i8* [[D_PTR]]) +// CHECK: add nsw i32 +// CHECK: store i32 % +#endif diff --git a/test/OpenMP/taskloop_simd_in_reduction_messages.cpp b/test/OpenMP/taskloop_simd_in_reduction_messages.cpp new file mode 100644 index 000000000000..f3b1a855e320 --- /dev/null +++ b/test/OpenMP/taskloop_simd_in_reduction_messages.cpp @@ -0,0 +1,376 @@ +// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 150 -o - %s +// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s +// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s + +void foo() { +} + +bool foobool(int argc) { + return argc; +} + +void foobar(int &ref) { +#pragma omp taskgroup task_reduction(+:ref) +#pragma omp taskloop simd in_reduction(+:ref) + for (int i = 0; i < 10; ++i) + foo(); +} + +void foobar1(int &ref) { +#pragma omp taskgroup task_reduction(+:ref) +#pragma omp taskloop simd in_reduction(-:ref) + for (int i = 0; i < 10; ++i) + foo(); +} + +#pragma omp declare reduction (red:int:omp_out += omp_in) + +void foobar2(int &ref) { +#pragma omp taskgroup task_reduction(+:ref) // expected-note {{previously marked as task_reduction with different reduction operation}} +#pragma omp taskloop simd in_reduction(red:ref) // expected-error{{in_reduction variable must have the same reduction operation as in a task_reduction clause}} + for (int i = 0; i < 10; ++i) + foo(); +} + +void foobar3(int &ref) { +#pragma omp taskgroup task_reduction(red:ref) // expected-note {{previously marked as task_reduction with different reduction operation}} +#pragma omp taskloop simd in_reduction(min:ref) // expected-error{{in_reduction variable must have the same reduction operation as in a task_reduction clause}} + for (int i = 0; i < 10; ++i) + foo(); +} + +void foobar4(int &ref) { +#pragma omp taskloop simd in_reduction(min:ref) // expected-error {{in_reduction variable must appear in a task_reduction clause}} + for (int i = 0; i < 10; ++i) + foo(); +} + +struct S1; // expected-note {{declared here}} expected-note 4 {{forward declaration of 'S1'}} +extern S1 a; +class S2 { + mutable int a; + S2 &operator+(const S2 &arg) { return (*this); } // expected-note 3 {{implicitly declared private here}} + +public: + S2() : a(0) {} + S2(S2 &s2) : a(s2.a) {} + static float S2s; // expected-note 2 {{static data member is predetermined as shared}} + static const float S2sc; // expected-note 2 {{'S2sc' declared here}} +}; +const float S2::S2sc = 0; +S2 b; // expected-note 3 {{'b' defined here}} +const S2 ba[5]; // expected-note 2 {{'ba' defined here}} +class S3 { + int a; + +public: + int b; + S3() : a(0) {} + S3(const S3 &s3) : a(s3.a) {} + S3 operator+(const S3 &arg1) { return arg1; } +}; +int operator+(const S3 &arg1, const S3 &arg2) { return 5; } +S3 c; // expected-note 3 {{'c' defined here}} +const S3 ca[5]; // expected-note 2 {{'ca' defined here}} +extern const int f; // expected-note 4 {{'f' declared here}} +class S4 { + int a; + S4(); // expected-note {{implicitly declared private here}} + S4(const S4 &s4); + S4 &operator+(const S4 &arg) { return (*this); } + +public: + S4(int v) : a(v) {} +}; +S4 &operator&=(S4 &arg1, S4 &arg2) { return arg1; } +class S5 { + int a; + S5() : a(0) {} // expected-note {{implicitly declared private here}} + S5(const S5 &s5) : a(s5.a) {} + S5 &operator+(const S5 &arg); + +public: + S5(int v) : a(v) {} +}; +class S6 { // expected-note 3 {{candidate function (the implicit copy assignment operator) not viable: no known conversion from 'int' to 'const S6' for 1st argument}} +#if __cplusplus >= 201103L // C++11 or later +// expected-note@-2 3 {{candidate function (the implicit move assignment operator) not viable}} +#endif + int a; + +public: + S6() : a(6) {} + operator int() { return 6; } +} o; + +S3 h, k; +#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}} + +template <class T> // expected-note {{declared here}} +T tmain(T argc) { + const T d = T(); // expected-note 4 {{'d' defined here}} + const T da[5] = {T()}; // expected-note 2 {{'da' defined here}} + T qa[5] = {T()}; + T i; + T &j = i; // expected-note 2 {{'j' defined here}} + S3 &p = k; // expected-note 2 {{'p' defined here}} + const T &r = da[(int)i]; // expected-note 2 {{'r' defined here}} + T &q = qa[(int)i]; + T fl; +#pragma omp taskgroup task_reduction(+:argc) +#pragma omp taskloop simd in_reduction // expected-error {{expected '(' after 'in_reduction'}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:argc) +#pragma omp taskloop simd in_reduction + // expected-error {{expected '(' after 'in_reduction'}} expected-warning {{extra tokens at the end of '#pragma omp taskloop simd' are ignored}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:argc) +#pragma omp taskloop simd in_reduction( // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:argc) +#pragma omp taskloop simd in_reduction(- // expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:argc) +#pragma omp taskloop simd in_reduction() // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:argc) +#pragma omp taskloop simd in_reduction(*) // expected-warning {{missing ':' after reduction identifier - ignoring}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:argc) +#pragma omp taskloop simd in_reduction(\) // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(&:argc) // expected-error {{invalid operands to binary expression ('float' and 'float')}} +#pragma omp taskloop simd in_reduction(& : argc // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{invalid operands to binary expression ('float' and 'float')}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(|:argc) // expected-error {{invalid operands to binary expression ('float' and 'float')}} +#pragma omp taskloop simd in_reduction(| : argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{invalid operands to binary expression ('float' and 'float')}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop simd in_reduction(|| : argc ? i : argc) // expected-error 2 {{expected variable name, array element or array section}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop simd in_reduction(foo : argc) //expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max' or declare reduction for type 'float'}} expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max' or declare reduction for type 'int'}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(&&:argc) +#pragma omp taskloop simd in_reduction(&& : argc) + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop simd in_reduction(^ : T) // expected-error {{'T' does not refer to a value}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:c) +#pragma omp taskloop simd in_reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 3 {{const-qualified list item cannot be reduction}} expected-error 2 {{'operator+' is a private member of 'S2'}} expected-error 2 {{in_reduction variable must appear in a task_reduction clause}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop simd in_reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 4 {{arguments of OpenMP clause 'in_reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 3 {{const-qualified list item cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop simd in_reduction(max : h.b) // expected-error {{expected variable name, array element or array section}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop simd in_reduction(+ : ba) // expected-error {{const-qualified list item cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop simd in_reduction(* : ca) // expected-error {{const-qualified list item cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop simd in_reduction(- : da) // expected-error {{const-qualified list item cannot be reduction}} expected-error {{const-qualified list item cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop simd in_reduction(^ : fl) // expected-error {{invalid operands to binary expression ('float' and 'float')}} expected-error {{in_reduction variable must appear in a task_reduction clause}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop simd in_reduction(&& : S2::S2s) // expected-error {{shared variable cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop simd in_reduction(&& : S2::S2sc) // expected-error {{const-qualified list item cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:k) +#pragma omp taskloop simd in_reduction(+ : h, k) // expected-error {{threadprivate or thread local variable cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop simd in_reduction(+ : o) // expected-error 2 {{no viable overloaded '='}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp parallel private(k) +#pragma omp taskloop simd in_reduction(+ : p), in_reduction(+ : p) // expected-error 2 {{argument of OpenMP clause 'in_reduction' must reference the same object in all threads}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:p) +#pragma omp taskloop simd in_reduction(+ : p), in_reduction(+ : p) // expected-error 2 {{variable can appear only once in OpenMP 'in_reduction' clause}} expected-note 2 {{previously referenced here}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop simd in_reduction(+ : r) // expected-error 2 {{const-qualified list item cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp parallel shared(i) +#pragma omp parallel reduction(min : i) +#pragma omp taskloop simd in_reduction(max : j) // expected-error 2 {{argument of OpenMP clause 'in_reduction' must reference the same object in all threads}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:fl) +{ +#pragma omp taskloop simd in_reduction(+ : fl) + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(*:fl) // expected-note 2 {{previously marked as task_reduction with different reduction operation}} +{ +#pragma omp taskloop simd in_reduction(+ : fl) // expected-error 2 {{in_reduction variable must have the same reduction operation as in a task_reduction clause}} + for (int i = 0; i < 10; ++i) + foo(); +} +} +#pragma omp parallel +#pragma omp for reduction(- : fl) + for (int i = 0; i < 10; ++i) +#pragma omp taskgroup task_reduction(+:fl) +#pragma omp taskloop simd in_reduction(+ : fl) + for (int j = 0; j < 10; ++j) + foo(); + + return T(); +} + +namespace A { +double x; +#pragma omp threadprivate(x) // expected-note {{defined as threadprivate or thread local}} +} +namespace B { +using A::x; +} + +int main(int argc, char **argv) { + const int d = 5; // expected-note 2 {{'d' defined here}} + const int da[5] = {0}; // expected-note {{'da' defined here}} + int qa[5] = {0}; + S4 e(4); + S5 g(5); + int i; + int &j = i; // expected-note {{'j' defined here}} + S3 &p = k; // expected-note 2 {{'p' defined here}} + const int &r = da[i]; // expected-note {{'r' defined here}} + int &q = qa[i]; + float fl; +#pragma omp taskloop simd in_reduction // expected-error {{expected '(' after 'in_reduction'}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop simd in_reduction + // expected-error {{expected '(' after 'in_reduction'}} expected-warning {{extra tokens at the end of '#pragma omp taskloop simd' are ignored}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop simd in_reduction( // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop simd in_reduction(- // expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop simd in_reduction() // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop simd in_reduction(*) // expected-warning {{missing ':' after reduction identifier - ignoring}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop simd in_reduction(\) // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop simd in_reduction(foo : argc // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max'}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(|:argc) +#pragma omp taskloop simd in_reduction(| : argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop simd in_reduction(|| : argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name, array element or array section}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop simd in_reduction(~ : argc) // expected-error {{expected unqualified-id}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(&&:argc) +#pragma omp taskloop simd in_reduction(&& : argc) + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop simd in_reduction(^ : S1) // expected-error {{'S1' does not refer to a value}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:c) +#pragma omp taskloop simd in_reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{const-qualified list item cannot be reduction}} expected-error {{'operator+' is a private member of 'S2'}} expected-error {{in_reduction variable must appear in a task_reduction clause}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop simd in_reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'in_reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 2 {{const-qualified list item cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop simd in_reduction(max : h.b) // expected-error {{expected variable name, array element or array section}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop simd in_reduction(+ : ba) // expected-error {{const-qualified list item cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop simd in_reduction(* : ca) // expected-error {{const-qualified list item cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop simd in_reduction(- : da) // expected-error {{const-qualified list item cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop simd in_reduction(^ : fl) // expected-error {{invalid operands to binary expression ('float' and 'float')}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop simd in_reduction(&& : S2::S2s) // expected-error {{shared variable cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop simd in_reduction(&& : S2::S2sc) // expected-error {{const-qualified list item cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop simd in_reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{nvalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:k) +#pragma omp taskloop simd in_reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop simd in_reduction(+ : o) // expected-error {{no viable overloaded '='}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp parallel private(k) +#pragma omp taskloop simd in_reduction(+ : p), in_reduction(+ : p) // expected-error 2 {{argument of OpenMP clause 'in_reduction' must reference the same object in all threads}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskgroup task_reduction(+:p) +#pragma omp taskloop simd in_reduction(+ : p), in_reduction(+ : p) // expected-error {{variable can appear only once in OpenMP 'in_reduction' clause}} expected-note {{previously referenced here}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp taskloop simd in_reduction(+ : r) // expected-error {{const-qualified list item cannot be reduction}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp parallel shared(i) +#pragma omp parallel reduction(min : i) +#pragma omp taskloop simd in_reduction(max : j) // expected-error {{argument of OpenMP clause 'in_reduction' must reference the same object in all threads}} + for (int i = 0; i < 10; ++i) + foo(); +#pragma omp parallel +#pragma omp for private(fl) + for (int i = 0; i < 10; ++i) +#pragma omp taskgroup task_reduction(+:fl) +#pragma omp taskloop simd in_reduction(+ : fl) + for (int j = 0; j < 10; ++j) + foo(); +#pragma omp taskgroup task_reduction(+:fl) +#pragma omp taskloop simd in_reduction(+ : fl) + for (int i = 0; i < 10; ++i) + foo(); + static int m; +#pragma omp taskgroup task_reduction(+:m) +#pragma omp taskloop simd in_reduction(+ : m) // OK + for (int i = 0; i < 10; ++i) + m++; + + return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain<int>' requested here}} expected-note {{in instantiation of function template specialization 'tmain<float>' requested here}} +} diff --git a/test/OpenMP/taskloop_simd_lastprivate_messages.cpp b/test/OpenMP/taskloop_simd_lastprivate_messages.cpp index ed1bdf5a6e3e..e5cba6c64b6b 100644 --- a/test/OpenMP/taskloop_simd_lastprivate_messages.cpp +++ b/test/OpenMP/taskloop_simd_lastprivate_messages.cpp @@ -18,9 +18,9 @@ public: const S2 &operator =(const S2&) const; S2 &operator =(const S2&); static float S2s; // expected-note {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note {{static data member is predetermined as shared}} }; -const float S2::S2sc = 0; // expected-note {{static data member is predetermined as shared}} +const float S2::S2sc = 0; const S2 b; const S2 ba[5]; class S3 { diff --git a/test/OpenMP/taskloop_simd_misc_messages.c b/test/OpenMP/taskloop_simd_misc_messages.c index 61c7b107d496..35cc62cad585 100644 --- a/test/OpenMP/taskloop_simd_misc_messages.c +++ b/test/OpenMP/taskloop_simd_misc_messages.c @@ -346,6 +346,10 @@ void test_firstprivate() { #pragma omp taskloop simd lastprivate(x, y, z) firstprivate(x, y, z) for (i = 0; i < 16; ++i) ; +// expected-error@+1 {{the value of 'simdlen' parameter must be less than or equal to the value of the 'safelen' parameter}} +#pragma omp taskloop simd simdlen(64) safelen(8) + for (i = 0; i < 16; ++i) + ; } void test_loop_messages() { diff --git a/test/OpenMP/taskloop_simd_reduction_codegen.cpp b/test/OpenMP/taskloop_simd_reduction_codegen.cpp index b96c8d50a3ff..7e8d54a2a1da 100644 --- a/test/OpenMP/taskloop_simd_reduction_codegen.cpp +++ b/test/OpenMP/taskloop_simd_reduction_codegen.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fopenmp -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu -std=c++98 | FileCheck %s // expected-no-diagnostics struct S { @@ -51,6 +51,7 @@ sum = 0.0; // CHECK: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], // CHECK: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%ident_t* // CHECK: [[DOTRD_INPUT_:%.*]] = alloca [4 x %struct.kmp_task_red_input_t], +// CHECK: alloca i32, // CHECK: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, // CHECK: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, // CHECK: store i32 0, i32* [[RETVAL]], diff --git a/test/OpenMP/taskloop_simd_reduction_messages.cpp b/test/OpenMP/taskloop_simd_reduction_messages.cpp index f9739f708a28..886b685f6325 100644 --- a/test/OpenMP/taskloop_simd_reduction_messages.cpp +++ b/test/OpenMP/taskloop_simd_reduction_messages.cpp @@ -25,9 +25,9 @@ public: S2() : a(0) {} S2(S2 &s2) : a(s2.a) {} static float S2s; // expected-note 2 {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note 2 {{'S2sc' declared here}} }; -const float S2::S2sc = 0; // expected-note 2 {{'S2sc' defined here}} +const float S2::S2sc = 0; S2 b; // expected-note 3 {{'b' defined here}} const S2 ba[5]; // expected-note 2 {{'ba' defined here}} class S3 { diff --git a/test/OpenMP/teams_codegen.cpp b/test/OpenMP/teams_codegen.cpp index 8aaa206abc3b..28f6ca0714d4 100644 --- a/test/OpenMP/teams_codegen.cpp +++ b/test/OpenMP/teams_codegen.cpp @@ -21,7 +21,7 @@ int teams_argument_global_local(int a){ int la = 23; float lc = 25.0; - // CK1: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) // CK1: call void @{{.+}}(i{{64|32}} %{{.+}}) #pragma omp target #pragma omp teams @@ -29,7 +29,7 @@ int teams_argument_global_local(int a){ ++comp; } - // CK1: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) // CK1: call void @{{.+}}(i{{64|32}} %{{.+}}) #pragma omp target {{{ @@ -39,7 +39,7 @@ int teams_argument_global_local(int a){ } }}} - // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 0) + // CK1-DAG: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 0) // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]], // CK1: call void @{{.+}}(i{{64|32}} %{{.+}}) @@ -49,7 +49,7 @@ int teams_argument_global_local(int a){ ++comp; } - // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 [[NT:%[^,]+]]) + // CK1-DAG: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 [[NT:%[^,]+]]) // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]], // CK1: call void @{{.+}}(i{{64|32}} %{{.+}}) @@ -59,7 +59,7 @@ int teams_argument_global_local(int a){ ++comp; } - // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]]) + // CK1-DAG: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]]) // CK1-DAG: [[NT]] = add nsw i32 [[NTA:%[^,]+]], [[NTB:%[^,]+]] // CK1-DAG: [[NTA]] = load i32, i32* @Gbla, @@ -78,7 +78,7 @@ int teams_argument_global_local(int a){ ++comp; } - // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 {{.+}}, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]]) + // CK1-DAG: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 {{.+}}, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]]) // CK1-DAG: [[NT]] = add nsw i32 [[NTA:%[^,]+]], 1 // CK1-DAG: [[NTA]] = load i32, i32* @Gbla, @@ -125,7 +125,7 @@ int teams_template_arg(void) { SS<int> la; SS<long long> lb; - // CK2-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]]) + // CK2-DAG: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]]) // CK2-DAG: [[NT]] = load i32, i32* getelementptr inbounds ([[SSI]], [[SSI]]* @Gbla, i32 0, i32 0) @@ -141,7 +141,7 @@ int teams_template_arg(void) { ++comp; } - // CK2-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]]) + // CK2-DAG: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]]) // CK2-DAG: [[TL]] = trunc i64 [[TLD:%[^,]+]] to i32 // CK2-DAG: [[TLD]] = load i64, i64* getelementptr inbounds ([[SSL]], [[SSL]]* @Gblb, i32 0, i32 0), @@ -181,7 +181,7 @@ struct SS{ int foo(void) { int comp = 1; - // CK3-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 123) + // CK3-DAG: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 123) // CK3-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]], // CK3-DAG: [[NTA]] = getelementptr inbounds [[SSI]], [[SSI]]* [[NTB:%[^,]+]], i32 0, i32 0 @@ -194,7 +194,7 @@ struct SS{ ++comp; } - // CK3-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 456, i32 [[TL:%[^,]+]]) + // CK3-DAG: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 456, i32 [[TL:%[^,]+]]) // CK3-DAG: [[TL]] = add nsw i32 [[TLA:%[^,]+]], 123 // CK3-DAG: [[TLA]] = fptosi float [[TLB:%[^,]+]] to i32 diff --git a/test/OpenMP/teams_distribute_codegen.cpp b/test/OpenMP/teams_distribute_codegen.cpp new file mode 100644 index 000000000000..8a2afbd4341d --- /dev/null +++ b/test/OpenMP/teams_distribute_codegen.cpp @@ -0,0 +1,241 @@ +// expected-no-diagnostics +#ifndef HEADER +#define HEADER +// Test host codegen. +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +#ifdef CK1 + +int a[100]; + +// CK1: define {{.*}}i32 @{{.+}}teams_argument_globali( +int teams_argument_global(int n){ + int te = n / 128; + int th = 128; + // discard n_addr + // CK1: alloca i32, + // CK1: [[TE:%.+]] = alloca i32, + // CK1: [[TH:%.+]] = alloca i32, + // CK1: [[TE_CAST:%.+]] = alloca i{{32|64}}, + // CK1: [[TH_CAST:%.+]] = alloca i{{32|64}}, + // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]], + // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]], + + // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}}) + + // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]], + #pragma omp target + #pragma omp teams distribute num_teams(te), thread_limit(th) + for(int i = 0; i < n; i++) { + a[i] = 0; + } + + // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // CK1: call void @[[OFFL2:.+]](i{{64|32}} %{{.+}}) + #pragma omp target + {{{ + #pragma omp teams distribute + for(int i = 0; i < n; i++) { + a[i] = 0; + } + }}} + + // outlined target regions + // CK1: define internal void @[[OFFL1]](i{{32|64}} [[TE_ARG:%.+]], i{{32|64}} [[TH_ARG:%.+]], i{{32|64}} {{.+}}, {{.+}}) + // CK1: [[TE_ADDR:%.+]] = alloca i{{32|64}}, + // CK1: [[TH_ADDR:%.+]] = alloca i{{32|64}}, + // CK1: store{{.+}} [[TE_ARG]], {{.+}} [[TE_ADDR]], + // CK1: store{{.+}} [[TH_ARG]], {{.+}} [[TH_ADDR]], + // CK1-64: [[TE_CONV:%.+]] = bitcast{{.+}} [[TE_ADDR]] to + // CK1-64: [[TH_CONV:%.+]] = bitcast{{.+}} [[TH_ADDR]] to + // CK1-64: [[TE_VAL:%.+]] = load i32, i32* [[TE_CONV]], + // CK1-64: [[TH_VAL:%.+]] = load i32, i32* [[TH_CONV]], + // CK1-32: [[TE_VAL:%.+]] = load i32, i32* [[TE_ADDR]], + // CK1-32: [[TH_VAL:%.+]] = load i32, i32* [[TH_ADDR]], + // CK1: {{%.+}} = call i32 @__kmpc_push_num_teams({{.+}}, {{.+}}, i32 [[TE_VAL]], i32 [[TH_VAL]]) + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 2, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}}) + // CK1: ret void + + // CK1: define internal void @[[OUTL1]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4( + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[OFFL2]]({{.+}}, {{.+}}) + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 2, {{.+}} @[[OUTL2:.+]] to {{.+}}, {{.+}}, {{.+}}) + // CK1: ret void + + // CK1: define internal void @[[OUTL2]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4( + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + return a[0]; +} + +#endif // CK1 + +// Test host codegen. +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +#ifdef CK2 + +// CK2: define {{.*}}i32 @{{.+}}teams_local_argv( +int teams_local_arg(void) { + int n = 100; + int a[n]; + + // CK2: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // CK2: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}}) + #pragma omp target + #pragma omp teams distribute + for(int i = 0; i < n; i++) { + a[i] = 0; + } + + // outlined target region + // CK2: define internal void @[[OFFL1]]({{.+}}, {{.+}}) + // CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}}) + // CK2: ret void + + // CK2: define internal void @[[OUTL1]]({{.+}}) + // CK2: call void @__kmpc_for_static_init_4( + // CK2: call void @__kmpc_for_static_fini( + // CK2: ret void + + return a[0]; +} +#endif // CK2 + +// Test host codegen. +// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64 +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64 +// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32 +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32 +#ifdef CK3 + +// CK3: [[SSI:%.+]] = type { [{{.+}} x i32], float } + +template <typename T, int X, long long Y> +struct SS{ + T a[X]; + float b; + // CK3: define {{.*}}i32 @{{.+}}foo{{.+}}( + int foo(void) { + + // CK3: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // CK3: call void @[[OFFL1:.+]]([[SSI]]* %{{.+}}) + #pragma omp target + #pragma omp teams distribute + for(int i = 0; i < X; i++) { + a[i] = (T)0; + } + + // outlined target region + // CK3: define internal void @[[OFFL1]]([[SSI]]* {{.+}}) + // CK3: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}}) + // CK3: ret void + + // CK3: define internal void @[[OUTL1]]({{.+}}) + // CK3: call void @__kmpc_for_static_init_4( + // CK3: call void @__kmpc_for_static_fini( + // CK3: ret void + + return a[0]; + } +}; + +int teams_template_struct(void) { + SS<int, 123, 456> V; + return V.foo(); + +} +#endif // CK3 + +// Test host codegen. +// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-64 +// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-64 +// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-32 +// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-32 + +#ifdef CK4 + +template <typename T, int n> +int tmain(T argc) { + T a[n]; + int te = n/128; + int th = 128; +#pragma omp target +#pragma omp teams distribute num_teams(te) thread_limit(th) + for(int i = 0; i < n; i++) { + a[i] = (T)0; + } + return 0; +} + +int main (int argc, char **argv) { + int n = 100; + int a[n]; +#pragma omp target +#pragma omp teams distribute + for(int i = 0; i < n; i++) { + a[i] = 0; + } + return tmain<int, 10>(argc); +} + +// CK4: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}}) +// CK4: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) +// CK4: call void @[[OFFL1:.+]]({{.+}}) +// CK4: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}}) +// CK4: ret + +// CK4: define {{.*}}void @[[OFFL1]]({{.+}}) +// CK4: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}}) +// CK4: ret void + +// CK4: define internal void @[[OUTL1]]({{.+}}) +// CK4: call void @__kmpc_for_static_init_4( +// CK4: call void @__kmpc_for_static_fini( +// CK4: ret void + +// CK4: define {{.*}}i32 @[[TMAIN]]({{.+}}) +// CK4: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}}) +// CK4: call void @[[OFFLT:.+]]({{.+}}) +// CK4: ret +// CK4-NEXT: } + +// CK4: define {{.*}}void @[[OFFLT]](i{{32|64}} [[TE_ARG:%.+]], i{{32|64}} [[TH_ARG:%.+]], {{.+}}) +// CK4: [[TE_ADDR:%.+]] = alloca i{{32|64}}, +// CK4: [[TH_ADDR:%.+]] = alloca i{{32|64}}, +// CK4: store{{.+}} [[TE_ARG]], {{.+}} [[TE_ADDR]], +// CK4: store{{.+}} [[TH_ARG]], {{.+}} [[TH_ADDR]], +// CK4-64: [[TE_CONV:%.+]] = bitcast{{.+}} [[TE_ADDR]] to +// CK4-64: [[TH_CONV:%.+]] = bitcast{{.+}} [[TH_ADDR]] to +// CK4-64: [[TE_VAL:%.+]] = load i32, i32* [[TE_CONV]], +// CK4-64: [[TH_VAL:%.+]] = load i32, i32* [[TH_CONV]], +// CK4-32: [[TE_VAL:%.+]] = load i32, i32* [[TE_ADDR]], +// CK4-32: [[TH_VAL:%.+]] = load i32, i32* [[TH_ADDR]], +// CK4: {{%.+}} = call i32 @__kmpc_push_num_teams({{.+}}, {{.+}}, i32 [[TE_VAL]], i32 [[TH_VAL]]) +// CK4: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT:.+]] to {{.+}}, {{.+}}, {{.+}}) +// CK4: ret void + +// CK4: define internal void @[[OUTLT]]({{.+}}) +// CK4: call void @__kmpc_for_static_init_4( +// CK4: call void @__kmpc_for_static_fini( +// CK4: ret void + +#endif // CK4 +#endif diff --git a/test/OpenMP/teams_distribute_collapse_codegen.cpp b/test/OpenMP/teams_distribute_collapse_codegen.cpp new file mode 100644 index 000000000000..a65c0a2e63f7 --- /dev/null +++ b/test/OpenMP/teams_distribute_collapse_codegen.cpp @@ -0,0 +1,128 @@ +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// Test host codegen. +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +#ifdef CK1 + +template <typename T, int X, long long Y> +struct SS{ + T a[X][Y]; + + // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}( + int foo(void) { + + // CK1: call i32 @__tgt_target_teams( + // CK1: call void @[[OFFL1:.+]]( + #pragma omp target + #pragma omp teams distribute collapse(2) + for(int i = 0; i < X; i++) { + for(int j = 0; j < Y; j++) { + a[i][j] = (T)0; + } + } + // CK1: define internal void @[[OFFL1]]( + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, + // CK1: ret void + + // CK1: define internal void @[[OUTL1]]({{.+}}) + // discard loop variables not needed here + // CK1: = alloca i32, + // CK1: = alloca i32, + // CK1: = alloca i32, + // CK1: = alloca i32, + // CK1: [[OMP_UB:%.+]] = alloca i32, + // CK1: store i32 56087, i32* [[OMP_UB]], + // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i32* [[OMP_UB]], + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + return a[0][0]; + } +}; + +int teams_template_struct(void) { + SS<int, 123, 456> V; + return V.foo(); + +} +#endif // CK1 + +// Test host codegen. +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +#ifdef CK2 + +template <typename T, int n, int m> +int tmain(T argc) { + T a[n][m]; + #pragma omp target + #pragma omp teams distribute collapse(2) + for(int i = 0; i < n; i++) { + for(int j = 0; j < m; j++) { + a[i][j] = (T)0; + } + } + return 0; +} + +int main (int argc, char **argv) { + int n = 100; + int m = 2; + int a[n][m]; + #pragma omp target + #pragma omp teams distribute collapse(2) + for(int i = 0; i < n; i++) { + for(int j = 0; j < m; j++) { + a[i][j] = 0; + } + } + return tmain<int, 10, 2>(argc); +} + +// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFL1:.+]]({{.+}}) +// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}}) +// CK2: ret + +// CK2: define {{.*}}void @[[OFFL1]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}} @[[OUTL1:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTL1]]({{.+}}) +// CK2: [[OMP_UB:%.omp.ub]] = alloca i64, +// CK2: store i64 {{.+}}, i64* [[OMP_UB]], +// CK2: call void @__kmpc_for_static_init_8({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i64* [[OMP_UB]], +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void +// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFLT1:.+]]({{.+}}) +// CK2: ret +// CK2-NEXT: } + +// CK2: define {{.*}}void @[[OFFLT1]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTLT1]]({{.+}}) +// discard loop variables not needed here +// CK2: [[OMP_UB:%.omp.ub]] = alloca i32, +// CK2: store i32 {{.+}}, i32* [[OMP_UB]], +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i32* [[OMP_UB]], +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +#endif // CK2 +#endif // #ifndef HEADER diff --git a/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp b/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp new file mode 100644 index 000000000000..8bd8825e00c4 --- /dev/null +++ b/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp @@ -0,0 +1,206 @@ +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// Test host codegen. +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +#ifdef CK1 + +template <typename T, int X, long long Y> +struct SS{ + T a[X]; + float b; + // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}( + int foo(void) { + + // CK1: call i32 @__tgt_target_teams( + // CK1: call void @[[OFFL1:.+]]( + #pragma omp target + #pragma omp teams distribute + for(int i = 0; i < X; i++) { + a[i] = (T)0; + } + // CK1: call i32 @__tgt_target_teams( + // CK1: call void @[[OFFL2:.+]]( + #pragma omp target + #pragma omp teams distribute dist_schedule(static) + for(int i = 0; i < X; i++) { + a[i] = (T)0; + } + // CK1: call i32 @__tgt_target_teams( + // CK1: call void @[[OFFL3:.+]]( + #pragma omp target + #pragma omp teams distribute dist_schedule(static, X/2) + for(int i = 0; i < X; i++) { + a[i] = (T)0; + } + // CK1: define internal void @[[OFFL1]]( + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, + // CK1: ret void + + // CK1: define internal void @[[OUTL1]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[OFFL2]]( + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL2:.+]] to {{.+}}, + // CK1: ret void + + // CK1: define internal void @[[OUTL2]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[OFFL3]]( + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL3:.+]] to {{.+}}, + // CK1: ret void + + // CK1: define internal void @[[OUTL3]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91 + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + return a[0]; + } +}; + +int teams_template_struct(void) { + SS<int, 123, 456> V; + return V.foo(); + +} +#endif // CK1 + +// Test host codegen. +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +#ifdef CK2 + +template <typename T, int n> +int tmain(T argc) { + T a[n]; +#pragma omp target +#pragma omp teams distribute + for(int i = 0; i < n; i++) { + a[i] = (T)0; + } +#pragma omp target +#pragma omp teams distribute dist_schedule(static) + for(int i = 0; i < n; i++) { + a[i] = (T)0; + } +#pragma omp target +#pragma omp teams distribute dist_schedule(static, n) + for(int i = 0; i < n; i++) { + a[i] = (T)0; + } + return 0; +} + +int main (int argc, char **argv) { + int n = 100; + int a[n]; +#pragma omp target +#pragma omp teams distribute + for(int i = 0; i < n; i++) { + a[i] = 0; + } +#pragma omp target +#pragma omp teams distribute dist_schedule(static) + for(int i = 0; i < n; i++) { + a[i] = 0; + } +#pragma omp target +#pragma omp teams distribute dist_schedule(static, n) + for(int i = 0; i < n; i++) { + a[i] = 0; + } + return tmain<int, 10>(argc); +} + +// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFL1:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFL2:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFL3:.+]]({{.+}}) +// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}}) +// CK2: ret + +// CK2: define {{.*}}void @[[OFFL1]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTL1]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFL2]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL2:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTL2]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFL3]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[OUTL3:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTL3]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91 +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFLT1:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFLT2:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFLT3:.+]]({{.+}}) +// CK2: ret +// CK2-NEXT: } + +// CK2: define {{.*}}void @[[OFFLT1]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTLT1]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFLT2]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT2:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTLT2]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFLT3]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT3:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTLT3]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91 +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +#endif // CK2 +#endif // #ifndef HEADER diff --git a/test/OpenMP/teams_distribute_firstprivate_codegen.cpp b/test/OpenMP/teams_distribute_firstprivate_codegen.cpp new file mode 100644 index 000000000000..a126b1d16b77 --- /dev/null +++ b/test/OpenMP/teams_distribute_firstprivate_codegen.cpp @@ -0,0 +1,336 @@ +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 + +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +struct St { + int a, b; + St() : a(0), b(0) {} + St(const St &st) : a(st.a + st.b), b(0) {} + ~St() {} +}; + +volatile int g = 1212; +volatile int &g1 = g; + +template <class T> +struct S { + T f; + S(T a) : f(a + g) {} + S() : f(g) {} + S(const S &s, St t = St()) : f(s.f + t.a) {} + operator T() { return T(); } + ~S() {} +}; + +// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float } +// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } +// CHECK-DAG: [[ST_TY:%.+]] = type { i{{[0-9]+}}, i{{[0-9]+}} } + +template <typename T> +T tmain() { + S<T> test; + T t_var = T(); + T vec[] = {1, 2}; + S<T> s_arr[] = {1, 2}; + S<T> &var = test; +#pragma omp target +#pragma omp teams distribute firstprivate(t_var, vec, s_arr, var) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + return T(); +} + +// CHECK-DAG: [[TEST:@.+]] = global [[S_FLOAT_TY]] zeroinitializer, +S<float> test; +// CHECK-DAG: [[T_VAR:@.+]] = global i{{[0-9]+}} 333, +int t_var = 333; +// CHECK-DAG: [[VEC:@.+]] = global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2], +int vec[] = {1, 2}; +// CHECK-DAG: [[S_ARR:@.+]] = global [2 x [[S_FLOAT_TY]]] zeroinitializer, +S<float> s_arr[] = {1, 2}; +// CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer, +S<float> var(3); +// CHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0, + +int main() { + static int sivar; +#ifdef LAMBDA + // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212, + // LAMBDA-LABEL: @main + // LAMBDA: call void [[OUTER_LAMBDA:@.+]]( + [&]() { + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // LAMBDA: call void @[[LOFFL1:.+]](i{{64|32}} %{{.+}}) + // LAMBDA: ret +#pragma omp target +#pragma omp teams distribute firstprivate(g, g1, sivar) + for (int i = 0; i < 2; ++i) { + // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} {{%.+}}, i{{64|32}} {{%.+}}) + // LAMBDA: {{%.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{%.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{%.+}} = alloca i{{[0-9]+}}, + // LAMBDA: [[G_CAST:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_CAST:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[SIVAR_CAST:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA-DAG: [[G_CAST_VAL:%.+]] = load{{.+}} [[G_CAST]], + // LAMBDA-DAG: [[G1_CAST_VAL:%.+]] = load{{.+}} [[G1_CAST]], + // LAMBDA-DAG: [[SIVAR_CAST_VAL:%.+]] = load{{.+}} [[SIVAR_CAST]], + // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[LOUTL1:.+]] to {{.+}}, {{.+}} [[G_CAST_VAL]], {{.+}} [[G1_CAST_VAL]], {{.+}} [[SIVAR_CAST_VAL]]) + // LAMBDA: ret void + + // LAMBDA: define internal void @[[LOUTL1]]({{.+}}) + // Skip global and bound tid vars + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: [[G_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_TMP:%.+]] = alloca i32*, + // skip loop vars + // LAMBDA-DAG: store {{.+}}, {{.+}} [[G_ADDR]], + // LAMBDA-DAG: store {{.+}}, {{.+}} [[G1_ADDR]], + // LAMBDA-DAG: store {{.+}}, {{.+}} [[SIVAR_ADDR]], + // LAMBDA-DAG: [[G_CONV:%.+]] = bitcast {{.+}} [[G_ADDR]] to + // LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast {{.+}} [[G1_ADDR]] to + // LAMBDA-DAG: [[SIVAR_CONV:%.+]] = bitcast {{.+}} [[SIVAR_ADDR]] to + // LAMBDA-DAG: store{{.+}} [[G1_CONV]], {{.+}} [[G1_TMP]], + g = 1; + g1 = 1; + sivar = 2; + // LAMBDA: call void @__kmpc_for_static_init_4( + // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G_CONV]], + // LAMBDA-DAG: [[G1:%.+]] = load{{.+}}, {{.+}}* [[G1_TMP]] + // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1]], + // LAMBDA-DAG: store{{.+}} 2, {{.+}} [[SIVAR_CONV]], + // LAMBDA-DAG: [[G1_REF:%.+]] = load{{.+}}, {{.+}} [[G1_TMP]], + // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1_REF]], + // LAMBDA: call void [[INNER_LAMBDA:@.+]]( + // LAMBDA: call void @__kmpc_for_static_fini( + [&]() { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + g = 2; + g1 = 2; + sivar = 4; + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + + // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]] + // LAMBDA: [[G1_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G1_REF]] + // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]] + }(); + } + }(); + return 0; +#else +#pragma omp target +#pragma omp teams distribute firstprivate(t_var, vec, s_arr, var, sivar) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + sivar += i; + } + return tmain<int>(); +#endif +} + +// CHECK: define {{.*}}i{{[0-9]+}} @main() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) +// CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}}) +// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]() +// CHECK: ret + +// CHECK: define{{.*}} void @[[OFFL1]]({{.+}}) +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_CAST:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[SIVAR_CAST:%.+]] = alloca i{{[0-9]+}}, + +// CHECK-DAG: [[VEC_TE_PAR:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_PRIV]], +// CHECK-DAG: [[T_VAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_CAST]], +// CHECK-DAG: [[S_ARR_TE_PAR:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_PRIV]], +// CHECK-DAG: [[VAR_TE_PAR:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_PRIV]], +// CHECK-DAG: [[SIVAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_CAST]], + +// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}} @[[OUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[VEC_TE_PAR]], i{{[0-9]+}} [[T_VAR_TE_PAR]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_TE_PAR]], [[S_FLOAT_TY]]* [[VAR_TE_PAR]], i{{[0-9]+}} [[SIVAR_TE_PAR]]) +// CHECK: ret void + +// CHECK: define internal void @[[OUTL1]]({{.+}}) +// Skip global and bound tid vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// Skip temp vars for loop +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]], + +// param copy +// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]], +// CHECK: store [2 x [[S_FLOAT_TY]]]* {{.+}}, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK: store [[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]** [[VAR_ADDR]], +// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[SIVAR_ADDR]], + +// T_VAR and SIVAR +// CHECK-DAG-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32* +// CHECK-DAG-64: [[CONV_SIVAR:%.+]] = bitcast i64* [[SIVAR_ADDR]] to i32* + +// preparation vars +// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK-DAG: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]], + +// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2 +// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}}) + +// firstprivate(s_arr) +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]] to +// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]], +// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ] +// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}}], [ [[S_ARR_DST:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]]) +// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]], +// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]], + +// firstprivate(var) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]]) + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK-DAG-32: {{.+}} = {{.+}} [[T_VAR_ADDR]] +// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_TVAR]] +// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[VAR_PRIV]] +// CHECK-DAG-32: {{.+}} = {{.+}} [[SIVAR_ADDR]] +// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_SIVAR]] +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) +// CHECK: call void @[[TOFFL1:.+]](i{{64|32}} %{{.+}}) +// CHECK: ret + +// CHECK: define {{.*}}void @[[TOFFL1]]({{.+}}) +// CHECK: [[TT_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[TVEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[TS_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK: [[TVAR_PRIV:%.+]] = alloca [[S_INT_TY]]*, +// CHECK: [[TT_VAR_CAST:%.+]] = alloca i{{[0-9]+}}, + +// CHECK-DAG: [[TVEC_TE_PAR:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[TVEC_PRIV]], +// CHECK-DAG: [[TT_VAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[TT_VAR_CAST]], +// CHECK-DAG: [[TS_ARR_TE_PAR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[TS_ARR_PRIV]], +// CHECK-DAG: [[TVAR_TE_PAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TVAR_PRIV]], + +// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[TOUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[TVEC_TE_PAR]], i{{[0-9]+}} [[TT_VAR_TE_PAR]], [2 x [[S_INT_TY]]]* [[TS_ARR_TE_PAR]], [[S_INT_TY]]* [[TVAR_TE_PAR]]) +// CHECK: ret void + +// CHECK: define internal void @[[TOUTL1]]({{.+}}) +// Skip global and bound tid vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, +// Skip temp vars for loop +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]], +// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*, + +// param copy +// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]], +// CHECK: store [2 x [[S_INT_TY]]]* {{.+}}, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]], +// CHECK: store [[S_INT_TY]]* {{.+}}, [[S_INT_TY]]** [[VAR_ADDR]], + + +// T_VAR and preparation variables +// CHECK: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32* +// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]], + +// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2 +// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}}) + +// firstprivate(s_arr) +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]] to +// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]], +// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ] +// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_DST:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]]) +// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]], +// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]], + +// firstprivate(var) +// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]], +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]]) +// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP]], + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK-DAG-32: {{.+}} = {{.+}} [[T_VAR_ADDR]] +// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_TVAR]] +// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[TMP]] +// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]] +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +#endif diff --git a/test/OpenMP/teams_distribute_firstprivate_messages.cpp b/test/OpenMP/teams_distribute_firstprivate_messages.cpp index a71080774331..1a2b5f3782f9 100644 --- a/test/OpenMP/teams_distribute_firstprivate_messages.cpp +++ b/test/OpenMP/teams_distribute_firstprivate_messages.cpp @@ -144,8 +144,9 @@ int main(int argc, char **argv) { #pragma omp teams distribute firstprivate(j) for (i = 0; i < argc; ++i) foo(); +// expected-error@+2 {{lastprivate variable cannot be firstprivate}} expected-note@+2 {{defined as lastprivate}} #pragma omp target -#pragma omp teams distribute lastprivate(argc), firstprivate(argc) // OK +#pragma omp teams distribute lastprivate(argc), firstprivate(argc) for (i = 0; i < argc; ++i) foo(); return 0; diff --git a/test/OpenMP/teams_distribute_lastprivate_codegen.cpp b/test/OpenMP/teams_distribute_lastprivate_codegen.cpp new file mode 100644 index 000000000000..2eab60b9dd3c --- /dev/null +++ b/test/OpenMP/teams_distribute_lastprivate_codegen.cpp @@ -0,0 +1,369 @@ +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 + +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +template <class T> +struct S { + T f; + S(T a) : f(a) {} + S() : f() {} + operator T() { return T(); } + ~S() {} +}; + +// CHECK: [[S_FLOAT_TY:%.+]] = type { float } +// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } +template <typename T> +T tmain() { + S<T> test; + T t_var = T(); + T vec[] = {1, 2}; + S<T> s_arr[] = {1, 2}; + S<T> &var = test; + #pragma omp target + #pragma omp teams distribute lastprivate(t_var, vec, s_arr, s_arr, var, var) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + return T(); +} + +int main() { + static int svar; + volatile double g; + volatile double &g1 = g; + + #ifdef LAMBDA + // LAMBDA-LABEL: @main + // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]]( + [&]() { + static float sfvar; + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( + // LAMBDA: call void [[OFFLOADING_FUN:@.+]]( + + // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]]( + // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}}) + #pragma omp target + #pragma omp teams distribute lastprivate(g, g1, svar, sfvar) + for (int i = 0; i < 2; ++i) { + // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, double*{{.+}} [[G_IN:%.+]], double*{{.+}} [[G1_IN:%.+]], i{{[0-9]+}}*{{.+}} [[SVAR_IN:%.+]], float*{{.+}} [[SFVAR_IN:%.+]]) + // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double*, + // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double*, + // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}*, + // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float*, + // loop variables + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G_PRIVATE:%.+]] = alloca double, + // LAMBDA: [[G1_PRIVATE:%.+]] = alloca double, + // LAMBDA: [[TMP_G1_PRIVATE:%.+]] = alloca double*, + // LAMBDA: [[SVAR_PRIVATE:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[SFVAR_PRIVATE:%.+]] = alloca float, + // LAMBDA: store double* [[G_IN]], double** [[G_PRIVATE_ADDR]], + // LAMBDA: store double* [[G1_IN]], double** [[G1_PRIVATE_ADDR]], + // LAMBDA: store i{{[0-9]+}}* [[SVAR_IN]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]], + // LAMBDA: store float* [[SFVAR_IN]], float** [[SFVAR_PRIVATE_ADDR]], + + // init private variables + // LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]], + // LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]], + // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]], + // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]], + // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]], + g = 1; + g1 = 1; + svar = 3; + sfvar = 4.0; + // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4( + // LAMBDA: store double 1.0{{.+}}, double* [[G_PRIVATE]], + // LAMBDA: [[TMP_G1_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]], + // LAMBDA: store{{.+}} double 1.0{{.+}}, double* [[TMP_G1_REF]], + // LAMBDA: store i{{[0-9]+}} 3, i{{[0-9]+}}* [[SVAR_PRIVATE]], + // LAMBDA: store float 4.0{{.+}}, float* [[SFVAR_PRIVATE]], + // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: store double* [[G_PRIVATE]], double** [[G_PRIVATE_ADDR_REF]], + // LAMBDA: [[TMP_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_PRIVATE_ADDR_FROM_TMP:%.+]] = load double*, double** [[TMP_G1_PRIVATE]], + // LAMBDA: store double* [[G1_PRIVATE_ADDR_FROM_TMP]], double** [[TMP_PRIVATE_ADDR_REF]], + // LAMBDA: [[SVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: store i{{[0-9]+}}* [[SVAR_PRIVATE]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]] + // LAMBDA: [[SFVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 + // LAMBDA: store float* [[SFVAR_PRIVATE]], float** [[SFVAR_PRIVATE_ADDR_REF]] + // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]]) + // LAMBDA: call {{.*}}void @__kmpc_for_static_fini( + // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]], + // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 + // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] + + // LAMBDA: [[OMP_LASTPRIV_BLOCK]]: + // LAMBDA: [[G_PRIV_VAL:%.+]] = load double, double* [[G_PRIVATE]], + // LAMBDA: store{{.*}} double [[G_PRIV_VAL]], double* [[G_IN_REF]], + // LAMBDA: [[TMP_G1_PRIV_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]], + // LAMBDA: [[TMP_G1_PRIV_VAL:%.+]] = load double, double* [[TMP_G1_PRIV_REF]], + // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* [[G1_IN_REF]], + + // LAMBDA: [[SVAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]], + // LAMBDA: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_IN_REF]], + // LAMBDA: [[SFVAR_PRIV_VAL:%.+]] = load float, float* [[SFVAR_PRIVATE]], + // LAMBDA: store float [[SFVAR_PRIV_VAL]], float* [[SFVAR_IN_REF]], + // LAMBDA: br label %[[OMP_LASTPRIV_DONE]] + // LAMBDA: [[OMP_LASTPRIV_DONE]]: + // LAMBDA: ret + [&]() { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + g = 2; + g1 = 2; + svar = 4; + sfvar = 8.0; + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]] + // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]] + + // LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]] + // LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]], + // LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]] + // LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 + // LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]] + // LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]] + }(); + } + }(); + return 0; + #else + S<float> test; + int t_var = 0; + int vec[] = {1, 2}; + S<float> s_arr[] = {1, 2}; + S<float> &var = test; + + #pragma omp target + #pragma omp teams distribute lastprivate(t_var, vec, s_arr, s_arr, var, var, svar) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + int i; + + return tmain<int>(); + #endif +} + +// CHECK: define{{.*}} i{{[0-9]+}} @main() +// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOAD_FUN:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}}) +// CHECK: ret + +// CHECK: define{{.+}} [[OFFLOAD_FUN]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]*{{.+}} {{.+}}, [2 x [[S_FLOAT_TY]]]*{{.+}} {{.+}}, [[S_FLOAT_TY]]*{{.+}} {{.+}}, i{{[0-9]+}} {{.+}}) +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams( +// CHECK: ret +// +// CHECK: define internal void [[OMP_OUTLINED:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN:%.+]], i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.+}} [[VAR_IN:%.+]], i{{[0-9]+}}*{{.*}} [[S_VAR_IN:%.+]]) +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, +// skip loop variables +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, + +// copy from parameters to local address variables +// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]], +// CHECK: store [2 x [[S_FLOAT_TY]]]* [[S_ARR_IN]], [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK: store [[S_FLOAT_TY]]* [[VAR_IN]], [[S_FLOAT_TY]]** [[VAR_ADDR]], +// CHECK: store i{{[0-9]+}}* [[S_VAR_IN]], i{{[0-9]+}}** [[SVAR_ADDR]], + +// load content of local address variables +// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]], +// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]], +// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]], +// CHECK: [[VAR_ADDR_REF:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]], +// the distribute loop +// CHECK: call void @__kmpc_for_static_init_4( +// assignment: vec[i] = t_var; +// CHECK: [[T_VAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]], +// CHECK: [[VEC_PTR:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}} +// CHECK: store i{{[0-9]+}} [[T_VAR_PRIV_VAL]], i{{[0-9]+}}* [[VEC_PTR]], + +// assignment: s_arr[i] = var; +// CHECK-DAG: [[S_ARR_PTR:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], +// CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_PTR]] to i8* +// CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST]], i8* [[TMP_VAL_BCAST]], +// CHECK: call void @__kmpc_for_static_fini( + +// lastprivates +// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]], +// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 +// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] + +// CHECK: [[OMP_LASTPRIV_BLOCK]]: +// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]], +// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]], +// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* +// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]* +// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 +// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]] +// CHECK: [[S_ARR_COPY_BLOCK]]: +// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8* +// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 +// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} +// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]] +// CHECK: [[S_ARR_COPY_DONE]]: +// CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], +// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8* +// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]], +// CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]], +// CHECK: ret void + +// template tmain +// CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]() +// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], +// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOAD_FUN_1:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}}) +// CHECK: ret + + +// CHECK: define internal void [[OFFLOAD_FUN_1]]( +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, +// CHECK: ret + +// CHECK: define internal void [[OMP_OUTLINED_1:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR1:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN1:%.+]], i{{[0-9]+}}*{{.+}} [[T_VAR_IN1:%.+]], [2 x [[S_INT_TY]]]*{{.+}} [[S_ARR_IN1:%.+]], [[S_INT_TY]]*{{.+}} [[VAR_IN1:%.+]]) +// skip alloca of global_tid and bound_tid +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: [[VEC_ADDR1:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR1:%.+]] = alloca i{{[0-9]+}}*, +// CHECK: [[S_ARR_ADDR1:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK: [[VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*, +// skip loop variables +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: [[OMP_IS_LAST1:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_PRIV1:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV1:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV1:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK: [[VAR_PRIV1:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[TMP_PRIV1:%.+]] = alloca [[S_INT_TY]]*, + +// skip init of bound and global tid +// CHECK: store i{{[0-9]+}}* {{.*}}, +// CHECK: store i{{[0-9]+}}* {{.*}}, +// copy from parameters to local address variables +// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN1]], [2 x i{{[0-9]+}}]** [[VEC_ADDR1]], +// CHECK: store i{{[0-9]+}}* [[T_VAR_IN1]], i{{[0-9]+}}** [[T_VAR_ADDR1]], +// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_IN1]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]], +// CHECK: store [[S_INT_TY]]* [[VAR_IN1]], [[S_INT_TY]]** [[VAR_ADDR1]], + +// load content of local address variables +// CHECK: [[VEC_ADDR_REF1:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR1]], +// CHECK: [[T_VAR_ADDR_REF1:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR1]], +// CHECK: [[S_ARR_ADDR_REF1:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]], +// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]], +// CHECK: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR1]], +// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV1]], [[S_INT_TY]]** [[TMP_PRIV1]], +// CHECK: call void @__kmpc_for_static_init_4( +// assignment: vec[i] = t_var; +// CHECK: [[IV_VAL1:%.+]] = +// CHECK: [[T_VAR_PRIV_VAL1:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]], +// CHECK: [[VEC_PTR1:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV1]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}} +// CHECK: store i{{[0-9]+}} [[T_VAR_PRIV_VAL1]], i{{[0-9]+}}* [[VEC_PTR1]], + +// assignment: s_arr[i] = var; +// CHECK-DAG: [[S_ARR_PTR1:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]], +// CHECK-DAG: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]], +// CHECK-DAG: [[S_ARR_PTR_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_PTR1]] to i8* +// CHECK-DAG: [[TMP_VAL_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* +// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST1]], i8* [[TMP_VAL_BCAST1]], +// CHECK: call void @__kmpc_for_static_fini( + +// lastprivates +// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST1]], +// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 +// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] + +// CHECK: [[OMP_LASTPRIV_BLOCK]]: +// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]], +// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF1]], +// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF1]] to i8* +// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV1]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]] to [[S_INT_TY]]* +// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 +// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]] +// CHECK: [[S_ARR_COPY_BLOCK]]: +// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8* +// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 +// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} +// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]] +// CHECK: [[S_ARR_COPY_DONE]]: +// CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]], +// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR1_REF]] to i8* +// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: ret void +#endif diff --git a/test/OpenMP/teams_distribute_lastprivate_messages.cpp b/test/OpenMP/teams_distribute_lastprivate_messages.cpp index b892141d5014..392607ea14ba 100644 --- a/test/OpenMP/teams_distribute_lastprivate_messages.cpp +++ b/test/OpenMP/teams_distribute_lastprivate_messages.cpp @@ -18,14 +18,14 @@ public: const S2 &operator =(const S2&) const; S2 &operator =(const S2&); static float S2s; // expected-note {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note {{static data member is predetermined as shared}} }; -const float S2::S2sc = 0; // expected-note {{static data member is predetermined as shared}} +const float S2::S2sc = 0; const S2 b; const S2 ba[5]; class S3 { int a; - S3 &operator=(const S3 &s3); // expected-note 2 {{implicitly declared private here}} + S3 &operator=(const S3 &s3); // expected-note {{implicitly declared private here}} public: S3() : a(0) {} @@ -52,7 +52,7 @@ public: }; class S6 { int a; - S6() : a(0) {} + S6() : a(0) {} // expected-note {{implicitly declared private here}} public: S6(const S6 &s6) : a(s6.a) {} @@ -255,12 +255,14 @@ int main(int argc, char **argv) { #pragma omp teams distribute lastprivate(j) for (i = 0; i < argc; ++i) foo(); +// expected-error@+2 {{firstprivate variable cannot be lastprivate}} expected-note@+2 {{defined as firstprivate}} #pragma omp target -#pragma omp teams distribute firstprivate(m) lastprivate(m) // expected-error {{'operator=' is a private member of 'S3'}} +#pragma omp teams distribute firstprivate(m) lastprivate(m) for (i = 0; i < argc; ++i) foo(); +// expected-error@+2 {{lastprivate variable cannot be firstprivate}} expected-note@+2 {{defined as lastprivate}} #pragma omp target -#pragma omp teams distribute lastprivate(n) firstprivate(n) // OK +#pragma omp teams distribute lastprivate(n) firstprivate(n) // expected-error {{calling a private constructor of class 'S6'}} for (i = 0; i < argc; ++i) foo(); static int si; diff --git a/test/OpenMP/teams_distribute_loop_messages.cpp b/test/OpenMP/teams_distribute_loop_messages.cpp index ea2604eebf0f..acab1f76a2d4 100644 --- a/test/OpenMP/teams_distribute_loop_messages.cpp +++ b/test/OpenMP/teams_distribute_loop_messages.cpp @@ -2,7 +2,7 @@ class S { int a; - S() : a(0) {} + S() : a(0) {} // expected-note {{implicitly declared private here}} public: S(int v) : a(v) {} @@ -689,12 +689,16 @@ void test_loop_eh() { void g() { throw 0; } }; } + #pragma omp target + #pragma omp teams distribute + f; // expected-error {{use of undeclared identifier 'f'}} } void test_loop_firstprivate_lastprivate() { S s(4); +// expected-error@+2 {{lastprivate variable cannot be firstprivate}} expected-note@+2 {{defined as lastprivate}} #pragma omp target -#pragma omp teams distribute lastprivate(s) firstprivate(s) +#pragma omp teams distribute lastprivate(s) firstprivate(s) // expected-error {{calling a private constructor of class 'S'}} for (int i = 0; i < 16; ++i) ; } diff --git a/test/OpenMP/teams_distribute_parallel_for_ast_print.cpp b/test/OpenMP/teams_distribute_parallel_for_ast_print.cpp index ad14794116ed..68bd424ade01 100644 --- a/test/OpenMP/teams_distribute_parallel_for_ast_print.cpp +++ b/test/OpenMP/teams_distribute_parallel_for_ast_print.cpp @@ -8,6 +8,9 @@ void foo() {} +int x; +#pragma omp threadprivate(x) + struct S { S(): a(0) {} S(int v) : a(v) {} @@ -40,7 +43,7 @@ public: void foo() { int b, argv, d, c, e, f; #pragma omp target -#pragma omp teams distribute parallel for default(none), private(b) firstprivate(argv) shared(d) reduction(+:c) reduction(max:e) num_teams(f) thread_limit(d) +#pragma omp teams distribute parallel for default(none), private(b) firstprivate(argv) shared(d) reduction(+:c) reduction(max:e) num_teams(f) thread_limit(d) copyin(x) for (int k = 0; k < a.a; ++k) ++a.a; } @@ -50,7 +53,7 @@ public: // CHECK: #pragma omp target // CHECK-NEXT: #pragma omp teams distribute parallel for private(this->a) private(this->a) // CHECK: #pragma omp target -// CHECK-NEXT: #pragma omp teams distribute parallel for default(none) private(b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d) +// CHECK-NEXT: #pragma omp teams distribute parallel for default(none) private(b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d) copyin(x) class S8 : public S7<S> { S8() {} @@ -74,7 +77,7 @@ public: void bar() { int b, argv, d, c, e, f8; #pragma omp target -#pragma omp teams distribute parallel for default(none), private(b) firstprivate(argv) shared(d) reduction(+:c) reduction(max:e) num_teams(f8) thread_limit(d) +#pragma omp teams distribute parallel for default(none), private(b) firstprivate(argv) shared(d) reduction(+:c) reduction(max:e) num_teams(f8) thread_limit(d) copyin(x) for (int k = 0; k < a.a; ++k) ++a.a; } @@ -86,7 +89,7 @@ public: // CHECK: #pragma omp target // CHECK-NEXT: #pragma omp teams distribute parallel for private(this->a) private(this->a) // CHECK: #pragma omp target -// CHECK-NEXT: #pragma omp teams distribute parallel for default(none) private(b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(f8) thread_limit(d) +// CHECK-NEXT: #pragma omp teams distribute parallel for default(none) private(b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(f8) thread_limit(d) copyin(x) template <class T, int N> T tmain(T argc) { @@ -127,19 +130,19 @@ T tmain(T argc) { // CHECK-NEXT: for (int i = 0; i < 10; ++i) // CHECK-NEXT: foo(); #pragma omp target -#pragma omp teams distribute parallel for default(none), private(b) firstprivate(argc) shared(d) reduction(+:c) reduction(max:e) num_teams(f) thread_limit(d) +#pragma omp teams distribute parallel for default(none), private(b) firstprivate(argc) shared(d) reduction(+:c) reduction(max:e) num_teams(f) thread_limit(d) copyin(x) for (int k = 0; k < 10; ++k) e += d + argc; // CHECK: #pragma omp target -// CHECK-NEXT: #pragma omp teams distribute parallel for default(none) private(b) firstprivate(argc) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d) +// CHECK-NEXT: #pragma omp teams distribute parallel for default(none) private(b) firstprivate(argc) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d) copyin(x) // CHECK-NEXT: for (int k = 0; k < 10; ++k) // CHECK-NEXT: e += d + argc; #pragma omp target -#pragma omp teams distribute parallel for linear(d) +#pragma omp teams distribute parallel for for (int k = 0; k < 10; ++k) e += d + argc; // CHECK: #pragma omp target -// CHECK-NEXT: #pragma omp teams distribute parallel for linear(d) +// CHECK-NEXT: #pragma omp teams distribute parallel for // CHECK-NEXT: for (int k = 0; k < 10; ++k) // CHECK-NEXT: e += d + argc; return T(); @@ -182,19 +185,19 @@ int main (int argc, char **argv) { // CHECK-NEXT: for (int i = 0; i < 10; ++i) // CHECK-NEXT: foo(); #pragma omp target -#pragma omp teams distribute parallel for default(none), private(b) firstprivate(argc) shared(d) reduction(+:c) reduction(max:e) num_teams(f) thread_limit(d) +#pragma omp teams distribute parallel for default(none), private(b) firstprivate(argc) shared(d) reduction(+:c) reduction(max:e) num_teams(f) thread_limit(d) copyin(x) for (int k = 0; k < 10; ++k) e += d + argc; // CHECK: #pragma omp target -// CHECK-NEXT: #pragma omp teams distribute parallel for default(none) private(b) firstprivate(argc) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d) +// CHECK-NEXT: #pragma omp teams distribute parallel for default(none) private(b) firstprivate(argc) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d) copyin(x) // CHECK-NEXT: for (int k = 0; k < 10; ++k) // CHECK-NEXT: e += d + argc; #pragma omp target -#pragma omp teams distribute parallel for linear(d) +#pragma omp teams distribute parallel for for (int k = 0; k < 10; ++k) e += d + argc; // CHECK: #pragma omp target -// CHECK-NEXT: #pragma omp teams distribute parallel for linear(d) +// CHECK-NEXT: #pragma omp teams distribute parallel for // CHECK-NEXT: for (int k = 0; k < 10; ++k) // CHECK-NEXT: e += d + argc; return (0); diff --git a/test/OpenMP/teams_distribute_parallel_for_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_codegen.cpp new file mode 100644 index 000000000000..5711ede87c86 --- /dev/null +++ b/test/OpenMP/teams_distribute_parallel_for_codegen.cpp @@ -0,0 +1,247 @@ +// expected-no-diagnostics +#ifndef HEADER +#define HEADER +// Test host codegen. +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +#ifdef CK1 + +int a[100]; + +// CK1: define {{.*}}i32 @{{.+}}teams_argument_globali( +int teams_argument_global(int n){ + int te = n / 128; + int th = 128; + // discard n_addr + // CK1: alloca i32, + // CK1: [[TE:%.+]] = alloca i32, + // CK1: [[TH:%.+]] = alloca i32, + // CK1: [[TE_CAST:%.+]] = alloca i{{32|64}}, + // CK1: [[TH_CAST:%.+]] = alloca i{{32|64}}, + // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]], + // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]], + // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}}) + + // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]], + #pragma omp target + #pragma omp teams distribute parallel for num_teams(te), thread_limit(th) + for(int i = 0; i < n; i++) { + a[i] = 0; + #pragma omp cancel for + } + + // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // CK1: call void @[[OFFL2:.+]](i{{64|32}} %{{.+}}) + #pragma omp target + {{{ + #pragma omp teams distribute parallel for + for(int i = 0; i < n; i++) { + a[i] = 0; + } + }}} + + // outlined target regions + // CK1: define internal void @[[OFFL1]](i{{32|64}} [[TE_ARG:%.+]], i{{32|64}} [[TH_ARG:%.+]], i{{32|64}} {{.+}}, {{.+}}) + // CK1: [[TE_ADDR:%.+]] = alloca i{{32|64}}, + // CK1: [[TH_ADDR:%.+]] = alloca i{{32|64}}, + // CK1: store{{.+}} [[TE_ARG]], {{.+}} [[TE_ADDR]], + // CK1: store{{.+}} [[TH_ARG]], {{.+}} [[TH_ADDR]], + // CK1-64: [[TE_CONV:%.+]] = bitcast{{.+}} [[TE_ADDR]] to + // CK1-64: [[TH_CONV:%.+]] = bitcast{{.+}} [[TH_ADDR]] to + // CK1-64: [[TE_VAL:%.+]] = load i32, i32* [[TE_CONV]], + // CK1-64: [[TH_VAL:%.+]] = load i32, i32* [[TH_CONV]], + // CK1-32: [[TE_VAL:%.+]] = load i32, i32* [[TE_ADDR]], + // CK1-32: [[TH_VAL:%.+]] = load i32, i32* [[TH_ADDR]], + // CK1: {{%.+}} = call i32 @__kmpc_push_num_teams({{.+}}, {{.+}}, i32 [[TE_VAL]], i32 [[TH_VAL]]) + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 2, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}}) + // CK1: ret void + + // CK1: define internal void @[[OUTL1]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4( + // CK1: call void {{.+}} @__kmpc_fork_call( + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[OFFL2]]({{.+}}, {{.+}}) + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 2, {{.+}} @[[OUTL2:.+]] to {{.+}}, {{.+}}, {{.+}}) + // CK1: ret void + + // CK1: define internal void @[[OUTL2]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4( + // CK1: call void {{.+}} @__kmpc_fork_call( + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + return a[0]; +} + +#endif // CK1 + +// Test host codegen. +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +#ifdef CK2 + +// CK2: define {{.*}}i32 @{{.+}}teams_local_argv( +int teams_local_arg(void) { + int n = 100; + int a[n]; + + // CK2: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // CK2: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}}) + #pragma omp target + #pragma omp teams distribute parallel for + for(int i = 0; i < n; i++) { + a[i] = 0; + } + + // outlined target region + // CK2: define internal void @[[OFFL1]]({{.+}}, {{.+}}) + // CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}}) + // CK2: ret void + + // CK2: define internal void @[[OUTL1]]({{.+}}) + // CK2: call void @__kmpc_for_static_init_4( + // CK2: call void {{.+}} @__kmpc_fork_call( + // CK2: call void @__kmpc_for_static_fini( + // CK2: ret void + + return a[0]; +} +#endif // CK2 + +// Test host codegen. +// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64 +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64 +// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32 +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32 +#ifdef CK3 + +// CK3: [[SSI:%.+]] = type { [{{.+}} x i32], float } + +template <typename T, int X, long long Y> +struct SS{ + T a[X]; + float b; + // CK3: define {{.*}}i32 @{{.+}}foo{{.+}}( + int foo(void) { + + // CK3: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // CK3: call void @[[OFFL1:.+]]([[SSI]]* %{{.+}}) + #pragma omp target + #pragma omp teams distribute parallel for + for(int i = 0; i < X; i++) { + a[i] = (T)0; + } + + // outlined target region + // CK3: define internal void @[[OFFL1]]([[SSI]]* {{.+}}) + // CK3: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}}) + // CK3: ret void + + // CK3: define internal void @[[OUTL1]]({{.+}}) + // CK3: call void @__kmpc_for_static_init_4( + // CK3: call void {{.+}} @__kmpc_fork_call( + // CK3: call void @__kmpc_for_static_fini( + // CK3: ret void + + return a[0]; + } +}; + +int teams_template_struct(void) { + SS<int, 123, 456> V; + return V.foo(); + +} +#endif // CK3 + +// Test host codegen. +// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-64 +// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-64 +// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-32 +// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-32 + +#ifdef CK4 + +template <typename T, int n> +int tmain(T argc) { + T a[n]; + int te = n/128; + int th = 128; +#pragma omp target +#pragma omp teams distribute parallel for num_teams(te) thread_limit(th) + for(int i = 0; i < n; i++) { + a[i] = (T)0; + } + return 0; +} + +int main (int argc, char **argv) { + int n = 100; + int a[n]; +#pragma omp target +#pragma omp teams distribute parallel for + for(int i = 0; i < n; i++) { + a[i] = 0; + } + return tmain<int, 10>(argc); +} + +// CK4: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}}) +// CK4: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) +// CK4: call void @[[OFFL1:.+]]({{.+}}) +// CK4: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}}) +// CK4: ret + +// CK4: define {{.*}}void @[[OFFL1]]({{.+}}) +// CK4: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}}) +// CK4: ret void + +// CK4: define internal void @[[OUTL1]]({{.+}}) +// CK4: call void @__kmpc_for_static_init_4( +// CK4: call void {{.+}} @__kmpc_fork_call( +// CK4: call void @__kmpc_for_static_fini( +// CK4: ret void + +// CK4: define {{.*}}i32 @[[TMAIN]]({{.+}}) +// CK4: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}}) +// CK4: call void @[[OFFLT:.+]]({{.+}}) +// CK4: ret +// CK4-NEXT: } + +// CK4: define {{.*}}void @[[OFFLT]](i{{32|64}} [[TE_ARG:%.+]], i{{32|64}} [[TH_ARG:%.+]], {{.+}}) +// CK4: [[TE_ADDR:%.+]] = alloca i{{32|64}}, +// CK4: [[TH_ADDR:%.+]] = alloca i{{32|64}}, +// CK4: store{{.+}} [[TE_ARG]], {{.+}} [[TE_ADDR]], +// CK4: store{{.+}} [[TH_ARG]], {{.+}} [[TH_ADDR]], +// CK4-64: [[TE_CONV:%.+]] = bitcast{{.+}} [[TE_ADDR]] to +// CK4-64: [[TH_CONV:%.+]] = bitcast{{.+}} [[TH_ADDR]] to +// CK4-64: [[TE_VAL:%.+]] = load i32, i32* [[TE_CONV]], +// CK4-64: [[TH_VAL:%.+]] = load i32, i32* [[TH_CONV]], +// CK4-32: [[TE_VAL:%.+]] = load i32, i32* [[TE_ADDR]], +// CK4-32: [[TH_VAL:%.+]] = load i32, i32* [[TH_ADDR]], +// CK4: {{%.+}} = call i32 @__kmpc_push_num_teams({{.+}}, {{.+}}, i32 [[TE_VAL]], i32 [[TH_VAL]]) +// CK4: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT:.+]] to {{.+}}, {{.+}}, {{.+}}) +// CK4: ret void + +// CK4: define internal void @[[OUTLT]]({{.+}}) +// CK4: call void @__kmpc_for_static_init_4( +// CK4: call void {{.+}} @__kmpc_fork_call( +// CK4: call void @__kmpc_for_static_fini( +// CK4: ret void + +#endif // CK4 +#endif diff --git a/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp new file mode 100644 index 000000000000..7522288f75e2 --- /dev/null +++ b/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp @@ -0,0 +1,144 @@ +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// Test host codegen. +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +#ifdef CK1 + +template <typename T, int X, long long Y> +struct SS{ + T a[X][Y]; + + // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}( + int foo(void) { + + // CK1: call i32 @__tgt_target_teams( + // CK1: call void @[[OFFL1:.+]]( + #pragma omp target + #pragma omp teams distribute parallel for collapse(2) + for(int i = 0; i < X; i++) { + for(int j = 0; j < Y; j++) { + a[i][j] = (T)0; + } + } + // CK1: define internal void @[[OFFL1]]( + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, + // CK1: ret void + + // CK1: define internal void @[[OUTL1]]({{.+}}) + // discard loop variables not needed here + // CK1: [[OMP_UB:%.omp.comb.ub]] = alloca i32, + // CK1: store i32 56087, i32* [[OMP_UB]], + // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i32* [[OMP_UB]], + // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[PAR_OUTL1]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34, {{.+}}, {{.+}}, + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + return a[0][0]; + } +}; + +int teams_template_struct(void) { + SS<int, 123, 456> V; + return V.foo(); + +} +#endif // CK1 + +// Test host codegen. +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +#ifdef CK2 + +template <typename T, int n, int m> +int tmain(T argc) { + T a[n][m]; + #pragma omp target + #pragma omp teams distribute parallel for collapse(2) + for(int i = 0; i < n; i++) { + for(int j = 0; j < m; j++) { + a[i][j] = (T)0; + } + } + return 0; +} + +int main (int argc, char **argv) { + int n = 100; + int m = 2; + int a[n][m]; + #pragma omp target + #pragma omp teams distribute parallel for collapse(2) + for(int i = 0; i < n; i++) { + for(int j = 0; j < m; j++) { + a[i][j] = 0; + } + } + return tmain<int, 10, 2>(argc); +} + +// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFL1:.+]]({{.+}}) +// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}}) +// CK2: ret + +// CK2: define {{.*}}void @[[OFFL1]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}} @[[OUTL1:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTL1]]({{.+}}) +// CK2: [[OMP_UB:%.omp.comb.ub]] = alloca i64, +// CK2: store i64 {{.+}}, i64* [[OMP_UB]], +// CK2: call void @__kmpc_for_static_init_8({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i64* [[OMP_UB]], +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[PAR_OUTL1]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_{{[4|8]}}({{.+}}, {{.+}}, i32 34, {{.+}}, {{.+}}, +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + + +// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFLT1:.+]]({{.+}}) +// CK2: ret +// CK2-NEXT: } + +// CK2: define {{.*}}void @[[OFFLT1]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTLT1]]({{.+}}) +// discard loop variables not needed here +// CK2: [[OMP_UB:%.omp.comb.ub]] = alloca i32, +// CK2: store i32 {{.+}}, i32* [[OMP_UB]], +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i32* [[OMP_UB]], +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL1:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[TPAR_OUTL1]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34, {{.+}}, {{.+}}, +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +#endif // CK2 +#endif // #ifndef HEADER diff --git a/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp new file mode 100644 index 000000000000..0202a40002b5 --- /dev/null +++ b/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp @@ -0,0 +1,199 @@ +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 + +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +int x; +#pragma omp threadprivate(x) + +template <typename T> +T tmain() { + int a[2]; +#pragma omp target +#pragma omp teams distribute parallel for copyin(x) + for (int i = 0; i < 2; ++i) { + a[i] = x; + } + return T(); +} + +int main() { + int a[2]; +#ifdef LAMBDA + // LAMBDA-LABEL: @main + // LAMBDA: call void [[OUTER_LAMBDA:@.+]]( + [&]() { + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: call i32 @__tgt_target_teams( + // LAMBDA: call void @[[LOFFL1:.+]]( + // LAMBDA: ret +#pragma omp target +#pragma omp teams distribute parallel for copyin(x) + for (int i = 0; i < 2; ++i) { + // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]]( + // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[LOUTL1:.+]] to {{.+}}) + // LAMBDA: ret void + + // LAMBDA: define internal void @[[LOUTL1]]( + // Skip global, bound tid and loop vars + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + a[i] = x; + + // LAMBDA: call void @__kmpc_for_static_init_4( + // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[LPAR_OUTL:.+]] to + // LAMBDA: call void @__kmpc_for_static_fini( + // LAMBDA: ret void + + // LAMBDA: define internal void @[[LPAR_OUTL]]({{.+}}) + // Skip global, bound tid and loop vars + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: alloca i{{[0-9]+}}, + // LAMBDA: alloca i{{[0-9]+}}, + // LAMBDA: {{%.+}} = alloca [2 x i{{[0-9]+}}]*, + // LAMBDA: [[X_ADDR:%.+]] = alloca i{{[0-9]+}}*, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + + // LAMBDA: [[X_REF:%.+]] = load {{.+}}, {{.+}} [[X_ADDR]], + + // LAMBDA: call void @__kmpc_for_static_init_4( + // LAMBDA: [[X_VAL:%.+]] = load {{.+}}, {{.+}} [[X_REF]], + // LAMBDA: store {{.+}} [[X_VAL]], + // LAMBDA: call void [[INNER_LAMBDA:@.+]]( + // LAMBDA: call void @__kmpc_for_static_fini( + // LAMBDA: ret void + [&]() { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + a[i] = x; + }(); + } + }(); + return 0; +#else +#pragma omp target +#pragma omp teams distribute parallel for copyin(x) + for (int i = 0; i < 2; ++i) { + a[i] = x; + } + return tmain<int>(); + //return 0; +#endif +} + +// CHECK: define {{.*}}i{{[0-9]+}} @main() +// CHECK: call i32 @__tgt_target_teams( +// CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}}) +// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]() +// CHECK: ret + +// CHECK: define{{.*}} void @[[OFFL1]]({{.+}}) +// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTL1:.+]] to {{.+}}) +// CHECK: ret void + +// CHECK: define internal void @[[OUTL1]]({{.+}}) +// Skip global, bound tid and loop vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call void {{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: define internal void @[[PAR_OUTL1]]({{.+}}) +// Skip global, bound tid and loop vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{%.+}} = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[X_ADDR:%.+]] = alloca i{{[0-9]+}}*, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, + +// CHECK: [[X_REF:%.+]] = load {{.+}}, {{.+}} [[X_ADDR]], +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: [[X_VAL:%.+]] = load {{.+}}, {{.+}} [[X_REF]], +// CHECK: store {{.+}} [[X_VAL]], +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]() +// CHECK: call i32 @__tgt_target_teams( +// CHECK: call void @[[TOFFL1:.+]]( +// CHECK: ret + +// CHECK: define {{.*}}void @[[TOFFL1]]( +// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[TOUTL1:.+]] to {{.+}}) +// CHECK: ret void + +// CHECK: define internal void @[[TOUTL1]]({{.+}}) +// Skip global, bound tid and loop vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call void {{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL1:.+]] to +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: define internal void @[[TPAR_OUTL1]]({{.+}}) +// Skip global, bound tid and loop vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// prev lb and ub +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, + +// CHECK: {{%.+}} = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[X_ADDR:%.+]] = alloca i{{[0-9]+}}*, +// iter variables +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, + +// CHECK: [[X_REF:%.+]] = load {{.+}}, {{.+}} [[X_ADDR]], +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: [[X_VAL:%.+]] = load {{.+}}, {{.+}} [[X_REF]], +// CHECK: store {{.+}} [[X_VAL]], +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + + +#endif diff --git a/test/OpenMP/teams_distribute_parallel_for_copyin_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_copyin_messages.cpp new file mode 100644 index 000000000000..30f3e75bf635 --- /dev/null +++ b/test/OpenMP/teams_distribute_parallel_for_copyin_messages.cpp @@ -0,0 +1,113 @@ +// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s + +void foo() { +} + +bool foobool(int argc) { + return argc; +} + +struct S1; // expected-note {{declared here}} +class S2 { + mutable int a; + +public: + S2() : a(0) {} + S2 &operator=(S2 &s2) { return *this; } +}; +class S3 { + int a; + +public: + S3() : a(0) {} + S3 &operator=(S3 &s3) { return *this; } +}; +class S4 { + int a; + S4(); + S4 &operator=(const S4 &s4); // expected-note {{implicitly declared private here}} + +public: + S4(int v) : a(v) {} +}; +class S5 { + int a; + S5() : a(0) {} + S5 &operator=(const S5 &s5) { return *this; } // expected-note {{implicitly declared private here}} + +public: + S5(int v) : a(v) {} +}; +template <class T> +class ST { +public: + static T s; +}; + +S2 k; +S3 h; +S4 l(3); +S5 m(4); +#pragma omp threadprivate(h, k, l, m) + +namespace A { +double x; +#pragma omp threadprivate(x) +} +namespace B { +using A::x; +} + +int main(int argc, char **argv) { + int i; +#pragma omp target +#pragma omp teams distribute parallel for copyin // expected-error {{expected '(' after 'copyin'}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp target +#pragma omp teams distribute parallel for copyin( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp target +#pragma omp teams distribute parallel for copyin() // expected-error {{expected expression}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp target +#pragma omp teams distribute parallel for copyin(k // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp target +#pragma omp teams distribute parallel for copyin(h, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp target +#pragma omp teams distribute parallel for copyin(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp target +#pragma omp teams distribute parallel for copyin(l) // expected-error {{'operator=' is a private member of 'S4'}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp target +#pragma omp teams distribute parallel for copyin(S1) // expected-error {{'S1' does not refer to a value}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp target +#pragma omp teams distribute parallel for copyin(argv[1]) // expected-error {{expected variable name}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp target +#pragma omp teams distribute parallel for copyin(i) // expected-error {{copyin variable must be threadprivate}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp target +#pragma omp teams distribute parallel for copyin(m) // expected-error {{'operator=' is a private member of 'S5'}} + for (i = 0; i < argc; ++i) + foo(); +#pragma omp target +#pragma omp teams distribute parallel for copyin(ST<int>::s, B::x) // expected-error {{copyin variable must be threadprivate}} + for (i = 0; i < argc; ++i) + foo(); + + return 0; +} diff --git a/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp new file mode 100644 index 000000000000..dc48f166b4c0 --- /dev/null +++ b/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp @@ -0,0 +1,263 @@ +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// Test host codegen. +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +#ifdef CK1 + +template <typename T, int X, long long Y> +struct SS{ + T a[X]; + float b; + // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}( + int foo(void) { + + // CK1: call i32 @__tgt_target_teams( + // CK1: call void @[[OFFL1:.+]]( + #pragma omp target + #pragma omp teams distribute parallel for + for(int i = 0; i < X; i++) { + a[i] = (T)0; + } + // CK1: call i32 @__tgt_target_teams( + // CK1: call void @[[OFFL2:.+]]( + #pragma omp target + #pragma omp teams distribute parallel for dist_schedule(static) + for(int i = 0; i < X; i++) { + a[i] = (T)0; + } + // CK1: call i32 @__tgt_target_teams( + // CK1: call void @[[OFFL3:.+]]( + #pragma omp target + #pragma omp teams distribute parallel for dist_schedule(static, X/2) + for(int i = 0; i < X; i++) { + a[i] = (T)0; + } + // CK1: define internal void @[[OFFL1]]( + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, + // CK1: ret void + + // CK1: define internal void @[[OUTL1]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 + // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[PAR_OUTL1]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4( + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[OFFL2]]( + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL2:.+]] to {{.+}}, + // CK1: ret void + + // CK1: define internal void @[[OUTL2]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 + // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL2:.+]] to + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[PAR_OUTL2]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4( + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + + // CK1: define internal void @[[OFFL3]]( + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL3:.+]] to {{.+}}, + // CK1: ret void + + // CK1: define internal void @[[OUTL3]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91 + // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL3:.+]] to + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[PAR_OUTL3]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4( + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + return a[0]; + } +}; + +int teams_template_struct(void) { + SS<int, 123, 456> V; + return V.foo(); + +} +#endif // CK1 + +// Test host codegen. +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +#ifdef CK2 + +template <typename T, int n> +int tmain(T argc) { + T a[n]; + int m = 10; +#pragma omp target +#pragma omp teams distribute parallel for + for(int i = 0; i < n; i++) { + a[i] = (T)0; + } +#pragma omp target +#pragma omp teams distribute parallel for dist_schedule(static) + for(int i = 0; i < n; i++) { + a[i] = (T)0; + } +#pragma omp target +#pragma omp teams distribute parallel for dist_schedule(static, m) + for(int i = 0; i < n; i++) { + a[i] = (T)0; + } + return 0; +} + +int main (int argc, char **argv) { + int n = 100; + int a[n]; + int m = 10; +#pragma omp target +#pragma omp teams distribute parallel for + for(int i = 0; i < n; i++) { + a[i] = 0; + } +#pragma omp target +#pragma omp teams distribute parallel for dist_schedule(static) + for(int i = 0; i < n; i++) { + a[i] = 0; + } +#pragma omp target +#pragma omp teams distribute parallel for dist_schedule(static, m) + for(int i = 0; i < n; i++) { + a[i] = 0; + } + return tmain<int, 10>(argc); +} + +// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFL1:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFL2:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFL3:.+]]({{.+}}) +// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}}) +// CK2: ret + +// CK2: define {{.*}}void @[[OFFL1]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTL1]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[PAR_OUTL1]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4( +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFL2]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL2:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTL2]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL2:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[PAR_OUTL2]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4( +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFL3]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[OUTL3:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTL3]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91 +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL3:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[PAR_OUTL3]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4( +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFLT1:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFLT2:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFLT3:.+]]({{.+}}) +// CK2: ret +// CK2-NEXT: } + +// CK2: define {{.*}}void @[[OFFLT1]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTLT1]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT1:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[PAR_OUTLT1]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4( +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFLT2]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT2:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTLT2]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT2:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[PAR_OUTLT2]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4( +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFLT3]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTLT3:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTLT3]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91 +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT3:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[PAR_OUTLT3]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4( +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +#endif // CK2 +#endif // #ifndef HEADER diff --git a/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp new file mode 100644 index 000000000000..fa99f9cfcf1b --- /dev/null +++ b/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp @@ -0,0 +1,496 @@ +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 + +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +struct St { + int a, b; + St() : a(0), b(0) {} + St(const St &st) : a(st.a + st.b), b(0) {} + ~St() {} +}; + +volatile int g = 1212; +volatile int &g1 = g; + +template <class T> +struct S { + T f; + S(T a) : f(a + g) {} + S() : f(g) {} + S(const S &s, St t = St()) : f(s.f + t.a) {} + operator T() { return T(); } + ~S() {} +}; + +// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float } +// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } +// CHECK-DAG: [[ST_TY:%.+]] = type { i{{[0-9]+}}, i{{[0-9]+}} } + +template <typename T> +T tmain() { + S<T> test; + T t_var = T(); + T vec[] = {1, 2}; + S<T> s_arr[] = {1, 2}; + S<T> &var = test; +#pragma omp target +#pragma omp teams distribute parallel for firstprivate(t_var, vec, s_arr, var) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + return T(); +} + +// CHECK-DAG: [[TEST:@.+]] = global [[S_FLOAT_TY]] zeroinitializer, +S<float> test; +// CHECK-DAG: [[T_VAR:@.+]] = global i{{[0-9]+}} 333, +int t_var = 333; +// CHECK-DAG: [[VEC:@.+]] = global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2], +int vec[] = {1, 2}; +// CHECK-DAG: [[S_ARR:@.+]] = global [2 x [[S_FLOAT_TY]]] zeroinitializer, +S<float> s_arr[] = {1, 2}; +// CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer, +S<float> var(3); +// CHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0, + +int main() { + static int sivar; +#ifdef LAMBDA + // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212, + // LAMBDA-LABEL: @main + // LAMBDA: call void [[OUTER_LAMBDA:@.+]]( + [&]() { + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // LAMBDA: call void @[[LOFFL1:.+]](i{{64|32}} %{{.+}}) + // LAMBDA: ret +#pragma omp target +#pragma omp teams distribute parallel for firstprivate(g, g1, sivar) + for (int i = 0; i < 2; ++i) { + // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} {{%.+}}, i{{64|32}} {{%.+}}) + // LAMBDA: {{%.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{%.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{%.+}} = alloca i{{[0-9]+}}, + // LAMBDA: [[G_CAST:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_CAST:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[SIVAR_CAST:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA-DAG: [[G_CAST_VAL:%.+]] = load{{.+}} [[G_CAST]], + // LAMBDA-DAG: [[G1_CAST_VAL:%.+]] = load{{.+}} [[G1_CAST]], + // LAMBDA-DAG: [[SIVAR_CAST_VAL:%.+]] = load{{.+}} [[SIVAR_CAST]], + // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[LOUTL1:.+]] to {{.+}}, {{.+}} [[G_CAST_VAL]], {{.+}} [[G1_CAST_VAL]], {{.+}} [[SIVAR_CAST_VAL]]) + // LAMBDA: ret void + + // LAMBDA: define internal void @[[LOUTL1]]({{.+}}) + // Skip global and bound tid vars + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: [[G_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_TMP:%.+]] = alloca i32*, + // skip loop vars + // LAMBDA-DAG: store {{.+}}, {{.+}} [[G_ADDR]], + // LAMBDA-DAG: store {{.+}}, {{.+}} [[G1_ADDR]], + // LAMBDA-DAG: store {{.+}}, {{.+}} [[SIVAR_ADDR]], + // LAMBDA-DAG: [[G_CONV:%.+]] = bitcast {{.+}} [[G_ADDR]] to + // LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast {{.+}} [[G1_ADDR]] to + // LAMBDA-DAG: [[SIVAR_CONV:%.+]] = bitcast {{.+}} [[SIVAR_ADDR]] to + // LAMBDA-DAG: store{{.+}} [[G1_CONV]], {{.+}} [[G1_TMP]], + g = 1; + g1 = 1; + sivar = 2; + // LAMBDA: call void @__kmpc_for_static_init_4( + // LAMBDA: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[LPAR_OUTL:.+]] to + // LAMBDA: call void @__kmpc_for_static_fini( + // LAMBDA: ret void + + // LAMBDA: define internal void @[[LPAR_OUTL]]({{.+}}) + // Skip global and bound tid vars, and prev lb and ub vars + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: [[G_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_TMP:%.+]] = alloca i32*, + // skip loop vars + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: [[G_PRIV:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_PRIV:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_TMP_PRIV:%.+]] = alloca i{{[0-9]+}}*, + // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA-DAG: store {{.+}}, {{.+}} [[G_ADDR]], + // LAMBDA-DAG: store {{.+}}, {{.+}} [[G1_ADDR]], + // LAMBDA-DAG: store {{.+}}, {{.+}} [[SIVAR_ADDR]], + // LAMBDA-DAG: [[G_CONV:%.+]] = bitcast {{.+}} [[G_ADDR]] to + // LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast {{.+}} [[G1_ADDR]] to + // LAMBDA-DAG: [[SIVAR_CONV:%.+]] = bitcast {{.+}} [[SIVAR_ADDR]] to + // LAMBDA-DAG: store{{.+}} [[G1_CONV]], {{.+}} [[G1_TMP]], + + // use of private vars + // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G_PRIV]], + // LAMBDA-DAG: [[G1:%.+]] = load{{.+}}, {{.+}}* [[G1_TMP_PRIV]] + // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1]], + // LAMBDA-DAG: store{{.+}} 2, {{.+}} [[SIVAR_PRIV]], + // LAMBDA-DAG: [[G1_REF:%.+]] = load{{.+}}, {{.+}} [[G1_TMP]], + // LAMBDA: call void [[INNER_LAMBDA:@.+]]( + // LAMBDA: call void @__kmpc_for_static_fini( + // LAMBDA: ret void + [&]() { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + g = 2; + g1 = 2; + sivar = 4; + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + + // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]] + // LAMBDA: [[G1_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G1_REF]] + // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]] + }(); + } + }(); + return 0; +#else +#pragma omp target +#pragma omp teams distribute parallel for firstprivate(t_var, vec, s_arr, var, sivar) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + sivar += i; + } + return tmain<int>(); +#endif +} + +// CHECK: define {{.*}}i{{[0-9]+}} @main() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) +// CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}}) +// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]() +// CHECK: ret + +// CHECK: define{{.*}} void @[[OFFL1]]({{.+}}) +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_CAST:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[SIVAR_CAST:%.+]] = alloca i{{[0-9]+}}, + +// CHECK-DAG: [[VEC_TE_PAR:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_PRIV]], +// CHECK-DAG: [[T_VAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_CAST]], +// CHECK-DAG: [[S_ARR_TE_PAR:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_PRIV]], +// CHECK-DAG: [[VAR_TE_PAR:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_PRIV]], +// CHECK-DAG: [[SIVAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_CAST]], + +// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}} @[[OUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[VEC_TE_PAR]], i{{[0-9]+}} [[T_VAR_TE_PAR]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_TE_PAR]], [[S_FLOAT_TY]]* [[VAR_TE_PAR]], i{{[0-9]+}} [[SIVAR_TE_PAR]]) +// CHECK: ret void + +// CHECK: define internal void @[[OUTL1]]({{.+}}) +// Skip global and bound tid vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// Skip temp vars for loop +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]], + +// param copy +// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]], +// CHECK: store [2 x [[S_FLOAT_TY]]]* {{.+}}, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK: store [[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]** [[VAR_ADDR]], +// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[SIVAR_ADDR]], + +// T_VAR and SIVAR +// CHECK-DAG-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32* +// CHECK-DAG-64: [[CONV_SIVAR:%.+]] = bitcast i64* [[SIVAR_ADDR]] to i32* + +// preparation vars +// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK-DAG: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]], + +// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2 +// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}}) + +// firstprivate(s_arr) +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]] to +// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]], +// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ] +// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}}], [ [[S_ARR_DST:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]]) +// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]], +// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]], + +// firstprivate(var) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]]) + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL:.+]] to +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: define internal void @[[PAR_OUTL]]({{.+}}) +// Skip global and bound tid vars, and prev lb ub vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// Skip temp vars for loop +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]], + +// param copy +// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]], +// CHECK: store [2 x [[S_FLOAT_TY]]]* {{.+}}, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK: store [[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]** [[VAR_ADDR]], +// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[SIVAR_ADDR]], + +// T_VAR and SIVAR +// CHECK-DAG-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32* +// CHECK-DAG-64: [[CONV_SIVAR:%.+]] = bitcast i64* [[SIVAR_ADDR]] to i32* + +// preparation vars +// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK-DAG: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]], + +// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2 +// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}}) + +// firstprivate(s_arr) +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]] to +// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]], +// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ] +// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}}], [ [[S_ARR_DST:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]]) +// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]], +// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]], + +// firstprivate(var) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]]) + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK-DAG-32: {{.+}} = {{.+}} [[T_VAR_ADDR]] +// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_TVAR]] +// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[VAR_PRIV]] +// CHECK-DAG-32: {{.+}} = {{.+}} [[SIVAR_ADDR]] +// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_SIVAR]] +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) +// CHECK: call void @[[TOFFL1:.+]](i{{64|32}} %{{.+}}) +// CHECK: ret + +// CHECK: define {{.*}}void @[[TOFFL1]]({{.+}}) +// CHECK: [[TT_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[TVEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[TS_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK: [[TVAR_PRIV:%.+]] = alloca [[S_INT_TY]]*, +// CHECK: [[TT_VAR_CAST:%.+]] = alloca i{{[0-9]+}}, + +// CHECK-DAG: [[TVEC_TE_PAR:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[TVEC_PRIV]], +// CHECK-DAG: [[TT_VAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[TT_VAR_CAST]], +// CHECK-DAG: [[TS_ARR_TE_PAR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[TS_ARR_PRIV]], +// CHECK-DAG: [[TVAR_TE_PAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TVAR_PRIV]], + +// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[TOUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[TVEC_TE_PAR]], i{{[0-9]+}} [[TT_VAR_TE_PAR]], [2 x [[S_INT_TY]]]* [[TS_ARR_TE_PAR]], [[S_INT_TY]]* [[TVAR_TE_PAR]]) +// CHECK: ret void + +// CHECK: define internal void @[[TOUTL1]]({{.+}}) +// Skip global and bound tid vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, +// Skip temp vars for loop +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]], +// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*, + +// param copy +// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]], +// CHECK: store [2 x [[S_INT_TY]]]* {{.+}}, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]], +// CHECK: store [[S_INT_TY]]* {{.+}}, [[S_INT_TY]]** [[VAR_ADDR]], + +// T_VAR and preparation variables +// CHECK: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32* +// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]], + +// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2 +// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}}) + +// firstprivate(s_arr) +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]] to +// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]], +// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ] +// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_DST:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]]) +// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]], +// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]], + +// firstprivate(var) +// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]], +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]]) +// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP]], + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL:.+]] to +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: define internal void @[[TPAR_OUTL]]({{.+}}) +// Skip global and bound tid vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, +// Skip temp vars for loop +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]], +// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*, + +// param copy +// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]], +// CHECK: store [2 x [[S_INT_TY]]]* {{.+}}, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]], +// CHECK: store [[S_INT_TY]]* {{.+}}, [[S_INT_TY]]** [[VAR_ADDR]], + +// T_VAR and preparation variables +// CHECK: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32* +// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]], + +// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2 +// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}}) + +// firstprivate(s_arr) +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]] to +// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]], +// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ] +// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_DST:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]]) +// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]], +// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]], + +// firstprivate(var) +// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]], +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]]) +// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP]], + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK-DAG-32: {{.+}} = {{.+}} [[T_VAR_ADDR]] +// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_TVAR]] +// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[TMP]] +// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]] +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +#endif diff --git a/test/OpenMP/teams_distribute_parallel_for_firstprivate_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_firstprivate_messages.cpp index e6314c210e8f..90e78defc5e2 100644 --- a/test/OpenMP/teams_distribute_parallel_for_firstprivate_messages.cpp +++ b/test/OpenMP/teams_distribute_parallel_for_firstprivate_messages.cpp @@ -144,8 +144,9 @@ int main(int argc, char **argv) { #pragma omp teams distribute parallel for firstprivate(j) for (i = 0; i < argc; ++i) foo(); +// expected-error@+2 {{lastprivate variable cannot be firstprivate}} expected-note@+2 {{defined as lastprivate}} #pragma omp target -#pragma omp teams distribute parallel for lastprivate(argc), firstprivate(argc) // OK +#pragma omp teams distribute parallel for lastprivate(argc), firstprivate(argc) for (i = 0; i < argc; ++i) foo(); return 0; diff --git a/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp new file mode 100644 index 000000000000..bc40dca83fbe --- /dev/null +++ b/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp @@ -0,0 +1,184 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +void fn1(); +void fn2(); +void fn3(); +void fn4(); +void fn5(); +void fn6(); + +int Arg; + +// CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test +void gtid_test() { +#pragma omp target +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOADING_FUN_0:@.+]]( +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOADING_FUN_1:@.+]]( +#pragma omp teams distribute parallel for + for(int i = 0 ; i < 100; i++) {} + // CHECK: define internal void [[OFFLOADING_FUN_0]]( + // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}}) + // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]]( + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_0:@.+]] to void + // CHECK: call void @__kmpc_for_static_fini( + + // CHECK: define{{.+}} void [[OMP_OUTLINED_0]]( + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call void @__kmpc_for_static_fini( + // CHECK: ret +#pragma omp target +#pragma omp teams distribute parallel for if (parallel: false) + for(int i = 0 ; i < 100; i++) { + // CHECK: define internal void [[OFFLOADING_FUN_1]]( + // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}}) + // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]]( + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call void @__kmpc_serialized_parallel( + // CHECK: call void [[OMP_OUTLINED_1:@.+]]( + // CHECK: call void @__kmpc_end_serialized_parallel( + // CHECK: call void @__kmpc_for_static_fini( + // CHECK: define{{.+}} void [[OMP_OUTLINED_1]]( + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call void @{{.+}}gtid_test + // CHECK: call void @__kmpc_for_static_fini( + // CHECK: ret + gtid_test(); + } +} + + +template <typename T> +int tmain(T Arg) { +#pragma omp target +#pragma omp teams distribute parallel for if (true) + for(int i = 0 ; i < 100; i++) { + fn1(); + } +#pragma omp target +#pragma omp teams distribute parallel for if (false) + for(int i = 0 ; i < 100; i++) { + fn2(); + } +#pragma omp target +#pragma omp teams distribute parallel for if (parallel: Arg) + for(int i = 0 ; i < 100; i++) { + fn3(); + } + return 0; +} + +// CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main() +int main() { +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOADING_FUN_0:@.+]]( +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOADING_FUN_1:@.+]]( +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOADING_FUN_2:@.+]]( +// CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain +#pragma omp target +#pragma omp teams distribute parallel for if (true) + for(int i = 0 ; i < 100; i++) { + // CHECK: define internal void [[OFFLOADING_FUN_0]]( + // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}}) + // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]]( + + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_2:@.+]] to void + // CHECK: call void @__kmpc_for_static_fini( + // CHECK: define{{.+}} void [[OMP_OUTLINED_2]]( + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call {{.*}}void @{{.+}}fn4 + // CHECK: call void @__kmpc_for_static_fini( + + fn4(); + } + +#pragma omp target +#pragma omp teams distribute parallel for if (false) + for(int i = 0 ; i < 100; i++) { + // CHECK: define internal void [[OFFLOADING_FUN_1]]( + // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}}) + // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]]( + + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call void @__kmpc_serialized_parallel( + // CHECK: call void [[OMP_OUTLINED_3:@.+]]( + // CHECK: call void @__kmpc_end_serialized_parallel( + // CHECK: call void @__kmpc_for_static_fini( + + // CHECK: define{{.+}} void [[OMP_OUTLINED_3]]( + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call {{.*}}void @{{.+}}fn5 + // CHECK: call void @__kmpc_for_static_fini( + fn5(); + } + +#pragma omp target +#pragma omp teams distribute parallel for if (Arg) + for(int i = 0 ; i < 100; i++) { + // CHECK: define internal void [[OFFLOADING_FUN_2]]( + // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}}* [[OMP_TEAMS_OUTLINED_2:@.+]] to {{.+}}) + // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_2]]( + + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_4:@.+]] to void + // CHECK: call void @__kmpc_serialized_parallel( + // CHECK: call void [[OMP_OUTLINED_4:@.+]]( + // CHECK: call void @__kmpc_end_serialized_parallel( + // CHECK: call void @__kmpc_for_static_fini( + + // CHECK: define{{.+}} void [[OMP_OUTLINED_4]]( + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call {{.*}}void @{{.+}}fn6 + // CHECK: call void @__kmpc_for_static_fini( + fn6(); + } + + return tmain(Arg); +} + +// CHECK-LABEL: define {{.+}} @{{.+}}tmain + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, void {{.+}}* [[T_OUTLINE_FUN_1:@.+]] to void +// CHECK: call void @__kmpc_for_static_fini( + +// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_1]] +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call {{.*}}void @{{.+}}fn1 +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call {{.*}}void @__kmpc_serialized_parallel( +// CHECK: call void [[T_OUTLINE_FUN_2:@.+]]( +// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel( +// CHECK: call void @__kmpc_for_static_fini( + +// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_2]] +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call {{.*}}void @{{.+}}fn2 +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, void {{.+}}* [[T_OUTLINE_FUN_3:@.+]] to void +// CHECK: call {{.*}}void @__kmpc_serialized_parallel( +// call void [[T_OUTLINE_FUN_3:@.+]]( +// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel( + +// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_3]] +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call {{.*}}void @{{.+}}fn3 +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void +#endif diff --git a/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp new file mode 100644 index 000000000000..501abb725b31 --- /dev/null +++ b/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp @@ -0,0 +1,584 @@ +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 + +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +template <class T> +struct S { + T f; + S(T a) : f(a) {} + S() : f() {} + operator T() { return T(); } + ~S() {} +}; + +// CHECK: [[S_FLOAT_TY:%.+]] = type { float } +// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } +template <typename T> +T tmain() { + S<T> test; + T t_var = T(); + T vec[] = {1, 2}; + S<T> s_arr[] = {1, 2}; + S<T> &var = test; + #pragma omp target + #pragma omp teams distribute parallel for lastprivate(t_var, vec, s_arr, s_arr, var, var) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + return T(); +} + +int main() { + static int svar; + volatile double g; + volatile double &g1 = g; + + #ifdef LAMBDA + // LAMBDA-LABEL: @main + // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]]( + [&]() { + static float sfvar; + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( + // LAMBDA: call void [[OFFLOADING_FUN:@.+]]( + + // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]]( + // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}}) + #pragma omp target + #pragma omp teams distribute parallel for lastprivate(g, g1, svar, sfvar) + for (int i = 0; i < 2; ++i) { + // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, double*{{.+}} [[G_IN:%.+]], double*{{.+}} [[G1_IN:%.+]], i{{[0-9]+}}*{{.+}} [[SVAR_IN:%.+]], float*{{.+}} [[SFVAR_IN:%.+]]) + // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double*, + // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double*, + // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}*, + // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float*, + // loop variables + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G_PRIVATE:%.+]] = alloca double, + // LAMBDA: [[G1_PRIVATE:%.+]] = alloca double, + // LAMBDA: [[TMP_G1_PRIVATE:%.+]] = alloca double*, + // LAMBDA: [[SVAR_PRIVATE:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[SFVAR_PRIVATE:%.+]] = alloca float, + // LAMBDA: store double* [[G_IN]], double** [[G_PRIVATE_ADDR]], + // LAMBDA: store double* [[G1_IN]], double** [[G1_PRIVATE_ADDR]], + // LAMBDA: store i{{[0-9]+}}* [[SVAR_IN]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]], + // LAMBDA: store float* [[SFVAR_IN]], float** [[SFVAR_PRIVATE_ADDR]], + + // init private variables + // LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]], + // LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]], + // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]], + // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]], + // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]], + g = 1; + g1 = 1; + svar = 3; + sfvar = 4.0; + // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4( + // LAMBDA: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[LPAR_OUTL:.+]] to + // LAMBDA: call {{.*}}void @__kmpc_for_static_fini( + + // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]], + // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 + // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] + + // LAMBDA: [[OMP_LASTPRIV_BLOCK]]: + // LAMBDA: [[G_PRIV_VAL:%.+]] = load double, double* [[G_PRIVATE]], + // LAMBDA: store{{.*}} double [[G_PRIV_VAL]], double* [[G_IN_REF]], + // LAMBDA: [[TMP_G1_PRIV_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]], + // LAMBDA: [[TMP_G1_PRIV_VAL:%.+]] = load double, double* [[TMP_G1_PRIV_REF]], + // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* [[G1_IN_REF]], + + // LAMBDA: [[SVAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]], + // LAMBDA: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_IN_REF]], + // LAMBDA: [[SFVAR_PRIV_VAL:%.+]] = load float, float* [[SFVAR_PRIVATE]], + // LAMBDA: store float [[SFVAR_PRIV_VAL]], float* [[SFVAR_IN_REF]], + // LAMBDA: br label %[[OMP_LASTPRIV_DONE]] + // LAMBDA: [[OMP_LASTPRIV_DONE]]: + // LAMBDA: ret + + // LAMBDA: define{{.*}} internal{{.*}} void @[[LPAR_OUTL]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, double*{{.+}} [[G_IN:%.+]], double*{{.+}} [[G1_IN:%.+]], i{{[0-9]+}}*{{.+}} [[SVAR_IN:%.+]], float*{{.+}} [[SFVAR_IN:%.+]]) + // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double*, + // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double*, + // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}*, + // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float*, + // loop variables + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G_PRIVATE:%.+]] = alloca double, + // LAMBDA: [[G1_PRIVATE:%.+]] = alloca double, + // LAMBDA: [[TMP_G1_PRIVATE:%.+]] = alloca double*, + // LAMBDA: [[SVAR_PRIVATE:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[SFVAR_PRIVATE:%.+]] = alloca float, + // LAMBDA: store double* [[G_IN]], double** [[G_PRIVATE_ADDR]], + // LAMBDA: store double* [[G1_IN]], double** [[G1_PRIVATE_ADDR]], + // LAMBDA: store i{{[0-9]+}}* [[SVAR_IN]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]], + // LAMBDA: store float* [[SFVAR_IN]], float** [[SFVAR_PRIVATE_ADDR]], + + // init private variables + // LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]], + // LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]], + // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]], + // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]], + // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]], + + // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4( + // LAMBDA: store double 1.0{{.+}}, double* [[G_PRIVATE]], + // LAMBDA: [[TMP_G1_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]], + // LAMBDA: store{{.+}} double 1.0{{.+}}, double* [[TMP_G1_REF]], + // LAMBDA: store i{{[0-9]+}} 3, i{{[0-9]+}}* [[SVAR_PRIVATE]], + // LAMBDA: store float 4.0{{.+}}, float* [[SFVAR_PRIVATE]], + // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: store double* [[G_PRIVATE]], double** [[G_PRIVATE_ADDR_REF]], + // LAMBDA: [[TMP_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_PRIVATE_ADDR_FROM_TMP:%.+]] = load double*, double** [[TMP_G1_PRIVATE]], + // LAMBDA: store double* [[G1_PRIVATE_ADDR_FROM_TMP]], double** [[TMP_PRIVATE_ADDR_REF]], + // LAMBDA: [[SVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: store i{{[0-9]+}}* [[SVAR_PRIVATE]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]] + // LAMBDA: [[SFVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 + // LAMBDA: store float* [[SFVAR_PRIVATE]], float** [[SFVAR_PRIVATE_ADDR_REF]] + // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]]) + // LAMBDA: call {{.*}}void @__kmpc_for_static_fini( + + // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]], + // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 + // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] + + // LAMBDA: [[OMP_LASTPRIV_BLOCK]]: + // LAMBDA: [[G_PRIV_VAL:%.+]] = load double, double* [[G_PRIVATE]], + // LAMBDA: store{{.*}} double [[G_PRIV_VAL]], double* [[G_IN_REF]], + // LAMBDA: [[TMP_G1_PRIV_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]], + // LAMBDA: [[TMP_G1_PRIV_VAL:%.+]] = load double, double* [[TMP_G1_PRIV_REF]], + // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* [[G1_IN_REF]], + + // LAMBDA: [[SVAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]], + // LAMBDA: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_IN_REF]], + // LAMBDA: [[SFVAR_PRIV_VAL:%.+]] = load float, float* [[SFVAR_PRIVATE]], + // LAMBDA: store float [[SFVAR_PRIV_VAL]], float* [[SFVAR_IN_REF]], + // LAMBDA: br label %[[OMP_LASTPRIV_DONE]] + // LAMBDA: [[OMP_LASTPRIV_DONE]]: + // LAMBDA: ret + + [&]() { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + g = 2; + g1 = 2; + svar = 4; + sfvar = 8.0; + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]] + // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]] + + // LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]] + // LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]], + // LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]] + // LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 + // LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]] + // LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]] + }(); + } + }(); + return 0; + #else + S<float> test; + int t_var = 0; + int vec[] = {1, 2}; + S<float> s_arr[] = {1, 2}; + S<float> &var = test; + + #pragma omp target + #pragma omp teams distribute parallel for lastprivate(t_var, vec, s_arr, s_arr, var, var, svar) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + int i; + + return tmain<int>(); + #endif +} + +// CHECK: define{{.*}} i{{[0-9]+}} @main() +// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOAD_FUN:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}}) +// CHECK: ret + +// CHECK: define{{.+}} [[OFFLOAD_FUN]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]*{{.+}} {{.+}}, [2 x [[S_FLOAT_TY]]]*{{.+}} {{.+}}, [[S_FLOAT_TY]]*{{.+}} {{.+}}, i{{[0-9]+}} {{.+}}) +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams( +// CHECK: ret +// +// CHECK: define internal void [[OMP_OUTLINED:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN:%.+]], i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.+}} [[VAR_IN:%.+]], i{{[0-9]+}}*{{.*}} [[S_VAR_IN:%.+]]) +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, +// skip loop variables +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, + +// copy from parameters to local address variables +// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]], +// CHECK: store [2 x [[S_FLOAT_TY]]]* [[S_ARR_IN]], [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK: store [[S_FLOAT_TY]]* [[VAR_IN]], [[S_FLOAT_TY]]** [[VAR_ADDR]], +// CHECK: store i{{[0-9]+}}* [[S_VAR_IN]], i{{[0-9]+}}** [[SVAR_ADDR]], + +// load content of local address variables +// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]], +// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]], +// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]], +// CHECK: [[VAR_ADDR_REF:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]], +// the distribute loop +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL:.+]] to +// CHECK: call void @__kmpc_for_static_fini( + +// lastprivates +// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]], +// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 +// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] + +// CHECK: [[OMP_LASTPRIV_BLOCK]]: +// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]], +// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]], +// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* +// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]* +// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 +// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]] +// CHECK: [[S_ARR_COPY_BLOCK]]: +// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8* +// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 +// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} +// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]] +// CHECK: [[S_ARR_COPY_DONE]]: +// CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], +// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8* +// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]], +// CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]], +// CHECK: ret void + +// CHECK: define internal void [[OMP_OUTLINED:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN:%.+]], i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.+}} [[VAR_IN:%.+]], i{{[0-9]+}}*{{.*}} [[S_VAR_IN:%.+]]) +// gbl and bound tid vars, prev lb and ub vars +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, +// skip loop variables +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, + +// copy from parameters to local address variables +// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]], +// CHECK: store [2 x [[S_FLOAT_TY]]]* [[S_ARR_IN]], [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK: store [[S_FLOAT_TY]]* [[VAR_IN]], [[S_FLOAT_TY]]** [[VAR_ADDR]], +// CHECK: store i{{[0-9]+}}* [[S_VAR_IN]], i{{[0-9]+}}** [[SVAR_ADDR]], + +// load content of local address variables +// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]], +// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]], +// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]], +// CHECK: [[VAR_ADDR_REF:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]], +// the distribute loop +// CHECK: call void @__kmpc_for_static_init_4( + +// assignment: vec[i] = t_var; +// CHECK: [[T_VAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]], +// CHECK: [[VEC_PTR:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}} +// CHECK: store i{{[0-9]+}} [[T_VAR_PRIV_VAL]], i{{[0-9]+}}* [[VEC_PTR]], + +// assignment: s_arr[i] = var; +// CHECK-DAG: [[S_ARR_PTR:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], +// CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_PTR]] to i8* +// CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST]], i8* [[TMP_VAL_BCAST]], + +// CHECK: call void @__kmpc_for_static_fini( + +// lastprivates +// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]], +// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 +// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] + +// CHECK: [[OMP_LASTPRIV_BLOCK]]: +// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]], +// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]], +// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* +// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]* +// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 +// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]] +// CHECK: [[S_ARR_COPY_BLOCK]]: +// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8* +// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 +// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} +// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]] +// CHECK: [[S_ARR_COPY_DONE]]: +// CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], +// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8* +// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]], +// CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]], +// CHECK: ret void + +// template tmain +// CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]() +// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], +// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOAD_FUN_1:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}}) +// CHECK: ret + +// CHECK: define internal void [[OFFLOAD_FUN_1]]( +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, +// CHECK: ret + +// CHECK: define internal void [[OMP_OUTLINED_1:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR1:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN1:%.+]], i{{[0-9]+}}*{{.+}} [[T_VAR_IN1:%.+]], [2 x [[S_INT_TY]]]*{{.+}} [[S_ARR_IN1:%.+]], [[S_INT_TY]]*{{.+}} [[VAR_IN1:%.+]]) +// skip alloca of global_tid and bound_tid +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: [[VEC_ADDR1:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR1:%.+]] = alloca i{{[0-9]+}}*, +// CHECK: [[S_ARR_ADDR1:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK: [[VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*, +// skip loop variables +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: [[OMP_IS_LAST1:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_PRIV1:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV1:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV1:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK: [[VAR_PRIV1:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[TMP_PRIV1:%.+]] = alloca [[S_INT_TY]]*, + +// skip init of bound and global tid +// CHECK: store i{{[0-9]+}}* {{.*}}, +// CHECK: store i{{[0-9]+}}* {{.*}}, +// copy from parameters to local address variables +// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN1]], [2 x i{{[0-9]+}}]** [[VEC_ADDR1]], +// CHECK: store i{{[0-9]+}}* [[T_VAR_IN1]], i{{[0-9]+}}** [[T_VAR_ADDR1]], +// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_IN1]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]], +// CHECK: store [[S_INT_TY]]* [[VAR_IN1]], [[S_INT_TY]]** [[VAR_ADDR1]], + +// load content of local address variables +// CHECK: [[VEC_ADDR_REF1:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR1]], +// CHECK: [[T_VAR_ADDR_REF1:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR1]], +// CHECK: [[S_ARR_ADDR_REF1:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]], +// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]], +// CHECK: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR1]], +// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV1]], [[S_INT_TY]]** [[TMP_PRIV1]], + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL:.+]] to +// CHECK: call void @__kmpc_for_static_fini( + +// lastprivates +// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST1]], +// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 +// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] + +// CHECK: [[OMP_LASTPRIV_BLOCK]]: +// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]], +// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF1]], +// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF1]] to i8* +// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV1]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]] to [[S_INT_TY]]* +// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 +// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]] +// CHECK: [[S_ARR_COPY_BLOCK]]: +// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8* +// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 +// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} +// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]] +// CHECK: [[S_ARR_COPY_DONE]]: +// CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]], +// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR1_REF]] to i8* +// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: ret void + +// CHECK: define internal void [[TPAR_OUTL:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR1:%.+]], i{{[0-9]+}}* noalias %{{.+}}, {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN1:%.+]], i{{[0-9]+}}*{{.+}} [[T_VAR_IN1:%.+]], [2 x [[S_INT_TY]]]*{{.+}} [[S_ARR_IN1:%.+]], [[S_INT_TY]]*{{.+}} [[VAR_IN1:%.+]]) +// skip alloca of global_tid and bound_tid, and prev lb and ub vars +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: [[VEC_ADDR1:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR1:%.+]] = alloca i{{[0-9]+}}*, +// CHECK: [[S_ARR_ADDR1:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK: [[VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*, +// skip loop variables +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: [[OMP_IS_LAST1:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_PRIV1:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV1:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV1:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK: [[VAR_PRIV1:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[TMP_PRIV1:%.+]] = alloca [[S_INT_TY]]*, + +// skip init of bound and global tid +// CHECK: store i{{[0-9]+}}* {{.*}}, +// CHECK: store i{{[0-9]+}}* {{.*}}, +// copy from parameters to local address variables +// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN1]], [2 x i{{[0-9]+}}]** [[VEC_ADDR1]], +// CHECK: store i{{[0-9]+}}* [[T_VAR_IN1]], i{{[0-9]+}}** [[T_VAR_ADDR1]], +// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_IN1]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]], +// CHECK: store [[S_INT_TY]]* [[VAR_IN1]], [[S_INT_TY]]** [[VAR_ADDR1]], + +// load content of local address variables +// CHECK: [[VEC_ADDR_REF1:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR1]], +// CHECK: [[T_VAR_ADDR_REF1:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR1]], +// CHECK: [[S_ARR_ADDR_REF1:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]], +// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]], +// CHECK: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR1]], +// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV1]], [[S_INT_TY]]** [[TMP_PRIV1]], + +// CHECK: call void @__kmpc_for_static_init_4( + +// assignment: vec[i] = t_var; +// CHECK: [[IV_VAL1:%.+]] = +// CHECK: [[T_VAR_PRIV_VAL1:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]], +// CHECK: [[VEC_PTR1:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV1]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}} +// CHECK: store i{{[0-9]+}} [[T_VAR_PRIV_VAL1]], i{{[0-9]+}}* [[VEC_PTR1]], + +// assignment: s_arr[i] = var; +// CHECK-DAG: [[S_ARR_PTR1:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]], +// CHECK-DAG: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]], +// CHECK-DAG: [[S_ARR_PTR_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_PTR1]] to i8* +// CHECK-DAG: [[TMP_VAL_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* +// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST1]], i8* [[TMP_VAL_BCAST1]], + +// CHECK: call void @__kmpc_for_static_fini( + +// lastprivates +// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST1]], +// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 +// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] + +// CHECK: [[OMP_LASTPRIV_BLOCK]]: +// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]], +// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF1]], +// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF1]] to i8* +// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV1]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]] to [[S_INT_TY]]* +// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 +// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]] +// CHECK: [[S_ARR_COPY_BLOCK]]: +// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8* +// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 +// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} +// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]] +// CHECK: [[S_ARR_COPY_DONE]]: +// CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]], +// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR1_REF]] to i8* +// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: ret void + +#endif diff --git a/test/OpenMP/teams_distribute_parallel_for_lastprivate_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_lastprivate_messages.cpp index 5691af71f7cd..b58bac4fdfc3 100644 --- a/test/OpenMP/teams_distribute_parallel_for_lastprivate_messages.cpp +++ b/test/OpenMP/teams_distribute_parallel_for_lastprivate_messages.cpp @@ -18,14 +18,14 @@ public: const S2 &operator =(const S2&) const; S2 &operator =(const S2&); static float S2s; // expected-note {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note {{static data member is predetermined as shared}} }; -const float S2::S2sc = 0; // expected-note {{static data member is predetermined as shared}} +const float S2::S2sc = 0; const S2 b; const S2 ba[5]; class S3 { int a; - S3 &operator=(const S3 &s3); // expected-note 2 {{implicitly declared private here}} + S3 &operator=(const S3 &s3); // expected-note {{implicitly declared private here}} public: S3() : a(0) {} @@ -52,7 +52,7 @@ public: }; class S6 { int a; - S6() : a(0) {} + S6() : a(0) {} // expected-note {{implicitly declared private here}} public: S6(const S6 &s6) : a(s6.a) {} @@ -255,12 +255,14 @@ int main(int argc, char **argv) { #pragma omp teams distribute parallel for lastprivate(j) for (i = 0; i < argc; ++i) foo(); +// expected-error@+2 {{firstprivate variable cannot be lastprivate}} expected-note@+2 {{defined as firstprivate}} #pragma omp target -#pragma omp teams distribute parallel for firstprivate(m) lastprivate(m) // expected-error {{'operator=' is a private member of 'S3'}} +#pragma omp teams distribute parallel for firstprivate(m) lastprivate(m) for (i = 0; i < argc; ++i) foo(); +// expected-error@+2 {{lastprivate variable cannot be firstprivate}} expected-note@+2 {{defined as lastprivate}} #pragma omp target -#pragma omp teams distribute parallel for lastprivate(n) firstprivate(n) // OK +#pragma omp teams distribute parallel for lastprivate(n) firstprivate(n) // expected-error {{calling a private constructor of class 'S6'}} for (i = 0; i < argc; ++i) foo(); static int si; diff --git a/test/OpenMP/teams_distribute_parallel_for_linear_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_linear_messages.cpp deleted file mode 100644 index b14fb2328b2b..000000000000 --- a/test/OpenMP/teams_distribute_parallel_for_linear_messages.cpp +++ /dev/null @@ -1,293 +0,0 @@ -// RUN: %clang_cc1 -verify -fopenmp %s - -namespace X { - int x; -}; - -struct B { - static int ib; // expected-note {{'B::ib' declared here}} - static int bfoo() { return 8; } -}; - -int bfoo() { return 4; } - -int z; -const int C1 = 1; -const int C2 = 2; -void test_linear_colons() -{ - int B = 0; - -#pragma omp target -#pragma omp teams distribute parallel for linear(B:bfoo()) - for (int i = 0; i < 10; ++i) ; - -#pragma omp target -#pragma omp teams distribute parallel for linear(B::ib:B:bfoo()) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'}} - for (int i = 0; i < 10; ++i) ; - -#pragma omp target -#pragma omp teams distribute parallel for linear(B:ib) // expected-error {{use of undeclared identifier 'ib'; did you mean 'B::ib'}} - for (int i = 0; i < 10; ++i) ; - -#pragma omp target -#pragma omp teams distribute parallel for linear(z:B:ib) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'?}} - for (int i = 0; i < 10; ++i) ; - -#pragma omp target -#pragma omp teams distribute parallel for linear(B:B::bfoo()) - for (int i = 0; i < 10; ++i) ; - -#pragma omp target -#pragma omp teams distribute parallel for linear(X::x : ::z) - for (int i = 0; i < 10; ++i) ; - -#pragma omp target -#pragma omp teams distribute parallel for linear(B,::z, X::x) - for (int i = 0; i < 10; ++i) ; - -#pragma omp target -#pragma omp teams distribute parallel for linear(::z) - for (int i = 0; i < 10; ++i) ; - -#pragma omp target -#pragma omp teams distribute parallel for linear(B::bfoo()) // expected-error {{expected variable name}} - for (int i = 0; i < 10; ++i) ; - -#pragma omp target -#pragma omp teams distribute parallel for linear(B::ib,B:C1+C2) - for (int i = 0; i < 10; ++i) ; -} - -template<int L, class T, class N> T test_template(T* arr, N num) { - N i; - T sum = (T)0; - T ind2 = - num * L; // expected-note {{'ind2' defined here}} - -#pragma omp target -#pragma omp teams distribute parallel for linear(ind2:L) // expected-error {{argument of a linear clause should be of integral or pointer type}} - for (i = 0; i < num; ++i) { - T cur = arr[(int)ind2]; - ind2 += L; - sum += cur; - } - return T(); -} - -template<int LEN> int test_warn() { - int ind2 = 0; - #pragma omp target - #pragma omp teams distribute parallel for linear(ind2:LEN) // expected-warning {{zero linear step (ind2 should probably be const)}} - for (int i = 0; i < 100; i++) { - ind2 += LEN; - } - return ind2; -} - -struct S1; // expected-note 2 {{declared here}} expected-note 2 {{forward declaration of 'S1'}} -extern S1 a; -class S2 { - mutable int a; -public: - S2():a(0) { } -}; -const S2 b; // expected-note 2 {{'b' defined here}} -const S2 ba[5]; -class S3 { - int a; -public: - S3():a(0) { } -}; -const S3 ca[5]; -class S4 { - int a; - S4(); -public: - S4(int v):a(v) { } -}; -class S5 { - int a; - S5():a(0) {} -public: - S5(int v):a(v) { } -}; - -S3 h; -#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}} - -template<class I, class C> int foomain(I argc, C **argv) { - I e(4); - I g(5); - int i; - int &j = i; - -#pragma omp target -#pragma omp teams distribute parallel for linear // expected-error {{expected '(' after 'linear'}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target -#pragma omp teams distribute parallel for linear ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target -#pragma omp teams distribute parallel for linear () // expected-error {{expected expression}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target -#pragma omp teams distribute parallel for linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target -#pragma omp teams distribute parallel for linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target -#pragma omp teams distribute parallel for linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target -#pragma omp teams distribute parallel for linear (argc : 5) - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target -#pragma omp teams distribute parallel for linear (S1) // expected-error {{'S1' does not refer to a value}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target -#pragma omp teams distribute parallel for linear (a, b:B::ib) // expected-error {{linear variable with incomplete type 'S1'}} expected-error {{const-qualified variable cannot be linear}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target -#pragma omp teams distribute parallel for linear (argv[1]) // expected-error {{expected variable name}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target -#pragma omp teams distribute parallel for linear(e, g) - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target -#pragma omp teams distribute parallel for linear(h) // expected-error {{threadprivate or thread local variable cannot be linear}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target -#pragma omp teams distribute parallel for linear(i) - for (int k = 0; k < argc; ++k) ++k; - - #pragma omp parallel - { - int v = 0; - int i; - #pragma omp target - #pragma omp teams distribute parallel for linear(v:i) - for (int k = 0; k < argc; ++k) { i = k; v += i; } - } - -#pragma omp target -#pragma omp teams distribute parallel for linear(j) - for (int k = 0; k < argc; ++k) ++k; - - int v = 0; - -#pragma omp target -#pragma omp teams distribute parallel for linear(v:j) - for (int k = 0; k < argc; ++k) { ++k; v += j; } - -#pragma omp target -#pragma omp teams distribute parallel for linear(i) - for (int k = 0; k < argc; ++k) ++k; - return 0; -} - -namespace A { -double x; -#pragma omp threadprivate(x) // expected-note {{defined as threadprivate or thread local}} -} -namespace C { -using A::x; -} - -int main(int argc, char **argv) { - double darr[100]; - // expected-note@+1 {{in instantiation of function template specialization 'test_template<-4, double, int>' requested here}} - test_template<-4>(darr, 4); - // expected-note@+1 {{in instantiation of function template specialization 'test_warn<0>' requested here}} - test_warn<0>(); - - S4 e(4); // expected-note {{'e' defined here}} - S5 g(5); // expected-note {{'g' defined here}} - int i; - int &j = i; - -#pragma omp target -#pragma omp teams distribute parallel for linear // expected-error {{expected '(' after 'linear'}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target -#pragma omp teams distribute parallel for linear ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target -#pragma omp teams distribute parallel for linear () // expected-error {{expected expression}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target -#pragma omp teams distribute parallel for linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target -#pragma omp teams distribute parallel for linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target -#pragma omp teams distribute parallel for linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target -#pragma omp teams distribute parallel for linear (argc) - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target -#pragma omp teams distribute parallel for linear (S1) // expected-error {{'S1' does not refer to a value}} - for (int k = 0; k < argc; ++k) ++k; - - -#pragma omp target -#pragma omp teams distribute parallel for linear (a, b) // expected-error {{linear variable with incomplete type 'S1'}} expected-error {{const-qualified variable cannot be linear}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target -#pragma omp teams distribute parallel for linear (argv[1]) // expected-error {{expected variable name}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target -#pragma omp teams distribute parallel for linear(e, g) // expected-error {{argument of a linear clause should be of integral or pointer type, not 'S4'}} expected-error {{argument of a linear clause should be of integral or pointer type, not 'S5'}} - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target -#pragma omp teams distribute parallel for linear(h, C::x) // expected-error 2 {{threadprivate or thread local variable cannot be linear}} - for (int k = 0; k < argc; ++k) ++k; - - #pragma omp parallel - { - int i; - #pragma omp target - #pragma omp teams distribute parallel for linear(i) - for (int k = 0; k < argc; ++k) ++k; - - #pragma omp target - #pragma omp teams distribute parallel for linear(i : 4) - for (int k = 0; k < argc; ++k) { ++k; i += 4; } - } - -#pragma omp target -#pragma omp teams distribute parallel for linear(j) - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target -#pragma omp teams distribute parallel for linear(i) - for (int k = 0; k < argc; ++k) ++k; - - foomain<int,char>(argc,argv); // expected-note {{in instantiation of function template specialization 'foomain<int, char>' requested here}} - return 0; -} - diff --git a/test/OpenMP/teams_distribute_parallel_for_loop_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_loop_messages.cpp index e0c315f513b7..a2f6f679d325 100644 --- a/test/OpenMP/teams_distribute_parallel_for_loop_messages.cpp +++ b/test/OpenMP/teams_distribute_parallel_for_loop_messages.cpp @@ -2,7 +2,7 @@ class S { int a; - S() : a(0) {} + S() : a(0) {} // expected-note {{implicitly declared private here}} public: S(int v) : a(v) {} @@ -691,8 +691,9 @@ void test_loop_eh() { void test_loop_firstprivate_lastprivate() { S s(4); +// expected-error@+2 {{lastprivate variable cannot be firstprivate}} expected-note@+2 {{defined as lastprivate}} #pragma omp target -#pragma omp teams distribute parallel for lastprivate(s) firstprivate(s) +#pragma omp teams distribute parallel for lastprivate(s) firstprivate(s) // expected-error {{calling a private constructor of class 'S'}} for (int i = 0; i < 16; ++i) ; } diff --git a/test/OpenMP/teams_distribute_parallel_for_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_messages.cpp index c3536cd86167..9843b442e5b4 100644 --- a/test/OpenMP/teams_distribute_parallel_for_messages.cpp +++ b/test/OpenMP/teams_distribute_parallel_for_messages.cpp @@ -9,6 +9,9 @@ static int pvt; #pragma omp teams distribute parallel for // expected-error {{unexpected OpenMP directive '#pragma omp teams distribute parallel for'}} int main(int argc, char **argv) { + #pragma omp target + #pragma omp teams distribute parallel for + f; // expected-error {{use of undeclared identifier 'f'}} #pragma omp target #pragma omp teams distribute parallel for { // expected-warning {{extra tokens at the end of '#pragma omp teams distribute parallel for' are ignored}} for (int i = 0; i < argc; ++i) @@ -34,7 +37,7 @@ int main(int argc, char **argv) { for (int i = 0; i < argc; ++i) foo(); #pragma omp target -#pragma omp teams distribute parallel for +#pragma omp teams distribute parallel for linear(argc) // expected-error {{unexpected OpenMP clause 'linear' in directive '#pragma omp teams distribute parallel for'}} for (int i = 0; i < argc; ++i) foo(); // expected-warning@+2 {{extra tokens at the end of '#pragma omp teams distribute parallel for' are ignored}} @@ -94,7 +97,7 @@ L1: } #pragma omp target -#pragma omp teams distribute parallel for copyin(pvt) // expected-error {{unexpected OpenMP clause 'copyin' in directive '#pragma omp teams distribute parallel for'}} +#pragma omp teams distribute parallel for copyin(pvt) for (int n = 0; n < 100; ++n) {} return 0; @@ -107,3 +110,12 @@ void test_ordered() { ; } +void test_cancel() { +#pragma omp target +#pragma omp teams distribute parallel for + for (int i = 0; i < 16; ++i) { +#pragma omp cancel for + ; + } +} + diff --git a/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp new file mode 100644 index 000000000000..7c0ac461220d --- /dev/null +++ b/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp @@ -0,0 +1,120 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +typedef __INTPTR_TYPE__ intptr_t; + +// CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[S_TY:%.+]] = type { [[INTPTR_T_TY:i[0-9]+]], [[INTPTR_T_TY]], [[INTPTR_T_TY]] } +// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" +// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } + +void foo(); + +struct S { + intptr_t a, b, c; + S(intptr_t a) : a(a) {} + operator char() { return a; } + ~S() {} +}; + +template <typename T, int C> +int tmain() { +#pragma omp target +#pragma omp teams distribute parallel for num_threads(C) + for (int i = 0; i < 100; i++) + foo(); +#pragma omp target +#pragma omp teams distribute parallel for num_threads(T(23)) + for (int i = 0; i < 100; i++) + foo(); + return 0; +} + +int main() { + S s(0); + char a = s; +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOADING_FUN_0:@.+]]( +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOADING_FUN_1:@.+]]( +// CHECK: invoke{{.+}} [[TMAIN_5:@.+]]() +// CHECK: invoke{{.+}} [[TMAIN_1:@.+]]() +#pragma omp target + // CHECK: define internal void [[OFFLOADING_FUN_0]]( + // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}}) +#pragma omp teams distribute parallel for num_threads(2) + for (int i = 0; i < 100; i++) { + // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]]( + // CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 2) + // CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( + foo(); + } +#pragma omp target + // CHECK: define internal void [[OFFLOADING_FUN_1]]( + + // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}}* [[OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}}) +#pragma omp teams distribute parallel for num_threads(a) + for (int i = 0; i < 100; i++) { + // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]]( + // CHECK-DAG: [[A_ADDR:%.+]] = alloca i8*, + // CHECK-DAG: [[A_REF:%.+]] = load i8*, i8** [[A_ADDR]], + // CHECK-DAG: [[A_VAL:%.+]] = load i8, i8* [[A_REF]], + // CHECK-DAG: [[A_EXT:%.+]] = sext i8 [[A_VAL]] to {{.+}} + // CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[A_EXT]]) + // CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( + foo(); + } + return a + tmain<char, 5>() + tmain<S, 1>(); +} + +// tmain 5 +// CHECK-DAG: define {{.*}}i{{[0-9]+}} [[TMAIN_5]]() +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[T_OFFLOADING_FUN_0:@.+]]( +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[T_OFFLOADING_FUN_1:@.+]]( + +// tmain 1 +// CHECK-DAG: define {{.*}}i{{[0-9]+}} [[TMAIN_1]]() +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[T_OFFLOADING_FUN_2:@.+]]( +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[T_OFFLOADING_FUN_3:@.+]]( + +// CHECK: define internal void [[T_OFFLOADING_FUN_0]]( +// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}}) + +// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_0]]( +// CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 5) +// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( + +// CHECK: define internal void [[T_OFFLOADING_FUN_1]]( +// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}}) + +// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_1]]( +// CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 23) +// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( + +// CHECK: define internal void [[T_OFFLOADING_FUN_2]]( +// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_2:@.+]] to {{.+}}) + +// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_2]]( +// CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 1) +// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( + +// CHECK: define internal void [[T_OFFLOADING_FUN_3]]( +// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}}* [[T_OMP_TEAMS_OUTLINED_3:@.+]] to {{.+}}) + +// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_3]]({{.+}}, {{.+}}, {{.+}} [[NUM_TH_CPT_IN:%.+]]) +// CHECK: [[NUM_TH_CPT:%.+]] = alloca i8*, +// CHECK: store {{.+}} [[NUM_TH_CPT_IN]], {{.+}} [[NUM_TH_CPT]], +// CHECK: [[NUM_TH_REF:%.+]] = load{{.+}}, {{.+}} [[NUM_TH_CPT]], +// CHECK-DAG: [[NUM_TH_VAL:%.+]] = load {{.+}}, {{.+}} [[NUM_TH_REF]], +// CHECK-DAG: [[NUM_TH_SEXT:%.+]] = sext i8 [[NUM_TH_VAL]] to {{.+}} +// CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[NUM_TH_SEXT]]) +// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( +#endif diff --git a/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp new file mode 100644 index 000000000000..80a12738cab4 --- /dev/null +++ b/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp @@ -0,0 +1,332 @@ +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 + +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +struct St { + int a, b; + St() : a(0), b(0) {} + St(const St &st) : a(st.a + st.b), b(0) {} + ~St() {} +}; + +volatile int g = 1212; +volatile int &g1 = g; + +template <class T> +struct S { + T f; + S(T a) : f(a + g) {} + S() : f(g) {} + S(const S &s, St t = St()) : f(s.f + t.a) {} + operator T() { return T(); } + ~S() {} +}; + +// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float } +// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } + +template <typename T> +T tmain() { + S<T> test; + T t_var = T(); + T vec[] = {1, 2}; + S<T> s_arr[] = {1, 2}; + S<T> &var = test; +#pragma omp target +#pragma omp teams distribute parallel for private(t_var, vec, s_arr, var) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + return T(); +} + +// CHECK-DAG: [[TEST:@.+]] = global [[S_FLOAT_TY]] zeroinitializer, +S<float> test; +// CHECK-DAG: [[T_VAR:@.+]] = global i{{[0-9]+}} 333, +int t_var = 333; +// CHECK-DAG: [[VEC:@.+]] = global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2], +int vec[] = {1, 2}; +// CHECK-DAG: [[S_ARR:@.+]] = global [2 x [[S_FLOAT_TY]]] zeroinitializer, +S<float> s_arr[] = {1, 2}; +// CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer, +S<float> var(3); +// CHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0, + +int main() { + static int sivar; +#ifdef LAMBDA + // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212, + // LAMBDA-LABEL: @main + // LAMBDA: call void [[OUTER_LAMBDA:@.+]]( + [&]() { + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // LAMBDA: call void @[[LOFFL1:.+]]( + // LAMBDA: ret +#pragma omp target +#pragma omp teams distribute parallel for private(g, g1, sivar) + for (int i = 0; i < 2; ++i) { + // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} {{%.+}}) + // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[LOUTL1:.+]] to {{.+}}) + // LAMBDA: ret void + + // LAMBDA: define internal void @[[LOUTL1]]({{.+}}) + // Skip global, bound tid and loop vars + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: [[G_PRIV:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_PRIV:%.+]] = alloca i{{[0-9]+}} + // LAMBDA: [[TMP:%.+]] = alloca i{{[0-9]+}}*, + // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: store{{.+}} [[G1_PRIV]], {{.+}} [[TMP]], + + g = 1; + g1 = 1; + sivar = 2; + // LAMBDA: call void @__kmpc_for_static_init_4( + // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[LPAR_OUTL:.+]] to + // LAMBDA: call void @__kmpc_for_static_fini( + // LAMBDA: ret void + + // LAMBDA: define internal void @[[LPAR_OUTL]]({{.+}}) + // Skip global, bound tid and loop vars + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: alloca i{{[0-9]+}}, + // LAMBDA: alloca i{{[0-9]+}}, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: [[G_PRIV:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_PRIV:%.+]] = alloca i{{[0-9]+}} + // LAMBDA: [[TMP:%.+]] = alloca i{{[0-9]+}}*, + // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: store{{.+}} [[G1_PRIV]], {{.+}} [[TMP]], + // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G_PRIV]], + // LAMBDA-DAG: store{{.+}} 2, {{.+}} [[SIVAR_PRIV]], + // LAMBDA-DAG: [[G1_REF:%.+]] = load{{.+}}, {{.+}} [[TMP]], + // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1_REF]], + // LAMBDA: call void [[INNER_LAMBDA:@.+]]( + // LAMBDA: call void @__kmpc_for_static_fini( + // LAMBDA: ret void + [&]() { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + g = 2; + g1 = 2; + sivar = 4; + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + + // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]] + // LAMBDA: [[G1_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G1_REF]] + // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]] + }(); + } + }(); + return 0; +#else +#pragma omp target +#pragma omp teams distribute parallel for private(t_var, vec, s_arr, var, sivar) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + sivar += i; + } + return tmain<int>(); +#endif +} + +// CHECK: define {{.*}}i{{[0-9]+}} @main() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 0) +// CHECK: call void @[[OFFL1:.+]]() +// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]() +// CHECK: ret + +// CHECK: define{{.*}} void @[[OFFL1]]() +// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[OUTL1:.+]] to {{.+}}) +// CHECK: ret void + +// CHECK: define internal void @[[OUTL1]]({{.+}}) +// Skip global, bound tid and loop vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK-DAG: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK-DAG: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK-DAG: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: alloca i32, + +// private(s_arr) +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]]) +// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]], + +// private(var) +// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]]) + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call void {{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: define internal void @[[PAR_OUTL1]]({{.+}}) +// Skip global, bound tid and loop vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK-DAG: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK-DAG: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK-DAG: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: alloca i32, + +// private(s_arr) +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]]) +// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]], + +// private(var) +// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]]) + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK-DAG: {{.+}} = {{.+}} [[T_VAR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[VAR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[SIVAR_PRIV]] +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, +// CHECK: call void @[[TOFFL1:.+]]() +// CHECK: ret + +// CHECK: define {{.*}}void @[[TOFFL1]]() +// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[TOUTL1:.+]] to {{.+}}) +// CHECK: ret void + +// CHECK: define internal void @[[TOUTL1]]({{.+}}) +// Skip global, bound tid and loop vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i32, +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*, + +// private(s_arr) +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]]) +// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]], + +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]]) +// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]], + +// private(var) +// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]]) +// CHECK-DAG: store{{.+}} [[VAR_PRIV]], {{.+}} [[TMP]] + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call void {{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL1:.+]] to +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: define internal void @[[TPAR_OUTL1]]({{.+}}) +// Skip global, bound tid and loop vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// prev lb and ub +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// iter variables +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i32, +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*, + +// private(s_arr) +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]]) +// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]], + +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]]) +// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]], + +// private(var) +// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]]) +// CHECK-DAG: store{{.+}} [[VAR_PRIV]], {{.+}} [[TMP]] + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK-DAG: {{.+}} = {{.+}} [[T_VAR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[TMP]] +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + + +#endif diff --git a/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp new file mode 100644 index 000000000000..71e92bd5a555 --- /dev/null +++ b/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp @@ -0,0 +1,90 @@ +// add -fopenmp-targets + +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +typedef __INTPTR_TYPE__ intptr_t; + +// CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" +// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } + +void foo(); + +struct S { + intptr_t a, b, c; + S(intptr_t a) : a(a) {} + operator char() { return a; } + ~S() {} +}; + +template <typename T> +T tmain() { +#pragma omp target +#pragma omp teams distribute parallel for proc_bind(master) + for(int i = 0; i < 1000; i++) {} + return T(); +} + +int main() { + // CHECK-LABEL: @main +#pragma omp target +#pragma omp teams distribute parallel for proc_bind(spread) + for(int i = 0; i < 1000; i++) {} +#pragma omp target +#pragma omp teams distribute parallel for proc_bind(close) + for(int i = 0; i < 1000; i++) {} + return tmain<int>(); +} + +// CHECK: call {{.*}}@__tgt_target_teams({{.+}}) +// CHECK: call void [[OFFL1:@.+]]() +// CHECK: call {{.*}}@__tgt_target_teams({{.+}}) +// CHECK: call void [[OFFL2:@.+]]() +// CHECK: [[CALL_RET:%.+]] = call{{.+}} i32 [[TMAIN:@.+]]() +// CHECK: ret i32 [[CALL_RET]] + +// CHECK: define{{.+}} void [[OFFL1]]( +// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) + +// CHECK: define{{.+}} [[OMP_OUTLINED_1]](i32* {{.+}} [[GTID_IN:%.+]], +// CHECK: [[GTID_ADDR:%.+]] = alloca i32*, +// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]], +// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]], +// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]], +// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 4) +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call( +// CHECK: ret void + +// CHECK: define{{.+}} [[OFFL2]]() +// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) + +// CHECK: define{{.+}} [[OMP_OUTLINED_1]](i32* {{.+}} [[GTID_IN:%.+]], +// CHECK: [[GTID_ADDR:%.+]] = alloca i32*, +// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]], +// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]], +// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]], +// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 3) +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call( +// CHECK: ret void + +// CHECK: define{{.+}} [[TMAIN]]() +// CHECK: call {{.*}}@__tgt_target_teams({{.+}}) +// CHECK: call void [[OFFL3:@.+]]() + +// CHECK: define{{.+}} [[OFFL3]]() +// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}}) + +// CHECK: define{{.+}} [[OMP_OUTLINED_3]](i32* {{.+}} [[GTID_IN:%.+]], +// CHECK: [[GTID_ADDR:%.+]] = alloca i32*, +// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]], +// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]], +// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]], +// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 2) +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call( +// CHECK: ret void +#endif diff --git a/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp new file mode 100644 index 000000000000..55ccfff99a3c --- /dev/null +++ b/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp @@ -0,0 +1,345 @@ +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 + +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +template <typename T> +T tmain() { + T t_var = T(); + T vec[] = {1, 2}; +#pragma omp target +#pragma omp teams distribute parallel for reduction(+: t_var) + for (int i = 0; i < 2; ++i) { + t_var += (T) i; + } + return T(); +} + +int main() { + static int sivar; +#ifdef LAMBDA + // LAMBDA: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer + + // LAMBDA-LABEL: @main + // LAMBDA: call void [[OUTER_LAMBDA:@.+]]( + [&]() { + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // LAMBDA: call void @[[LOFFL1:.+]]( + // LAMBDA: ret +#pragma omp target +#pragma omp teams distribute parallel for reduction(+: sivar) + for (int i = 0; i < 2; ++i) { + // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} [[SIVAR_ARG:%.+]]) + // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}, + // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]], + // LAMBDA: [[SIVAR_CONV:%.+]] = bitcast{{.+}} [[SIVAR_ADDR]] to + // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[LOUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_CONV]]) + // LAMBDA: ret void + + // LAMBDA: define internal void @[[LOUTL1]]({{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]]) + // Skip global and bound tid vars + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*, + // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{.+}}, + // LAMBDA: [[RED_LIST:%.+]] = alloca [1 x {{.+}}], + // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]], + // LAMBDA: [[SIVAR_REF:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]] + // LAMBDA: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]], + + // LAMBDA: call void @__kmpc_for_static_init_4( + // LAMBDA: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[LPAR_OUTL:.+]] to + // LAMBDA: call void @__kmpc_for_static_fini( + // LAMBDA: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]], + // LAMBDA: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to + // LAMBDA: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]], + // LAMBDA: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to + // LAMBDA: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]]) + // LAMBDA: switch{{.+}} [[K_RED_RET]], label{{.+}} [ + // LAMBDA: {{.+}}, label %[[CASE1:.+]] + // LAMBDA: {{.+}}, label %[[CASE2:.+]] + // LAMBDA: ] + // LAMBDA: [[CASE1]]: + // LAMBDA-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]], + // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]], + // LAMBDA-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]] + // LAMBDA: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]], + // LAMBDA: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) + // LAMBDA: br + // LAMBDA: [[CASE2]]: + // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]], + // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] + // LAMBDA: br + + // LAMBDA: define internal void @[[LPAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]]) + + // Skip global and bound tid vars, and prev lb and ub vars + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: alloca i{{[0-9]+}}, + // LAMBDA: alloca i{{[0-9]+}}, + // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*, + // skip loop vars + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{.+}}, + // LAMBDA: [[RED_LIST:%.+]] = alloca [1 x {{.+}}], + // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]], + // LAMBDA: [[SIVAR_REF:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]] + // LAMBDA: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]], + + // LAMBDA: call void @__kmpc_for_static_init_4( + // LAMBDA: store{{.+}}, {{.+}} [[SIVAR_PRIV]], + // LAMBDA: call void [[INNER_LAMBDA:@.+]]( + // LAMBDA: call void @__kmpc_for_static_fini( + // LAMBDA: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]], + // LAMBDA: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to + // LAMBDA: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]], + // LAMBDA: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to + // LAMBDA: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]]) + // LAMBDA: switch{{.+}} [[K_RED_RET]], label{{.+}} [ + // LAMBDA: {{.+}}, label %[[CASE1:.+]] + // LAMBDA: {{.+}}, label %[[CASE2:.+]] + // LAMBDA: ] + // LAMBDA: [[CASE1]]: + // LAMBDA-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]], + // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]], + // LAMBDA-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]] + // LAMBDA: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]], + // LAMBDA: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) + // LAMBDA: br + // LAMBDA: [[CASE2]]: + // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]], + // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] + // LAMBDA: br + + sivar += i; + + [&]() { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + + sivar += 4; + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + + // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]] + // LAMBDA: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF]] + // LAMBDA: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], 4 + // LAMBDA: store i{{[0-9]+}} [[SIVAR_INC]], i{{[0-9]+}}* [[SIVAR_REF]] + }(); + } + }(); + return 0; +#else +#pragma omp target +#pragma omp teams distribute parallel for reduction(+: sivar) + for (int i = 0; i < 2; ++i) { + sivar += i; + } + return tmain<int>(); +#endif +} + +// CHECK: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer + +// CHECK: define {{.*}}i{{[0-9]+}} @main() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) +// CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}}) +// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]() +// CHECK: ret + +// CHECK: define{{.*}} void @[[OFFL1]](i{{64|32}} [[SIVAR_ARG:%.+]]) +// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}, +// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]], +// CHECK-64: [[SIVAR_CONV:%.+]] = bitcast{{.+}} [[SIVAR_ADDR]] to +// CHECK-64: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_CONV]]) +// CHECK-32: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_ADDR]]) +// CHECK: ret void + +// CHECK: define internal void @[[OUTL1]]({{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]]) +// Skip global and bound tid vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*, +// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{.+}}, +// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}], +// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]], +// CHECK: [[SIVAR_REF:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]] +// CHECK: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]], + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL:.+]] to +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]], +// CHECK: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to +// CHECK: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]], +// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to +// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]]) +// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [ +// CHECK: {{.+}}, label %[[CASE1:.+]] +// CHECK: {{.+}}, label %[[CASE2:.+]] +// CHECK: ] +// CHECK: [[CASE1]]: +// CHECK-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]], +// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]], +// CHECK-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]] +// CHECK: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]], +// CHECK: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) +// CHECK: br +// CHECK: [[CASE2]]: +// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]], +// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] +// CHECK: br + +// CHECK: define internal void @[[PAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]]) +// Skip global and bound tid vars, and prev lb and ub +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*, +// skip loop vars +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{.+}}, +// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}], +// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]], +// CHECK: [[SIVAR_REF:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]] +// CHECK: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]], + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: store{{.+}}, {{.+}} [[SIVAR_PRIV]], +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]], +// CHECK: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to +// CHECK: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]], +// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to +// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]]) +// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [ +// CHECK: {{.+}}, label %[[CASE1:.+]] +// CHECK: {{.+}}, label %[[CASE2:.+]] +// CHECK: ] +// CHECK: [[CASE1]]: +// CHECK-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]], +// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]], +// CHECK-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]] +// CHECK: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]], +// CHECK: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) +// CHECK: br +// CHECK: [[CASE2]]: +// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]], +// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] +// CHECK: br + +// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, +// CHECK: call void @[[TOFFL1:.+]]({{.+}}) +// CHECK: ret + +// CHECK: define{{.*}} void @[[TOFFL1]](i{{64|32}} [[TVAR_ARG:%.+]]) +// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}}, +// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]], +// CHECK-64: [[TVAR_CONV:%.+]] = bitcast{{.+}} [[TVAR_ADDR]] to +// CHECK-64: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[TOUTL1:.+]] to {{.+}}, {{.+}} [[TVAR_CONV]]) +// CHECK-32: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[TOUTL1:.+]] to {{.+}}, {{.+}} [[TVAR_ADDR]]) +// CHECK: ret void + +// CHECK: define internal void @[[TOUTL1]]({{.+}}, {{.+}}, {{.+}} [[TVAR_ARG:%.+]]) +// Skip global and bound tid vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}}*, +// CHECK: [[TVAR_PRIV:%.+]] = alloca i{{.+}}, +// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}], +// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]], +// CHECK: [[TVAR_REF:%.+]] = load{{.+}}, {{.+}} [[TVAR_ADDR]] +// CHECK: store{{.+}} 0, {{.+}} [[TVAR_PRIV]], + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL:.+]] to +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]], +// CHECK: [[TVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[TVAR_PRIV]] to +// CHECK: store{{.+}} [[TVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]], +// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to +// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]]) +// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [ +// CHECK: {{.+}}, label %[[CASE1:.+]] +// CHECK: {{.+}}, label %[[CASE2:.+]] +// CHECK: ] +// CHECK: [[CASE1]]: +// CHECK-DAG: [[TVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_REF]], +// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]], +// CHECK-DAG: [[TVAR_INC:%.+]] = add{{.+}} [[TVAR_VAL]], [[TVAR_PRIV_VAL]] +// CHECK: store{{.+}} [[TVAR_INC]], {{.+}} [[TVAR_REF]], +// CHECK: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) +// CHECK: br +// CHECK: [[CASE2]]: +// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]], +// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]] +// CHECK: br + +// CHECK: define internal void @[[TPAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[TVAR_ARG:%.+]]) +// Skip global and bound tid vars, and prev lb and ub vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}}*, +// skip loop vars +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: [[TVAR_PRIV:%.+]] = alloca i{{.+}}, +// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}], +// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]], +// CHECK: [[TVAR_REF:%.+]] = load{{.+}}, {{.+}} [[TVAR_ADDR]] +// CHECK: store{{.+}} 0, {{.+}} [[TVAR_PRIV]], + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: store{{.+}}, {{.+}} [[TVAR_PRIV]], +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]], +// CHECK: [[TVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[TVAR_PRIV]] to +// CHECK: store{{.+}} [[TVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]], +// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to +// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]]) +// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [ +// CHECK: {{.+}}, label %[[CASE1:.+]] +// CHECK: {{.+}}, label %[[CASE2:.+]] +// CHECK: ] +// CHECK: [[CASE1]]: +// CHECK-DAG: [[TVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_REF]], +// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]], +// CHECK-DAG: [[TVAR_INC:%.+]] = add{{.+}} [[TVAR_VAL]], [[TVAR_PRIV_VAL]] +// CHECK: store{{.+}} [[TVAR_INC]], {{.+}} [[TVAR_REF]], +// CHECK: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) +// CHECK: br +// CHECK: [[CASE2]]: +// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]], +// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]] +// CHECK: br +#endif diff --git a/test/OpenMP/teams_distribute_parallel_for_reduction_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_reduction_messages.cpp index 73661618d20c..889efd76cc3c 100644 --- a/test/OpenMP/teams_distribute_parallel_for_reduction_messages.cpp +++ b/test/OpenMP/teams_distribute_parallel_for_reduction_messages.cpp @@ -19,9 +19,9 @@ public: S2() : a(0) {} S2(S2 &s2) : a(s2.a) {} static float S2s; // expected-note 2 {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note 2 {{'S2sc' declared here}} }; -const float S2::S2sc = 0; // expected-note 2 {{'S2sc' defined here}} +const float S2::S2sc = 0; S2 b; // expected-note 3 {{'b' defined here}} const S2 ba[5]; // expected-note 2 {{'ba' defined here}} class S3 { diff --git a/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp new file mode 100644 index 000000000000..201a170ccd4a --- /dev/null +++ b/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp @@ -0,0 +1,398 @@ +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// Test host codegen. +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +#ifdef CK1 + +template <typename T, int X, long long Y> +struct SS{ + T a[X]; + float b; + // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}( + int foo(void) { + + // CK1: call i32 @__tgt_target_teams( + // CK1: call void @[[OFFL1:.+]]( + #pragma omp target + #pragma omp teams distribute parallel for + for(int i = 0; i < X; i++) { + a[i] = (T)0; + } + // CK1: call i32 @__tgt_target_teams( + // CK1: call void @[[OFFL2:.+]]( + #pragma omp target + #pragma omp teams distribute parallel for schedule(static) + for(int i = 0; i < X; i++) { + a[i] = (T)0; + } + // CK1: call i32 @__tgt_target_teams( + // CK1: call void @[[OFFL3:.+]]( + #pragma omp target + #pragma omp teams distribute parallel for schedule(static, X/2) + for(int i = 0; i < X; i++) { + a[i] = (T)0; + } + + // CK1: call i32 @__tgt_target_teams( + // CK1: call void @[[OFFL4:.+]]( + #pragma omp target + #pragma omp teams distribute parallel for schedule(dynamic) + for(int i = 0; i < X; i++) { + a[i] = (T)0; + } + + // CK1: call i32 @__tgt_target_teams( + // CK1: call void @[[OFFL5:.+]]( + #pragma omp target + #pragma omp teams distribute parallel for schedule(dynamic, X/2) + for(int i = 0; i < X; i++) { + a[i] = (T)0; + } + + // CK1: define internal void @[[OFFL1]]( + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, + // CK1: ret void + + // CK1: define internal void @[[OUTL1]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4( + // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[PAR_OUTL1]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34, + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[OFFL2]]( + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL2:.+]] to {{.+}}, + // CK1: ret void + + // CK1: define internal void @[[OUTL2]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4( + // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL2:.+]] to + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[PAR_OUTL2]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34, + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[OFFL3]]( + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL3:.+]] to {{.+}}, + // CK1: ret void + + // CK1: define internal void @[[OUTL3]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4( + // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL3:.+]] to + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[PAR_OUTL3]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 33, + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[OFFL4]]( + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL4:.+]] to {{.+}}, + // CK1: ret void + + // CK1: define internal void @[[OUTL4]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4( + // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL4:.+]] to + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[PAR_OUTL4]]({{.+}}) + // CK1: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35, + // CK1: call {{.+}} @__kmpc_dispatch_next_4( + // CK1: ret void + + // CK1: define internal void @[[OFFL5]]( + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL5:.+]] to {{.+}}, + // CK1: ret void + + // CK1: define internal void @[[OUTL5]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4( + // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL5:.+]] to + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[PAR_OUTL5]]({{.+}}) + // CK1: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35, + // CK1: call {{.+}} @__kmpc_dispatch_next_4( + // CK1: ret void + + return a[0]; + } +}; + +int teams_template_struct(void) { + SS<int, 123, 456> V; + return V.foo(); + +} +#endif // CK1 + +// Test host codegen. +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +#ifdef CK2 + +template <typename T, int n> +int tmain(T argc) { + T a[n]; + int m = 10; +#pragma omp target +#pragma omp teams distribute parallel for + for(int i = 0; i < n; i++) { + a[i] = (T)0; + } +#pragma omp target +#pragma omp teams distribute parallel for schedule(static) + for(int i = 0; i < n; i++) { + a[i] = (T)0; + } +#pragma omp target +#pragma omp teams distribute parallel for schedule(static, m) + for(int i = 0; i < n; i++) { + a[i] = (T)0; + } +#pragma omp target +#pragma omp teams distribute parallel for schedule(dynamic) + for(int i = 0; i < n; i++) { + a[i] = (T)0; + } +#pragma omp target +#pragma omp teams distribute parallel for schedule(dynamic, m) + for(int i = 0; i < n; i++) { + a[i] = (T)0; + } + return 0; +} + +int main (int argc, char **argv) { + int n = 100; + int a[n]; + int m = 10; +#pragma omp target +#pragma omp teams distribute parallel for + for(int i = 0; i < n; i++) { + a[i] = 0; + } +#pragma omp target +#pragma omp teams distribute parallel for dist_schedule(static) + for(int i = 0; i < n; i++) { + a[i] = 0; + } +#pragma omp target +#pragma omp teams distribute parallel for dist_schedule(static, m) + for(int i = 0; i < n; i++) { + a[i] = 0; + } +#pragma omp target +#pragma omp teams distribute parallel for schedule(dynamic) + for(int i = 0; i < n; i++) { + a[i] = 0; + } +#pragma omp target +#pragma omp teams distribute parallel for schedule(dynamic, m) + for(int i = 0; i < n; i++) { + a[i] = 0; + } + return tmain<int, 10>(argc); +} + +// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFL1:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFL2:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFL3:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFL4:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFL5:.+]]({{.+}}) +// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}}) +// CK2: ret + +// CK2: define {{.*}}void @[[OFFL1]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTL1]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4( +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[PAR_OUTL1]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34, +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFL2]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL2:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTL2]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4( +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL2:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[PAR_OUTL2]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34, +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFL3]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[OUTL3:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTL3]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4( +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL3:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[PAR_OUTL3]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34 +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFL4]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTL4:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTL4]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4( +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL4:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[PAR_OUTL4]]({{.+}}) +// CK2: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35, +// CK2: call {{.+}} @__kmpc_dispatch_next_4( +// CK2: ret void + + +// CK2: define {{.*}}void @[[OFFL5]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTL5:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTL5]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4( +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL5:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[PAR_OUTL5]]({{.+}}) +// CK2: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35, +// CK2: call {{.+}} @__kmpc_dispatch_next_4( +// CK2: ret void + +// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFLT1:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFLT2:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFLT3:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFLT4:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFLT5:.+]]({{.+}}) +// CK2: ret +// CK2-NEXT: } + +// CK2: define {{.*}}void @[[OFFLT1]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTLT1]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4( +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT1:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[PAR_OUTLT1]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34, +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFLT2]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT2:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTLT2]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4( +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT2:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[PAR_OUTLT2]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34, +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFLT3]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTLT3:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTLT3]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4( +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT3:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[PAR_OUTLT3]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 33, +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFLT4]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTLT4:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTLT4]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4( +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT4:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[PAR_OUTLT4]]({{.+}}) +// CK2: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35, +// CK2: call {{.+}} @__kmpc_dispatch_next_4( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFLT5]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTLT5:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTLT5]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4( +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT5:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[PAR_OUTLT5]]({{.+}}) +// CK2: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35, +// CK2: call {{.+}} @__kmpc_dispatch_next_4( +// CK2: ret void + +#endif // CK2 +#endif // #ifndef HEADER diff --git a/test/OpenMP/teams_distribute_parallel_for_shared_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_shared_messages.cpp index 8edcd3664341..1a1af09aa205 100644 --- a/test/OpenMP/teams_distribute_parallel_for_shared_messages.cpp +++ b/test/OpenMP/teams_distribute_parallel_for_shared_messages.cpp @@ -84,7 +84,7 @@ int main(int argc, char **argv) { #pragma omp teams distribute parallel for shared (S1) // expected-error {{'S1' does not refer to a value}} for (int j=0; j<100; j++) foo(); #pragma omp target - #pragma omp teams distribute parallel for shared (a, b, c, d, f) + #pragma omp teams distribute parallel for shared (a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} for (int j=0; j<100; j++) foo(); #pragma omp target #pragma omp teams distribute parallel for shared (argv[1]) // expected-error {{expected variable name}} diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_ast_print.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_ast_print.cpp index 26cc15822943..7573fdeafa31 100644 --- a/test/OpenMP/teams_distribute_parallel_for_simd_ast_print.cpp +++ b/test/OpenMP/teams_distribute_parallel_for_simd_ast_print.cpp @@ -29,9 +29,10 @@ public: ++this->a.a; } S7 &operator=(S7 &s) { + int k; #pragma omp target -#pragma omp teams distribute parallel for simd private(a) private(this->a) - for (int k = 0; k < s.a.a; ++k) +#pragma omp teams distribute parallel for simd private(a) private(this->a) linear(k) + for (k = 0; k < s.a.a; ++k) ++s.a.a; foo(); @@ -59,7 +60,7 @@ public: // CHECK: #pragma omp target // CHECK-NEXT: #pragma omp teams distribute parallel for simd private(this->a) private(this->a) private(T::a) // CHECK: #pragma omp target -// CHECK-NEXT: #pragma omp teams distribute parallel for simd private(this->a) private(this->a) +// CHECK-NEXT: #pragma omp teams distribute parallel for simd private(this->a) private(this->a) linear(k) // CHECK: #pragma omp target // CHECK-NEXT: #pragma omp teams distribute parallel for simd default(none) private(b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d) // CHECK: #pragma omp target @@ -161,11 +162,11 @@ T tmain(T argc) { // CHECK-NEXT: for (int k = 0; k < 10; ++k) // CHECK-NEXT: e += d + argc; #pragma omp target -#pragma omp teams distribute parallel for simd simdlen(clen-1) linear(d) +#pragma omp teams distribute parallel for simd simdlen(clen-1) for (int k = 0; k < 10; ++k) e += d + argc; // CHECK: #pragma omp target -// CHECK-NEXT: #pragma omp teams distribute parallel for simd simdlen(clen - 1) linear(d) +// CHECK-NEXT: #pragma omp teams distribute parallel for simd simdlen(clen - 1) // CHECK-NEXT: for (int k = 0; k < 10; ++k) // CHECK-NEXT: e += d + argc; #pragma omp target @@ -224,11 +225,11 @@ int main (int argc, char **argv) { // CHECK-NEXT: for (int k = 0; k < 10; ++k) // CHECK-NEXT: e += d + argc; #pragma omp target -#pragma omp teams distribute parallel for simd simdlen(clen-1) linear(d) +#pragma omp teams distribute parallel for simd simdlen(clen-1) for (int k = 0; k < 10; ++k) e += d + argc; // CHECK: #pragma omp target -// CHECK-NEXT: #pragma omp teams distribute parallel for simd simdlen(clen - 1) linear(d) +// CHECK-NEXT: #pragma omp teams distribute parallel for simd simdlen(clen - 1) // CHECK-NEXT: for (int k = 0; k < 10; ++k) // CHECK-NEXT: e += d + argc; #pragma omp target diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp new file mode 100644 index 000000000000..00f2844735c9 --- /dev/null +++ b/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp @@ -0,0 +1,263 @@ +// expected-no-diagnostics +#ifndef HEADER +#define HEADER +// Test host codegen. +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +#ifdef CK1 + +int a[100]; + +// CK1: define {{.*}}i32 @{{.+}}teams_argument_globali( +int teams_argument_global(int n){ + int te = n / 128; + int th = 128; + // discard n_addr + // CK1: alloca i32, + // CK1: [[TE:%.+]] = alloca i32, + // CK1: [[TH:%.+]] = alloca i32, + // CK1: [[TE_CAST:%.+]] = alloca i{{32|64}}, + // CK1: [[TH_CAST:%.+]] = alloca i{{32|64}}, + // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]], + // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]], + + // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0) + + // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]], + #pragma omp target + #pragma omp teams distribute parallel for simd num_teams(te), thread_limit(th) simdlen(64) + for(int i = 0; i < n; i++) { + a[i] = 0; + } + + int i; + // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0) + // CK1: call void @[[OFFL2:.+]]( + #pragma omp target + {{{ + #pragma omp teams distribute parallel for simd safelen(4) aligned(a) linear(i) + for(i = 0; i < n; i++) { + a[i] = 0; + } + }}} + // outlined target regions + // CK1: define internal void @[[OFFL1]](i{{32|64}} [[TE_ARG:%.+]], i{{32|64}} [[TH_ARG:%.+]], i{{32|64}} {{.+}}, {{.+}}) + // CK1: [[TE_ADDR:%.+]] = alloca i{{32|64}}, + // CK1: [[TH_ADDR:%.+]] = alloca i{{32|64}}, + // CK1: store{{.+}} [[TE_ARG]], {{.+}} [[TE_ADDR]], + // CK1: store{{.+}} [[TH_ARG]], {{.+}} [[TH_ADDR]], + // CK1-64: [[TE_CONV:%.+]] = bitcast{{.+}} [[TE_ADDR]] to + // CK1-64: [[TH_CONV:%.+]] = bitcast{{.+}} [[TH_ADDR]] to + // CK1-64: [[TE_VAL:%.+]] = load i32, i32* [[TE_CONV]], + // CK1-64: [[TH_VAL:%.+]] = load i32, i32* [[TH_CONV]], + // CK1-32: [[TE_VAL:%.+]] = load i32, i32* [[TE_ADDR]], + // CK1-32: [[TH_VAL:%.+]] = load i32, i32* [[TH_ADDR]], + // CK1: {{%.+}} = call i32 @__kmpc_push_num_teams({{.+}}, {{.+}}, i32 [[TE_VAL]], i32 [[TH_VAL]]) + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 2, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}}) + // CK1: ret void + + // CK1: define internal void @[[OUTL1]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4( + // CK1: call void {{.+}} @__kmpc_fork_call( + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[OFFL2]]({{.+}}, {{.+}}) + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL2:.+]] to {{.+}}, {{.+}}, {{.+}}) + // CK1: ret void + + // CK1: define internal void @[[OUTL2]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4( + // CK1: call void {{.+}} @__kmpc_fork_call( + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + return a[0]; +} + +// CK1-DAG: !{!"llvm.loop.vectorize.enable", i1 true} +// CK1-DAG: !{!"llvm.loop.vectorize.width", i32 4} +// CK1-DAG: !{!"llvm.loop.vectorize.width", i32 64} + +#endif // CK1 + +// Test host codegen. +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +#ifdef CK2 + +// CK2: define {{.*}}i32 @{{.+}}teams_local_argv( +int teams_local_arg(void) { + int n = 100; + int a[n], i; + + // CK2: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0) + // CK2: call void @[[OFFL1:.+]]( + #pragma omp target + #pragma omp teams distribute parallel for simd safelen(4) aligned(a) linear(i) + for(i = 0; i < n; i++) { + a[i] = 0; + } + + // outlined target region + // CK2: define internal void @[[OFFL1]]({{.+}}, {{.+}}) + // CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}}) + // CK2: ret void + + // CK2: define internal void @[[OUTL1]]({{.+}}) + // CK2: call void @__kmpc_for_static_init_4( + // CK2: call void {{.+}} @__kmpc_fork_call( + // CK2: call void @__kmpc_for_static_fini( + // CK2: ret void + + return a[0]; +} + +// CK2-DAG: !{!"llvm.loop.vectorize.enable", i1 true} +// CK2-DAG: !{!"llvm.loop.vectorize.width", i32 4} + +#endif // CK2 + +// Test host codegen. +// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64 +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64 +// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32 +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32 +#ifdef CK3 + +// CK3: [[SSI:%.+]] = type { [{{.+}} x i32], float } + +template <typename T, int X, long long Y> +struct SS{ + T a[X]; + float b; + // CK3: define {{.*}}i32 @{{.+}}foo{{.+}}( + int foo(void) { + int i; + // CK3: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0) + // CK3: call void @[[OFFL1:.+]]([[SSI]]* %{{.+}}) + #pragma omp target + #pragma omp teams distribute parallel for simd safelen(4) aligned(a) linear(i) + for(i = 0; i < X; i++) { + a[i] = (T)0; + } + + // outlined target region + // CK3: define internal void @[[OFFL1]]([[SSI]]* {{.+}}) + // CK3: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 2, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}}) + // CK3: ret void + + // CK3: define internal void @[[OUTL1]]({{.+}}) + // CK3: call void @__kmpc_for_static_init_4( + // CK3: call void {{.+}} @__kmpc_fork_call( + // CK3: call void @__kmpc_for_static_fini( + // CK3: ret void + + return a[0]; + } +}; + +int teams_template_struct(void) { + SS<int, 123, 456> V; + return V.foo(); + +} + +// CK3-DAG: !{!"llvm.loop.vectorize.enable", i1 true} +// CK3-DAG: !{!"llvm.loop.vectorize.width", i32 4} +#endif // CK3 + +// Test host codegen. +// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-64 +// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-64 +// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-32 +// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-32 + +#ifdef CK4 + +template <typename T, int n> +int tmain(T argc) { + T a[n]; + int te = n/128; + int th = 128; +#pragma omp target +#pragma omp teams distribute parallel for simd num_teams(te) thread_limit(th) simdlen(64) + for(int i = 0; i < n; i++) { + a[i] = (T)0; + } + return 0; +} + +int main (int argc, char **argv) { + int n = 100; + int a[n], i; +#pragma omp target +#pragma omp teams distribute parallel for simd safelen(4) aligned(a) linear(i) + for(i = 0; i < n; i++) { + a[i] = 0; + } + return tmain<int, 10>(argc); +} + +// CK4: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}}) +// CK4: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0) +// CK4: call void @[[OFFL1:.+]]({{.+}}) +// CK4: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}}) +// CK4: ret + +// CK4: define {{.*}}void @[[OFFL1]]({{.+}}) +// CK4: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}}) +// CK4: ret void + +// CK4: define internal void @[[OUTL1]]({{.+}}) +// CK4: call void @__kmpc_for_static_init_4( +// CK4: call void {{.+}} @__kmpc_fork_call( +// CK4: call void @__kmpc_for_static_fini( +// CK4: ret void + +// CK4: define {{.*}}i32 @[[TMAIN]]({{.+}}) +// CK4: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0) +// CK4: call void @[[OFFLT:.+]]({{.+}}) +// CK4: ret +// CK4-NEXT: } + +// CK4: define {{.*}}void @[[OFFLT]](i{{32|64}} [[TE_ARG:%.+]], i{{32|64}} [[TH_ARG:%.+]], {{.+}}) +// CK4: [[TE_ADDR:%.+]] = alloca i{{32|64}}, +// CK4: [[TH_ADDR:%.+]] = alloca i{{32|64}}, +// CK4: store{{.+}} [[TE_ARG]], {{.+}} [[TE_ADDR]], +// CK4: store{{.+}} [[TH_ARG]], {{.+}} [[TH_ADDR]], +// CK4-64: [[TE_CONV:%.+]] = bitcast{{.+}} [[TE_ADDR]] to +// CK4-64: [[TH_CONV:%.+]] = bitcast{{.+}} [[TH_ADDR]] to +// CK4-64: [[TE_VAL:%.+]] = load i32, i32* [[TE_CONV]], +// CK4-64: [[TH_VAL:%.+]] = load i32, i32* [[TH_CONV]], +// CK4-32: [[TE_VAL:%.+]] = load i32, i32* [[TE_ADDR]], +// CK4-32: [[TH_VAL:%.+]] = load i32, i32* [[TH_ADDR]], +// CK4: {{%.+}} = call i32 @__kmpc_push_num_teams({{.+}}, {{.+}}, i32 [[TE_VAL]], i32 [[TH_VAL]]) +// CK4: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT:.+]] to {{.+}}, {{.+}}, {{.+}}) +// CK4: ret void + +// CK4: define internal void @[[OUTLT]]({{.+}}) +// CK4: call void @__kmpc_for_static_init_4( +// CK4: call void {{.+}} @__kmpc_fork_call( +// CK4: call void @__kmpc_for_static_fini( +// CK4: ret void + +// CK4-DAG: !{!"llvm.loop.vectorize.enable", i1 true} +// CK4-DAG: !{!"llvm.loop.vectorize.width", i32 4} +// CK4-DAG: !{!"llvm.loop.vectorize.width", i32 64} + +#endif // CK4 +#endif + diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp new file mode 100644 index 000000000000..91c451d49a08 --- /dev/null +++ b/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp @@ -0,0 +1,149 @@ +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// Test host codegen. +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +#ifdef CK1 + +template <typename T, int X, long long Y> +struct SS{ + T a[X][Y]; + + // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}( + int foo(void) { + + // CK1: call i32 @__tgt_target_teams( + // CK1: call void @[[OFFL1:.+]]( + #pragma omp target + #pragma omp teams distribute parallel for simd collapse(2) + for(int i = 0; i < X; i++) { + for(int j = 0; j < Y; j++) { + a[i][j] = (T)0; + } + } + // CK1: define internal void @[[OFFL1]]( + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, + // CK1: ret void + + // CK1: define internal void @[[OUTL1]]({{.+}}) + // discard loop variables not needed here + // CK1: [[OMP_UB:%.omp.comb.ub]] = alloca i32, + // CK1: store i32 56087, i32* [[OMP_UB]], + // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i32* [[OMP_UB]], + // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[PAR_OUTL1]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34, {{.+}}, {{.+}}, + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + return a[0][0]; + } +}; + +int teams_template_struct(void) { + SS<int, 123, 456> V; + return V.foo(); + +} + +// CK4: !{!"llvm.loop.vectorize.enable", i1 true} + +#endif // CK1 + +// Test host codegen. +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +#ifdef CK2 + +template <typename T, int n, int m> +int tmain(T argc) { + T a[n][m]; + #pragma omp target + #pragma omp teams distribute parallel for simd collapse(2) + for(int i = 0; i < n; i++) { + for(int j = 0; j < m; j++) { + a[i][j] = (T)0; + } + } + return 0; +} + +int main (int argc, char **argv) { + int n = 100; + int m = 2; + int a[n][m]; + #pragma omp target + #pragma omp teams distribute parallel for simd collapse(2) + for(int i = 0; i < n; i++) { + for(int j = 0; j < m; j++) { + a[i][j] = 0; + } + } + return tmain<int, 10, 2>(argc); +} + +// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFL1:.+]]({{.+}}) +// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}}) +// CK2: ret + +// CK2: define {{.*}}void @[[OFFL1]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}} @[[OUTL1:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTL1]]({{.+}}) +// CK2: [[OMP_UB:%.omp.comb.ub]] = alloca i64, +// CK2: store i64 {{.+}}, i64* [[OMP_UB]], +// CK2: call void @__kmpc_for_static_init_8({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i64* [[OMP_UB]], +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[PAR_OUTL1]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_{{[4|8]}}({{.+}}, {{.+}}, i32 34, {{.+}}, {{.+}}, +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + + +// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFLT1:.+]]({{.+}}) +// CK2: ret +// CK2-NEXT: } + +// CK2: define {{.*}}void @[[OFFLT1]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTLT1]]({{.+}}) +// discard loop variables not needed here +// CK2: [[OMP_UB:%.omp.comb.ub]] = alloca i32, +// CK2: store i32 {{.+}}, i32* [[OMP_UB]], +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i32* [[OMP_UB]], +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL1:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[TPAR_OUTL1]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34, {{.+}}, {{.+}}, +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK4: !{!"llvm.loop.vectorize.enable", i1 true} + +#endif // CK2 +#endif // #ifndef HEADER diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp new file mode 100644 index 000000000000..5c1f73066cd4 --- /dev/null +++ b/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp @@ -0,0 +1,268 @@ +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// Test host codegen. +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +#ifdef CK1 + +template <typename T, int X, long long Y> +struct SS{ + T a[X]; + float b; + // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}( + int foo(void) { + + // CK1: call i32 @__tgt_target_teams( + // CK1: call void @[[OFFL1:.+]]( + #pragma omp target + #pragma omp teams distribute parallel for simd + for(int i = 0; i < X; i++) { + a[i] = (T)0; + } + // CK1: call i32 @__tgt_target_teams( + // CK1: call void @[[OFFL2:.+]]( + #pragma omp target + #pragma omp teams distribute parallel for simd dist_schedule(static) + for(int i = 0; i < X; i++) { + a[i] = (T)0; + } + // CK1: call i32 @__tgt_target_teams( + // CK1: call void @[[OFFL3:.+]]( + #pragma omp target + #pragma omp teams distribute parallel for simd dist_schedule(static, X/2) + for(int i = 0; i < X; i++) { + a[i] = (T)0; + } + // CK1: define internal void @[[OFFL1]]( + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, + // CK1: ret void + + // CK1: define internal void @[[OUTL1]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 + // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[PAR_OUTL1]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4( + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[OFFL2]]( + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL2:.+]] to {{.+}}, + // CK1: ret void + + // CK1: define internal void @[[OUTL2]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 + // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL2:.+]] to + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[PAR_OUTL2]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4( + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + + // CK1: define internal void @[[OFFL3]]( + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL3:.+]] to {{.+}}, + // CK1: ret void + + // CK1: define internal void @[[OUTL3]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91 + // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL3:.+]] to + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[PAR_OUTL3]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4( + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + return a[0]; + } +}; + +int teams_template_struct(void) { + SS<int, 123, 456> V; + return V.foo(); + +} + +// CK1: !{!"llvm.loop.vectorize.enable", i1 true} + +#endif // CK1 + +// Test host codegen. +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +#ifdef CK2 + +template <typename T, int n> +int tmain(T argc) { + T a[n]; + int m = 10; +#pragma omp target +#pragma omp teams distribute parallel for simd + for(int i = 0; i < n; i++) { + a[i] = (T)0; + } +#pragma omp target +#pragma omp teams distribute parallel for simd dist_schedule(static) + for(int i = 0; i < n; i++) { + a[i] = (T)0; + } +#pragma omp target +#pragma omp teams distribute parallel for simd dist_schedule(static, m) + for(int i = 0; i < n; i++) { + a[i] = (T)0; + } + return 0; +} + +int main (int argc, char **argv) { + int n = 100; + int a[n]; + int m = 10; +#pragma omp target +#pragma omp teams distribute parallel for simd + for(int i = 0; i < n; i++) { + a[i] = 0; + } +#pragma omp target +#pragma omp teams distribute parallel for simd dist_schedule(static) + for(int i = 0; i < n; i++) { + a[i] = 0; + } +#pragma omp target +#pragma omp teams distribute parallel for simd dist_schedule(static, m) + for(int i = 0; i < n; i++) { + a[i] = 0; + } + return tmain<int, 10>(argc); +} + +// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFL1:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFL2:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFL3:.+]]({{.+}}) +// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}}) +// CK2: ret + +// CK2: define {{.*}}void @[[OFFL1]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTL1]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[PAR_OUTL1]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4( +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFL2]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL2:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTL2]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL2:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[PAR_OUTL2]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4( +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFL3]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[OUTL3:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTL3]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91 +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL3:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[PAR_OUTL3]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4( +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFLT1:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFLT2:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFLT3:.+]]({{.+}}) +// CK2: ret +// CK2-NEXT: } + +// CK2: define {{.*}}void @[[OFFLT1]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTLT1]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT1:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[PAR_OUTLT1]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4( +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFLT2]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT2:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTLT2]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT2:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[PAR_OUTLT2]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4( +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFLT3]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTLT3:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTLT3]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91 +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT3:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[PAR_OUTLT3]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4( +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: !{!"llvm.loop.vectorize.enable", i1 true} + +#endif // CK2 +#endif // #ifndef HEADER diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp new file mode 100644 index 000000000000..1219742c752d --- /dev/null +++ b/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp @@ -0,0 +1,501 @@ +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 + +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +struct St { + int a, b; + St() : a(0), b(0) {} + St(const St &st) : a(st.a + st.b), b(0) {} + ~St() {} +}; + +volatile int g = 1212; +volatile int &g1 = g; + +template <class T> +struct S { + T f; + S(T a) : f(a + g) {} + S() : f(g) {} + S(const S &s, St t = St()) : f(s.f + t.a) {} + operator T() { return T(); } + ~S() {} +}; + +// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float } +// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } +// CHECK-DAG: [[ST_TY:%.+]] = type { i{{[0-9]+}}, i{{[0-9]+}} } + +template <typename T> +T tmain() { + S<T> test; + T t_var = T(); + T vec[] = {1, 2}; + S<T> s_arr[] = {1, 2}; + S<T> &var = test; +#pragma omp target +#pragma omp teams distribute parallel for simd firstprivate(t_var, vec, s_arr, var) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + return T(); +} + +// CHECK-DAG: [[TEST:@.+]] = global [[S_FLOAT_TY]] zeroinitializer, +S<float> test; +// CHECK-DAG: [[T_VAR:@.+]] = global i{{[0-9]+}} 333, +int t_var = 333; +// CHECK-DAG: [[VEC:@.+]] = global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2], +int vec[] = {1, 2}; +// CHECK-DAG: [[S_ARR:@.+]] = global [2 x [[S_FLOAT_TY]]] zeroinitializer, +S<float> s_arr[] = {1, 2}; +// CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer, +S<float> var(3); +// CHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0, + +int main() { + static int sivar; +#ifdef LAMBDA + // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212, + // LAMBDA-LABEL: @main + // LAMBDA: call void [[OUTER_LAMBDA:@.+]]( + [&]() { + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0) + // LAMBDA: call void @[[LOFFL1:.+]](i{{64|32}} %{{.+}}) + // LAMBDA: ret +#pragma omp target +#pragma omp teams distribute parallel for simd firstprivate(g, g1, sivar) + for (int i = 0; i < 2; ++i) { + // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} {{%.+}}, i{{64|32}} {{%.+}}) + // LAMBDA: {{%.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{%.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{%.+}} = alloca i{{[0-9]+}}, + // LAMBDA: [[G_CAST:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_CAST:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[SIVAR_CAST:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA-DAG: [[G_CAST_VAL:%.+]] = load{{.+}} [[G_CAST]], + // LAMBDA-DAG: [[G1_CAST_VAL:%.+]] = load{{.+}} [[G1_CAST]], + // LAMBDA-DAG: [[SIVAR_CAST_VAL:%.+]] = load{{.+}} [[SIVAR_CAST]], + // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[LOUTL1:.+]] to {{.+}}, {{.+}} [[G_CAST_VAL]], {{.+}} [[G1_CAST_VAL]], {{.+}} [[SIVAR_CAST_VAL]]) + // LAMBDA: ret void + + // LAMBDA: define internal void @[[LOUTL1]]({{.+}}) + // Skip global and bound tid vars + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: [[G_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_TMP:%.+]] = alloca i32*, + // skip loop vars + // LAMBDA-DAG: store {{.+}}, {{.+}} [[G_ADDR]], + // LAMBDA-DAG: store {{.+}}, {{.+}} [[G1_ADDR]], + // LAMBDA-DAG: store {{.+}}, {{.+}} [[SIVAR_ADDR]], + // LAMBDA-DAG: [[G_CONV:%.+]] = bitcast {{.+}} [[G_ADDR]] to + // LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast {{.+}} [[G1_ADDR]] to + // LAMBDA-DAG: [[SIVAR_CONV:%.+]] = bitcast {{.+}} [[SIVAR_ADDR]] to + // LAMBDA-DAG: store{{.+}} [[G1_CONV]], {{.+}} [[G1_TMP]], + g = 1; + g1 = 1; + sivar = 2; + // LAMBDA: call void @__kmpc_for_static_init_4( + // LAMBDA: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[LPAR_OUTL:.+]] to + // LAMBDA: call void @__kmpc_for_static_fini( + // LAMBDA: ret void + + // LAMBDA: define internal void @[[LPAR_OUTL]]({{.+}}) + // Skip global and bound tid vars, and prev lb and ub vars + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: [[G_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_TMP:%.+]] = alloca i32*, + // skip loop vars + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: [[G_PRIV:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_PRIV:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_TMP_PRIV:%.+]] = alloca i{{[0-9]+}}*, + // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA-DAG: store {{.+}}, {{.+}} [[G_ADDR]], + // LAMBDA-DAG: store {{.+}}, {{.+}} [[G1_ADDR]], + // LAMBDA-DAG: store {{.+}}, {{.+}} [[SIVAR_ADDR]], + // LAMBDA-DAG: [[G_CONV:%.+]] = bitcast {{.+}} [[G_ADDR]] to + // LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast {{.+}} [[G1_ADDR]] to + // LAMBDA-DAG: [[SIVAR_CONV:%.+]] = bitcast {{.+}} [[SIVAR_ADDR]] to + // LAMBDA-DAG: store{{.+}} [[G1_CONV]], {{.+}} [[G1_TMP]], + + // use of private vars + // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G_PRIV]], + // LAMBDA-DAG: [[G1:%.+]] = load{{.+}}, {{.+}}* [[G1_TMP_PRIV]] + // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1]], + // LAMBDA-DAG: store{{.+}} 2, {{.+}} [[SIVAR_PRIV]], + // LAMBDA-DAG: [[G1_REF:%.+]] = load{{.+}}, {{.+}} [[G1_TMP]], + // LAMBDA: call void [[INNER_LAMBDA:@.+]]( + // LAMBDA: call void @__kmpc_for_static_fini( + // LAMBDA: ret void + [&]() { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + g = 2; + g1 = 2; + sivar = 4; + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + + // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]] + // LAMBDA: [[G1_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G1_REF]] + // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]] + }(); + } + }(); + return 0; + +// LAMBDA: !{!"llvm.loop.vectorize.enable", i1 true} + +#else +#pragma omp target +#pragma omp teams distribute parallel for simd firstprivate(t_var, vec, s_arr, var, sivar) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + sivar += i; + } + return tmain<int>(); +#endif +} + +// CHECK: define {{.*}}i{{[0-9]+}} @main() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0) +// CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}}) +// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]() +// CHECK: ret + +// CHECK: define{{.*}} void @[[OFFL1]]({{.+}}) +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_CAST:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[SIVAR_CAST:%.+]] = alloca i{{[0-9]+}}, + +// CHECK-DAG: [[VEC_TE_PAR:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_PRIV]], +// CHECK-DAG: [[T_VAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_CAST]], +// CHECK-DAG: [[S_ARR_TE_PAR:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_PRIV]], +// CHECK-DAG: [[VAR_TE_PAR:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_PRIV]], +// CHECK-DAG: [[SIVAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_CAST]], + +// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}} @[[OUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[VEC_TE_PAR]], i{{[0-9]+}} [[T_VAR_TE_PAR]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_TE_PAR]], [[S_FLOAT_TY]]* [[VAR_TE_PAR]], i{{[0-9]+}} [[SIVAR_TE_PAR]]) +// CHECK: ret void + +// CHECK: define internal void @[[OUTL1]]({{.+}}) +// Skip global and bound tid vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// Skip temp vars for loop +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]], + +// param copy +// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]], +// CHECK: store [2 x [[S_FLOAT_TY]]]* {{.+}}, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK: store [[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]** [[VAR_ADDR]], +// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[SIVAR_ADDR]], + +// T_VAR and SIVAR +// CHECK-DAG-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32* +// CHECK-DAG-64: [[CONV_SIVAR:%.+]] = bitcast i64* [[SIVAR_ADDR]] to i32* + +// preparation vars +// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK-DAG: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]], + +// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2 +// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}}) + +// firstprivate(s_arr) +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]] to +// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]], +// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ] +// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}}], [ [[S_ARR_DST:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]]) +// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]], +// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]], + +// firstprivate(var) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]]) + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL:.+]] to +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: define internal void @[[PAR_OUTL]]({{.+}}) +// Skip global and bound tid vars, and prev lb ub vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// Skip temp vars for loop +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]], + +// param copy +// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]], +// CHECK: store [2 x [[S_FLOAT_TY]]]* {{.+}}, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK: store [[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]** [[VAR_ADDR]], +// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[SIVAR_ADDR]], + +// T_VAR and SIVAR +// CHECK-DAG-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32* +// CHECK-DAG-64: [[CONV_SIVAR:%.+]] = bitcast i64* [[SIVAR_ADDR]] to i32* + +// preparation vars +// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK-DAG: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]], + +// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2 +// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}}) + +// firstprivate(s_arr) +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]] to +// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]], +// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ] +// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}}], [ [[S_ARR_DST:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]]) +// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]], +// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]], + +// firstprivate(var) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]]) + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK-DAG-32: {{.+}} = {{.+}} [[T_VAR_ADDR]] +// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_TVAR]] +// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[VAR_PRIV]] +// CHECK-DAG-32: {{.+}} = {{.+}} [[SIVAR_ADDR]] +// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_SIVAR]] +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0) +// CHECK: call void @[[TOFFL1:.+]](i{{64|32}} %{{.+}}) +// CHECK: ret + +// CHECK: define {{.*}}void @[[TOFFL1]]({{.+}}) +// CHECK: [[TT_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[TVEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[TS_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK: [[TVAR_PRIV:%.+]] = alloca [[S_INT_TY]]*, +// CHECK: [[TT_VAR_CAST:%.+]] = alloca i{{[0-9]+}}, + +// CHECK-DAG: [[TVEC_TE_PAR:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[TVEC_PRIV]], +// CHECK-DAG: [[TT_VAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[TT_VAR_CAST]], +// CHECK-DAG: [[TS_ARR_TE_PAR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[TS_ARR_PRIV]], +// CHECK-DAG: [[TVAR_TE_PAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TVAR_PRIV]], + +// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[TOUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[TVEC_TE_PAR]], i{{[0-9]+}} [[TT_VAR_TE_PAR]], [2 x [[S_INT_TY]]]* [[TS_ARR_TE_PAR]], [[S_INT_TY]]* [[TVAR_TE_PAR]]) +// CHECK: ret void + +// CHECK: define internal void @[[TOUTL1]]({{.+}}) +// Skip global and bound tid vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, +// Skip temp vars for loop +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]], +// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*, + +// param copy +// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]], +// CHECK: store [2 x [[S_INT_TY]]]* {{.+}}, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]], +// CHECK: store [[S_INT_TY]]* {{.+}}, [[S_INT_TY]]** [[VAR_ADDR]], + +// T_VAR and preparation variables +// CHECK: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32* +// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]], + +// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2 +// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}}) + +// firstprivate(s_arr) +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]] to +// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]], +// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ] +// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_DST:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]]) +// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]], +// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]], + +// firstprivate(var) +// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]], +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]]) +// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP]], + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL:.+]] to +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: define internal void @[[TPAR_OUTL]]({{.+}}) +// Skip global and bound tid vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, +// Skip temp vars for loop +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]], +// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*, + +// param copy +// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]], +// CHECK: store [2 x [[S_INT_TY]]]* {{.+}}, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]], +// CHECK: store [[S_INT_TY]]* {{.+}}, [[S_INT_TY]]** [[VAR_ADDR]], + +// T_VAR and preparation variables +// CHECK: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32* +// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]], + +// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2 +// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}}) + +// firstprivate(s_arr) +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]] to +// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]], +// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ] +// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_DST:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]]) +// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]], +// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]], + +// firstprivate(var) +// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]], +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]]) +// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP]], + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK-DAG-32: {{.+}} = {{.+}} [[T_VAR_ADDR]] +// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_TVAR]] +// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[TMP]] +// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]] +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: !{!"llvm.loop.vectorize.enable", i1 true} + +#endif diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_messages.cpp index cbbb313c06b3..668337db454b 100644 --- a/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_messages.cpp +++ b/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_messages.cpp @@ -144,8 +144,9 @@ int main(int argc, char **argv) { #pragma omp teams distribute parallel for simd firstprivate(j) for (i = 0; i < argc; ++i) foo(); +// expected-error@+2 {{lastprivate variable cannot be firstprivate}} expected-note@+2 {{defined as lastprivate}} #pragma omp target -#pragma omp teams distribute parallel for simd lastprivate(argc), firstprivate(argc) // OK +#pragma omp teams distribute parallel for simd lastprivate(argc), firstprivate(argc) for (i = 0; i < argc; ++i) foo(); return 0; diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp new file mode 100644 index 000000000000..6f2cef88cbbc --- /dev/null +++ b/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp @@ -0,0 +1,187 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +void fn1(); +void fn2(); +void fn3(); +void fn4(); +void fn5(); +void fn6(); + +int Arg; + +// CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test +void gtid_test() { +#pragma omp target +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOADING_FUN_0:@.+]]( +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOADING_FUN_1:@.+]]( +#pragma omp teams distribute parallel for simd + for(int i = 0 ; i < 100; i++) {} + // CHECK: define internal void [[OFFLOADING_FUN_0]]( + // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}}) + // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]]( + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_0:@.+]] to void + // CHECK: call void @__kmpc_for_static_fini( + + // CHECK: define{{.+}} void [[OMP_OUTLINED_0]]( + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call void @__kmpc_for_static_fini( + // CHECK: ret +#pragma omp target +#pragma omp teams distribute parallel for simd if (parallel: false) + for(int i = 0 ; i < 100; i++) { + // CHECK: define internal void [[OFFLOADING_FUN_1]]( + // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}}) + // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]]( + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call void @__kmpc_serialized_parallel( + // CHECK: call void [[OMP_OUTLINED_1:@.+]]( + // CHECK: call void @__kmpc_end_serialized_parallel( + // CHECK: call void @__kmpc_for_static_fini( + // CHECK: define{{.+}} void [[OMP_OUTLINED_1]]( + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call void @{{.+}}gtid_test + // CHECK: call void @__kmpc_for_static_fini( + // CHECK: ret + gtid_test(); + } +} + + +template <typename T> +int tmain(T Arg) { +#pragma omp target +#pragma omp teams distribute parallel for simd if (true) + for(int i = 0 ; i < 100; i++) { + fn1(); + } +#pragma omp target +#pragma omp teams distribute parallel for simd if (false) + for(int i = 0 ; i < 100; i++) { + fn2(); + } +#pragma omp target +#pragma omp teams distribute parallel for simd if (parallel: Arg) + for(int i = 0 ; i < 100; i++) { + fn3(); + } + return 0; +} + +// CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main() +int main() { +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOADING_FUN_0:@.+]]( +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOADING_FUN_1:@.+]]( +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOADING_FUN_2:@.+]]( +// CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain +#pragma omp target +#pragma omp teams distribute parallel for simd if (true) + for(int i = 0 ; i < 100; i++) { + // CHECK: define internal void [[OFFLOADING_FUN_0]]( + // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}}) + // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]]( + + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_2:@.+]] to void + // CHECK: call void @__kmpc_for_static_fini( + // CHECK: define{{.+}} void [[OMP_OUTLINED_2]]( + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call {{.*}}void @{{.+}}fn4 + // CHECK: call void @__kmpc_for_static_fini( + + fn4(); + } + +#pragma omp target +#pragma omp teams distribute parallel for simd if (false) + for(int i = 0 ; i < 100; i++) { + // CHECK: define internal void [[OFFLOADING_FUN_1]]( + // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}}) + // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]]( + + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call void @__kmpc_serialized_parallel( + // CHECK: call void [[OMP_OUTLINED_3:@.+]]( + // CHECK: call void @__kmpc_end_serialized_parallel( + // CHECK: call void @__kmpc_for_static_fini( + + // CHECK: define{{.+}} void [[OMP_OUTLINED_3]]( + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call {{.*}}void @{{.+}}fn5 + // CHECK: call void @__kmpc_for_static_fini( + fn5(); + } + +#pragma omp target +#pragma omp teams distribute parallel for simd if (Arg) + for(int i = 0 ; i < 100; i++) { + // CHECK: define internal void [[OFFLOADING_FUN_2]]( + // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}}* [[OMP_TEAMS_OUTLINED_2:@.+]] to {{.+}}) + // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_2]]( + + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_4:@.+]] to void + // CHECK: call void @__kmpc_serialized_parallel( + // CHECK: call void [[OMP_OUTLINED_4:@.+]]( + // CHECK: call void @__kmpc_end_serialized_parallel( + // CHECK: call void @__kmpc_for_static_fini( + + // CHECK: define{{.+}} void [[OMP_OUTLINED_4]]( + // CHECK: call void @__kmpc_for_static_init_4( + // CHECK: call {{.*}}void @{{.+}}fn6 + // CHECK: call void @__kmpc_for_static_fini( + fn6(); + } + + return tmain(Arg); +} + +// CHECK-LABEL: define {{.+}} @{{.+}}tmain + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, void {{.+}}* [[T_OUTLINE_FUN_1:@.+]] to void +// CHECK: call void @__kmpc_for_static_fini( + +// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_1]] +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call {{.*}}void @{{.+}}fn1 +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call {{.*}}void @__kmpc_serialized_parallel( +// CHECK: call void [[T_OUTLINE_FUN_2:@.+]]( +// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel( +// CHECK: call void @__kmpc_for_static_fini( + +// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_2]] +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call {{.*}}void @{{.+}}fn2 +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, void {{.+}}* [[T_OUTLINE_FUN_3:@.+]] to void +// CHECK: call {{.*}}void @__kmpc_serialized_parallel( +// call void [[T_OUTLINE_FUN_3:@.+]]( +// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel( + +// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_3]] +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call {{.*}}void @{{.+}}fn3 +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: !{!"llvm.loop.vectorize.enable", i1 true} + +#endif diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp new file mode 100644 index 000000000000..5df893d525f4 --- /dev/null +++ b/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp @@ -0,0 +1,599 @@ +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 + +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +template <class T> +struct S { + T f; + S(T a) : f(a) {} + S() : f() {} + operator T() { return T(); } + ~S() {} +}; + +// CHECK: [[S_FLOAT_TY:%.+]] = type { float } +// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } +template <typename T> +T tmain() { + S<T> test; + T t_var = T(); + T vec[] = {1, 2}; + S<T> s_arr[] = {1, 2}; + S<T> &var = test; + #pragma omp target + #pragma omp teams distribute parallel for simd lastprivate(t_var, vec, s_arr, s_arr, var, var) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + return T(); +} + +int main() { + static int svar; + volatile double g; + volatile double &g1 = g; + + #ifdef LAMBDA + // LAMBDA-LABEL: @main + // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]]( + [&]() { + static float sfvar; + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( + // LAMBDA: call void [[OFFLOADING_FUN:@.+]]( + + // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]]( + // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}}) + #pragma omp target + #pragma omp teams distribute parallel for simd lastprivate(g, g1, svar, sfvar) + for (int i = 0; i < 2; ++i) { + // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, double*{{.+}} [[G_IN:%.+]], double*{{.+}} [[G1_IN:%.+]], i{{[0-9]+}}*{{.+}} [[SVAR_IN:%.+]], float*{{.+}} [[SFVAR_IN:%.+]]) + // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double*, + // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double*, + // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}*, + // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float*, + // loop variables + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G_PRIVATE:%.+]] = alloca double, + // LAMBDA: [[G1_PRIVATE:%.+]] = alloca double, + // LAMBDA: [[TMP_G1_PRIVATE:%.+]] = alloca double*, + // LAMBDA: [[SVAR_PRIVATE:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[SFVAR_PRIVATE:%.+]] = alloca float, + // LAMBDA: store double* [[G_IN]], double** [[G_PRIVATE_ADDR]], + // LAMBDA: store double* [[G1_IN]], double** [[G1_PRIVATE_ADDR]], + // LAMBDA: store i{{[0-9]+}}* [[SVAR_IN]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]], + // LAMBDA: store float* [[SFVAR_IN]], float** [[SFVAR_PRIVATE_ADDR]], + + // init private variables + // LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]], + // LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]], + // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]], + // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]], + // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]], + g = 1; + g1 = 1; + svar = 3; + sfvar = 4.0; + // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4( + // LAMBDA: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[LPAR_OUTL:.+]] to + // LAMBDA: call {{.*}}void @__kmpc_for_static_fini( + // skip final branch + // LAMBDA: br + // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]], + // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 + // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] + + // LAMBDA: [[OMP_LASTPRIV_BLOCK]]: + // LAMBDA: [[G_PRIV_VAL:%.+]] = load double, double* [[G_PRIVATE]], + // LAMBDA: store{{.*}} double [[G_PRIV_VAL]], double* [[G_IN_REF]], + // LAMBDA: [[TMP_G1_PRIV_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]], + // LAMBDA: [[TMP_G1_PRIV_VAL:%.+]] = load double, double* [[TMP_G1_PRIV_REF]], + // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* [[G1_IN_REF]], + + // LAMBDA: [[SVAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]], + // LAMBDA: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_IN_REF]], + // LAMBDA: [[SFVAR_PRIV_VAL:%.+]] = load float, float* [[SFVAR_PRIVATE]], + // LAMBDA: store float [[SFVAR_PRIV_VAL]], float* [[SFVAR_IN_REF]], + // LAMBDA: br label %[[OMP_LASTPRIV_DONE]] + // LAMBDA: [[OMP_LASTPRIV_DONE]]: + // LAMBDA: ret + + // LAMBDA: define{{.*}} internal{{.*}} void @[[LPAR_OUTL]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, double*{{.+}} [[G_IN:%.+]], double*{{.+}} [[G1_IN:%.+]], i{{[0-9]+}}*{{.+}} [[SVAR_IN:%.+]], float*{{.+}} [[SFVAR_IN:%.+]]) + // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double*, + // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double*, + // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}*, + // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float*, + // loop variables + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G_PRIVATE:%.+]] = alloca double, + // LAMBDA: [[G1_PRIVATE:%.+]] = alloca double, + // LAMBDA: [[TMP_G1_PRIVATE:%.+]] = alloca double*, + // LAMBDA: [[SVAR_PRIVATE:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[SFVAR_PRIVATE:%.+]] = alloca float, + // LAMBDA: store double* [[G_IN]], double** [[G_PRIVATE_ADDR]], + // LAMBDA: store double* [[G1_IN]], double** [[G1_PRIVATE_ADDR]], + // LAMBDA: store i{{[0-9]+}}* [[SVAR_IN]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]], + // LAMBDA: store float* [[SFVAR_IN]], float** [[SFVAR_PRIVATE_ADDR]], + + // init private variables + // LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]], + // LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]], + // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]], + // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]], + // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]], + + // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4( + // LAMBDA: store double 1.0{{.+}}, double* [[G_PRIVATE]], + // LAMBDA: [[TMP_G1_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]], + // LAMBDA: store{{.+}} double 1.0{{.+}}, double* [[TMP_G1_REF]], + // LAMBDA: store i{{[0-9]+}} 3, i{{[0-9]+}}* [[SVAR_PRIVATE]], + // LAMBDA: store float 4.0{{.+}}, float* [[SFVAR_PRIVATE]], + // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: store double* [[G_PRIVATE]], double** [[G_PRIVATE_ADDR_REF]], + // LAMBDA: [[TMP_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_PRIVATE_ADDR_FROM_TMP:%.+]] = load double*, double** [[TMP_G1_PRIVATE]], + // LAMBDA: store double* [[G1_PRIVATE_ADDR_FROM_TMP]], double** [[TMP_PRIVATE_ADDR_REF]], + // LAMBDA: [[SVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: store i{{[0-9]+}}* [[SVAR_PRIVATE]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]] + // LAMBDA: [[SFVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 + // LAMBDA: store float* [[SFVAR_PRIVATE]], float** [[SFVAR_PRIVATE_ADDR_REF]] + // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]]) + // LAMBDA: call {{.*}}void @__kmpc_for_static_fini( + + // skip 'final' simd branch and block (checked as part of simd) + // LAMBDA: br + + // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]], + // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 + // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] + + // LAMBDA: [[OMP_LASTPRIV_BLOCK]]: + // LAMBDA: [[G_PRIV_VAL:%.+]] = load double, double* [[G_PRIVATE]], + // LAMBDA: store{{.*}} double [[G_PRIV_VAL]], double* [[G_IN_REF]], + // LAMBDA: [[TMP_G1_PRIV_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]], + // LAMBDA: [[TMP_G1_PRIV_VAL:%.+]] = load double, double* [[TMP_G1_PRIV_REF]], + // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* [[G1_IN_REF]], + + // LAMBDA: [[SVAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]], + // LAMBDA: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_IN_REF]], + // LAMBDA: [[SFVAR_PRIV_VAL:%.+]] = load float, float* [[SFVAR_PRIVATE]], + // LAMBDA: store float [[SFVAR_PRIV_VAL]], float* [[SFVAR_IN_REF]], + // LAMBDA: br label %[[OMP_LASTPRIV_DONE]] + // LAMBDA: [[OMP_LASTPRIV_DONE]]: + // LAMBDA: ret + + [&]() { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + g = 2; + g1 = 2; + svar = 4; + sfvar = 8.0; + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]] + // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]] + + // LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]] + // LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]], + // LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]] + // LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 + // LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]] + // LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]] + }(); + } + }(); + return 0; + +// LAMBDA: !{!"llvm.loop.vectorize.enable", i1 true} + + #else + S<float> test; + int t_var = 0; + int vec[] = {1, 2}; + S<float> s_arr[] = {1, 2}; + S<float> &var = test; + + #pragma omp target + #pragma omp teams distribute parallel for simd lastprivate(t_var, vec, s_arr, s_arr, var, var, svar) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + int i; + + return tmain<int>(); + #endif +} + +// CHECK: define{{.*}} i{{[0-9]+}} @main() +// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOAD_FUN:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}}) +// CHECK: ret + +// CHECK: define{{.+}} [[OFFLOAD_FUN]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]*{{.+}} {{.+}}, [2 x [[S_FLOAT_TY]]]*{{.+}} {{.+}}, [[S_FLOAT_TY]]*{{.+}} {{.+}}, i{{[0-9]+}} {{.+}}) +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams( +// CHECK: ret +// +// CHECK: define internal void [[OMP_OUTLINED:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN:%.+]], i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.+}} [[VAR_IN:%.+]], i{{[0-9]+}}*{{.*}} [[S_VAR_IN:%.+]]) +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, +// skip loop variables +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, + +// copy from parameters to local address variables +// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]], +// CHECK: store [2 x [[S_FLOAT_TY]]]* [[S_ARR_IN]], [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK: store [[S_FLOAT_TY]]* [[VAR_IN]], [[S_FLOAT_TY]]** [[VAR_ADDR]], +// CHECK: store i{{[0-9]+}}* [[S_VAR_IN]], i{{[0-9]+}}** [[SVAR_ADDR]], + +// load content of local address variables +// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]], +// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]], +// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]], +// CHECK: [[VAR_ADDR_REF:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]], +// the distribute loop +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL:.+]] to +// CHECK: call void @__kmpc_for_static_fini( + +// lastprivates +// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]], +// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 +// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] + +// CHECK: [[OMP_LASTPRIV_BLOCK]]: +// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]], +// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]], +// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* +// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]* +// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 +// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]] +// CHECK: [[S_ARR_COPY_BLOCK]]: +// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8* +// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 +// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} +// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]] +// CHECK: [[S_ARR_COPY_DONE]]: +// CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], +// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8* +// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]], +// CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]], +// CHECK: ret void + +// CHECK: define internal void [[OMP_OUTLINED:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN:%.+]], i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.+}} [[VAR_IN:%.+]], i{{[0-9]+}}*{{.*}} [[S_VAR_IN:%.+]]) +// gbl and bound tid vars, prev lb and ub vars +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, +// skip loop variables +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, + +// copy from parameters to local address variables +// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]], +// CHECK: store [2 x [[S_FLOAT_TY]]]* [[S_ARR_IN]], [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK: store [[S_FLOAT_TY]]* [[VAR_IN]], [[S_FLOAT_TY]]** [[VAR_ADDR]], +// CHECK: store i{{[0-9]+}}* [[S_VAR_IN]], i{{[0-9]+}}** [[SVAR_ADDR]], + +// load content of local address variables +// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]], +// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]], +// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]], +// CHECK: [[VAR_ADDR_REF:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]], +// the distribute loop +// CHECK: call void @__kmpc_for_static_init_4( + +// assignment: vec[i] = t_var; +// CHECK: [[T_VAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]], +// CHECK: [[VEC_PTR:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}} +// CHECK: store i{{[0-9]+}} [[T_VAR_PRIV_VAL]], i{{[0-9]+}}* [[VEC_PTR]], + +// assignment: s_arr[i] = var; +// CHECK-DAG: [[S_ARR_PTR:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], +// CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_PTR]] to i8* +// CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST]], i8* [[TMP_VAL_BCAST]], + +// CHECK: call void @__kmpc_for_static_fini( + +// skip final branch and block +// CHECK: br + +// lastprivates +// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]], +// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 +// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] + +// CHECK: [[OMP_LASTPRIV_BLOCK]]: +// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]], +// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]], +// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* +// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]* +// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 +// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]] +// CHECK: [[S_ARR_COPY_BLOCK]]: +// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8* +// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 +// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} +// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]] +// CHECK: [[S_ARR_COPY_DONE]]: +// CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], +// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8* +// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]], +// CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]], +// CHECK: ret void + +// template tmain +// CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]() +// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], +// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOAD_FUN_1:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}}) +// CHECK: ret + +// CHECK: define internal void [[OFFLOAD_FUN_1]]( +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, +// CHECK: ret + +// CHECK: define internal void [[OMP_OUTLINED_1:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR1:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN1:%.+]], i{{[0-9]+}}*{{.+}} [[T_VAR_IN1:%.+]], [2 x [[S_INT_TY]]]*{{.+}} [[S_ARR_IN1:%.+]], [[S_INT_TY]]*{{.+}} [[VAR_IN1:%.+]]) +// skip alloca of global_tid and bound_tid +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: [[VEC_ADDR1:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR1:%.+]] = alloca i{{[0-9]+}}*, +// CHECK: [[S_ARR_ADDR1:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK: [[VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*, +// skip loop variables +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: [[OMP_IS_LAST1:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_PRIV1:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV1:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV1:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK: [[VAR_PRIV1:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[TMP_PRIV1:%.+]] = alloca [[S_INT_TY]]*, + +// skip init of bound and global tid +// CHECK: store i{{[0-9]+}}* {{.*}}, +// CHECK: store i{{[0-9]+}}* {{.*}}, +// copy from parameters to local address variables +// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN1]], [2 x i{{[0-9]+}}]** [[VEC_ADDR1]], +// CHECK: store i{{[0-9]+}}* [[T_VAR_IN1]], i{{[0-9]+}}** [[T_VAR_ADDR1]], +// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_IN1]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]], +// CHECK: store [[S_INT_TY]]* [[VAR_IN1]], [[S_INT_TY]]** [[VAR_ADDR1]], + +// load content of local address variables +// CHECK: [[VEC_ADDR_REF1:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR1]], +// CHECK: [[T_VAR_ADDR_REF1:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR1]], +// CHECK: [[S_ARR_ADDR_REF1:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]], +// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]], +// CHECK: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR1]], +// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV1]], [[S_INT_TY]]** [[TMP_PRIV1]], + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL:.+]] to +// CHECK: call void @__kmpc_for_static_fini( + +// lastprivates +// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST1]], +// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 +// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] + +// CHECK: [[OMP_LASTPRIV_BLOCK]]: +// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]], +// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF1]], +// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF1]] to i8* +// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV1]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]] to [[S_INT_TY]]* +// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 +// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]] +// CHECK: [[S_ARR_COPY_BLOCK]]: +// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8* +// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 +// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} +// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]] +// CHECK: [[S_ARR_COPY_DONE]]: +// CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]], +// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR1_REF]] to i8* +// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: ret void + +// CHECK: define internal void [[TPAR_OUTL:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR1:%.+]], i{{[0-9]+}}* noalias %{{.+}}, {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN1:%.+]], i{{[0-9]+}}*{{.+}} [[T_VAR_IN1:%.+]], [2 x [[S_INT_TY]]]*{{.+}} [[S_ARR_IN1:%.+]], [[S_INT_TY]]*{{.+}} [[VAR_IN1:%.+]]) +// skip alloca of global_tid and bound_tid, and prev lb and ub vars +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: [[VEC_ADDR1:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR1:%.+]] = alloca i{{[0-9]+}}*, +// CHECK: [[S_ARR_ADDR1:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK: [[VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*, +// skip loop variables +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: [[OMP_IS_LAST1:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_PRIV1:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV1:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV1:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK: [[VAR_PRIV1:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[TMP_PRIV1:%.+]] = alloca [[S_INT_TY]]*, + +// skip init of bound and global tid +// CHECK: store i{{[0-9]+}}* {{.*}}, +// CHECK: store i{{[0-9]+}}* {{.*}}, +// copy from parameters to local address variables +// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN1]], [2 x i{{[0-9]+}}]** [[VEC_ADDR1]], +// CHECK: store i{{[0-9]+}}* [[T_VAR_IN1]], i{{[0-9]+}}** [[T_VAR_ADDR1]], +// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_IN1]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]], +// CHECK: store [[S_INT_TY]]* [[VAR_IN1]], [[S_INT_TY]]** [[VAR_ADDR1]], + +// load content of local address variables +// CHECK: [[VEC_ADDR_REF1:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR1]], +// CHECK: [[T_VAR_ADDR_REF1:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR1]], +// CHECK: [[S_ARR_ADDR_REF1:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]], +// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]], +// CHECK: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR1]], +// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV1]], [[S_INT_TY]]** [[TMP_PRIV1]], + +// CHECK: call void @__kmpc_for_static_init_4( + +// assignment: vec[i] = t_var; +// CHECK: [[IV_VAL1:%.+]] = +// CHECK: [[T_VAR_PRIV_VAL1:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]], +// CHECK: [[VEC_PTR1:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV1]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}} +// CHECK: store i{{[0-9]+}} [[T_VAR_PRIV_VAL1]], i{{[0-9]+}}* [[VEC_PTR1]], + +// assignment: s_arr[i] = var; +// CHECK-DAG: [[S_ARR_PTR1:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]], +// CHECK-DAG: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]], +// CHECK-DAG: [[S_ARR_PTR_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_PTR1]] to i8* +// CHECK-DAG: [[TMP_VAL_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* +// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST1]], i8* [[TMP_VAL_BCAST1]], + +// CHECK: call void @__kmpc_for_static_fini( + +// skip final branch and block +// CHECK: br + +// lastprivates +// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST1]], +// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 +// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] + +// CHECK: [[OMP_LASTPRIV_BLOCK]]: +// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]], +// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF1]], +// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF1]] to i8* +// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV1]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]] to [[S_INT_TY]]* +// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 +// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]] +// CHECK: [[S_ARR_COPY_BLOCK]]: +// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8* +// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 +// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} +// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]] +// CHECK: [[S_ARR_COPY_DONE]]: +// CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]], +// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR1_REF]] to i8* +// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: ret void + +// CHECK: !{!"llvm.loop.vectorize.enable", i1 true} + +#endif diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_messages.cpp index fce59b924f67..204be4c95e91 100644 --- a/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_messages.cpp +++ b/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_messages.cpp @@ -18,14 +18,14 @@ public: const S2 &operator =(const S2&) const; S2 &operator =(const S2&); static float S2s; // expected-note {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note {{static data member is predetermined as shared}} }; -const float S2::S2sc = 0; // expected-note {{static data member is predetermined as shared}} +const float S2::S2sc = 0; const S2 b; const S2 ba[5]; class S3 { int a; - S3 &operator=(const S3 &s3); // expected-note 2 {{implicitly declared private here}} + S3 &operator=(const S3 &s3); // expected-note {{implicitly declared private here}} public: S3() : a(0) {} @@ -52,7 +52,7 @@ public: }; class S6 { int a; - S6() : a(0) {} + S6() : a(0) {} // expected-note {{implicitly declared private here}} public: S6(const S6 &s6) : a(s6.a) {} @@ -255,12 +255,14 @@ int main(int argc, char **argv) { #pragma omp teams distribute parallel for simd lastprivate(j) for (i = 0; i < argc; ++i) foo(); +// expected-error@+2 {{firstprivate variable cannot be lastprivate}} expected-note@+2 {{defined as firstprivate}} #pragma omp target -#pragma omp teams distribute parallel for simd firstprivate(m) lastprivate(m) // expected-error {{'operator=' is a private member of 'S3'}} +#pragma omp teams distribute parallel for simd firstprivate(m) lastprivate(m) for (i = 0; i < argc; ++i) foo(); +// expected-error@+2 {{lastprivate variable cannot be firstprivate}} expected-note@+2 {{defined as lastprivate}} #pragma omp target -#pragma omp teams distribute parallel for simd lastprivate(n) firstprivate(n) // OK +#pragma omp teams distribute parallel for simd lastprivate(n) firstprivate(n) // expected-error {{calling a private constructor of class 'S6'}} for (i = 0; i < argc; ++i) foo(); static int si; diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_linear_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_linear_messages.cpp index 5b91a3c4cccc..c0a9baf916dd 100644 --- a/test/OpenMP/teams_distribute_parallel_for_simd_linear_messages.cpp +++ b/test/OpenMP/teams_distribute_parallel_for_simd_linear_messages.cpp @@ -18,34 +18,42 @@ void test_linear_colons() { int B = 0; +// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute parallel for simd linear(B:bfoo()) for (int i = 0; i < 10; ++i) ; +// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute parallel for simd linear(B::ib:B:bfoo()) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'}} for (int i = 0; i < 10; ++i) ; +// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute parallel for simd linear(B:ib) // expected-error {{use of undeclared identifier 'ib'; did you mean 'B::ib'}} for (int i = 0; i < 10; ++i) ; +// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute parallel for simd linear(z:B:ib) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'?}} for (int i = 0; i < 10; ++i) ; +// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute parallel for simd linear(B:B::bfoo()) for (int i = 0; i < 10; ++i) ; +// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute parallel for simd linear(X::x : ::z) for (int i = 0; i < 10; ++i) ; +// expected-error@+2 3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute parallel for simd linear(B,::z, X::x) for (int i = 0; i < 10; ++i) ; +// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute parallel for simd linear(::z) for (int i = 0; i < 10; ++i) ; @@ -54,6 +62,7 @@ void test_linear_colons() #pragma omp teams distribute parallel for simd linear(B::bfoo()) // expected-error {{expected variable name}} for (int i = 0; i < 10; ++i) ; +// expected-error@+2 2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute parallel for simd linear(B::ib,B:C1+C2) for (int i = 0; i < 10; ++i) ; @@ -76,6 +85,7 @@ template<int L, class T, class N> T test_template(T* arr, N num) { template<int LEN> int test_warn() { int ind2 = 0; +// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute parallel for simd linear(ind2:LEN) // expected-warning {{zero linear step (ind2 should probably be const)}} for (int i = 0; i < 100; i++) { @@ -133,10 +143,12 @@ template<class I, class C> int foomain(I argc, C **argv) { #pragma omp teams distribute parallel for simd linear () // expected-error {{expected expression}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute parallel for simd linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute parallel for simd linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int k = 0; k < argc; ++k) ++k; @@ -145,6 +157,7 @@ template<class I, class C> int foomain(I argc, C **argv) { #pragma omp teams distribute parallel for simd linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute parallel for simd linear (argc : 5) for (int k = 0; k < argc; ++k) ++k; @@ -161,6 +174,7 @@ template<class I, class C> int foomain(I argc, C **argv) { #pragma omp teams distribute parallel for simd linear (argv[1]) // expected-error {{expected variable name}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+2 2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute parallel for simd linear(e, g) for (int k = 0; k < argc; ++k) ++k; @@ -169,32 +183,11 @@ template<class I, class C> int foomain(I argc, C **argv) { #pragma omp teams distribute parallel for simd linear(h) // expected-error {{threadprivate or thread local variable cannot be linear}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute parallel for simd linear(i) for (int k = 0; k < argc; ++k) ++k; - #pragma omp parallel - { - int v = 0; - int i; - #pragma omp target - #pragma omp teams distribute parallel for simd linear(v:i) - for (int k = 0; k < argc; ++k) { i = k; v += i; } - } - -#pragma omp target -#pragma omp teams distribute parallel for simd linear(j) - for (int k = 0; k < argc; ++k) ++k; - - int v = 0; - -#pragma omp target -#pragma omp teams distribute parallel for simd linear(v:j) - for (int k = 0; k < argc; ++k) { ++k; v += j; } - -#pragma omp target -#pragma omp teams distribute parallel for simd linear(i) - for (int k = 0; k < argc; ++k) ++k; return 0; } @@ -230,10 +223,12 @@ int main(int argc, char **argv) { #pragma omp teams distribute parallel for simd linear () // expected-error {{expected expression}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute parallel for simd linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute parallel for simd linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int k = 0; k < argc; ++k) ++k; @@ -242,6 +237,7 @@ int main(int argc, char **argv) { #pragma omp teams distribute parallel for simd linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute parallel for simd linear (argc) for (int k = 0; k < argc; ++k) ++k; @@ -267,26 +263,6 @@ int main(int argc, char **argv) { #pragma omp teams distribute parallel for simd linear(h, C::x) // expected-error 2 {{threadprivate or thread local variable cannot be linear}} for (int k = 0; k < argc; ++k) ++k; - #pragma omp parallel - { - int i; - #pragma omp target - #pragma omp teams distribute parallel for simd linear(i) - for (int k = 0; k < argc; ++k) ++k; - - #pragma omp target - #pragma omp teams distribute parallel for simd linear(i : 4) - for (int k = 0; k < argc; ++k) { ++k; i += 4; } - } - -#pragma omp target -#pragma omp teams distribute parallel for simd linear(j) - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target -#pragma omp teams distribute parallel for simd linear(i) - for (int k = 0; k < argc; ++k) ++k; - foomain<int,char>(argc,argv); // expected-note {{in instantiation of function template specialization 'foomain<int, char>' requested here}} return 0; } diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_loop_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_loop_messages.cpp index 03325623502f..66361b64eb84 100644 --- a/test/OpenMP/teams_distribute_parallel_for_simd_loop_messages.cpp +++ b/test/OpenMP/teams_distribute_parallel_for_simd_loop_messages.cpp @@ -2,7 +2,7 @@ class S { int a; - S() : a(0) {} + S() : a(0) {} // expected-note {{implicitly declared private here}} public: S(int v) : a(v) {} @@ -693,8 +693,9 @@ void test_loop_eh() { void test_loop_firstprivate_lastprivate() { S s(4); +// expected-error@+2 {{lastprivate variable cannot be firstprivate}} expected-note@+2 {{defined as lastprivate}} #pragma omp target -#pragma omp teams distribute parallel for simd lastprivate(s) firstprivate(s) +#pragma omp teams distribute parallel for simd lastprivate(s) firstprivate(s) // expected-error {{calling a private constructor of class 'S'}} for (int i = 0; i < 16; ++i) ; } diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_messages.cpp index 7876c38980ee..12ac8f9a9479 100644 --- a/test/OpenMP/teams_distribute_parallel_for_simd_messages.cpp +++ b/test/OpenMP/teams_distribute_parallel_for_simd_messages.cpp @@ -9,6 +9,9 @@ static int pvt; #pragma omp teams distribute parallel for simd // expected-error {{unexpected OpenMP directive '#pragma omp teams distribute parallel for simd'}} int main(int argc, char **argv) { + #pragma omp target + #pragma omp teams distribute parallel for simd + f; // expected-error {{use of undeclared identifier 'f'}} #pragma omp target #pragma omp teams distribute parallel for simd { // expected-warning {{extra tokens at the end of '#pragma omp teams distribute parallel for simd' are ignored}} for (int i = 0; i < argc; ++i) diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp new file mode 100644 index 000000000000..93051af6161b --- /dev/null +++ b/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp @@ -0,0 +1,123 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +typedef __INTPTR_TYPE__ intptr_t; + +// CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[S_TY:%.+]] = type { [[INTPTR_T_TY:i[0-9]+]], [[INTPTR_T_TY]], [[INTPTR_T_TY]] } +// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" +// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } + +void foo(); + +struct S { + intptr_t a, b, c; + S(intptr_t a) : a(a) {} + operator char() { return a; } + ~S() {} +}; + +template <typename T, int C> +int tmain() { +#pragma omp target +#pragma omp teams distribute parallel for simd num_threads(C) + for (int i = 0; i < 100; i++) + foo(); +#pragma omp target +#pragma omp teams distribute parallel for simd num_threads(T(23)) + for (int i = 0; i < 100; i++) + foo(); + return 0; +} + +int main() { + S s(0); + char a = s; +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOADING_FUN_0:@.+]]( +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOADING_FUN_1:@.+]]( +// CHECK: invoke{{.+}} [[TMAIN_5:@.+]]() +// CHECK: invoke{{.+}} [[TMAIN_1:@.+]]() +#pragma omp target + // CHECK: define internal void [[OFFLOADING_FUN_0]]( + // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}}) +#pragma omp teams distribute parallel for simd num_threads(2) + for (int i = 0; i < 100; i++) { + // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]]( + // CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 2) + // CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( + foo(); + } +#pragma omp target + // CHECK: define internal void [[OFFLOADING_FUN_1]]( + + // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}}* [[OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}}) +#pragma omp teams distribute parallel for simd num_threads(a) + for (int i = 0; i < 100; i++) { + // CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]]( + // CHECK-DAG: [[A_ADDR:%.+]] = alloca i8*, + // CHECK-DAG: [[A_REF:%.+]] = load i8*, i8** [[A_ADDR]], + // CHECK-DAG: [[A_VAL:%.+]] = load i8, i8* [[A_REF]], + // CHECK-DAG: [[A_EXT:%.+]] = sext i8 [[A_VAL]] to {{.+}} + // CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[A_EXT]]) + // CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( + foo(); + } + return a + tmain<char, 5>() + tmain<S, 1>(); +} + +// tmain 5 +// CHECK-DAG: define {{.*}}i{{[0-9]+}} [[TMAIN_5]]() +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[T_OFFLOADING_FUN_0:@.+]]( +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[T_OFFLOADING_FUN_1:@.+]]( + +// tmain 1 +// CHECK-DAG: define {{.*}}i{{[0-9]+}} [[TMAIN_1]]() +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[T_OFFLOADING_FUN_2:@.+]]( +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[T_OFFLOADING_FUN_3:@.+]]( + +// CHECK: define internal void [[T_OFFLOADING_FUN_0]]( +// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}}) + +// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_0]]( +// CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 5) +// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( + +// CHECK: define internal void [[T_OFFLOADING_FUN_1]]( +// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}}) + +// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_1]]( +// CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 23) +// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( + +// CHECK: define internal void [[T_OFFLOADING_FUN_2]]( +// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_2:@.+]] to {{.+}}) + +// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_2]]( +// CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 1) +// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( + +// CHECK: define internal void [[T_OFFLOADING_FUN_3]]( +// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}}* [[T_OMP_TEAMS_OUTLINED_3:@.+]] to {{.+}}) + +// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_3]]({{.+}}, {{.+}}, {{.+}} [[NUM_TH_CPT_IN:%.+]]) +// CHECK: [[NUM_TH_CPT:%.+]] = alloca i8*, +// CHECK: store {{.+}} [[NUM_TH_CPT_IN]], {{.+}} [[NUM_TH_CPT]], +// CHECK: [[NUM_TH_REF:%.+]] = load{{.+}}, {{.+}} [[NUM_TH_CPT]], +// CHECK-DAG: [[NUM_TH_VAL:%.+]] = load {{.+}}, {{.+}} [[NUM_TH_REF]], +// CHECK-DAG: [[NUM_TH_SEXT:%.+]] = sext i8 [[NUM_TH_VAL]] to {{.+}} +// CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[NUM_TH_SEXT]]) +// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( + +// CHECK: !{!"llvm.loop.vectorize.enable", i1 true} + +#endif diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp new file mode 100644 index 000000000000..a6dbd29a5b96 --- /dev/null +++ b/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp @@ -0,0 +1,336 @@ +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 + +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +struct St { + int a, b; + St() : a(0), b(0) {} + St(const St &st) : a(st.a + st.b), b(0) {} + ~St() {} +}; + +volatile int g = 1212; +volatile int &g1 = g; + +template <class T> +struct S { + T f; + S(T a) : f(a + g) {} + S() : f(g) {} + S(const S &s, St t = St()) : f(s.f + t.a) {} + operator T() { return T(); } + ~S() {} +}; + +// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float } +// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } + +template <typename T> +T tmain() { + S<T> test; + T t_var = T(); + T vec[] = {1, 2}; + S<T> s_arr[] = {1, 2}; + S<T> &var = test; +#pragma omp target +#pragma omp teams distribute parallel for simd private(t_var, vec, s_arr, var) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + return T(); +} + +// CHECK-DAG: [[TEST:@.+]] = global [[S_FLOAT_TY]] zeroinitializer, +S<float> test; +// CHECK-DAG: [[T_VAR:@.+]] = global i{{[0-9]+}} 333, +int t_var = 333; +// CHECK-DAG: [[VEC:@.+]] = global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2], +int vec[] = {1, 2}; +// CHECK-DAG: [[S_ARR:@.+]] = global [2 x [[S_FLOAT_TY]]] zeroinitializer, +S<float> s_arr[] = {1, 2}; +// CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer, +S<float> var(3); +// CHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0, + +int main() { + static int sivar; +#ifdef LAMBDA + // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212, + // LAMBDA-LABEL: @main + // LAMBDA: call void [[OUTER_LAMBDA:@.+]]( + [&]() { + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0) + // LAMBDA: call void @[[LOFFL1:.+]]( + // LAMBDA: ret +#pragma omp target +#pragma omp teams distribute parallel for simd private(g, g1, sivar) + for (int i = 0; i < 2; ++i) { + // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} {{%.+}}) + // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[LOUTL1:.+]] to {{.+}}) + // LAMBDA: ret void + + // LAMBDA: define internal void @[[LOUTL1]]({{.+}}) + // Skip global, bound tid and loop vars + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: [[G_PRIV:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_PRIV:%.+]] = alloca i{{[0-9]+}} + // LAMBDA: [[TMP:%.+]] = alloca i{{[0-9]+}}*, + // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: store{{.+}} [[G1_PRIV]], {{.+}} [[TMP]], + + g = 1; + g1 = 1; + sivar = 2; + // LAMBDA: call void @__kmpc_for_static_init_4( + // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[LPAR_OUTL:.+]] to + // LAMBDA: call void @__kmpc_for_static_fini( + // LAMBDA: ret void + + // LAMBDA: define internal void @[[LPAR_OUTL]]({{.+}}) + // Skip global, bound tid and loop vars + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: alloca i{{[0-9]+}}, + // LAMBDA: alloca i{{[0-9]+}}, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: [[G_PRIV:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_PRIV:%.+]] = alloca i{{[0-9]+}} + // LAMBDA: [[TMP:%.+]] = alloca i{{[0-9]+}}*, + // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: store{{.+}} [[G1_PRIV]], {{.+}} [[TMP]], + // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G_PRIV]], + // LAMBDA-DAG: store{{.+}} 2, {{.+}} [[SIVAR_PRIV]], + // LAMBDA-DAG: [[G1_REF:%.+]] = load{{.+}}, {{.+}} [[TMP]], + // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1_REF]], + // LAMBDA: call void [[INNER_LAMBDA:@.+]]( + // LAMBDA: call void @__kmpc_for_static_fini( + // LAMBDA: ret void + [&]() { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + g = 2; + g1 = 2; + sivar = 4; + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + + // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]] + // LAMBDA: [[G1_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G1_REF]] + // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]] + }(); + } + }(); + return 0; + +// LAMBDA: !{!"llvm.loop.vectorize.enable", i1 true} + +#else +#pragma omp target +#pragma omp teams distribute parallel for simd private(t_var, vec, s_arr, var, sivar) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + sivar += i; + } + return tmain<int>(); +#endif +} + +// CHECK: define {{.*}}i{{[0-9]+}} @main() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 0) +// CHECK: call void @[[OFFL1:.+]]() +// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]() +// CHECK: ret + +// CHECK: define{{.*}} void @[[OFFL1]]() +// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[OUTL1:.+]] to {{.+}}) +// CHECK: ret void + +// CHECK: define internal void @[[OUTL1]]({{.+}}) +// Skip global, bound tid and loop vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK-DAG: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK-DAG: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK-DAG: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: alloca i32, + +// private(s_arr) +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]]) +// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]], + +// private(var) +// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]]) + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call void {{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: define internal void @[[PAR_OUTL1]]({{.+}}) +// Skip global, bound tid and loop vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK-DAG: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK-DAG: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK-DAG: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: alloca i32, + +// private(s_arr) +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]]) +// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]], + +// private(var) +// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]]) + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK-DAG: {{.+}} = {{.+}} [[T_VAR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[VAR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[SIVAR_PRIV]] +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, +// CHECK: call void @[[TOFFL1:.+]]() +// CHECK: ret + +// CHECK: define {{.*}}void @[[TOFFL1]]() +// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[TOUTL1:.+]] to {{.+}}) +// CHECK: ret void + +// CHECK: define internal void @[[TOUTL1]]({{.+}}) +// Skip global, bound tid and loop vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*, + +// private(s_arr) +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]]) +// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]], + +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]]) +// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]], + +// private(var) +// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]]) +// CHECK-DAG: store{{.+}} [[VAR_PRIV]], {{.+}} [[TMP]] + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call void {{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL1:.+]] to +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: define internal void @[[TPAR_OUTL1]]({{.+}}) +// Skip global, bound tid and loop vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// prev lb and ub +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// iter variables +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i32, +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*, + +// private(s_arr) +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]]) +// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]], + +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]]) +// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]], + +// private(var) +// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]]) +// CHECK-DAG: store{{.+}} [[VAR_PRIV]], {{.+}} [[TMP]] + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK-DAG: {{.+}} = {{.+}} [[T_VAR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[TMP]] +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: !{!"llvm.loop.vectorize.enable", i1 true} + +#endif diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp new file mode 100644 index 000000000000..7f95a877ab85 --- /dev/null +++ b/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp @@ -0,0 +1,93 @@ +// add -fopenmp-targets + +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +typedef __INTPTR_TYPE__ intptr_t; + +// CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" +// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } + +void foo(); + +struct S { + intptr_t a, b, c; + S(intptr_t a) : a(a) {} + operator char() { return a; } + ~S() {} +}; + +template <typename T> +T tmain() { +#pragma omp target +#pragma omp teams distribute parallel for simd proc_bind(master) + for(int i = 0; i < 1000; i++) {} + return T(); +} + +int main() { + // CHECK-LABEL: @main +#pragma omp target +#pragma omp teams distribute parallel for simd proc_bind(spread) + for(int i = 0; i < 1000; i++) {} +#pragma omp target +#pragma omp teams distribute parallel for simd proc_bind(close) + for(int i = 0; i < 1000; i++) {} + return tmain<int>(); +} + +// CHECK: call {{.*}}@__tgt_target_teams({{.+}}) +// CHECK: call void [[OFFL1:@.+]]() +// CHECK: call {{.*}}@__tgt_target_teams({{.+}}) +// CHECK: call void [[OFFL2:@.+]]() +// CHECK: [[CALL_RET:%.+]] = call{{.+}} i32 [[TMAIN:@.+]]() +// CHECK: ret i32 [[CALL_RET]] + +// CHECK: define{{.+}} void [[OFFL1]]( +// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) + +// CHECK: define{{.+}} [[OMP_OUTLINED_1]](i32* {{.+}} [[GTID_IN:%.+]], +// CHECK: [[GTID_ADDR:%.+]] = alloca i32*, +// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]], +// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]], +// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]], +// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 4) +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call( +// CHECK: ret void + +// CHECK: define{{.+}} [[OFFL2]]() +// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) + +// CHECK: define{{.+}} [[OMP_OUTLINED_1]](i32* {{.+}} [[GTID_IN:%.+]], +// CHECK: [[GTID_ADDR:%.+]] = alloca i32*, +// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]], +// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]], +// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]], +// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 3) +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call( +// CHECK: ret void + +// CHECK: define{{.+}} [[TMAIN]]() +// CHECK: call {{.*}}@__tgt_target_teams({{.+}}) +// CHECK: call void [[OFFL3:@.+]]() + +// CHECK: define{{.+}} [[OFFL3]]() +// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}}) + +// CHECK: define{{.+}} [[OMP_OUTLINED_3]](i32* {{.+}} [[GTID_IN:%.+]], +// CHECK: [[GTID_ADDR:%.+]] = alloca i32*, +// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]], +// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]], +// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]], +// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 2) +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call( +// CHECK: ret void + +// CHECK: !{!"llvm.loop.vectorize.enable", i1 true} + +#endif diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp new file mode 100644 index 000000000000..384ac4b5f32a --- /dev/null +++ b/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp @@ -0,0 +1,351 @@ +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 + +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +template <typename T> +T tmain() { + T t_var = T(); + T vec[] = {1, 2}; +#pragma omp target +#pragma omp teams distribute parallel for simd reduction(+: t_var) + for (int i = 0; i < 2; ++i) { + t_var += (T) i; + } + return T(); +} + +int main() { + static int sivar; +#ifdef LAMBDA + // LAMBDA: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer + + // LAMBDA-LABEL: @main + // LAMBDA: call void [[OUTER_LAMBDA:@.+]]( + [&]() { + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0) + // LAMBDA: call void @[[LOFFL1:.+]]( + // LAMBDA: ret +#pragma omp target +#pragma omp teams distribute parallel for simd reduction(+: sivar) + for (int i = 0; i < 2; ++i) { + // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} [[SIVAR_ARG:%.+]]) + // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}, + // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]], + // LAMBDA: [[SIVAR_CONV:%.+]] = bitcast{{.+}} [[SIVAR_ADDR]] to + // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[LOUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_CONV]]) + // LAMBDA: ret void + + // LAMBDA: define internal void @[[LOUTL1]]({{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]]) + // Skip global and bound tid vars + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*, + // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{.+}}, + // LAMBDA: [[RED_LIST:%.+]] = alloca [1 x {{.+}}], + // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]], + // LAMBDA: [[SIVAR_REF:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]] + // LAMBDA: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]], + + // LAMBDA: call void @__kmpc_for_static_init_4( + // LAMBDA: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[LPAR_OUTL:.+]] to + // LAMBDA: call void @__kmpc_for_static_fini( + // LAMBDA: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]], + // LAMBDA: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to + // LAMBDA: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]], + // LAMBDA: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to + // LAMBDA: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]]) + // LAMBDA: switch{{.+}} [[K_RED_RET]], label{{.+}} [ + // LAMBDA: {{.+}}, label %[[CASE1:.+]] + // LAMBDA: {{.+}}, label %[[CASE2:.+]] + // LAMBDA: ] + // LAMBDA: [[CASE1]]: + // LAMBDA-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]], + // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]], + // LAMBDA-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]] + // LAMBDA: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]], + // LAMBDA: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) + // LAMBDA: br + // LAMBDA: [[CASE2]]: + // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]], + // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] + // LAMBDA: br + + // LAMBDA: define internal void @[[LPAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]]) + + // Skip global and bound tid vars, and prev lb and ub vars + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: alloca i{{[0-9]+}}, + // LAMBDA: alloca i{{[0-9]+}}, + // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*, + // skip loop vars + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{.+}}, + // LAMBDA: [[RED_LIST:%.+]] = alloca [1 x {{.+}}], + // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]], + // LAMBDA: [[SIVAR_REF:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]] + // LAMBDA: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]], + + // LAMBDA: call void @__kmpc_for_static_init_4( + // LAMBDA: store{{.+}}, {{.+}} [[SIVAR_PRIV]], + // LAMBDA: call void [[INNER_LAMBDA:@.+]]( + // LAMBDA: call void @__kmpc_for_static_fini( + // LAMBDA: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]], + // LAMBDA: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to + // LAMBDA: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]], + // LAMBDA: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to + // LAMBDA: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]]) + // LAMBDA: switch{{.+}} [[K_RED_RET]], label{{.+}} [ + // LAMBDA: {{.+}}, label %[[CASE1:.+]] + // LAMBDA: {{.+}}, label %[[CASE2:.+]] + // LAMBDA: ] + // LAMBDA: [[CASE1]]: + // LAMBDA-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]], + // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]], + // LAMBDA-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]] + // LAMBDA: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]], + // LAMBDA: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) + // LAMBDA: br + // LAMBDA: [[CASE2]]: + // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]], + // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] + // LAMBDA: br + + sivar += i; + + [&]() { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + + sivar += 4; + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + + // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]] + // LAMBDA: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF]] + // LAMBDA: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], 4 + // LAMBDA: store i{{[0-9]+}} [[SIVAR_INC]], i{{[0-9]+}}* [[SIVAR_REF]] + }(); + } + }(); + return 0; + +// LAMBDA: !{!"llvm.loop.vectorize.enable", i1 true} + +#else +#pragma omp target +#pragma omp teams distribute parallel for simd reduction(+: sivar) + for (int i = 0; i < 2; ++i) { + sivar += i; + } + return tmain<int>(); +#endif +} + +// CHECK: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer + +// CHECK: define {{.*}}i{{[0-9]+}} @main() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0) +// CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}}) +// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]() +// CHECK: ret + +// CHECK: define{{.*}} void @[[OFFL1]](i{{64|32}} [[SIVAR_ARG:%.+]]) +// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}, +// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]], +// CHECK-64: [[SIVAR_CONV:%.+]] = bitcast{{.+}} [[SIVAR_ADDR]] to +// CHECK-64: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_CONV]]) +// CHECK-32: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_ADDR]]) +// CHECK: ret void + +// CHECK: define internal void @[[OUTL1]]({{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]]) +// Skip global and bound tid vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*, +// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{.+}}, +// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}], +// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]], +// CHECK: [[SIVAR_REF:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]] +// CHECK: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]], + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL:.+]] to +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]], +// CHECK: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to +// CHECK: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]], +// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to +// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]]) +// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [ +// CHECK: {{.+}}, label %[[CASE1:.+]] +// CHECK: {{.+}}, label %[[CASE2:.+]] +// CHECK: ] +// CHECK: [[CASE1]]: +// CHECK-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]], +// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]], +// CHECK-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]] +// CHECK: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]], +// CHECK: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) +// CHECK: br +// CHECK: [[CASE2]]: +// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]], +// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] +// CHECK: br + +// CHECK: define internal void @[[PAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]]) +// Skip global and bound tid vars, and prev lb and ub +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*, +// skip loop vars +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{.+}}, +// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}], +// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]], +// CHECK: [[SIVAR_REF:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]] +// CHECK: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]], + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: store{{.+}}, {{.+}} [[SIVAR_PRIV]], +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]], +// CHECK: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to +// CHECK: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]], +// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to +// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]]) +// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [ +// CHECK: {{.+}}, label %[[CASE1:.+]] +// CHECK: {{.+}}, label %[[CASE2:.+]] +// CHECK: ] +// CHECK: [[CASE1]]: +// CHECK-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]], +// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]], +// CHECK-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]] +// CHECK: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]], +// CHECK: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) +// CHECK: br +// CHECK: [[CASE2]]: +// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]], +// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] +// CHECK: br + +// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, +// CHECK: call void @[[TOFFL1:.+]]({{.+}}) +// CHECK: ret + +// CHECK: define{{.*}} void @[[TOFFL1]](i{{64|32}} [[TVAR_ARG:%.+]]) +// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}}, +// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]], +// CHECK-64: [[TVAR_CONV:%.+]] = bitcast{{.+}} [[TVAR_ADDR]] to +// CHECK-64: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[TOUTL1:.+]] to {{.+}}, {{.+}} [[TVAR_CONV]]) +// CHECK-32: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[TOUTL1:.+]] to {{.+}}, {{.+}} [[TVAR_ADDR]]) +// CHECK: ret void + +// CHECK: define internal void @[[TOUTL1]]({{.+}}, {{.+}}, {{.+}} [[TVAR_ARG:%.+]]) +// Skip global and bound tid vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}}*, +// CHECK: [[TVAR_PRIV:%.+]] = alloca i{{.+}}, +// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}], +// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]], +// CHECK: [[TVAR_REF:%.+]] = load{{.+}}, {{.+}} [[TVAR_ADDR]] +// CHECK: store{{.+}} 0, {{.+}} [[TVAR_PRIV]], + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL:.+]] to +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]], +// CHECK: [[TVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[TVAR_PRIV]] to +// CHECK: store{{.+}} [[TVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]], +// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to +// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]]) +// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [ +// CHECK: {{.+}}, label %[[CASE1:.+]] +// CHECK: {{.+}}, label %[[CASE2:.+]] +// CHECK: ] +// CHECK: [[CASE1]]: +// CHECK-DAG: [[TVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_REF]], +// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]], +// CHECK-DAG: [[TVAR_INC:%.+]] = add{{.+}} [[TVAR_VAL]], [[TVAR_PRIV_VAL]] +// CHECK: store{{.+}} [[TVAR_INC]], {{.+}} [[TVAR_REF]], +// CHECK: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) +// CHECK: br +// CHECK: [[CASE2]]: +// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]], +// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]] +// CHECK: br + +// CHECK: define internal void @[[TPAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[TVAR_ARG:%.+]]) +// Skip global and bound tid vars, and prev lb and ub vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}}*, +// skip loop vars +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: alloca i32, +// CHECK: [[TVAR_PRIV:%.+]] = alloca i{{.+}}, +// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}], +// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]], +// CHECK: [[TVAR_REF:%.+]] = load{{.+}}, {{.+}} [[TVAR_ADDR]] +// CHECK: store{{.+}} 0, {{.+}} [[TVAR_PRIV]], + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: store{{.+}}, {{.+}} [[TVAR_PRIV]], +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]], +// CHECK: [[TVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[TVAR_PRIV]] to +// CHECK: store{{.+}} [[TVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]], +// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to +// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]]) +// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [ +// CHECK: {{.+}}, label %[[CASE1:.+]] +// CHECK: {{.+}}, label %[[CASE2:.+]] +// CHECK: ] +// CHECK: [[CASE1]]: +// CHECK-DAG: [[TVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_REF]], +// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]], +// CHECK-DAG: [[TVAR_INC:%.+]] = add{{.+}} [[TVAR_VAL]], [[TVAR_PRIV_VAL]] +// CHECK: store{{.+}} [[TVAR_INC]], {{.+}} [[TVAR_REF]], +// CHECK: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) +// CHECK: br +// CHECK: [[CASE2]]: +// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]], +// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]] +// CHECK: br + +// CHECK: !{!"llvm.loop.vectorize.enable", i1 true} + +#endif diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_reduction_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_reduction_messages.cpp index 47494b94ff0e..b0522e8f69d2 100644 --- a/test/OpenMP/teams_distribute_parallel_for_simd_reduction_messages.cpp +++ b/test/OpenMP/teams_distribute_parallel_for_simd_reduction_messages.cpp @@ -19,9 +19,9 @@ public: S2() : a(0) {} S2(S2 &s2) : a(s2.a) {} static float S2s; // expected-note 2 {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note 2 {{'S2sc' declared here}} }; -const float S2::S2sc = 0; // expected-note 2 {{'S2sc' defined here}} +const float S2::S2sc = 0; S2 b; // expected-note 3 {{'b' defined here}} const S2 ba[5]; // expected-note 2 {{'ba' defined here}} class S3 { diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp new file mode 100644 index 000000000000..71e8ee8e8110 --- /dev/null +++ b/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp @@ -0,0 +1,402 @@ +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// Test host codegen. +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +#ifdef CK1 + +template <typename T, int X, long long Y> +struct SS{ + T a[X]; + float b; + // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}( + int foo(void) { + + // CK1: call i32 @__tgt_target_teams( + // CK1: call void @[[OFFL1:.+]]( + #pragma omp target + #pragma omp teams distribute parallel for simd + for(int i = 0; i < X; i++) { + a[i] = (T)0; + } + // CK1: call i32 @__tgt_target_teams( + // CK1: call void @[[OFFL2:.+]]( + #pragma omp target + #pragma omp teams distribute parallel for simd schedule(static) + for(int i = 0; i < X; i++) { + a[i] = (T)0; + } + // CK1: call i32 @__tgt_target_teams( + // CK1: call void @[[OFFL3:.+]]( + #pragma omp target + #pragma omp teams distribute parallel for simd schedule(static, X/2) + for(int i = 0; i < X; i++) { + a[i] = (T)0; + } + + // CK1: call i32 @__tgt_target_teams( + // CK1: call void @[[OFFL4:.+]]( + #pragma omp target + #pragma omp teams distribute parallel for simd schedule(dynamic) + for(int i = 0; i < X; i++) { + a[i] = (T)0; + } + + // CK1: call i32 @__tgt_target_teams( + // CK1: call void @[[OFFL5:.+]]( + #pragma omp target + #pragma omp teams distribute parallel for simd schedule(dynamic, X/2) + for(int i = 0; i < X; i++) { + a[i] = (T)0; + } + + // CK1: define internal void @[[OFFL1]]( + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, + // CK1: ret void + + // CK1: define internal void @[[OUTL1]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4( + // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[PAR_OUTL1]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34, + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[OFFL2]]( + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL2:.+]] to {{.+}}, + // CK1: ret void + + // CK1: define internal void @[[OUTL2]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4( + // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL2:.+]] to + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[PAR_OUTL2]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34, + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[OFFL3]]( + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL3:.+]] to {{.+}}, + // CK1: ret void + + // CK1: define internal void @[[OUTL3]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4( + // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL3:.+]] to + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[PAR_OUTL3]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 33, + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[OFFL4]]( + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL4:.+]] to {{.+}}, + // CK1: ret void + + // CK1: define internal void @[[OUTL4]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4( + // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL4:.+]] to + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[PAR_OUTL4]]({{.+}}) + // CK1: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35, + // CK1: call {{.+}} @__kmpc_dispatch_next_4( + // CK1: ret void + + // CK1: define internal void @[[OFFL5]]( + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL5:.+]] to {{.+}}, + // CK1: ret void + + // CK1: define internal void @[[OUTL5]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4( + // CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL5:.+]] to + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[PAR_OUTL5]]({{.+}}) + // CK1: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35, + // CK1: call {{.+}} @__kmpc_dispatch_next_4( + // CK1: ret void + + // CK1: !{!"llvm.loop.vectorize.enable", i1 true} + + return a[0]; + } +}; + +int teams_template_struct(void) { + SS<int, 123, 456> V; + return V.foo(); + +} +#endif // CK1 + +// Test host codegen. +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +#ifdef CK2 + +template <typename T, int n> +int tmain(T argc) { + T a[n]; + int m = 10; +#pragma omp target +#pragma omp teams distribute parallel for simd + for(int i = 0; i < n; i++) { + a[i] = (T)0; + } +#pragma omp target +#pragma omp teams distribute parallel for simd schedule(static) + for(int i = 0; i < n; i++) { + a[i] = (T)0; + } +#pragma omp target +#pragma omp teams distribute parallel for simd schedule(static, m) + for(int i = 0; i < n; i++) { + a[i] = (T)0; + } +#pragma omp target +#pragma omp teams distribute parallel for simd schedule(dynamic) + for(int i = 0; i < n; i++) { + a[i] = (T)0; + } +#pragma omp target +#pragma omp teams distribute parallel for simd schedule(dynamic, m) + for(int i = 0; i < n; i++) { + a[i] = (T)0; + } + return 0; +} + +int main (int argc, char **argv) { + int n = 100; + int a[n]; + int m = 10; +#pragma omp target +#pragma omp teams distribute parallel for simd + for(int i = 0; i < n; i++) { + a[i] = 0; + } +#pragma omp target +#pragma omp teams distribute parallel for simd dist_schedule(static) + for(int i = 0; i < n; i++) { + a[i] = 0; + } +#pragma omp target +#pragma omp teams distribute parallel for simd dist_schedule(static, m) + for(int i = 0; i < n; i++) { + a[i] = 0; + } +#pragma omp target +#pragma omp teams distribute parallel for simd schedule(dynamic) + for(int i = 0; i < n; i++) { + a[i] = 0; + } +#pragma omp target +#pragma omp teams distribute parallel for simd schedule(dynamic, m) + for(int i = 0; i < n; i++) { + a[i] = 0; + } + return tmain<int, 10>(argc); +} + +// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFL1:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFL2:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFL3:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFL4:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFL5:.+]]({{.+}}) +// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}}) +// CK2: ret + +// CK2: define {{.*}}void @[[OFFL1]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTL1]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4( +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[PAR_OUTL1]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34, +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFL2]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL2:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTL2]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4( +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL2:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[PAR_OUTL2]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34, +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFL3]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[OUTL3:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTL3]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4( +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL3:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[PAR_OUTL3]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34 +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFL4]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTL4:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTL4]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4( +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL4:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[PAR_OUTL4]]({{.+}}) +// CK2: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35, +// CK2: call {{.+}} @__kmpc_dispatch_next_4( +// CK2: ret void + + +// CK2: define {{.*}}void @[[OFFL5]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTL5:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTL5]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4( +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL5:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[PAR_OUTL5]]({{.+}}) +// CK2: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35, +// CK2: call {{.+}} @__kmpc_dispatch_next_4( +// CK2: ret void + +// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFLT1:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFLT2:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFLT3:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFLT4:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFLT5:.+]]({{.+}}) +// CK2: ret +// CK2-NEXT: } + +// CK2: define {{.*}}void @[[OFFLT1]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTLT1]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4( +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT1:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[PAR_OUTLT1]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34, +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFLT2]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT2:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTLT2]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4( +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT2:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[PAR_OUTLT2]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34, +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFLT3]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTLT3:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTLT3]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4( +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT3:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[PAR_OUTLT3]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 33, +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFLT4]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTLT4:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTLT4]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4( +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT4:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[PAR_OUTLT4]]({{.+}}) +// CK2: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35, +// CK2: call {{.+}} @__kmpc_dispatch_next_4( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFLT5]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTLT5:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTLT5]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4( +// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT5:.+]] to +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define internal void @[[PAR_OUTLT5]]({{.+}}) +// CK2: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35, +// CK2: call {{.+}} @__kmpc_dispatch_next_4( +// CK2: ret void + +// CK2: !{!"llvm.loop.vectorize.enable", i1 true} + +#endif // CK2 +#endif // #ifndef HEADER diff --git a/test/OpenMP/teams_distribute_parallel_for_simd_shared_messages.cpp b/test/OpenMP/teams_distribute_parallel_for_simd_shared_messages.cpp index f13146641a38..1089555889f0 100644 --- a/test/OpenMP/teams_distribute_parallel_for_simd_shared_messages.cpp +++ b/test/OpenMP/teams_distribute_parallel_for_simd_shared_messages.cpp @@ -84,7 +84,7 @@ int main(int argc, char **argv) { #pragma omp teams distribute parallel for simd shared (S1) // expected-error {{'S1' does not refer to a value}} for (int j=0; j<100; j++) foo(); #pragma omp target - #pragma omp teams distribute parallel for simd shared (a, b, c, d, f) + #pragma omp teams distribute parallel for simd shared (a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} for (int j=0; j<100; j++) foo(); #pragma omp target #pragma omp teams distribute parallel for simd shared (argv[1]) // expected-error {{expected variable name}} diff --git a/test/OpenMP/teams_distribute_private_codegen.cpp b/test/OpenMP/teams_distribute_private_codegen.cpp new file mode 100644 index 000000000000..8e4a61cd7cb1 --- /dev/null +++ b/test/OpenMP/teams_distribute_private_codegen.cpp @@ -0,0 +1,236 @@ +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 + +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +struct St { + int a, b; + St() : a(0), b(0) {} + St(const St &st) : a(st.a + st.b), b(0) {} + ~St() {} +}; + +volatile int g = 1212; +volatile int &g1 = g; + +template <class T> +struct S { + T f; + S(T a) : f(a + g) {} + S() : f(g) {} + S(const S &s, St t = St()) : f(s.f + t.a) {} + operator T() { return T(); } + ~S() {} +}; + +// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float } +// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } + +template <typename T> +T tmain() { + S<T> test; + T t_var = T(); + T vec[] = {1, 2}; + S<T> s_arr[] = {1, 2}; + S<T> &var = test; +#pragma omp target +#pragma omp teams distribute private(t_var, vec, s_arr, var) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + return T(); +} + +// CHECK-DAG: [[TEST:@.+]] = global [[S_FLOAT_TY]] zeroinitializer, +S<float> test; +// CHECK-DAG: [[T_VAR:@.+]] = global i{{[0-9]+}} 333, +int t_var = 333; +// CHECK-DAG: [[VEC:@.+]] = global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2], +int vec[] = {1, 2}; +// CHECK-DAG: [[S_ARR:@.+]] = global [2 x [[S_FLOAT_TY]]] zeroinitializer, +S<float> s_arr[] = {1, 2}; +// CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer, +S<float> var(3); +// CHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0, + +int main() { + static int sivar; +#ifdef LAMBDA + // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212, + // LAMBDA-LABEL: @main + // LAMBDA: call void [[OUTER_LAMBDA:@.+]]( + [&]() { + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // LAMBDA: call void @[[LOFFL1:.+]]( + // LAMBDA: ret +#pragma omp target +#pragma omp teams distribute private(g, g1, sivar) + for (int i = 0; i < 2; ++i) { + // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} {{%.+}}) + // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[LOUTL1:.+]] to {{.+}}) + // LAMBDA: ret void + + // LAMBDA: define internal void @[[LOUTL1]]({{.+}}) + // Skip global, bound tid and loop vars + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: [[G_PRIV:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_PRIV:%.+]] = alloca i{{[0-9]+}} + // LAMBDA: [[TMP:%.+]] = alloca i{{[0-9]+}}*, + // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: store{{.+}} [[G1_PRIV]], {{.+}} [[TMP]], + g = 1; + g1 = 1; + sivar = 2; + // LAMBDA: call void @__kmpc_for_static_init_4( + // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G_PRIV]], + // LAMBDA-DAG: store{{.+}} 2, {{.+}} [[SIVAR_PRIV]], + // LAMBDA-DAG: [[G1_REF:%.+]] = load{{.+}}, {{.+}} [[TMP]], + // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1_REF]], + // LAMBDA: call void [[INNER_LAMBDA:@.+]]( + // LAMBDA: call void @__kmpc_for_static_fini( + [&]() { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + g = 2; + g1 = 2; + sivar = 4; + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + + // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]] + // LAMBDA: [[G1_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G1_REF]] + // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]] + }(); + } + }(); + return 0; +#else +#pragma omp target +#pragma omp teams distribute private(t_var, vec, s_arr, var, sivar) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + sivar += i; + } + return tmain<int>(); +#endif +} + +// CHECK: define {{.*}}i{{[0-9]+}} @main() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 0) +// CHECK: call void @[[OFFL1:.+]]() +// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]() +// CHECK: ret + +// CHECK: define{{.*}} void @[[OFFL1]]() +// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[OUTL1:.+]] to {{.+}}) +// CHECK: ret void + +// CHECK: define internal void @[[OUTL1]]({{.+}}) +// Skip global, bound tid and loop vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK-DAG: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK-DAG: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK-DAG: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}}, + +// private(s_arr) +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]]) +// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]], + +// private(var) +// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]]) + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK-DAG: {{.+}} = {{.+}} [[T_VAR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[VAR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[SIVAR_PRIV]] +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + + +// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, +// CHECK: call void @[[TOFFL1:.+]]() +// CHECK: ret + +// CHECK: define {{.*}}void @[[TOFFL1]]() +// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[TOUTL1:.+]] to {{.+}}) +// CHECK: ret void + +// CHECK: define internal void @[[TOUTL1]]({{.+}}) +// Skip global, bound tid and loop vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*, + +// private(s_arr) +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]]) +// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]], + +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]]) +// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]], + +// private(var) +// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]]) +// CHECK-DAG: store{{.+}} [[VAR_PRIV]], {{.+}} [[TMP]] + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK-DAG: {{.+}} = {{.+}} [[T_VAR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[TMP]] +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +#endif diff --git a/test/OpenMP/teams_distribute_reduction_codegen.cpp b/test/OpenMP/teams_distribute_reduction_codegen.cpp new file mode 100644 index 000000000000..961891da98d0 --- /dev/null +++ b/test/OpenMP/teams_distribute_reduction_codegen.cpp @@ -0,0 +1,217 @@ +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 + +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +template <typename T> +T tmain() { + T t_var = T(); + T vec[] = {1, 2}; +#pragma omp target +#pragma omp teams distribute reduction(+: t_var) + for (int i = 0; i < 2; ++i) { + t_var += (T) i; + } + return T(); +} + +int main() { + static int sivar; +#ifdef LAMBDA + // LAMBDA: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer + + // LAMBDA-LABEL: @main + // LAMBDA: call void [[OUTER_LAMBDA:@.+]]( + [&]() { + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // LAMBDA: call void @[[LOFFL1:.+]]( + // LAMBDA: ret +#pragma omp target +#pragma omp teams distribute reduction(+: sivar) + for (int i = 0; i < 2; ++i) { + // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} [[SIVAR_ARG:%.+]]) + // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}, + // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]], + // LAMBDA: [[SIVAR_CONV:%.+]] = bitcast{{.+}} [[SIVAR_ADDR]] to + // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[LOUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_CONV]]) + // LAMBDA: ret void + + // LAMBDA: define internal void @[[LOUTL1]]({{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]]) + // Skip global and bound tid vars + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*, + // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{.+}}, + // LAMBDA: [[RED_LIST:%.+]] = alloca [1 x {{.+}}], + // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]], + // LAMBDA: [[SIVAR_REF:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]] + // LAMBDA: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]], + + // LAMBDA: call void @__kmpc_for_static_init_4( + // LAMBDA: store{{.+}}, {{.+}} [[SIVAR_PRIV]], + // LAMBDA: call void [[INNER_LAMBDA:@.+]]( + // LAMBDA: call void @__kmpc_for_static_fini( + // LAMBDA: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]], + // LAMBDA: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to + // LAMBDA: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]], + // LAMBDA: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to + // LAMBDA: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]]) + // LAMBDA: switch{{.+}} [[K_RED_RET]], label{{.+}} [ + // LAMBDA: {{.+}}, label %[[CASE1:.+]] + // LAMBDA: {{.+}}, label %[[CASE2:.+]] + // LAMBDA: ] + // LAMBDA: [[CASE1]]: + // LAMBDA-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]], + // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]], + // LAMBDA-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]] + // LAMBDA: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]], + // LAMBDA: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) + // LAMBDA: br + // LAMBDA: [[CASE2]]: + // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]], + // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] + // LAMBDA: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) + // LAMBDA: br + + sivar += i; + + [&]() { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + + sivar += 4; + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + + // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]] + // LAMBDA: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF]] + // LAMBDA: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], 4 + // LAMBDA: store i{{[0-9]+}} [[SIVAR_INC]], i{{[0-9]+}}* [[SIVAR_REF]] + }(); + } + }(); + return 0; +#else +#pragma omp target +#pragma omp teams distribute reduction(+: sivar) + for (int i = 0; i < 2; ++i) { + sivar += i; + } + return tmain<int>(); +#endif +} + +// CHECK: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer + +// CHECK: define {{.*}}i{{[0-9]+}} @main() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) +// CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}}) +// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]() +// CHECK: ret + +// CHECK: define{{.*}} void @[[OFFL1]](i{{64|32}} [[SIVAR_ARG:%.+]]) +// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}, +// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]], +// CHECK-64: [[SIVAR_CONV:%.+]] = bitcast{{.+}} [[SIVAR_ADDR]] to +// CHECK-64: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_CONV]]) +// CHECK-32: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_ADDR]]) +// CHECK: ret void + +// CHECK: define internal void @[[OUTL1]]({{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]]) +// Skip global and bound tid vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*, +// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{.+}}, +// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}], +// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]], +// CHECK: [[SIVAR_REF:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]] +// CHECK: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]], + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: store{{.+}}, {{.+}} [[SIVAR_PRIV]], +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]], +// CHECK: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to +// CHECK: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]], +// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to +// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]]) +// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [ +// CHECK: {{.+}}, label %[[CASE1:.+]] +// CHECK: {{.+}}, label %[[CASE2:.+]] +// CHECK: ] +// CHECK: [[CASE1]]: +// CHECK-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]], +// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]], +// CHECK-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]] +// CHECK: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]], +// CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) +// CHECK: br +// CHECK: [[CASE2]]: +// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]], +// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] +// CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) +// CHECK: br + + +// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, +// CHECK: call void @[[TOFFL1:.+]]({{.+}}) +// CHECK: ret + +// CHECK: define{{.*}} void @[[TOFFL1]](i{{64|32}} [[TVAR_ARG:%.+]]) +// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}}, +// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]], +// CHECK-64: [[TVAR_CONV:%.+]] = bitcast{{.+}} [[TVAR_ADDR]] to +// CHECK-64: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[TOUTL1:.+]] to {{.+}}, {{.+}} [[TVAR_CONV]]) +// CHECK-32: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[TOUTL1:.+]] to {{.+}}, {{.+}} [[TVAR_ADDR]]) +// CHECK: ret void + +// CHECK: define internal void @[[TOUTL1]]({{.+}}, {{.+}}, {{.+}} [[TVAR_ARG:%.+]]) +// Skip global and bound tid vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}}*, +// CHECK: [[TVAR_PRIV:%.+]] = alloca i{{.+}}, +// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}], +// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]], +// CHECK: [[TVAR_REF:%.+]] = load{{.+}}, {{.+}} [[TVAR_ADDR]] +// CHECK: store{{.+}} 0, {{.+}} [[TVAR_PRIV]], + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: store{{.+}}, {{.+}} [[TVAR_PRIV]], +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]], +// CHECK: [[TVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[TVAR_PRIV]] to +// CHECK: store{{.+}} [[TVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]], +// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to +// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]]) +// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [ +// CHECK: {{.+}}, label %[[CASE1:.+]] +// CHECK: {{.+}}, label %[[CASE2:.+]] +// CHECK: ] +// CHECK: [[CASE1]]: +// CHECK-DAG: [[TVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_REF]], +// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]], +// CHECK-DAG: [[TVAR_INC:%.+]] = add{{.+}} [[TVAR_VAL]], [[TVAR_PRIV_VAL]] +// CHECK: store{{.+}} [[TVAR_INC]], {{.+}} [[TVAR_REF]], +// CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) +// CHECK: br +// CHECK: [[CASE2]]: +// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]], +// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]] +// CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) +// CHECK: br + +#endif diff --git a/test/OpenMP/teams_distribute_reduction_messages.cpp b/test/OpenMP/teams_distribute_reduction_messages.cpp index d0a6d296c0f5..f11236f0ce27 100644 --- a/test/OpenMP/teams_distribute_reduction_messages.cpp +++ b/test/OpenMP/teams_distribute_reduction_messages.cpp @@ -25,9 +25,9 @@ public: S2() : a(0) {} S2(S2 &s2) : a(s2.a) {} static float S2s; // expected-note 2 {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note 2 {{'S2sc' declared here}} }; -const float S2::S2sc = 0; // expected-note 2 {{'S2sc' defined here}} +const float S2::S2sc = 0; S2 b; // expected-note 3 {{'b' defined here}} const S2 ba[5]; // expected-note 2 {{'ba' defined here}} class S3 { diff --git a/test/OpenMP/teams_distribute_shared_messages.cpp b/test/OpenMP/teams_distribute_shared_messages.cpp index b9e096ce1c24..ecf6f2e03673 100644 --- a/test/OpenMP/teams_distribute_shared_messages.cpp +++ b/test/OpenMP/teams_distribute_shared_messages.cpp @@ -84,7 +84,7 @@ int main(int argc, char **argv) { #pragma omp teams distribute shared (S1) // expected-error {{'S1' does not refer to a value}} for (int j=0; j<100; j++) foo(); #pragma omp target - #pragma omp teams distribute shared (a, b, c, d, f) + #pragma omp teams distribute shared (a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} for (int j=0; j<100; j++) foo(); #pragma omp target #pragma omp teams distribute shared (argv[1]) // expected-error {{expected variable name}} diff --git a/test/OpenMP/teams_distribute_simd_ast_print.cpp b/test/OpenMP/teams_distribute_simd_ast_print.cpp index aaf6745774c5..e751413c4232 100644 --- a/test/OpenMP/teams_distribute_simd_ast_print.cpp +++ b/test/OpenMP/teams_distribute_simd_ast_print.cpp @@ -29,9 +29,10 @@ public: ++this->a.a; } S7 &operator=(S7 &s) { + int k; #pragma omp target -#pragma omp teams distribute simd private(a) private(this->a) - for (int k = 0; k < s.a.a; ++k) +#pragma omp teams distribute simd private(a) private(this->a) linear(k) + for (k = 0; k < s.a.a; ++k) ++s.a.a; return *this; } @@ -56,7 +57,7 @@ public: // CHECK: #pragma omp target // CHECK-NEXT: #pragma omp teams distribute simd private(this->a) private(this->a) private(T::a) // CHECK: #pragma omp target -// CHECK-NEXT: #pragma omp teams distribute simd private(this->a) private(this->a) +// CHECK-NEXT: #pragma omp teams distribute simd private(this->a) private(this->a) linear(k) // CHECK: #pragma omp target // CHECK-NEXT: #pragma omp teams distribute simd default(none) private(b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d) // CHECK: #pragma omp target @@ -155,11 +156,11 @@ T tmain(T argc) { // CHECK-NEXT: for (int k = 0; k < 10; ++k) // CHECK-NEXT: e += d + argc; #pragma omp target -#pragma omp teams distribute simd simdlen(clen-1) linear(d) +#pragma omp teams distribute simd simdlen(clen-1) for (int k = 0; k < 10; ++k) e += d + argc; // CHECK: #pragma omp target -// CHECK-NEXT: #pragma omp teams distribute simd simdlen(clen - 1) linear(d) +// CHECK-NEXT: #pragma omp teams distribute simd simdlen(clen - 1) // CHECK-NEXT: for (int k = 0; k < 10; ++k) // CHECK-NEXT: e += d + argc; #pragma omp target @@ -218,11 +219,11 @@ int main (int argc, char **argv) { // CHECK-NEXT: for (int k = 0; k < 10; ++k) // CHECK-NEXT: e += d + argc; #pragma omp target -#pragma omp teams distribute simd simdlen(clen-1) linear(d) +#pragma omp teams distribute simd simdlen(clen-1) for (int k = 0; k < 10; ++k) e += d + argc; // CHECK: #pragma omp target -// CHECK-NEXT: #pragma omp teams distribute simd simdlen(clen - 1) linear(d) +// CHECK-NEXT: #pragma omp teams distribute simd simdlen(clen - 1) // CHECK-NEXT: for (int k = 0; k < 10; ++k) // CHECK-NEXT: e += d + argc; #pragma omp target diff --git a/test/OpenMP/teams_distribute_simd_codegen.cpp b/test/OpenMP/teams_distribute_simd_codegen.cpp new file mode 100644 index 000000000000..cad5854a2f1e --- /dev/null +++ b/test/OpenMP/teams_distribute_simd_codegen.cpp @@ -0,0 +1,251 @@ +// expected-no-diagnostics +#ifndef HEADER +#define HEADER +// Test host codegen. +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +#ifdef CK1 + +int a[100]; + +// CK1: define {{.*}}i32 @{{.+}}teams_argument_globali( +int teams_argument_global(int n) { + int i; + int te = n / 128; + int th = 128; + // discard n_addr and i + // CK1: alloca i32, + // CK1: alloca i32, + // CK1: [[TE:%.+]] = alloca i32, + // CK1: [[TH:%.+]] = alloca i32, + // CK1: [[TE_CAST:%.+]] = alloca i{{32|64}}, + // CK1: [[TH_CAST:%.+]] = alloca i{{32|64}}, + // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]], + // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]], + + // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}}) + + // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]], + #pragma omp target + #pragma omp teams distribute simd num_teams(te), thread_limit(th) aligned(a) simdlen(16) linear(i) + for(i = 0; i < n; i++) { + a[i] = 0; + } + + // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // CK1: call void @[[OFFL2:.+]](i{{64|32}} %{{.+}}) + #pragma omp target + {{{ + #pragma omp teams distribute simd safelen(32) + for(int i = 0; i < n; i++) { + a[i] = 0; + } + }}} + + // outlined target regions + // CK1: define internal void @[[OFFL1]](i{{32|64}} [[TE_ARG:%.+]], i{{32|64}} [[TH_ARG:%.+]], [100 x i{{32|64}}]* {{.+}}, i{{32|64}} {{.+}}, {{.+}}) + // CK1: [[TE_ADDR:%.+]] = alloca i{{32|64}}, + // CK1: [[TH_ADDR:%.+]] = alloca i{{32|64}}, + // CK1: store{{.+}} [[TE_ARG]], {{.+}} [[TE_ADDR]], + // CK1: store{{.+}} [[TH_ARG]], {{.+}} [[TH_ADDR]], + // CK1-64: [[TE_CONV:%.+]] = bitcast{{.+}} [[TE_ADDR]] to + // CK1-64: [[TH_CONV:%.+]] = bitcast{{.+}} [[TH_ADDR]] to + // CK1-64: [[TE_VAL:%.+]] = load i32, i32* [[TE_CONV]], + // CK1-64: [[TH_VAL:%.+]] = load i32, i32* [[TH_CONV]], + // CK1-32: [[TE_VAL:%.+]] = load i32, i32* [[TE_ADDR]], + // CK1-32: [[TH_VAL:%.+]] = load i32, i32* [[TH_ADDR]], + // CK1: {{%.+}} = call i32 @__kmpc_push_num_teams({{.+}}, {{.+}}, i32 [[TE_VAL]], i32 [[TH_VAL]]) + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}}) + // CK1: ret void + + // CK1: define internal void @[[OUTL1]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4( + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[OFFL2]]({{.+}}, {{.+}}) + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 2, {{.+}} @[[OUTL2:.+]] to {{.+}}, {{.+}}, {{.+}}) + // CK1: ret void + + // CK1: define internal void @[[OUTL2]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4( + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + return a[0]; +} + +// CK1-DAG: !{!"llvm.loop.vectorize.width", i32 16} +// CK1-DAG: !{!"llvm.loop.vectorize.enable", i1 true} +// CK1-DAG: !{!"llvm.loop.vectorize.width", i32 32} + +#endif // CK1 + +// Test host codegen. +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +#ifdef CK2 + +// CK2: define {{.*}}i32 @{{.+}}teams_local_argv( +int teams_local_arg(void) { + int n = 100; + int a[n]; + + // CK2: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // CK2: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}}) + #pragma omp target + #pragma omp teams distribute simd + for(int i = 0; i < n; i++) { + a[i] = 0; + } + + // outlined target region + // CK2: define internal void @[[OFFL1]]({{.+}}, {{.+}}) + // CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}}) + // CK2: ret void + + // CK2: define internal void @[[OUTL1]]({{.+}}) + // CK2: call void @__kmpc_for_static_init_4( + // CK2: call void @__kmpc_for_static_fini( + // CK2: ret void + + return a[0]; +} +// CK2: !{!"llvm.loop.vectorize.enable", i1 true} +#endif // CK2 + +// Test host codegen. +// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64 +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64 +// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32 +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32 +#ifdef CK3 + +// CK3: [[SSI:%.+]] = type { [{{.+}} x i32], float } + +template <typename T, int X, long long Y> +struct SS{ + T a[X]; + float b; + // CK3: define {{.*}}i32 @{{.+}}foo{{.+}}( + int foo(void) { + + // CK3: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // CK3: call void @[[OFFL1:.+]]([[SSI]]* %{{.+}}) + #pragma omp target + #pragma omp teams distribute simd + for(int i = 0; i < X; i++) { + a[i] = (T)0; + } + + // outlined target region + // CK3: define internal void @[[OFFL1]]([[SSI]]* {{.+}}) + // CK3: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}}) + // CK3: ret void + + // CK3: define internal void @[[OUTL1]]({{.+}}) + // CK3: call void @__kmpc_for_static_init_4( + // CK3: call void @__kmpc_for_static_fini( + // CK3: ret void + + return a[0]; + } +}; + +int teams_template_struct(void) { + SS<int, 123, 456> V; + return V.foo(); + +} +// CK3: !{!"llvm.loop.vectorize.enable", i1 true} +#endif // CK3 + +// Test host codegen. +// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-64 +// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-64 +// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-32 +// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-32 + +#ifdef CK4 + +template <typename T, int n> +int tmain(T argc) { + T a[n]; + int te = n/128; + int th = 128; +#pragma omp target +#pragma omp teams distribute simd num_teams(te) thread_limit(th) + for(int i = 0; i < n; i++) { + a[i] = (T)0; + } + return 0; +} + +int main (int argc, char **argv) { + int n = 100; + int a[n]; +#pragma omp target +#pragma omp teams distribute simd + for(int i = 0; i < n; i++) { + a[i] = 0; + } + return tmain<int, 10>(argc); +} + +// CK4: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}}) +// CK4: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) +// CK4: call void @[[OFFL1:.+]]({{.+}}) +// CK4: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}}) +// CK4: ret + +// CK4: define {{.*}}void @[[OFFL1]]({{.+}}) +// CK4: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}}) +// CK4: ret void + +// CK4: define internal void @[[OUTL1]]({{.+}}) +// CK4: call void @__kmpc_for_static_init_4( +// CK4: call void @__kmpc_for_static_fini( +// CK4: ret void + +// CK4: define {{.*}}i32 @[[TMAIN]]({{.+}}) +// CK4: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}}) +// CK4: call void @[[OFFLT:.+]]({{.+}}) +// CK4: ret +// CK4-NEXT: } + +// CK4: define {{.*}}void @[[OFFLT]](i{{32|64}} [[TE_ARG:%.+]], i{{32|64}} [[TH_ARG:%.+]], {{.+}}) +// CK4: [[TE_ADDR:%.+]] = alloca i{{32|64}}, +// CK4: [[TH_ADDR:%.+]] = alloca i{{32|64}}, +// CK4: store{{.+}} [[TE_ARG]], {{.+}} [[TE_ADDR]], +// CK4: store{{.+}} [[TH_ARG]], {{.+}} [[TH_ADDR]], +// CK4-64: [[TE_CONV:%.+]] = bitcast{{.+}} [[TE_ADDR]] to +// CK4-64: [[TH_CONV:%.+]] = bitcast{{.+}} [[TH_ADDR]] to +// CK4-64: [[TE_VAL:%.+]] = load i32, i32* [[TE_CONV]], +// CK4-64: [[TH_VAL:%.+]] = load i32, i32* [[TH_CONV]], +// CK4-32: [[TE_VAL:%.+]] = load i32, i32* [[TE_ADDR]], +// CK4-32: [[TH_VAL:%.+]] = load i32, i32* [[TH_ADDR]], +// CK4: {{%.+}} = call i32 @__kmpc_push_num_teams({{.+}}, {{.+}}, i32 [[TE_VAL]], i32 [[TH_VAL]]) +// CK4: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT:.+]] to {{.+}}, {{.+}}, {{.+}}) +// CK4: ret void + +// CK4: define internal void @[[OUTLT]]({{.+}}) +// CK4: call void @__kmpc_for_static_init_4( +// CK4: call void @__kmpc_for_static_fini( +// CK4: ret void + +// CK4: !{!"llvm.loop.vectorize.enable", i1 true} +#endif // CK4 +#endif + diff --git a/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp b/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp new file mode 100644 index 000000000000..071d1672fad1 --- /dev/null +++ b/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp @@ -0,0 +1,131 @@ +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// Test host codegen. +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +#ifdef CK1 + +template <typename T, int X, long long Y> +struct SS{ + T a[X][Y]; + + // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}( + int foo(void) { + + // CK1: call i32 @__tgt_target_teams( + // CK1: call void @[[OFFL1:.+]]( + #pragma omp target + #pragma omp teams distribute simd collapse(2) + for(int i = 0; i < X; i++) { + for(int j = 0; j < Y; j++) { + a[i][j] = (T)0; + } + } + // CK1: define internal void @[[OFFL1]]( + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, + // CK1: ret void + + // CK1: define internal void @[[OUTL1]]({{.+}}) + // discard loop variables not needed here + // CK1: = alloca i32, + // CK1: = alloca i32, + // CK1: = alloca i32, + // CK1: = alloca i32, + // CK1: [[OMP_UB:%.+]] = alloca i32, + // CK1: store i32 56087, i32* [[OMP_UB]], + // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i32* [[OMP_UB]], + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + return a[0][0]; + } +}; + +int teams_template_struct(void) { + SS<int, 123, 456> V; + return V.foo(); + +} + +// CK1: !{!"llvm.loop.vectorize.enable", i1 true} +#endif // CK1 + +// Test host codegen. +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +#ifdef CK2 + +template <typename T, int n, int m> +int tmain(T argc) { + T a[n][m]; + #pragma omp target + #pragma omp teams distribute simd collapse(2) + for(int i = 0; i < n; i++) { + for(int j = 0; j < m; j++) { + a[i][j] = (T)0; + } + } + return 0; +} + +int main (int argc, char **argv) { + int n = 100; + int m = 2; + int a[n][m]; + #pragma omp target + #pragma omp teams distribute simd collapse(2) + for(int i = 0; i < n; i++) { + for(int j = 0; j < m; j++) { + a[i][j] = 0; + } + } + return tmain<int, 10, 2>(argc); +} + +// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFL1:.+]]({{.+}}) +// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}}) +// CK2: ret + +// CK2: define {{.*}}void @[[OFFL1]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}} @[[OUTL1:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTL1]]({{.+}}) +// CK2: [[OMP_UB:%.omp.ub]] = alloca i64, +// CK2: store i64 {{.+}}, i64* [[OMP_UB]], +// CK2: call void @__kmpc_for_static_init_8({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i64* [[OMP_UB]], +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void +// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFLT1:.+]]({{.+}}) +// CK2: ret +// CK2-NEXT: } + +// CK2: define {{.*}}void @[[OFFLT1]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTLT1]]({{.+}}) +// discard loop variables not needed here +// CK2: [[OMP_UB:%.omp.ub]] = alloca i32, +// CK2: store i32 {{.+}}, i32* [[OMP_UB]], +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i32* [[OMP_UB]], +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: !{!"llvm.loop.vectorize.enable", i1 true} +#endif // CK2 +#endif // #ifndef HEADER diff --git a/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp b/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp new file mode 100644 index 000000000000..599e18f58a16 --- /dev/null +++ b/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp @@ -0,0 +1,208 @@ +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// Test host codegen. +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +#ifdef CK1 + +template <typename T, int X, long long Y> +struct SS{ + T a[X]; + float b; + // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}( + int foo(void) { + + // CK1: call i32 @__tgt_target_teams( + // CK1: call void @[[OFFL1:.+]]( + #pragma omp target + #pragma omp teams distribute simd + for(int i = 0; i < X; i++) { + a[i] = (T)0; + } + // CK1: call i32 @__tgt_target_teams( + // CK1: call void @[[OFFL2:.+]]( + #pragma omp target + #pragma omp teams distribute simd dist_schedule(static) + for(int i = 0; i < X; i++) { + a[i] = (T)0; + } + // CK1: call i32 @__tgt_target_teams( + // CK1: call void @[[OFFL3:.+]]( + #pragma omp target + #pragma omp teams distribute simd dist_schedule(static, X/2) + for(int i = 0; i < X; i++) { + a[i] = (T)0; + } + // CK1: define internal void @[[OFFL1]]( + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, + // CK1: ret void + + // CK1: define internal void @[[OUTL1]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[OFFL2]]( + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL2:.+]] to {{.+}}, + // CK1: ret void + + // CK1: define internal void @[[OUTL2]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + // CK1: define internal void @[[OFFL3]]( + // CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL3:.+]] to {{.+}}, + // CK1: ret void + + // CK1: define internal void @[[OUTL3]]({{.+}}) + // CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91 + // CK1: call void @__kmpc_for_static_fini( + // CK1: ret void + + return a[0]; + } +}; + +int teams_template_struct(void) { + SS<int, 123, 456> V; + return V.foo(); + +} +// CK1: !{!"llvm.loop.vectorize.enable", i1 true} +#endif // CK1 + +// Test host codegen. +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +#ifdef CK2 + +template <typename T, int n> +int tmain(T argc) { + T a[n]; +#pragma omp target +#pragma omp teams distribute simd + for(int i = 0; i < n; i++) { + a[i] = (T)0; + } +#pragma omp target +#pragma omp teams distribute simd dist_schedule(static) + for(int i = 0; i < n; i++) { + a[i] = (T)0; + } +#pragma omp target +#pragma omp teams distribute simd dist_schedule(static, n) + for(int i = 0; i < n; i++) { + a[i] = (T)0; + } + return 0; +} + +int main (int argc, char **argv) { + int n = 100; + int a[n]; +#pragma omp target +#pragma omp teams distribute simd + for(int i = 0; i < n; i++) { + a[i] = 0; + } +#pragma omp target +#pragma omp teams distribute simd dist_schedule(static) + for(int i = 0; i < n; i++) { + a[i] = 0; + } +#pragma omp target +#pragma omp teams distribute simd dist_schedule(static, n) + for(int i = 0; i < n; i++) { + a[i] = 0; + } + return tmain<int, 10>(argc); +} + +// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFL1:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFL2:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFL3:.+]]({{.+}}) +// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}}) +// CK2: ret + +// CK2: define {{.*}}void @[[OFFL1]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTL1]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFL2]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL2:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTL2]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFL3]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[OUTL3:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTL3]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91 +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFLT1:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFLT2:.+]]({{.+}}) +// CK2: call i32 @__tgt_target_teams( +// CK2: call void @[[OFFLT3:.+]]({{.+}}) +// CK2: ret +// CK2-NEXT: } + +// CK2: define {{.*}}void @[[OFFLT1]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTLT1]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFLT2]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT2:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTLT2]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92 +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: define {{.*}}void @[[OFFLT3]]({{.+}}) +// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT3:.+]] to {{.+}}, +// CK2: ret void + +// CK2: define internal void @[[OUTLT3]]({{.+}}) +// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91 +// CK2: call void @__kmpc_for_static_fini( +// CK2: ret void + +// CK2: !{!"llvm.loop.vectorize.enable", i1 true} +#endif // CK2 +#endif // #ifndef HEADER diff --git a/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp b/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp new file mode 100644 index 000000000000..e503f5267f19 --- /dev/null +++ b/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp @@ -0,0 +1,337 @@ +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 + +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +struct St { + int a, b; + St() : a(0), b(0) {} + St(const St &st) : a(st.a + st.b), b(0) {} + ~St() {} +}; + +volatile int g = 1212; +volatile int &g1 = g; + +template <class T> +struct S { + T f; + S(T a) : f(a + g) {} + S() : f(g) {} + S(const S &s, St t = St()) : f(s.f + t.a) {} + operator T() { return T(); } + ~S() {} +}; + +// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float } +// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } +// CHECK-DAG: [[ST_TY:%.+]] = type { i{{[0-9]+}}, i{{[0-9]+}} } + +template <typename T> +T tmain() { + S<T> test; + T t_var = T(); + T vec[] = {1, 2}; + S<T> s_arr[] = {1, 2}; + S<T> &var = test; +#pragma omp target +#pragma omp teams distribute simd firstprivate(t_var, vec, s_arr, var) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + return T(); +} + +// CHECK-DAG: [[TEST:@.+]] = global [[S_FLOAT_TY]] zeroinitializer, +S<float> test; +// CHECK-DAG: [[T_VAR:@.+]] = global i{{[0-9]+}} 333, +int t_var = 333; +// CHECK-DAG: [[VEC:@.+]] = global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2], +int vec[] = {1, 2}; +// CHECK-DAG: [[S_ARR:@.+]] = global [2 x [[S_FLOAT_TY]]] zeroinitializer, +S<float> s_arr[] = {1, 2}; +// CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer, +S<float> var(3); +// CHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0, + +int main() { + static int sivar; +#ifdef LAMBDA + // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212, + // LAMBDA-LABEL: @main + // LAMBDA: call void [[OUTER_LAMBDA:@.+]]( + [&]() { + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // LAMBDA: call void @[[LOFFL1:.+]](i{{64|32}} %{{.+}}) + // LAMBDA: ret +#pragma omp target +#pragma omp teams distribute simd firstprivate(g, g1, sivar) + for (int i = 0; i < 2; ++i) { + // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} {{%.+}}, i{{64|32}} {{%.+}}) + // LAMBDA: {{%.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{%.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{%.+}} = alloca i{{[0-9]+}}, + // LAMBDA: [[G_CAST:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_CAST:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[SIVAR_CAST:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA-DAG: [[G_CAST_VAL:%.+]] = load{{.+}} [[G_CAST]], + // LAMBDA-DAG: [[G1_CAST_VAL:%.+]] = load{{.+}} [[G1_CAST]], + // LAMBDA-DAG: [[SIVAR_CAST_VAL:%.+]] = load{{.+}} [[SIVAR_CAST]], + // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[LOUTL1:.+]] to {{.+}}, {{.+}} [[G_CAST_VAL]], {{.+}} [[G1_CAST_VAL]], {{.+}} [[SIVAR_CAST_VAL]]) + // LAMBDA: ret void + + // LAMBDA: define internal void @[[LOUTL1]]({{.+}}) + // Skip global and bound tid vars + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: [[G_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_TMP:%.+]] = alloca i32*, + // skip loop vars + // LAMBDA-DAG: store {{.+}}, {{.+}} [[G_ADDR]], + // LAMBDA-DAG: store {{.+}}, {{.+}} [[G1_ADDR]], + // LAMBDA-DAG: store {{.+}}, {{.+}} [[SIVAR_ADDR]], + // LAMBDA-DAG: [[G_CONV:%.+]] = bitcast {{.+}} [[G_ADDR]] to + // LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast {{.+}} [[G1_ADDR]] to + // LAMBDA-DAG: [[SIVAR_CONV:%.+]] = bitcast {{.+}} [[SIVAR_ADDR]] to + // LAMBDA-DAG: store{{.+}} [[G1_CONV]], {{.+}} [[G1_TMP]], + g = 1; + g1 = 1; + sivar = 2; + // LAMBDA: call void @__kmpc_for_static_init_4( + // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G_CONV]], + // LAMBDA-DAG: [[G1:%.+]] = load{{.+}}, {{.+}}* [[G1_TMP]] + // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1]], + // LAMBDA-DAG: store{{.+}} 2, {{.+}} [[SIVAR_CONV]], + // LAMBDA-DAG: [[G1_REF:%.+]] = load{{.+}}, {{.+}} [[G1_TMP]], + // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1_REF]], + // LAMBDA: call void [[INNER_LAMBDA:@.+]]( + // LAMBDA: call void @__kmpc_for_static_fini( + [&]() { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + g = 2; + g1 = 2; + sivar = 4; + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + + // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]] + // LAMBDA: [[G1_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G1_REF]] + // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]] + }(); + } + }(); + return 0; +#else +#pragma omp target +#pragma omp teams distribute simd firstprivate(t_var, vec, s_arr, var, sivar) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + sivar += i; + } + return tmain<int>(); +#endif +} + +// CHECK: define {{.*}}i{{[0-9]+}} @main() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) +// CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}}) +// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]() +// CHECK: ret + +// CHECK: define{{.*}} void @[[OFFL1]]({{.+}}) +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_CAST:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[SIVAR_CAST:%.+]] = alloca i{{[0-9]+}}, + +// CHECK-DAG: [[VEC_TE_PAR:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_PRIV]], +// CHECK-DAG: [[T_VAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_CAST]], +// CHECK-DAG: [[S_ARR_TE_PAR:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_PRIV]], +// CHECK-DAG: [[VAR_TE_PAR:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_PRIV]], +// CHECK-DAG: [[SIVAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_CAST]], + +// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}} @[[OUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[VEC_TE_PAR]], i{{[0-9]+}} [[T_VAR_TE_PAR]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_TE_PAR]], [[S_FLOAT_TY]]* [[VAR_TE_PAR]], i{{[0-9]+}} [[SIVAR_TE_PAR]]) +// CHECK: ret void + +// CHECK: define internal void @[[OUTL1]]({{.+}}) +// Skip global and bound tid vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// Skip temp vars for loop +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]], + +// param copy +// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]], +// CHECK: store [2 x [[S_FLOAT_TY]]]* {{.+}}, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK: store [[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]** [[VAR_ADDR]], +// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[SIVAR_ADDR]], + +// T_VAR and SIVAR +// CHECK-DAG-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32* +// CHECK-DAG-64: [[CONV_SIVAR:%.+]] = bitcast i64* [[SIVAR_ADDR]] to i32* + +// preparation vars +// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK-DAG: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]], + +// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2 +// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}}) + +// firstprivate(s_arr) +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]] to +// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]], +// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ] +// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}}], [ [[S_ARR_DST:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]]) +// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]], +// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]], + +// firstprivate(var) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]]) + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK-DAG-32: {{.+}} = {{.+}} [[T_VAR_ADDR]] +// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_TVAR]] +// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[VAR_PRIV]] +// CHECK-DAG-32: {{.+}} = {{.+}} [[SIVAR_ADDR]] +// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_SIVAR]] +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) +// CHECK: call void @[[TOFFL1:.+]](i{{64|32}} %{{.+}}) +// CHECK: ret + +// CHECK: define {{.*}}void @[[TOFFL1]]({{.+}}) +// CHECK: [[TT_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[TVEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[TS_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK: [[TVAR_PRIV:%.+]] = alloca [[S_INT_TY]]*, +// CHECK: [[TT_VAR_CAST:%.+]] = alloca i{{[0-9]+}}, + +// CHECK-DAG: [[TVEC_TE_PAR:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[TVEC_PRIV]], +// CHECK-DAG: [[TT_VAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[TT_VAR_CAST]], +// CHECK-DAG: [[TS_ARR_TE_PAR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[TS_ARR_PRIV]], +// CHECK-DAG: [[TVAR_TE_PAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TVAR_PRIV]], + +// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[TOUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[TVEC_TE_PAR]], i{{[0-9]+}} [[TT_VAR_TE_PAR]], [2 x [[S_INT_TY]]]* [[TS_ARR_TE_PAR]], [[S_INT_TY]]* [[TVAR_TE_PAR]]) +// CHECK: ret void + +// CHECK: define internal void @[[TOUTL1]]({{.+}}) +// Skip global and bound tid vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, +// Skip temp vars for loop +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]], +// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*, + +// param copy +// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]], +// CHECK: store [2 x [[S_INT_TY]]]* {{.+}}, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]], +// CHECK: store [[S_INT_TY]]* {{.+}}, [[S_INT_TY]]** [[VAR_ADDR]], + + +// T_VAR and preparation variables +// CHECK: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32* +// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]], + +// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2 +// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}}) + +// firstprivate(s_arr) +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]] to +// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]], +// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ] +// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_DST:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]]) +// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]], +// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]], + +// firstprivate(var) +// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]], +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]]) +// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]]) +// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP]], + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK-DAG-32: {{.+}} = {{.+}} [[T_VAR_ADDR]] +// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_TVAR]] +// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[TMP]] +// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]] +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: !{!"llvm.loop.vectorize.enable", i1 true} +#endif diff --git a/test/OpenMP/teams_distribute_simd_firstprivate_messages.cpp b/test/OpenMP/teams_distribute_simd_firstprivate_messages.cpp index 324672c14129..bcc0e351c66e 100644 --- a/test/OpenMP/teams_distribute_simd_firstprivate_messages.cpp +++ b/test/OpenMP/teams_distribute_simd_firstprivate_messages.cpp @@ -144,6 +144,7 @@ int main(int argc, char **argv) { #pragma omp teams distribute simd firstprivate(j) for (i = 0; i < argc; ++i) foo(); +// expected-error@+2 {{lastprivate variable cannot be firstprivate}} expected-note@+2 {{defined as lastprivate}} #pragma omp target #pragma omp teams distribute simd lastprivate(argc), firstprivate(argc) // OK for (i = 0; i < argc; ++i) foo(); diff --git a/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp b/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp new file mode 100644 index 000000000000..3a18b75cf405 --- /dev/null +++ b/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp @@ -0,0 +1,371 @@ +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 + +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +template <class T> +struct S { + T f; + S(T a) : f(a) {} + S() : f() {} + operator T() { return T(); } + ~S() {} +}; + +// CHECK: [[S_FLOAT_TY:%.+]] = type { float } +// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } +template <typename T> +T tmain() { + S<T> test; + T t_var = T(); + T vec[] = {1, 2}; + S<T> s_arr[] = {1, 2}; + S<T> &var = test; + #pragma omp target + #pragma omp teams distribute simd lastprivate(t_var, vec, s_arr, s_arr, var, var) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + return T(); +} + +int main() { + static int svar; + volatile double g; + volatile double &g1 = g; + + #ifdef LAMBDA + // LAMBDA-LABEL: @main + // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]]( + [&]() { + static float sfvar; + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( + // LAMBDA: call void [[OFFLOADING_FUN:@.+]]( + + // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]]( + // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}}) + #pragma omp target + #pragma omp teams distribute simd lastprivate(g, g1, svar, sfvar) + for (int i = 0; i < 2; ++i) { + // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, double*{{.+}} [[G_IN:%.+]], double*{{.+}} [[G1_IN:%.+]], i{{[0-9]+}}*{{.+}} [[SVAR_IN:%.+]], float*{{.+}} [[SFVAR_IN:%.+]]) + // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double*, + // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double*, + // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}*, + // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float*, + // loop variables + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: {{.+}} = alloca i{{[0-9]+}}, + // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G_PRIVATE:%.+]] = alloca double, + // LAMBDA: [[G1_PRIVATE:%.+]] = alloca double, + // LAMBDA: [[TMP_G1_PRIVATE:%.+]] = alloca double*, + // LAMBDA: [[SVAR_PRIVATE:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[SFVAR_PRIVATE:%.+]] = alloca float, + // LAMBDA: store double* [[G_IN]], double** [[G_PRIVATE_ADDR]], + // LAMBDA: store double* [[G1_IN]], double** [[G1_PRIVATE_ADDR]], + // LAMBDA: store i{{[0-9]+}}* [[SVAR_IN]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]], + // LAMBDA: store float* [[SFVAR_IN]], float** [[SFVAR_PRIVATE_ADDR]], + + // init private variables + // LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]], + // LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]], + // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]], + // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]], + // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]], + g = 1; + g1 = 1; + svar = 3; + sfvar = 4.0; + // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4( + // LAMBDA: store double 1.0{{.+}}, double* [[G_PRIVATE]], + // LAMBDA: [[TMP_G1_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]], + // LAMBDA: store{{.+}} double 1.0{{.+}}, double* [[TMP_G1_REF]], + // LAMBDA: store i{{[0-9]+}} 3, i{{[0-9]+}}* [[SVAR_PRIVATE]], + // LAMBDA: store float 4.0{{.+}}, float* [[SFVAR_PRIVATE]], + // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: store double* [[G_PRIVATE]], double** [[G_PRIVATE_ADDR_REF]], + // LAMBDA: [[TMP_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_PRIVATE_ADDR_FROM_TMP:%.+]] = load double*, double** [[TMP_G1_PRIVATE]], + // LAMBDA: store double* [[G1_PRIVATE_ADDR_FROM_TMP]], double** [[TMP_PRIVATE_ADDR_REF]], + // LAMBDA: [[SVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: store i{{[0-9]+}}* [[SVAR_PRIVATE]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]] + // LAMBDA: [[SFVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 + // LAMBDA: store float* [[SFVAR_PRIVATE]], float** [[SFVAR_PRIVATE_ADDR_REF]] + // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]]) + // LAMBDA: call {{.*}}void @__kmpc_for_static_fini( + // LAMBDA: store i32 2, i32* % + // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]], + // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 + // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] + + // LAMBDA: [[OMP_LASTPRIV_BLOCK]]: + // LAMBDA: [[G_PRIV_VAL:%.+]] = load double, double* [[G_PRIVATE]], + // LAMBDA: store{{.*}} double [[G_PRIV_VAL]], double* [[G_IN_REF]], + // LAMBDA: [[TMP_G1_PRIV_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]], + // LAMBDA: [[TMP_G1_PRIV_VAL:%.+]] = load double, double* [[TMP_G1_PRIV_REF]], + // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* [[G1_IN_REF]], + + // LAMBDA: [[SVAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]], + // LAMBDA: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_IN_REF]], + // LAMBDA: [[SFVAR_PRIV_VAL:%.+]] = load float, float* [[SFVAR_PRIVATE]], + // LAMBDA: store float [[SFVAR_PRIV_VAL]], float* [[SFVAR_IN_REF]], + // LAMBDA: br label %[[OMP_LASTPRIV_DONE]] + // LAMBDA: [[OMP_LASTPRIV_DONE]]: + // LAMBDA: ret + [&]() { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + g = 2; + g1 = 2; + svar = 4; + sfvar = 8.0; + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]] + // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]] + + // LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]] + // LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]], + // LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]] + // LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 + // LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]] + // LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]] + }(); + } + }(); + return 0; + #else + S<float> test; + int t_var = 0; + int vec[] = {1, 2}; + S<float> s_arr[] = {1, 2}; + S<float> &var = test; + + #pragma omp target + #pragma omp teams distribute simd lastprivate(t_var, vec, s_arr, s_arr, var, var, svar) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + int i; + + return tmain<int>(); + #endif +} + +// CHECK: define{{.*}} i{{[0-9]+}} @main() +// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOAD_FUN:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}}) +// CHECK: ret + +// CHECK: define{{.+}} [[OFFLOAD_FUN]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]*{{.+}} {{.+}}, [2 x [[S_FLOAT_TY]]]*{{.+}} {{.+}}, [[S_FLOAT_TY]]*{{.+}} {{.+}}, i{{[0-9]+}} {{.+}}) +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams( +// CHECK: ret +// +// CHECK: define internal void [[OMP_OUTLINED:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN:%.+]], i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.+}} [[VAR_IN:%.+]], i{{[0-9]+}}*{{.*}} [[S_VAR_IN:%.+]]) +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*, +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*, +// skip loop variables +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*, +// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, + +// copy from parameters to local address variables +// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]], +// CHECK: store [2 x [[S_FLOAT_TY]]]* [[S_ARR_IN]], [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK: store [[S_FLOAT_TY]]* [[VAR_IN]], [[S_FLOAT_TY]]** [[VAR_ADDR]], +// CHECK: store i{{[0-9]+}}* [[S_VAR_IN]], i{{[0-9]+}}** [[SVAR_ADDR]], + +// load content of local address variables +// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]], +// CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]], +// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]], +// CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]], +// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]], +// CHECK: [[VAR_ADDR_REF:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]], +// the distribute loop +// CHECK: call void @__kmpc_for_static_init_4( +// assignment: vec[i] = t_var; +// CHECK: [[T_VAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]], +// CHECK: [[VEC_PTR:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}} +// CHECK: store i{{[0-9]+}} [[T_VAR_PRIV_VAL]], i{{[0-9]+}}* [[VEC_PTR]], + +// assignment: s_arr[i] = var; +// CHECK-DAG: [[S_ARR_PTR:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], +// CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_PTR]] to i8* +// CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST]], i8* [[TMP_VAL_BCAST]], +// CHECK: call void @__kmpc_for_static_fini( + +// lastprivates +// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]], +// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 +// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] + +// CHECK: [[OMP_LASTPRIV_BLOCK]]: +// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]], +// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]], +// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8* +// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]* +// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 +// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]] +// CHECK: [[S_ARR_COPY_BLOCK]]: +// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8* +// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 +// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} +// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]] +// CHECK: [[S_ARR_COPY_DONE]]: +// CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]], +// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8* +// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]], +// CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]], +// CHECK: ret void + +// template tmain +// CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]() +// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], +// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) +// CHECK: call i{{[0-9]+}} @__tgt_target_teams( +// CHECK: call void [[OFFLOAD_FUN_1:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}}) +// CHECK: ret + + +// CHECK: define internal void [[OFFLOAD_FUN_1]]( +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, +// CHECK: ret + +// CHECK: define internal void [[OMP_OUTLINED_1:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR1:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN1:%.+]], i{{[0-9]+}}*{{.+}} [[T_VAR_IN1:%.+]], [2 x [[S_INT_TY]]]*{{.+}} [[S_ARR_IN1:%.+]], [[S_INT_TY]]*{{.+}} [[VAR_IN1:%.+]]) +// skip alloca of global_tid and bound_tid +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: {{.+}} = alloca i{{[0-9]+}}*, +// CHECK: [[VEC_ADDR1:%.+]] = alloca [2 x i{{[0-9]+}}]*, +// CHECK: [[T_VAR_ADDR1:%.+]] = alloca i{{[0-9]+}}*, +// CHECK: [[S_ARR_ADDR1:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK: [[VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*, +// skip loop variables +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: {{.+}} = alloca i{{[0-9]+}}, +// CHECK: [[OMP_IS_LAST1:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_PRIV1:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV1:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV1:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK: [[VAR_PRIV1:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[TMP_PRIV1:%.+]] = alloca [[S_INT_TY]]*, + +// skip init of bound and global tid +// CHECK: store i{{[0-9]+}}* {{.*}}, +// CHECK: store i{{[0-9]+}}* {{.*}}, +// copy from parameters to local address variables +// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN1]], [2 x i{{[0-9]+}}]** [[VEC_ADDR1]], +// CHECK: store i{{[0-9]+}}* [[T_VAR_IN1]], i{{[0-9]+}}** [[T_VAR_ADDR1]], +// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_IN1]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]], +// CHECK: store [[S_INT_TY]]* [[VAR_IN1]], [[S_INT_TY]]** [[VAR_ADDR1]], + +// load content of local address variables +// CHECK: [[VEC_ADDR_REF1:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR1]], +// CHECK: [[T_VAR_ADDR_REF1:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR1]], +// CHECK: [[S_ARR_ADDR_REF1:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]], +// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]], +// CHECK: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR1]], +// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV1]], [[S_INT_TY]]** [[TMP_PRIV1]], +// CHECK: call void @__kmpc_for_static_init_4( +// assignment: vec[i] = t_var; +// CHECK: [[IV_VAL1:%.+]] = +// CHECK: [[T_VAR_PRIV_VAL1:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]], +// CHECK: [[VEC_PTR1:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV1]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}} +// CHECK: store i{{[0-9]+}} [[T_VAR_PRIV_VAL1]], i{{[0-9]+}}* [[VEC_PTR1]], + +// assignment: s_arr[i] = var; +// CHECK-DAG: [[S_ARR_PTR1:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]], +// CHECK-DAG: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]], +// CHECK-DAG: [[S_ARR_PTR_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_PTR1]] to i8* +// CHECK-DAG: [[TMP_VAL_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* +// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST1]], i8* [[TMP_VAL_BCAST1]], +// CHECK: call void @__kmpc_for_static_fini( + +// lastprivates +// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST1]], +// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0 +// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]] + +// CHECK: [[OMP_LASTPRIV_BLOCK]]: +// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]], +// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF1]], +// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF1]] to i8* +// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV1]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]], +// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]] to [[S_INT_TY]]* +// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2 +// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]] +// CHECK: [[S_ARR_COPY_BLOCK]]: +// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}} +// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8* +// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}}) +// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1 +// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}} +// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]] +// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]] +// CHECK: [[S_ARR_COPY_DONE]]: +// CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]], +// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR1_REF]] to i8* +// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8* +// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}}) +// CHECK: ret void +// CHECK: !{!"llvm.loop.vectorize.enable", i1 true} +#endif diff --git a/test/OpenMP/teams_distribute_simd_lastprivate_messages.cpp b/test/OpenMP/teams_distribute_simd_lastprivate_messages.cpp index 4c715bf2ce30..51dbfef22933 100644 --- a/test/OpenMP/teams_distribute_simd_lastprivate_messages.cpp +++ b/test/OpenMP/teams_distribute_simd_lastprivate_messages.cpp @@ -18,14 +18,14 @@ public: const S2 &operator =(const S2&) const; S2 &operator =(const S2&); static float S2s; // expected-note {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note {{static data member is predetermined as shared}} }; -const float S2::S2sc = 0; // expected-note {{static data member is predetermined as shared}} +const float S2::S2sc = 0; const S2 b; const S2 ba[5]; class S3 { int a; - S3 &operator=(const S3 &s3); // expected-note 2 {{implicitly declared private here}} + S3 &operator=(const S3 &s3); // expected-note {{implicitly declared private here}} public: S3() : a(0) {} @@ -52,7 +52,7 @@ public: }; class S6 { int a; - S6() : a(0) {} + S6() : a(0) {} // expected-note {{implicitly declared private here}} public: S6(const S6 &s6) : a(s6.a) {} @@ -255,12 +255,14 @@ int main(int argc, char **argv) { #pragma omp teams distribute simd lastprivate(j) for (i = 0; i < argc; ++i) foo(); +// expected-error@+2 {{firstprivate variable cannot be lastprivate}} expected-note@+2 {{defined as firstprivate}} #pragma omp target -#pragma omp teams distribute simd firstprivate(m) lastprivate(m) // expected-error {{'operator=' is a private member of 'S3'}} +#pragma omp teams distribute simd firstprivate(m) lastprivate(m) for (i = 0; i < argc; ++i) foo(); +// expected-error@+2 {{lastprivate variable cannot be firstprivate}} expected-note@+2 {{defined as lastprivate}} #pragma omp target -#pragma omp teams distribute simd lastprivate(n) firstprivate(n) // OK +#pragma omp teams distribute simd lastprivate(n) firstprivate(n) // expected-error {{calling a private constructor of class 'S6'}} for (i = 0; i < argc; ++i) foo(); static int si; diff --git a/test/OpenMP/teams_distribute_simd_linear_messages.cpp b/test/OpenMP/teams_distribute_simd_linear_messages.cpp index 6bfdb16274a5..a53f0e62dda2 100644 --- a/test/OpenMP/teams_distribute_simd_linear_messages.cpp +++ b/test/OpenMP/teams_distribute_simd_linear_messages.cpp @@ -18,34 +18,42 @@ void test_linear_colons() { int B = 0; +// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute simd linear(B:bfoo()) for (int i = 0; i < 10; ++i) ; +// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute simd linear(B::ib:B:bfoo()) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'}} for (int i = 0; i < 10; ++i) ; +// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute simd linear(B:ib) // expected-error {{use of undeclared identifier 'ib'; did you mean 'B::ib'}} for (int i = 0; i < 10; ++i) ; +// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute simd linear(z:B:ib) // expected-error {{unexpected ':' in nested name specifier; did you mean '::'?}} for (int i = 0; i < 10; ++i) ; +// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute simd linear(B:B::bfoo()) for (int i = 0; i < 10; ++i) ; +// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute simd linear(X::x : ::z) for (int i = 0; i < 10; ++i) ; +// expected-error@+2 3 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute simd linear(B,::z, X::x) for (int i = 0; i < 10; ++i) ; +// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute simd linear(::z) for (int i = 0; i < 10; ++i) ; @@ -54,6 +62,7 @@ void test_linear_colons() #pragma omp teams distribute simd linear(B::bfoo()) // expected-error {{expected variable name}} for (int i = 0; i < 10; ++i) ; +// expected-error@+2 2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute simd linear(B::ib,B:C1+C2) for (int i = 0; i < 10; ++i) ; @@ -76,6 +85,7 @@ template<int L, class T, class N> T test_template(T* arr, N num) { template<int LEN> int test_warn() { int ind2 = 0; +// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute simd linear(ind2:LEN) // expected-warning {{zero linear step (ind2 should probably be const)}} for (int i = 0; i < 100; i++) { @@ -133,10 +143,12 @@ template<class I, class C> int foomain(I argc, C **argv) { #pragma omp teams distribute simd linear () // expected-error {{expected expression}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute simd linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute simd linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int k = 0; k < argc; ++k) ++k; @@ -145,6 +157,7 @@ template<class I, class C> int foomain(I argc, C **argv) { #pragma omp teams distribute simd linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute simd linear (argc : 5) for (int k = 0; k < argc; ++k) ++k; @@ -161,6 +174,7 @@ template<class I, class C> int foomain(I argc, C **argv) { #pragma omp teams distribute simd linear (argv[1]) // expected-error {{expected variable name}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+2 2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute simd linear(e, g) for (int k = 0; k < argc; ++k) ++k; @@ -169,32 +183,11 @@ template<class I, class C> int foomain(I argc, C **argv) { #pragma omp teams distribute simd linear(h) // expected-error {{threadprivate or thread local variable cannot be linear}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute simd linear(i) for (int k = 0; k < argc; ++k) ++k; - #pragma omp parallel - { - int v = 0; - int i; - #pragma omp target - #pragma omp teams distribute simd linear(v:i) - for (int k = 0; k < argc; ++k) { i = k; v += i; } - } - -#pragma omp target -#pragma omp teams distribute simd linear(j) - for (int k = 0; k < argc; ++k) ++k; - - int v = 0; - -#pragma omp target -#pragma omp teams distribute simd linear(v:j) - for (int k = 0; k < argc; ++k) { ++k; v += j; } - -#pragma omp target -#pragma omp teams distribute simd linear(i) - for (int k = 0; k < argc; ++k) ++k; return 0; } @@ -230,10 +223,12 @@ int main(int argc, char **argv) { #pragma omp teams distribute simd linear () // expected-error {{expected expression}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute simd linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute simd linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} for (int k = 0; k < argc; ++k) ++k; @@ -242,6 +237,7 @@ int main(int argc, char **argv) { #pragma omp teams distribute simd linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}} for (int k = 0; k < argc; ++k) ++k; +// expected-error@+2 {{only loop iteration variables are allowed in 'linear' clause in distribute directives}} #pragma omp target #pragma omp teams distribute simd linear (argc) for (int k = 0; k < argc; ++k) ++k; @@ -267,26 +263,6 @@ int main(int argc, char **argv) { #pragma omp teams distribute simd linear(h, C::x) // expected-error 2 {{threadprivate or thread local variable cannot be linear}} for (int k = 0; k < argc; ++k) ++k; - #pragma omp parallel - { - int i; - #pragma omp target - #pragma omp teams distribute simd linear(i) - for (int k = 0; k < argc; ++k) ++k; - - #pragma omp target - #pragma omp teams distribute simd linear(i : 4) - for (int k = 0; k < argc; ++k) { ++k; i += 4; } - } - -#pragma omp target -#pragma omp teams distribute simd linear(j) - for (int k = 0; k < argc; ++k) ++k; - -#pragma omp target -#pragma omp teams distribute simd linear(i) - for (int k = 0; k < argc; ++k) ++k; - foomain<int,char>(argc,argv); // expected-note {{in instantiation of function template specialization 'foomain<int, char>' requested here}} return 0; } diff --git a/test/OpenMP/teams_distribute_simd_loop_messages.cpp b/test/OpenMP/teams_distribute_simd_loop_messages.cpp index 81fbfe860a70..60e07a3db596 100644 --- a/test/OpenMP/teams_distribute_simd_loop_messages.cpp +++ b/test/OpenMP/teams_distribute_simd_loop_messages.cpp @@ -2,7 +2,7 @@ class S { int a; - S() : a(0) {} + S() : a(0) {} // expected-note {{implicitly declared private here}} public: S(int v) : a(v) {} @@ -693,8 +693,9 @@ void test_loop_eh() { void test_loop_firstprivate_lastprivate() { S s(4); +// expected-error@+2 {{lastprivate variable cannot be firstprivate}} expected-note@+2 {{defined as lastprivate}} #pragma omp target -#pragma omp teams distribute simd lastprivate(s) firstprivate(s) +#pragma omp teams distribute simd lastprivate(s) firstprivate(s) // expected-error {{calling a private constructor of class 'S'}} for (int i = 0; i < 16; ++i) ; } diff --git a/test/OpenMP/teams_distribute_simd_messages.cpp b/test/OpenMP/teams_distribute_simd_messages.cpp index 9095caf38787..b00de796b0ed 100644 --- a/test/OpenMP/teams_distribute_simd_messages.cpp +++ b/test/OpenMP/teams_distribute_simd_messages.cpp @@ -9,6 +9,9 @@ static int pvt; #pragma omp teams distribute simd // expected-error {{unexpected OpenMP directive '#pragma omp teams distribute simd'}} int main(int argc, char **argv) { + #pragma omp target + #pragma omp teams distribute simd + f; // expected-error {{use of undeclared identifier 'f'}} #pragma omp target #pragma omp teams distribute simd { // expected-warning {{extra tokens at the end of '#pragma omp teams distribute simd' are ignored}} for (int i = 0; i < argc; ++i) diff --git a/test/OpenMP/teams_distribute_simd_private_codegen.cpp b/test/OpenMP/teams_distribute_simd_private_codegen.cpp new file mode 100644 index 000000000000..ca9a673e2441 --- /dev/null +++ b/test/OpenMP/teams_distribute_simd_private_codegen.cpp @@ -0,0 +1,238 @@ +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 + +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +struct St { + int a, b; + St() : a(0), b(0) {} + St(const St &st) : a(st.a + st.b), b(0) {} + ~St() {} +}; + +volatile int g = 1212; +volatile int &g1 = g; + +template <class T> +struct S { + T f; + S(T a) : f(a + g) {} + S() : f(g) {} + S(const S &s, St t = St()) : f(s.f + t.a) {} + operator T() { return T(); } + ~S() {} +}; + +// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float } +// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } + +template <typename T> +T tmain() { + S<T> test; + T t_var = T(); + T vec[] = {1, 2}; + S<T> s_arr[] = {1, 2}; + S<T> &var = test; +#pragma omp target +#pragma omp teams distribute simd private(t_var, vec, s_arr, var) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + return T(); +} + +// CHECK-DAG: [[TEST:@.+]] = global [[S_FLOAT_TY]] zeroinitializer, +S<float> test; +// CHECK-DAG: [[T_VAR:@.+]] = global i{{[0-9]+}} 333, +int t_var = 333; +// CHECK-DAG: [[VEC:@.+]] = global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2], +int vec[] = {1, 2}; +// CHECK-DAG: [[S_ARR:@.+]] = global [2 x [[S_FLOAT_TY]]] zeroinitializer, +S<float> s_arr[] = {1, 2}; +// CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer, +S<float> var(3); +// CHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0, + +int main() { + static int sivar; +#ifdef LAMBDA + // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212, + // LAMBDA-LABEL: @main + // LAMBDA: call void [[OUTER_LAMBDA:@.+]]( + [&]() { + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // LAMBDA: call void @[[LOFFL1:.+]]( + // LAMBDA: ret +#pragma omp target +#pragma omp teams distribute simd private(g, g1, sivar) + for (int i = 0; i < 2; ++i) { + // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} {{%.+}}) + // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[LOUTL1:.+]] to {{.+}}) + // LAMBDA: ret void + + // LAMBDA: define internal void @[[LOUTL1]]({{.+}}) + // Skip global, bound tid and loop vars + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: alloca i32, + // LAMBDA: [[G_PRIV:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: [[G1_PRIV:%.+]] = alloca i{{[0-9]+}} + // LAMBDA: [[TMP:%.+]] = alloca i{{[0-9]+}}*, + // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}}, + // LAMBDA: store{{.+}} [[G1_PRIV]], {{.+}} [[TMP]], + + g = 1; + g1 = 1; + sivar = 2; + // LAMBDA: call void @__kmpc_for_static_init_4( + // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G_PRIV]], + // LAMBDA-DAG: store{{.+}} 2, {{.+}} [[SIVAR_PRIV]], + // LAMBDA-DAG: [[G1_REF:%.+]] = load{{.+}}, {{.+}} [[TMP]], + // LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1_REF]], + // LAMBDA: call void [[INNER_LAMBDA:@.+]]( + // LAMBDA: call void @__kmpc_for_static_fini( + [&]() { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + g = 2; + g1 = 2; + sivar = 4; + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + + // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]] + // LAMBDA: [[G1_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + // LAMBDA: [[G1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G1_REF]] + // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]] + // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]] + }(); + } + }(); + return 0; +#else +#pragma omp target +#pragma omp teams distribute simd private(t_var, vec, s_arr, var, sivar) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + sivar += i; + } + return tmain<int>(); +#endif +} + +// CHECK: define {{.*}}i{{[0-9]+}} @main() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 0) +// CHECK: call void @[[OFFL1:.+]]() +// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]() +// CHECK: ret + +// CHECK: define{{.*}} void @[[OFFL1]]() +// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[OUTL1:.+]] to {{.+}}) +// CHECK: ret void + +// CHECK: define internal void @[[OUTL1]]({{.+}}) +// Skip global, bound tid and loop vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK: {{.+}} = alloca i32, +// CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK-DAG: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], +// CHECK-DAG: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]], +// CHECK-DAG: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}}, + +// private(s_arr) +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]]) +// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]], + +// private(var) +// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]]) + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK-DAG: {{.+}} = {{.+}} [[T_VAR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[VAR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[SIVAR_PRIV]] +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + + +// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, +// CHECK: call void @[[TOFFL1:.+]]() +// CHECK: ret + +// CHECK: define {{.*}}void @[[TOFFL1]]() +// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[TOUTL1:.+]] to {{.+}}) +// CHECK: ret void + +// CHECK: define internal void @[[TOUTL1]]({{.+}}) +// Skip global, bound tid and loop vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, +// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], +// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*, + +// private(s_arr) +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]]) +// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]], + +// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], +// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ] +// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]]) +// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]], + +// private(var) +// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]]) +// CHECK-DAG: store{{.+}} [[VAR_PRIV]], {{.+}} [[TMP]] + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK-DAG: {{.+}} = {{.+}} [[T_VAR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]] +// CHECK-DAG: {{.+}} = {{.+}} [[TMP]] +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: !{!"llvm.loop.vectorize.enable", i1 true} +#endif diff --git a/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp b/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp new file mode 100644 index 000000000000..3153cce3a923 --- /dev/null +++ b/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp @@ -0,0 +1,218 @@ +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 + +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +template <typename T> +T tmain() { + T t_var = T(); + T vec[] = {1, 2}; +#pragma omp target +#pragma omp teams distribute simd reduction(+: t_var) + for (int i = 0; i < 2; ++i) { + t_var += (T) i; + } + return T(); +} + +int main() { + static int sivar; +#ifdef LAMBDA + // LAMBDA: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer + + // LAMBDA-LABEL: @main + // LAMBDA: call void [[OUTER_LAMBDA:@.+]]( + [&]() { + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // LAMBDA: call void @[[LOFFL1:.+]]( + // LAMBDA: ret +#pragma omp target +#pragma omp teams distribute simd reduction(+: sivar) + for (int i = 0; i < 2; ++i) { + // LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} [[SIVAR_ARG:%.+]]) + // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}, + // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]], + // LAMBDA: [[SIVAR_CONV:%.+]] = bitcast{{.+}} [[SIVAR_ADDR]] to + // LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[LOUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_CONV]]) + // LAMBDA: ret void + + // LAMBDA: define internal void @[[LOUTL1]]({{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]]) + // Skip global and bound tid vars + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: {{.+}} = alloca i32*, + // LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*, + // LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{.+}}, + // LAMBDA: [[RED_LIST:%.+]] = alloca [1 x {{.+}}], + // LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]], + // LAMBDA: [[SIVAR_REF:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]] + // LAMBDA: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]], + + // LAMBDA: call void @__kmpc_for_static_init_4( + // LAMBDA: store{{.+}}, {{.+}} [[SIVAR_PRIV]], + // LAMBDA: call void [[INNER_LAMBDA:@.+]]( + // LAMBDA: call void @__kmpc_for_static_fini( + // LAMBDA: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]], + // LAMBDA: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to + // LAMBDA: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]], + // LAMBDA: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to + // LAMBDA: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]]) + // LAMBDA: switch{{.+}} [[K_RED_RET]], label{{.+}} [ + // LAMBDA: {{.+}}, label %[[CASE1:.+]] + // LAMBDA: {{.+}}, label %[[CASE2:.+]] + // LAMBDA: ] + // LAMBDA: [[CASE1]]: + // LAMBDA-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]], + // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]], + // LAMBDA-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]] + // LAMBDA: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]], + // LAMBDA: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) + // LAMBDA: br + // LAMBDA: [[CASE2]]: + // LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]], + // LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] + // LAMBDA: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) + // LAMBDA: br + + sivar += i; + + [&]() { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + + sivar += 4; + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + + // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]] + // LAMBDA: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF]] + // LAMBDA: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], 4 + // LAMBDA: store i{{[0-9]+}} [[SIVAR_INC]], i{{[0-9]+}}* [[SIVAR_REF]] + }(); + } + }(); + return 0; +#else +#pragma omp target +#pragma omp teams distribute simd reduction(+: sivar) + for (int i = 0; i < 2; ++i) { + sivar += i; + } + return tmain<int>(); +#endif +} + +// CHECK: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer + +// CHECK: define {{.*}}i{{[0-9]+}} @main() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) +// CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}}) +// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]() +// CHECK: ret + +// CHECK: define{{.*}} void @[[OFFL1]](i{{64|32}} [[SIVAR_ARG:%.+]]) +// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}, +// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]], +// CHECK-64: [[SIVAR_CONV:%.+]] = bitcast{{.+}} [[SIVAR_ADDR]] to +// CHECK-64: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_CONV]]) +// CHECK-32: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_ADDR]]) +// CHECK: ret void + +// CHECK: define internal void @[[OUTL1]]({{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]]) +// Skip global and bound tid vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*, +// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{.+}}, +// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}], +// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]], +// CHECK: [[SIVAR_REF:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]] +// CHECK: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]], + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: store{{.+}}, {{.+}} [[SIVAR_PRIV]], +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]], +// CHECK: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to +// CHECK: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]], +// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to +// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]]) +// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [ +// CHECK: {{.+}}, label %[[CASE1:.+]] +// CHECK: {{.+}}, label %[[CASE2:.+]] +// CHECK: ] +// CHECK: [[CASE1]]: +// CHECK-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]], +// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]], +// CHECK-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]] +// CHECK: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]], +// CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) +// CHECK: br +// CHECK: [[CASE2]]: +// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]], +// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]] +// CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) +// CHECK: br + + +// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]() +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, +// CHECK: call void @[[TOFFL1:.+]]({{.+}}) +// CHECK: ret + +// CHECK: define{{.*}} void @[[TOFFL1]](i{{64|32}} [[TVAR_ARG:%.+]]) +// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}}, +// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]], +// CHECK-64: [[TVAR_CONV:%.+]] = bitcast{{.+}} [[TVAR_ADDR]] to +// CHECK-64: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[TOUTL1:.+]] to {{.+}}, {{.+}} [[TVAR_CONV]]) +// CHECK-32: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[TOUTL1:.+]] to {{.+}}, {{.+}} [[TVAR_ADDR]]) +// CHECK: ret void + +// CHECK: define internal void @[[TOUTL1]]({{.+}}, {{.+}}, {{.+}} [[TVAR_ARG:%.+]]) +// Skip global and bound tid vars +// CHECK: {{.+}} = alloca i32*, +// CHECK: {{.+}} = alloca i32*, +// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}}*, +// CHECK: [[TVAR_PRIV:%.+]] = alloca i{{.+}}, +// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}], +// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]], +// CHECK: [[TVAR_REF:%.+]] = load{{.+}}, {{.+}} [[TVAR_ADDR]] +// CHECK: store{{.+}} 0, {{.+}} [[TVAR_PRIV]], + +// CHECK: call void @__kmpc_for_static_init_4( +// CHECK: store{{.+}}, {{.+}} [[TVAR_PRIV]], +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]], +// CHECK: [[TVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[TVAR_PRIV]] to +// CHECK: store{{.+}} [[TVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]], +// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to +// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]]) +// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [ +// CHECK: {{.+}}, label %[[CASE1:.+]] +// CHECK: {{.+}}, label %[[CASE2:.+]] +// CHECK: ] +// CHECK: [[CASE1]]: +// CHECK-DAG: [[TVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_REF]], +// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]], +// CHECK-DAG: [[TVAR_INC:%.+]] = add{{.+}} [[TVAR_VAL]], [[TVAR_PRIV_VAL]] +// CHECK: store{{.+}} [[TVAR_INC]], {{.+}} [[TVAR_REF]], +// CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) +// CHECK: br +// CHECK: [[CASE2]]: +// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]], +// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]] +// CHECK: call void @__kmpc_end_reduce({{.+}}, {{.+}}, {{.+}} [[RED_VAR]]) +// CHECK: br + +// CHECK: !{!"llvm.loop.vectorize.enable", i1 true} +#endif diff --git a/test/OpenMP/teams_distribute_simd_reduction_messages.cpp b/test/OpenMP/teams_distribute_simd_reduction_messages.cpp index 879ec688dcb5..09d22bd4bd81 100644 --- a/test/OpenMP/teams_distribute_simd_reduction_messages.cpp +++ b/test/OpenMP/teams_distribute_simd_reduction_messages.cpp @@ -19,9 +19,9 @@ public: S2() : a(0) {} S2(S2 &s2) : a(s2.a) {} static float S2s; // expected-note 2 {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note 2 {{'S2sc' declared here}} }; -const float S2::S2sc = 0; // expected-note 2 {{'S2sc' defined here}} +const float S2::S2sc = 0; S2 b; // expected-note 3 {{'b' defined here}} const S2 ba[5]; // expected-note 2 {{'ba' defined here}} class S3 { diff --git a/test/OpenMP/teams_distribute_simd_shared_messages.cpp b/test/OpenMP/teams_distribute_simd_shared_messages.cpp index cd963e8469a8..6b06f5b6f203 100644 --- a/test/OpenMP/teams_distribute_simd_shared_messages.cpp +++ b/test/OpenMP/teams_distribute_simd_shared_messages.cpp @@ -84,7 +84,7 @@ int main(int argc, char **argv) { #pragma omp teams distribute simd shared (S1) // expected-error {{'S1' does not refer to a value}} for (int j=0; j<100; j++) foo(); #pragma omp target - #pragma omp teams distribute simd shared (a, b, c, d, f) + #pragma omp teams distribute simd shared (a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} for (int j=0; j<100; j++) foo(); #pragma omp target #pragma omp teams distribute simd shared (argv[1]) // expected-error {{expected variable name}} diff --git a/test/OpenMP/teams_messages.cpp b/test/OpenMP/teams_messages.cpp index 3a42a0e8dd6a..642fe73cac75 100644 --- a/test/OpenMP/teams_messages.cpp +++ b/test/OpenMP/teams_messages.cpp @@ -7,6 +7,9 @@ void foo() { int main(int argc, char **argv) { #pragma omp target + #pragma omp teams + f; // expected-error {{use of undeclared identifier 'f'}} + #pragma omp target #pragma omp teams { // expected-warning {{extra tokens at the end of '#pragma omp teams' are ignored}} foo(); #pragma omp target diff --git a/test/OpenMP/teams_private_codegen.cpp b/test/OpenMP/teams_private_codegen.cpp index 1ba010fa5c3c..7ddf39c9b0ea 100644 --- a/test/OpenMP/teams_private_codegen.cpp +++ b/test/OpenMP/teams_private_codegen.cpp @@ -98,7 +98,7 @@ int main() { // lambda and target region in main // LAMBDA: define {{.+}} [[OUTER_LAMBDA]]([[CAP_TY]]* {{.+}}) - // LAMBDA: call void @[[OMP_OFFLOADING:.+]](i{{[0-9]+}}* {{.+}}, i{{[0-9]+}} {{.+}} + // LAMBDA: call void @[[OMP_OFFLOADING:.+]]() // target region in struct constructor // LAMBDA: define{{.*}} void [[ST_CONSTR:@.+]]([[SS_TY]]* %this, @@ -154,7 +154,7 @@ int main() { #pragma omp target #pragma omp teams private(g, sivar) { - // LAMBDA: define{{.+}} @[[OMP_OFFLOADING]](i{{[0-9]+}}* {{.+}} [[G_IN:%.+]], i{{[0-9]+}} [[SIVAR_IN:%.+]] + // LAMBDA: define{{.+}} @[[OMP_OFFLOADING]]() // LAMBDA: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[OMP_OUTLINED_1:@.+]] to void // LAMBDA: define {{.+}} [[OMP_OUTLINED_1]](i{{[0-9]+}}* {{.+}}, i{{[0-9]+}}* {{.+}} @@ -196,14 +196,13 @@ int main() { // CHECK: define{{.*}} i{{[0-9]+}} @main() // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]], // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) -// CHECK: [[OFF_IN:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* {{%.+}}, -// CHECK: call void @[[OMP_OFFLOADING:.+]](i{{[0-9]+}} [[OFF_IN]] +// CHECK: call void @[[OMP_OFFLOADING:.+]]() // CHECK: = call{{.*}} i{{.+}} [[TMAIN_INT:@.+]]() // CHECK: call void [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]* // CHECK: ret // target region in main function -// CHECK: define{{.+}} @[[OMP_OFFLOADING]](i{{[0-9]+}} +// CHECK: define{{.+}} @[[OMP_OFFLOADING]]() // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[OMP_OUTLINED:@.+]] to void // CHECK: define internal void [[OMP_OUTLINED]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}) diff --git a/test/OpenMP/teams_reduction_messages.cpp b/test/OpenMP/teams_reduction_messages.cpp index 9974c147b72c..7aef97730d9e 100644 --- a/test/OpenMP/teams_reduction_messages.cpp +++ b/test/OpenMP/teams_reduction_messages.cpp @@ -25,9 +25,9 @@ public: S2() : a(0) {} S2(S2 &s2) : a(s2.a) {} static float S2s; // expected-note 2 {{static data member is predetermined as shared}} - static const float S2sc; + static const float S2sc; // expected-note 2 {{'S2sc' declared here}} }; -const float S2::S2sc = 0; // expected-note 2 {{'S2sc' defined here}} +const float S2::S2sc = 0; S2 b; // expected-note 3 {{'b' defined here}} const S2 ba[5]; // expected-note 2 {{'ba' defined here}} class S3 { diff --git a/test/OpenMP/teams_shared_messages.cpp b/test/OpenMP/teams_shared_messages.cpp index e14ba1e32f47..6b013d099dce 100644 --- a/test/OpenMP/teams_shared_messages.cpp +++ b/test/OpenMP/teams_shared_messages.cpp @@ -84,7 +84,7 @@ int main(int argc, char **argv) { #pragma omp teams shared (S1) // expected-error {{'S1' does not refer to a value}} foo(); #pragma omp target - #pragma omp teams shared (a, b, c, d, f) + #pragma omp teams shared (a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} foo(); #pragma omp target #pragma omp teams shared (argv[1]) // expected-error {{expected variable name}} diff --git a/test/OpenMP/vla_crash.c b/test/OpenMP/vla_crash.c index 50dcf068707b..0dcba5503e4c 100644 --- a/test/OpenMP/vla_crash.c +++ b/test/OpenMP/vla_crash.c @@ -1,5 +1,4 @@ // RUN: %clang_cc1 -verify -triple powerpc64le-unknown-linux-gnu -fopenmp -x c -emit-llvm %s -o - | FileCheck %s -// expected-no-diagnostics int a; @@ -20,3 +19,24 @@ void foo() { // CHECK: define internal void [[OUTLINED]](i32* {{[^,]+}}, i32* {{[^,]+}}, i64 {{[^,]+}}, i32** {{[^,]+}}, i64 {{[^,]+}}, i32**** {{[^,]+}}) +// CHECK-LABEL: bar +void bar(int n, int *a) { + // CHECK: [[N:%.+]] = alloca i32, + // CHECK: [[A:%.+]] = alloca i32*, + // CHECK: [[P:%.+]] = alloca i32*, + // CHECK: @__kmpc_global_thread_num + // CHECK: [[BC:%.+]] = bitcast i32** [[A]] to i32* + // CHECK: store i32* [[BC]], i32** [[P]], + // CHECK: call void @__kmpc_serialized_parallel + // CHECK: call void [[OUTLINED:@[^(]+]](i32* %{{[^,]+}}, i32* %{{[^,]+}}, i64 %{{[^,]+}}, i32** [[P]], i32** [[A]]) + // CHECK: call void @__kmpc_end_serialized_parallel + // CHECK: ret void + // expected-warning@+1 {{incompatible pointer types initializing 'int (*)[n]' with an expression of type 'int **'}} + int(*p)[n] = &a; +#pragma omp parallel if(0) + // expected-warning@+1 {{comparison of distinct pointer types ('int (*)[n]' and 'int **')}} + if (p == &a) { + } +} + +// CHECK: define internal void [[OUTLINED]](i32* {{[^,]+}}, i32* {{[^,]+}}, i64 {{[^,]+}}, i32** {{[^,]+}}, i32** {{[^,]+}}) |