diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 74f5cda469161..e2de87a2e773e 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5321,6 +5321,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // If the call returns a temporary with struct return, create a temporary // alloca to hold the result, unless one is given to us. Address SRetPtr = Address::invalid(); + // Original alloca for lifetime markers + Address SRetAlloca = Address::invalid(); bool NeedSRetLifetimeEnd = false; if (RetAI.isIndirect() || RetAI.isInAlloca() || RetAI.isCoerceAndExpand()) { // For virtual function pointer thunks and musttail calls, we must always @@ -5335,8 +5337,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, SRetPtr = ReturnValue.getAddress(); } else { SRetPtr = CreateMemTempWithoutCast(RetTy, "tmp"); - if (HaveInsertPoint() && ReturnValue.isUnused()) + if (HaveInsertPoint() && ReturnValue.isUnused()) { NeedSRetLifetimeEnd = EmitLifetimeStart(SRetPtr.getBasePointer()); + if (NeedSRetLifetimeEnd) + SRetAlloca = SRetPtr; + } } if (IRFunctionArgs.hasSRetArg()) { // A mismatch between the allocated return value's AS and the target's @@ -5917,8 +5922,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // can't depend on being inside of an ExprWithCleanups, so we need to manually // pop this cleanup later on. Being eager about this is OK, since this // temporary is 'invisible' outside of the callee. + // Use the original alloca pointer (before any addrspacecast) for the + // lifetime end marker, since lifetime intrinsics must reference the alloca + // address space. if (NeedSRetLifetimeEnd) - pushFullExprCleanup(NormalEHLifetimeMarker, SRetPtr); + pushFullExprCleanup(NormalEHLifetimeMarker, SRetAlloca); llvm::BasicBlock *InvokeDest = CannotThrow ? nullptr : getInvokeDest(); diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index b8150a24d45fc..04098bf1b0f7f 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -294,8 +294,27 @@ void AggExprEmitter::withReturnValueSlot( // We need to always provide our own temporary if destruction is required. // Otherwise, EmitCall will emit its own, notice that it's "unused", and end // its lifetime before we have the chance to emit a proper destructor call. + // + // We also need a temporary if the destination is in a different address space + // from the sret AS. Use the target hook to get the actual sret AS for this + // return type. + const CXXRecordDecl *RD = RetTy->getAsCXXRecordDecl(); + LangAS SRetLangAS = CGF.CGM.getTargetCodeGenInfo().getSRetAddrSpace(RD); + unsigned SRetAS = CGF.getContext().getTargetAddressSpace(SRetLangAS); + bool CanAggregateCopy = + RD ? (RD->hasTrivialCopyConstructor() || + RD->hasTrivialMoveConstructor() || RD->hasTrivialCopyAssignment() || + RD->hasTrivialMoveAssignment() || RD->hasAttr() || + RD->isUnion()) + : RetTy.isTriviallyCopyableType(CGF.getContext()); + bool DestASMismatch = !Dest.isIgnored() && CanAggregateCopy && + Dest.getAddress() + .getBasePointer() + ->stripPointerCasts() + ->getType() + ->getPointerAddressSpace() != SRetAS; bool UseTemp = Dest.isPotentiallyAliased() || Dest.requiresGCollection() || - (RequiresDestruction && Dest.isIgnored()); + (RequiresDestruction && Dest.isIgnored()) || DestASMismatch; Address RetAddr = Address::invalid(); @@ -303,6 +322,14 @@ void AggExprEmitter::withReturnValueSlot( llvm::IntrinsicInst *LifetimeStartInst = nullptr; if (!UseTemp) { RetAddr = Dest.getAddress(); + if (RetAddr.isValid() && RetAddr.getAddressSpace() != SRetAS) { + llvm::Type *SRetPtrTy = + llvm::PointerType::get(CGF.getLLVMContext(), SRetAS); + RetAddr = RetAddr.withPointer( + CGF.CGM.getTargetCodeGenInfo().performAddrSpaceCast( + CGF, RetAddr.getBasePointer(), LangAS::Default, SRetPtrTy), + RetAddr.isKnownNonNull()); + } } else { RetAddr = CGF.CreateMemTempWithoutCast(RetTy, "tmp"); if (CGF.EmitLifetimeStart(RetAddr.getBasePointer())) { diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index 7dc2eaf1e9f75..e7088ac2befc2 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -1372,12 +1372,14 @@ bool ItaniumCXXABI::classifyReturnType(CGFunctionInfo &FI) const { if (!RD) return false; - // If C++ prohibits us from making a copy, return by address. + // If C++ prohibits us from making a copy, return by address using the target + // hook getSRetAddrSpace to decide the AS. if (!RD->canPassInRegisters()) { auto Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType()); - FI.getReturnInfo() = ABIArgInfo::getIndirect( - Align, /*AddrSpace=*/CGM.getDataLayout().getAllocaAddrSpace(), - /*ByVal=*/false); + LangAS SRetAS = CGM.getTargetCodeGenInfo().getSRetAddrSpace(RD); + unsigned AS = CGM.getContext().getTargetAddressSpace(SRetAS); + FI.getReturnInfo() = + ABIArgInfo::getIndirect(Align, /*AddrSpace=*/AS, /*ByVal=*/false); return true; } return false; diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp index 19d9265247119..74aa2165dad80 100644 --- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp @@ -1189,9 +1189,10 @@ bool MicrosoftCXXABI::classifyReturnType(CGFunctionInfo &FI) const { if (isIndirectReturn) { CharUnits Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType()); - FI.getReturnInfo() = ABIArgInfo::getIndirect( - Align, /*AddrSpace=*/CGM.getDataLayout().getAllocaAddrSpace(), - /*ByVal=*/false); + LangAS SRetAS = CGM.getTargetCodeGenInfo().getSRetAddrSpace(RD); + unsigned AS = CGM.getContext().getTargetAddressSpace(SRetAS); + FI.getReturnInfo() = + ABIArgInfo::getIndirect(Align, /*AddrSpace=*/AS, /*ByVal=*/false); // MSVC always passes `this` before the `sret` parameter. FI.getReturnInfo().setSRetAfterThis(FI.isInstanceMethod()); diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h index d0edae1295094..6d13ba61e1a8d 100644 --- a/clang/lib/CodeGen/TargetInfo.h +++ b/clang/lib/CodeGen/TargetInfo.h @@ -32,6 +32,7 @@ class Value; } namespace clang { +class CXXRecordDecl; class Decl; namespace CodeGen { @@ -344,6 +345,12 @@ class TargetCodeGenInfo { LangAS SrcAddr, llvm::Type *DestTy) const; + /// Get the address space for an indirect (sret) return of the given type. + /// The default falls back to the alloca AS. + virtual LangAS getSRetAddrSpace(const CXXRecordDecl *RD) const { + return getASTAllocaAddressSpace(); + } + /// Get address space of pointer parameter for __cxa_atexit. virtual LangAS getAddrSpaceOfCxaAtexitPtrParam() const { return LangAS::Default; diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index 0fcbf7e458a34..ad07363541cb1 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -8,6 +8,7 @@ #include "ABIInfoImpl.h" #include "TargetInfo.h" +#include "clang/AST/DeclCXX.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/AMDGPUAddrSpace.h" @@ -308,6 +309,9 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo { return getLangASFromTargetAS( getABIInfo().getDataLayout().getAllocaAddrSpace()); } + + LangAS getSRetAddrSpace(const CXXRecordDecl *RD) const override; + LangAS getGlobalVarAddressSpace(CodeGenModule &CGM, const VarDecl *D) const override; llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts, @@ -448,6 +452,15 @@ llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer( llvm::ConstantPointerNull::get(NPT), PT); } +LangAS +AMDGPUTargetCodeGenInfo::getSRetAddrSpace(const CXXRecordDecl *RD) const { + // Types with no viable copy/move must be constructed in-place , use the + // default AS so the sret pointer matches the "this" convention. + if (RD && !RD->canPassInRegisters()) + return LangAS::Default; + return getASTAllocaAddressSpace(); +} + LangAS AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, const VarDecl *D) const { diff --git a/clang/test/CodeGenCXX/no-elide-constructors.cpp b/clang/test/CodeGenCXX/no-elide-constructors.cpp index 994282debb0d0..f091a94611c1f 100644 --- a/clang/test/CodeGenCXX/no-elide-constructors.cpp +++ b/clang/test/CodeGenCXX/no-elide-constructors.cpp @@ -1,9 +1,11 @@ // RUN: %clang_cc1 -std=c++98 -triple i386-unknown-unknown -fno-elide-constructors -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CXX98 // RUN: %clang_cc1 -std=c++11 -triple i386-unknown-unknown -fno-elide-constructors -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CXX11 // RUN: %clang_cc1 -std=c++11 -triple amdgcn-amd-amdhsa -fno-elide-constructors -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK --check-prefix=CHECK-CXX11-NONZEROALLOCAAS +// RUN: %clang_cc1 -std=c++11 -triple spirv64-amd-amdhsa -fno-elide-constructors -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK --check-prefix=CHECK-CXX11-SPIRV // RUN: %clang_cc1 -std=c++98 -triple i386-unknown-unknown -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CXX98-ELIDE // RUN: %clang_cc1 -std=c++11 -triple i386-unknown-unknown -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CXX11-ELIDE // RUN: %clang_cc1 -std=c++11 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CXX11-NONZEROALLOCAAS-ELIDE +// RUN: %clang_cc1 -std=c++11 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CXX11-SPIRV-ELIDE // Reduced from PR12208 class X { @@ -17,7 +19,7 @@ class X { }; // CHECK-LABEL: define{{.*}} void @_Z4Testv( -// CHECK-SAME: ptr {{.*}}dead_on_unwind noalias writable sret([[CLASS_X:%.*]]) align 1 [[AGG_RESULT:%.*]]) +// CHECK-SAME: ptr {{.*}}dead_on_unwind noalias writable sret([[CLASS_X:%.*]]) align 1 [[AGG_RESULT:%[^),]+]]) X Test() { X x; @@ -26,16 +28,18 @@ X Test() // sret argument. // CHECK-CXX98: call void @_ZN1XC1ERKS_( // CHECK-CXX11: call void @_ZN1XC1EOS_( - // CHECK-CXX11-NONZEROALLOCAAS: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr - // CHECK-CXX11-NONZEROALLOCAAS-NEXT: call void @_ZN1XC1EOS_(ptr noundef nonnull align 1 dereferenceable(1) [[TMP0]] + // CHECK-CXX11-NONZEROALLOCAAS: call void @_ZN1XC1EOS_(ptr noundef nonnull align 1 dereferenceable(1) [[AGG_RESULT]] + // CHECK-CXX11-SPIRV: [[TMP0:%.*]] = addrspacecast ptr [[AGG_RESULT]] to ptr addrspace(4) + // CHECK-CXX11-SPIRV-NEXT: call spir_func addrspace(4) void @_ZN1XC1EOS_(ptr addrspace(4) noundef align 1 dereferenceable{{.*}}(1) [[TMP0]] // CHECK-CXX98-ELIDE-NOT: call void @_ZN1XC1ERKS_( // CHECK-CXX11-ELIDE-NOT: call void @_ZN1XC1EOS_( // CHECK-CXX11-NONZEROALLOCAAS-ELIDE-NOT: call void @_ZN1XC1EOS_( + // CHECK-CXX11-SPIRV-ELIDE-NOT: call void @_ZN1XC1EOS_( // Make sure that the destructor for X is called. // FIXME: This call is present even in the -ELIDE runs, but is guarded by a // branch that is never taken in those cases. We could generate better IR // here. - // CHECK: call void @_ZN1XD1Ev( + // CHECK: call {{.*}}void @_ZN1XD1Ev( return x; } diff --git a/clang/test/CodeGenHIP/placement-new-addrspace.hip b/clang/test/CodeGenHIP/placement-new-addrspace.hip new file mode 100644 index 0000000000000..48a401baf9a78 --- /dev/null +++ b/clang/test/CodeGenHIP/placement-new-addrspace.hip @@ -0,0 +1,63 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --functions "make_big|kernel|local_test" --version 5 +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -disable-llvm-passes -x hip -std=c++17 %s -o - | FileCheck %s +// REQUIRES: amdgpu-registered-target + +// Verify that when a function returning an aggregate via sret is called with a +// destination in a different address space (e.g. global pointer from kernel +// arg), the compiler materialises a temporary in the alloca AS and copies back, +// rather than emitting an invalid addrspacecast of the destination pointer. + +typedef __SIZE_TYPE__ size_t; +__attribute__((device)) void *operator new(size_t, void *p) noexcept { return p; } + +struct Big { + int v[32]; + __attribute__((device)) Big(int x) { + for (int i = 0; i < 32; ++i) + v[i] = x + i; + } +}; + +// CHECK-LABEL: define dso_local void @_Z8make_bigv( +// CHECK-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_BIG:%.*]]) align 4 [[AGG_RESULT:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr +// CHECK-NEXT: call void @_ZN3BigC1Ei(ptr noundef nonnull align 4 dereferenceable(128) [[AGG_RESULT_ASCAST]], i32 noundef 7) #[[ATTR3:[0-9]+]] +// CHECK-NEXT: ret void +// +__attribute__((device)) Big make_big() { return Big(7); } + +// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z6kernelP3Big( +// CHECK-SAME: ptr addrspace(1) noundef [[OUT_COERCE:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[OUT:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_BIG:%.*]], align 4, addrspace(5) +// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT]] to ptr +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[OUT_COERCE]], ptr [[OUT_ASCAST]], align 8 +// CHECK-NEXT: [[OUT1:%.*]] = load ptr, ptr [[OUT_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[OUT1]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @_Z8make_bigv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_BIG]]) align 4 [[TMP]]) #[[ATTR3]] +// CHECK-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP0]], ptr addrspace(5) align 4 [[TMP]], i64 128, i1 false) +// CHECK-NEXT: ret void +// +__attribute__((global)) void kernel(Big *out) { + new (out) Big(make_big()); +} + +// If the destination is ultimately backed by alloca AS (even through cast +// chains), we should pass it directly as sret and avoid an extra temp/copy. +// CHECK-LABEL: define dso_local void @_Z10local_testv( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[LOCAL:%.*]] = alloca [[STRUCT_BIG:%.*]], align 4, addrspace(5) +// CHECK-NEXT: [[LOCAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LOCAL]] to ptr +// CHECK-NEXT: [[LOCAL_ASCAST_ASCAST:%.*]] = addrspacecast ptr [[LOCAL_ASCAST]] to ptr addrspace(5) +// CHECK-NEXT: call void @_Z8make_bigv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_BIG]]) align 4 [[LOCAL_ASCAST_ASCAST]]) #[[ATTR3]] +// CHECK-NEXT: ret void +// +__attribute__((device)) void local_test() { + Big local = make_big(); +} diff --git a/clang/test/CodeGenHIP/sret-lifetime-markers.cpp b/clang/test/CodeGenHIP/sret-lifetime-markers.cpp new file mode 100644 index 0000000000000..42b1ee2707297 --- /dev/null +++ b/clang/test/CodeGenHIP/sret-lifetime-markers.cpp @@ -0,0 +1,60 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --functions ".*" --include-generated-funcs --version 6 +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -O1 -x c++ -std=c++2b %s -o - | FileCheck --check-prefix=AMDGCN %s +// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -O1 -x c++ -std=c++2b %s -o - | FileCheck --check-prefix=SPIRV %s +// REQUIRES: amdgpu-registered-target + +// Verify that lifetime markers for an sret temporary use the alloca address +// space (5), even when the sret pointer itself is in the default address +// space (0). Regression test for an assertion failure in EmitLifetimeEnd +// when the return value of a function returning a non-trivially-copyable +// type is discarded. + +template +struct SuperScalar { + double val[N]; + + SuperScalar() { + for (int i = 0; i < N; i++) val[i] = 0.0; + } + SuperScalar(const SuperScalar& rhs) { + for (int i = 0; i < N; i++) val[i] = rhs.val[i]; + } + SuperScalar& operator=(const SuperScalar& rhs) { + for (int i = 0; i < N; i++) val[i] = rhs.val[i]; + return *this; + } +}; + +template +SuperScalar atomic_fetch_add(SuperScalar* dest, const SuperScalar& val); + +template +void add_functor(SuperScalar* data, int i, SuperScalar& update) { + atomic_fetch_add(&data[i], update); +} + +template void add_functor<4>(SuperScalar<4>*, int, SuperScalar<4>&); +// AMDGCN-LABEL: define weak_odr void @_Z11add_functorILi4EEvP11SuperScalarIXT_EEiRS1_( +// AMDGCN-SAME: ptr noundef [[DATA:%.*]], i32 noundef [[I:%.*]], ptr noundef nonnull align 8 dereferenceable(32) [[UPDATE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] comdat { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_SUPERSCALAR:%.*]], align 8, addrspace(5) +// AMDGCN-NEXT: [[IDXPROM:%.*]] = sext i32 [[I]] to i64 +// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_SUPERSCALAR]], ptr [[DATA]], i64 [[IDXPROM]] +// AMDGCN-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[TMP]]) #[[ATTR3:[0-9]+]] +// AMDGCN-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// AMDGCN-NEXT: call void @_Z16atomic_fetch_addILi4EE11SuperScalarIXT_EEPS1_RKS1_(ptr dead_on_unwind writable sret([[STRUCT_SUPERSCALAR]]) align 8 [[TMP_ASCAST]], ptr noundef [[ARRAYIDX]], ptr noundef nonnull align 8 dereferenceable(32) [[UPDATE]]) #[[ATTR4:[0-9]+]] +// AMDGCN-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[TMP]]) #[[ATTR3]] +// AMDGCN-NEXT: ret void +// +// +// SPIRV-LABEL: define weak_odr spir_func void @_Z11add_functorILi4EEvP11SuperScalarIXT_EEiRS1_( +// SPIRV-SAME: ptr addrspace(4) noundef [[DATA:%.*]], i32 noundef [[I:%.*]], ptr addrspace(4) noundef align 8 dereferenceable(32) [[UPDATE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] comdat { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP:%.*]] = alloca [[STRUCT_SUPERSCALAR:%.*]], align 8 +// SPIRV-NEXT: [[IDXPROM:%.*]] = sext i32 [[I]] to i64 +// SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_SUPERSCALAR]], ptr addrspace(4) [[DATA]], i64 [[IDXPROM]] +// SPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(ptr nonnull [[TMP]]) #[[ATTR3:[0-9]+]] +// SPIRV-NEXT: call spir_func addrspace(4) void @_Z16atomic_fetch_addILi4EE11SuperScalarIXT_EEPS1_RKS1_(ptr dead_on_unwind nonnull writable sret([[STRUCT_SUPERSCALAR]]) align 8 [[TMP]], ptr addrspace(4) noundef [[ARRAYIDX]], ptr addrspace(4) noundef align 8 dereferenceable(32) [[UPDATE]]) #[[ATTR3]] +// SPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(ptr nonnull [[TMP]]) #[[ATTR3]] +// SPIRV-NEXT: ret void +// diff --git a/clang/test/CodeGenHIP/sret-nontrivial-copyable.hip b/clang/test/CodeGenHIP/sret-nontrivial-copyable.hip new file mode 100644 index 0000000000000..3ef293d126cf8 --- /dev/null +++ b/clang/test/CodeGenHIP/sret-nontrivial-copyable.hip @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --functions ".*" --include-generated-funcs --version 6 +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -x c++ -std=c++2b %s -o - | FileCheck --check-prefix=AMDGCN %s +// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -x c++ -std=c++2b %s -o - | FileCheck --check-prefix=SPIRV %s +// REQUIRES: amdgpu-registered-target + +// Verify that a non-trivially-copyable type returned via sret into a member +// field uses addrspace(0) for the sret pointer (not addrspace(5)). So +// in-place construction through the addrspace(0) is the only legal option. +struct NontrivialPtr { + void *p; + NontrivialPtr() noexcept; + NontrivialPtr(const NontrivialPtr &) = delete; + NontrivialPtr(NontrivialPtr &&) = delete; + ~NontrivialPtr() noexcept; +}; + +NontrivialPtr make() noexcept; + +struct Wrapper { + NontrivialPtr field; + Wrapper() noexcept; + virtual ~Wrapper() noexcept; +}; + +Wrapper::Wrapper() noexcept : field(make()) {} +// AMDGCN-LABEL: define dso_local void @_ZN7WrapperC2Ev( +// AMDGCN-SAME: ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] align 2 { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// AMDGCN-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr +// AMDGCN-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: store ptr addrspace(1) getelementptr inbounds inrange(-16, 16) ({ [4 x ptr addrspace(1)] }, ptr addrspace(1) @_ZTV7Wrapper, i32 0, i32 0, i32 2), ptr [[THIS1]], align 8 +// AMDGCN-NEXT: [[FIELD:%.*]] = getelementptr inbounds nuw [[STRUCT_WRAPPER:%.*]], ptr [[THIS1]], i32 0, i32 1 +// AMDGCN-NEXT: call void @_Z4makev(ptr dead_on_unwind writable sret([[STRUCT_NONTRIVIALPTR:%.*]]) align 8 [[FIELD]]) #[[ATTR2:[0-9]+]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN-LABEL: define dso_local void @_ZN7WrapperC1Ev( +// AMDGCN-SAME: ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// AMDGCN-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr +// AMDGCN-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: call void @_ZN7WrapperC2Ev(ptr noundef nonnull align 8 dereferenceable(16) [[THIS1]]) #[[ATTR2]] +// AMDGCN-NEXT: ret void +// +// +// SPIRV-LABEL: define spir_func void @_ZN7WrapperC2Ev( +// SPIRV-SAME: ptr addrspace(4) noundef align 8 dereferenceable_or_null(16) [[THIS:%.*]]) unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] align 2 { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[THIS_ADDR:%.*]] = alloca ptr addrspace(4), align 8 +// SPIRV-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NONTRIVIALPTR:%.*]], align 8 +// SPIRV-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr [[THIS_ADDR]] to ptr addrspace(4) +// SPIRV-NEXT: store ptr addrspace(4) [[THIS]], ptr addrspace(4) [[THIS_ADDR_ASCAST]], align 8 +// SPIRV-NEXT: [[THIS1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[THIS_ADDR_ASCAST]], align 8 +// SPIRV-NEXT: store ptr addrspace(1) getelementptr inbounds inrange(-16, 16) ({ [4 x ptr addrspace(1)] }, ptr addrspace(1) @_ZTV7Wrapper, i32 0, i32 0, i32 2), ptr addrspace(4) [[THIS1]], align 8 +// SPIRV-NEXT: [[FIELD:%.*]] = getelementptr inbounds nuw [[STRUCT_WRAPPER:%.*]], ptr addrspace(4) [[THIS1]], i32 0, i32 1 +// SPIRV-NEXT: call spir_func addrspace(4) void @_Z4makev(ptr dead_on_unwind writable sret([[STRUCT_NONTRIVIALPTR]]) align 8 [[TMP]]) #[[ATTR3:[0-9]+]] +// SPIRV-NEXT: call addrspace(4) void @llvm.memcpy.p4.p0.i64(ptr addrspace(4) align 8 [[FIELD]], ptr align 8 [[TMP]], i64 8, i1 false) +// SPIRV-NEXT: ret void +// +// +// SPIRV-LABEL: define spir_func void @_ZN7WrapperC1Ev( +// SPIRV-SAME: ptr addrspace(4) noundef align 8 dereferenceable_or_null(16) [[THIS:%.*]]) unnamed_addr addrspace(4) #[[ATTR0]] align 2 { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[THIS_ADDR:%.*]] = alloca ptr addrspace(4), align 8 +// SPIRV-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr [[THIS_ADDR]] to ptr addrspace(4) +// SPIRV-NEXT: store ptr addrspace(4) [[THIS]], ptr addrspace(4) [[THIS_ADDR_ASCAST]], align 8 +// SPIRV-NEXT: [[THIS1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[THIS_ADDR_ASCAST]], align 8 +// SPIRV-NEXT: call spir_func addrspace(4) void @_ZN7WrapperC2Ev(ptr addrspace(4) noundef align 8 dereferenceable_or_null(16) [[THIS1]]) #[[ATTR3]] +// SPIRV-NEXT: ret void +// diff --git a/clang/test/CodeGenHIP/store-addr-space.hip b/clang/test/CodeGenHIP/store-addr-space.hip index 6103edba46274..34aca1fadccee 100644 --- a/clang/test/CodeGenHIP/store-addr-space.hip +++ b/clang/test/CodeGenHIP/store-addr-space.hip @@ -2,6 +2,8 @@ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm -fcuda-is-device \ // RUN: -o - %s | FileCheck --check-prefix=AMDGCN --enable-var-scope %s +// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -x hip -emit-llvm -fcuda-is-device \ +// RUN: -o - %s | FileCheck --check-prefix=SPIRV --enable-var-scope %s struct Foo { unsigned long long val; @@ -12,19 +14,18 @@ struct Foo { }; // AMDGCN-LABEL: define dso_local void @_Z3barPK3Foo( -// AMDGCN-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_FOO:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[SRC_PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// AMDGCN-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FOO:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[SRC_PTR:%.*]]) #[[ATTR0:[0-9]+]] { // AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[RESULT_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// AMDGCN-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 8, addrspace(5) // AMDGCN-NEXT: [[SRC_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // AMDGCN-NEXT: [[DST:%.*]] = alloca [[UNION_ANON:%.*]], align 8, addrspace(5) // AMDGCN-NEXT: [[RESULT_PTR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RESULT_PTR]] to ptr // AMDGCN-NEXT: [[SRC_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_PTR_ADDR]] to ptr -// AMDGCN-NEXT: [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr // AMDGCN-NEXT: [[DST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DST]] to ptr -// AMDGCN-NEXT: store ptr addrspace(5) [[AGG_RESULT]], ptr [[RESULT_PTR_ASCAST]], align 4 +// AMDGCN-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR_ASCAST]], align 8 // AMDGCN-NEXT: store ptr [[SRC_PTR]], ptr [[SRC_PTR_ADDR_ASCAST]], align 8 -// AMDGCN-NEXT: call void @_ZN3FooC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[AGG_RESULT_ASCAST]]) #[[ATTR1:[0-9]+]] -// AMDGCN-NEXT: store ptr [[AGG_RESULT_ASCAST]], ptr [[DST_ASCAST]], align 8 +// AMDGCN-NEXT: call void @_ZN3FooC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[AGG_RESULT]]) #[[ATTR1:[0-9]+]] +// AMDGCN-NEXT: store ptr [[AGG_RESULT]], ptr [[DST_ASCAST]], align 8 // AMDGCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SRC_PTR_ADDR_ASCAST]], align 8 // AMDGCN-NEXT: [[VAL:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], ptr [[TMP0]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP1:%.*]] = load i64, ptr [[VAL]], align 8 @@ -33,6 +34,28 @@ struct Foo { // AMDGCN-NEXT: store i64 [[TMP1]], ptr [[ARRAYIDX]], align 8 // AMDGCN-NEXT: ret void // +// SPIRV-LABEL: define spir_func void @_Z3barPK3Foo( +// SPIRV-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FOO:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[SRC_PTR:%.*]]) addrspace(4) #[[ATTR0:[0-9]+]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 8 +// SPIRV-NEXT: [[SRC_PTR_ADDR:%.*]] = alloca ptr addrspace(4), align 8 +// SPIRV-NEXT: [[DST:%.*]] = alloca [[UNION_ANON:%.*]], align 8 +// SPIRV-NEXT: [[RESULT_PTR_ASCAST:%.*]] = addrspacecast ptr [[RESULT_PTR]] to ptr addrspace(4) +// SPIRV-NEXT: [[SRC_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[SRC_PTR_ADDR]] to ptr addrspace(4) +// SPIRV-NEXT: [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr [[AGG_RESULT]] to ptr addrspace(4) +// SPIRV-NEXT: [[DST_ASCAST:%.*]] = addrspacecast ptr [[DST]] to ptr addrspace(4) +// SPIRV-NEXT: store ptr [[AGG_RESULT]], ptr addrspace(4) [[RESULT_PTR_ASCAST]], align 8 +// SPIRV-NEXT: store ptr addrspace(4) [[SRC_PTR]], ptr addrspace(4) [[SRC_PTR_ADDR_ASCAST]], align 8 +// SPIRV-NEXT: call spir_func addrspace(4) void @_ZN3FooC1Ev(ptr addrspace(4) noundef align 8 dereferenceable_or_null(8) [[AGG_RESULT_ASCAST]]) #[[ATTR1:[0-9]+]] +// SPIRV-NEXT: store ptr addrspace(4) [[AGG_RESULT_ASCAST]], ptr addrspace(4) [[DST_ASCAST]], align 8 +// SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[SRC_PTR_ADDR_ASCAST]], align 8 +// SPIRV-NEXT: [[VAL:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], ptr addrspace(4) [[TMP0]], i32 0, i32 0 +// SPIRV-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(4) [[VAL]], align 8 +// SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[DST_ASCAST]], align 8 +// SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr addrspace(4) [[TMP2]], i64 0 +// SPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) [[ARRAYIDX]], align 8 +// SPIRV-NEXT: ret void +// __attribute__((device)) Foo bar(const Foo *const src_ptr) { Foo result; diff --git a/clang/test/OpenMP/amdgcn_sret_ctor.cpp b/clang/test/OpenMP/amdgcn_sret_ctor.cpp index 81d0cce5190e7..c8195b124feb1 100644 --- a/clang/test/OpenMP/amdgcn_sret_ctor.cpp +++ b/clang/test/OpenMP/amdgcn_sret_ctor.cpp @@ -20,8 +20,7 @@ E::E() noexcept : foo(s()) {} // CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[FOO:%.*]] = getelementptr inbounds nuw [[STRUCT_E:%.*]], ptr [[THIS1]], i32 0, i32 0 -// CHECK-NEXT: [[FOO_ASCAST:%.*]] = addrspacecast ptr [[FOO]] to ptr addrspace(5) -// CHECK-NEXT: call void @_Z1sv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_S:%.*]]) align 1 [[FOO_ASCAST]]) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: call void @_Z1sv(ptr dead_on_unwind writable sret([[STRUCT_S:%.*]]) align 1 [[FOO]]) #[[ATTR2:[0-9]+]] // CHECK-NEXT: ret void // //