From 784b958e0caa570be4e6d4d495583c168fb1433c Mon Sep 17 00:00:00 2001 From: Vigneshwar Jayakumar Date: Fri, 6 Mar 2026 12:41:19 -0600 Subject: [PATCH 1/4] [Clang] Fix invalid sret addrspacecast for placement new on HIP (#183639) When a HIP kernel uses placement new with a function returning an aggregate via sret (e.g. `new (out) T(make_t())`), and the placement destination is in global memory (addrspace 1), the sret pointer was addrspacecast'd to addrspace 5 (private), producing an invalid pointer that faults at runtime. Instead of casting the caller's pointer directly, materialise a temporary alloca in the callee's expected address space, pass that as the sret argument, and copy the result back to the original destination after the call. (cherry picked from commit e2f7f8365866945b2eb6763566385be0da255068) --- clang/lib/CodeGen/CGExprAgg.cpp | 23 ++++++- .../CodeGenHIP/placement-new-addrspace.hip | 63 +++++++++++++++++++ clang/test/OpenMP/amdgcn_sret_ctor.cpp | 4 +- 3 files changed, 87 insertions(+), 3 deletions(-) create mode 100644 clang/test/CodeGenHIP/placement-new-addrspace.hip diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index b8150a24d45fc..8e0a026d397a0 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -294,8 +294,21 @@ void AggExprEmitter::withReturnValueSlot( // We need to always provide our own temporary if destruction is required. // Otherwise, EmitCall will emit its own, notice that it's "unused", and end // its lifetime before we have the chance to emit a proper destructor call. + // + // We also need a temporary if the destination is in a different address space + // from the alloca AS, to avoid an invalid addrspacecast on the sret pointer. + // Look through addrspacecasts to avoid unnecessary temps when the + // destination is already in the alloca AS. + unsigned SRetAS = CGF.getContext().getTargetAddressSpace( + CGF.CGM.getASTAllocaAddressSpace()); + bool DestASMismatch = + !Dest.isIgnored() && Dest.getAddress() + .getBasePointer() + ->stripPointerCasts() + ->getType() + ->getPointerAddressSpace() != SRetAS; bool UseTemp = Dest.isPotentiallyAliased() || Dest.requiresGCollection() || - (RequiresDestruction && Dest.isIgnored()); + (RequiresDestruction && Dest.isIgnored()) || DestASMismatch; Address RetAddr = Address::invalid(); @@ -303,6 +316,14 @@ void AggExprEmitter::withReturnValueSlot( llvm::IntrinsicInst *LifetimeStartInst = nullptr; if (!UseTemp) { RetAddr = Dest.getAddress(); + if (RetAddr.isValid() && RetAddr.getAddressSpace() != SRetAS) { + llvm::Type *SRetPtrTy = + llvm::PointerType::get(CGF.getLLVMContext(), SRetAS); + RetAddr = RetAddr.withPointer( + CGF.CGM.getTargetCodeGenInfo().performAddrSpaceCast( + CGF, RetAddr.getBasePointer(), LangAS::Default, SRetPtrTy), + RetAddr.isKnownNonNull()); + } } else { RetAddr = CGF.CreateMemTempWithoutCast(RetTy, "tmp"); if (CGF.EmitLifetimeStart(RetAddr.getBasePointer())) { diff --git a/clang/test/CodeGenHIP/placement-new-addrspace.hip b/clang/test/CodeGenHIP/placement-new-addrspace.hip new file mode 100644 index 0000000000000..48a401baf9a78 --- /dev/null +++ b/clang/test/CodeGenHIP/placement-new-addrspace.hip @@ -0,0 +1,63 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --functions "make_big|kernel|local_test" --version 5 +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -disable-llvm-passes -x hip -std=c++17 %s -o - | FileCheck %s +// REQUIRES: amdgpu-registered-target + +// Verify that when a function returning an aggregate via sret is called with a +// destination in a different address space (e.g. global pointer from kernel +// arg), the compiler materialises a temporary in the alloca AS and copies back, +// rather than emitting an invalid addrspacecast of the destination pointer. + +typedef __SIZE_TYPE__ size_t; +__attribute__((device)) void *operator new(size_t, void *p) noexcept { return p; } + +struct Big { + int v[32]; + __attribute__((device)) Big(int x) { + for (int i = 0; i < 32; ++i) + v[i] = x + i; + } +}; + +// CHECK-LABEL: define dso_local void @_Z8make_bigv( +// CHECK-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_BIG:%.*]]) align 4 [[AGG_RESULT:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr +// CHECK-NEXT: call void @_ZN3BigC1Ei(ptr noundef nonnull align 4 dereferenceable(128) [[AGG_RESULT_ASCAST]], i32 noundef 7) #[[ATTR3:[0-9]+]] +// CHECK-NEXT: ret void +// +__attribute__((device)) Big make_big() { return Big(7); } + +// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z6kernelP3Big( +// CHECK-SAME: ptr addrspace(1) noundef [[OUT_COERCE:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[OUT:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_BIG:%.*]], align 4, addrspace(5) +// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT]] to ptr +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[OUT_COERCE]], ptr [[OUT_ASCAST]], align 8 +// CHECK-NEXT: [[OUT1:%.*]] = load ptr, ptr [[OUT_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[OUT1]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @_Z8make_bigv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_BIG]]) align 4 [[TMP]]) #[[ATTR3]] +// CHECK-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP0]], ptr addrspace(5) align 4 [[TMP]], i64 128, i1 false) +// CHECK-NEXT: ret void +// +__attribute__((global)) void kernel(Big *out) { + new (out) Big(make_big()); +} + +// If the destination is ultimately backed by alloca AS (even through cast +// chains), we should pass it directly as sret and avoid an extra temp/copy. +// CHECK-LABEL: define dso_local void @_Z10local_testv( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[LOCAL:%.*]] = alloca [[STRUCT_BIG:%.*]], align 4, addrspace(5) +// CHECK-NEXT: [[LOCAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LOCAL]] to ptr +// CHECK-NEXT: [[LOCAL_ASCAST_ASCAST:%.*]] = addrspacecast ptr [[LOCAL_ASCAST]] to ptr addrspace(5) +// CHECK-NEXT: call void @_Z8make_bigv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_BIG]]) align 4 [[LOCAL_ASCAST_ASCAST]]) #[[ATTR3]] +// CHECK-NEXT: ret void +// +__attribute__((device)) void local_test() { + Big local = make_big(); +} diff --git a/clang/test/OpenMP/amdgcn_sret_ctor.cpp b/clang/test/OpenMP/amdgcn_sret_ctor.cpp index 81d0cce5190e7..ec1b83ecec4cc 100644 --- a/clang/test/OpenMP/amdgcn_sret_ctor.cpp +++ b/clang/test/OpenMP/amdgcn_sret_ctor.cpp @@ -16,12 +16,12 @@ E::E() noexcept : foo(s()) {} // CHECK-SAME: ptr noundef nonnull align 1 dereferenceable(1) [[THIS:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] align 2 { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 1, addrspace(5) // CHECK-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr // CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[FOO:%.*]] = getelementptr inbounds nuw [[STRUCT_E:%.*]], ptr [[THIS1]], i32 0, i32 0 -// CHECK-NEXT: [[FOO_ASCAST:%.*]] = addrspacecast ptr [[FOO]] to ptr addrspace(5) -// CHECK-NEXT: call void @_Z1sv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_S:%.*]]) align 1 [[FOO_ASCAST]]) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: call void @_Z1sv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_S]]) align 1 [[TMP]]) #[[ATTR2:[0-9]+]] // CHECK-NEXT: ret void // // From 52a6a1957f241f829e7ec53fb109899e88138889 Mon Sep 17 00:00:00 2001 From: Vigneshwar Jayakumar Date: Fri, 6 Mar 2026 14:26:33 -0600 Subject: [PATCH 2/4] =?UTF-8?q?[Clang]=20Fix=20EmitAggregateCopy=20asserti?= =?UTF-8?q?on=20for=20non-trivially-copyable=20sr=E2=80=A6=20(#185091)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …et types Fix for buildbot crash on #183639 The UseTemp path in AggExprEmitter::withReturnValueSlot copies back via EmitAggregateCopy, which asserts that the type has a trivial copy/move constructor or assignment operator. Gate the DestASMismatch condition on isTriviallyCopyableType so that non-trivially-copyable types (e.g. std::exception_ptr) fall through to the addrspacecast path instead. Fix buildbot crash: https://lab.llvm.org/buildbot/#/builders/73/builds/19803 (cherry picked from commit 337fed353d771d3425d06393637fdc956097b51e) --- clang/lib/CodeGen/CGExprAgg.cpp | 13 +++---- .../CodeGenHIP/sret-nontrivial-copyable.hip | 34 +++++++++++++++++++ clang/test/OpenMP/amdgcn_sret_ctor.cpp | 4 +-- 3 files changed, 43 insertions(+), 8 deletions(-) create mode 100644 clang/test/CodeGenHIP/sret-nontrivial-copyable.hip diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index 8e0a026d397a0..990bdb21c34a4 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -301,12 +301,13 @@ void AggExprEmitter::withReturnValueSlot( // destination is already in the alloca AS. unsigned SRetAS = CGF.getContext().getTargetAddressSpace( CGF.CGM.getASTAllocaAddressSpace()); - bool DestASMismatch = - !Dest.isIgnored() && Dest.getAddress() - .getBasePointer() - ->stripPointerCasts() - ->getType() - ->getPointerAddressSpace() != SRetAS; + bool DestASMismatch = !Dest.isIgnored() && + RetTy.isTriviallyCopyableType(CGF.getContext()) && + Dest.getAddress() + .getBasePointer() + ->stripPointerCasts() + ->getType() + ->getPointerAddressSpace() != SRetAS; bool UseTemp = Dest.isPotentiallyAliased() || Dest.requiresGCollection() || (RequiresDestruction && Dest.isIgnored()) || DestASMismatch; diff --git a/clang/test/CodeGenHIP/sret-nontrivial-copyable.hip b/clang/test/CodeGenHIP/sret-nontrivial-copyable.hip new file mode 100644 index 0000000000000..ee39104470fa1 --- /dev/null +++ b/clang/test/CodeGenHIP/sret-nontrivial-copyable.hip @@ -0,0 +1,34 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -x c++ -std=c++2b %s -o - | FileCheck %s +// REQUIRES: amdgpu-registered-target + +// Verify that a non-trivially-copyable type returned via sret into a member +// field does not trigger an EmitAggregateCopy assertion. +// fix for a buildbot failure +struct NontrivialPtr { + void *p; + NontrivialPtr() noexcept; + NontrivialPtr(const NontrivialPtr &) noexcept; + NontrivialPtr &operator=(const NontrivialPtr &) noexcept; + ~NontrivialPtr() noexcept; +}; + +NontrivialPtr make() noexcept; + +struct Wrapper { + NontrivialPtr field; + Wrapper() noexcept; + virtual ~Wrapper() noexcept; +}; + +// CHECK-LABEL: define dso_local void @_ZN7WrapperC1Ev( +// CHECK-SAME: ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] align 2 { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr +// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @_ZN7WrapperC2Ev(ptr noundef nonnull align 8 dereferenceable(16) [[THIS1]]) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: ret void +// +Wrapper::Wrapper() noexcept : field(make()) {} diff --git a/clang/test/OpenMP/amdgcn_sret_ctor.cpp b/clang/test/OpenMP/amdgcn_sret_ctor.cpp index ec1b83ecec4cc..81d0cce5190e7 100644 --- a/clang/test/OpenMP/amdgcn_sret_ctor.cpp +++ b/clang/test/OpenMP/amdgcn_sret_ctor.cpp @@ -16,12 +16,12 @@ E::E() noexcept : foo(s()) {} // CHECK-SAME: ptr noundef nonnull align 1 dereferenceable(1) [[THIS:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] align 2 { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 1, addrspace(5) // CHECK-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr // CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[FOO:%.*]] = getelementptr inbounds nuw [[STRUCT_E:%.*]], ptr [[THIS1]], i32 0, i32 0 -// CHECK-NEXT: call void @_Z1sv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_S]]) align 1 [[TMP]]) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: [[FOO_ASCAST:%.*]] = addrspacecast ptr [[FOO]] to ptr addrspace(5) +// CHECK-NEXT: call void @_Z1sv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_S:%.*]]) align 1 [[FOO_ASCAST]]) #[[ATTR2:[0-9]+]] // CHECK-NEXT: ret void // // From 007666f9dca41d2fd6f6beb567cb6d3425c30917 Mon Sep 17 00:00:00 2001 From: Vigneshwar Jayakumar Date: Thu, 23 Apr 2026 15:40:33 -0500 Subject: [PATCH 3/4] [Clang] Fix sret AS for non-trivial-copy returns. (#186275) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit classifyReturnType used getAllocaAddrSpace() for sret, which is wrong on targets like AMDGPU where alloca lives in addrspace(5). For types with deleted copy/move constructors, there is no way to construct into a temp and copy out — the sret pointer must point directly to the caller's destination in the default address space. Add a target hook getSRetAddrSpace() so AMDGPU can return LangAS::Default for non-register-passable types. Fixes issue #185744 (cherry picked from commit de82b4790943b2a48d9c974ed3c6c1707c3edeb0) --- clang/lib/CodeGen/CGExprAgg.cpp | 19 +++-- clang/lib/CodeGen/ItaniumCXXABI.cpp | 10 +-- clang/lib/CodeGen/MicrosoftCXXABI.cpp | 7 +- clang/lib/CodeGen/TargetInfo.h | 7 ++ clang/lib/CodeGen/Targets/AMDGPU.cpp | 13 ++++ .../test/CodeGenCXX/no-elide-constructors.cpp | 12 ++-- .../CodeGenHIP/sret-nontrivial-copyable.hip | 72 ++++++++++++++----- clang/test/CodeGenHIP/store-addr-space.hip | 35 +++++++-- clang/test/OpenMP/amdgcn_sret_ctor.cpp | 3 +- 9 files changed, 136 insertions(+), 42 deletions(-) diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index 990bdb21c34a4..04098bf1b0f7f 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -296,13 +296,18 @@ void AggExprEmitter::withReturnValueSlot( // its lifetime before we have the chance to emit a proper destructor call. // // We also need a temporary if the destination is in a different address space - // from the alloca AS, to avoid an invalid addrspacecast on the sret pointer. - // Look through addrspacecasts to avoid unnecessary temps when the - // destination is already in the alloca AS. - unsigned SRetAS = CGF.getContext().getTargetAddressSpace( - CGF.CGM.getASTAllocaAddressSpace()); - bool DestASMismatch = !Dest.isIgnored() && - RetTy.isTriviallyCopyableType(CGF.getContext()) && + // from the sret AS. Use the target hook to get the actual sret AS for this + // return type. + const CXXRecordDecl *RD = RetTy->getAsCXXRecordDecl(); + LangAS SRetLangAS = CGF.CGM.getTargetCodeGenInfo().getSRetAddrSpace(RD); + unsigned SRetAS = CGF.getContext().getTargetAddressSpace(SRetLangAS); + bool CanAggregateCopy = + RD ? (RD->hasTrivialCopyConstructor() || + RD->hasTrivialMoveConstructor() || RD->hasTrivialCopyAssignment() || + RD->hasTrivialMoveAssignment() || RD->hasAttr() || + RD->isUnion()) + : RetTy.isTriviallyCopyableType(CGF.getContext()); + bool DestASMismatch = !Dest.isIgnored() && CanAggregateCopy && Dest.getAddress() .getBasePointer() ->stripPointerCasts() diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index 7dc2eaf1e9f75..e7088ac2befc2 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -1372,12 +1372,14 @@ bool ItaniumCXXABI::classifyReturnType(CGFunctionInfo &FI) const { if (!RD) return false; - // If C++ prohibits us from making a copy, return by address. + // If C++ prohibits us from making a copy, return by address using the target + // hook getSRetAddrSpace to decide the AS. if (!RD->canPassInRegisters()) { auto Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType()); - FI.getReturnInfo() = ABIArgInfo::getIndirect( - Align, /*AddrSpace=*/CGM.getDataLayout().getAllocaAddrSpace(), - /*ByVal=*/false); + LangAS SRetAS = CGM.getTargetCodeGenInfo().getSRetAddrSpace(RD); + unsigned AS = CGM.getContext().getTargetAddressSpace(SRetAS); + FI.getReturnInfo() = + ABIArgInfo::getIndirect(Align, /*AddrSpace=*/AS, /*ByVal=*/false); return true; } return false; diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp index 19d9265247119..74aa2165dad80 100644 --- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp @@ -1189,9 +1189,10 @@ bool MicrosoftCXXABI::classifyReturnType(CGFunctionInfo &FI) const { if (isIndirectReturn) { CharUnits Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType()); - FI.getReturnInfo() = ABIArgInfo::getIndirect( - Align, /*AddrSpace=*/CGM.getDataLayout().getAllocaAddrSpace(), - /*ByVal=*/false); + LangAS SRetAS = CGM.getTargetCodeGenInfo().getSRetAddrSpace(RD); + unsigned AS = CGM.getContext().getTargetAddressSpace(SRetAS); + FI.getReturnInfo() = + ABIArgInfo::getIndirect(Align, /*AddrSpace=*/AS, /*ByVal=*/false); // MSVC always passes `this` before the `sret` parameter. FI.getReturnInfo().setSRetAfterThis(FI.isInstanceMethod()); diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h index d0edae1295094..6d13ba61e1a8d 100644 --- a/clang/lib/CodeGen/TargetInfo.h +++ b/clang/lib/CodeGen/TargetInfo.h @@ -32,6 +32,7 @@ class Value; } namespace clang { +class CXXRecordDecl; class Decl; namespace CodeGen { @@ -344,6 +345,12 @@ class TargetCodeGenInfo { LangAS SrcAddr, llvm::Type *DestTy) const; + /// Get the address space for an indirect (sret) return of the given type. + /// The default falls back to the alloca AS. + virtual LangAS getSRetAddrSpace(const CXXRecordDecl *RD) const { + return getASTAllocaAddressSpace(); + } + /// Get address space of pointer parameter for __cxa_atexit. virtual LangAS getAddrSpaceOfCxaAtexitPtrParam() const { return LangAS::Default; diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index 0fcbf7e458a34..ad07363541cb1 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -8,6 +8,7 @@ #include "ABIInfoImpl.h" #include "TargetInfo.h" +#include "clang/AST/DeclCXX.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/AMDGPUAddrSpace.h" @@ -308,6 +309,9 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo { return getLangASFromTargetAS( getABIInfo().getDataLayout().getAllocaAddrSpace()); } + + LangAS getSRetAddrSpace(const CXXRecordDecl *RD) const override; + LangAS getGlobalVarAddressSpace(CodeGenModule &CGM, const VarDecl *D) const override; llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts, @@ -448,6 +452,15 @@ llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer( llvm::ConstantPointerNull::get(NPT), PT); } +LangAS +AMDGPUTargetCodeGenInfo::getSRetAddrSpace(const CXXRecordDecl *RD) const { + // Types with no viable copy/move must be constructed in-place , use the + // default AS so the sret pointer matches the "this" convention. + if (RD && !RD->canPassInRegisters()) + return LangAS::Default; + return getASTAllocaAddressSpace(); +} + LangAS AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, const VarDecl *D) const { diff --git a/clang/test/CodeGenCXX/no-elide-constructors.cpp b/clang/test/CodeGenCXX/no-elide-constructors.cpp index 994282debb0d0..f091a94611c1f 100644 --- a/clang/test/CodeGenCXX/no-elide-constructors.cpp +++ b/clang/test/CodeGenCXX/no-elide-constructors.cpp @@ -1,9 +1,11 @@ // RUN: %clang_cc1 -std=c++98 -triple i386-unknown-unknown -fno-elide-constructors -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CXX98 // RUN: %clang_cc1 -std=c++11 -triple i386-unknown-unknown -fno-elide-constructors -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CXX11 // RUN: %clang_cc1 -std=c++11 -triple amdgcn-amd-amdhsa -fno-elide-constructors -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK --check-prefix=CHECK-CXX11-NONZEROALLOCAAS +// RUN: %clang_cc1 -std=c++11 -triple spirv64-amd-amdhsa -fno-elide-constructors -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK --check-prefix=CHECK-CXX11-SPIRV // RUN: %clang_cc1 -std=c++98 -triple i386-unknown-unknown -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CXX98-ELIDE // RUN: %clang_cc1 -std=c++11 -triple i386-unknown-unknown -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CXX11-ELIDE // RUN: %clang_cc1 -std=c++11 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CXX11-NONZEROALLOCAAS-ELIDE +// RUN: %clang_cc1 -std=c++11 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CXX11-SPIRV-ELIDE // Reduced from PR12208 class X { @@ -17,7 +19,7 @@ class X { }; // CHECK-LABEL: define{{.*}} void @_Z4Testv( -// CHECK-SAME: ptr {{.*}}dead_on_unwind noalias writable sret([[CLASS_X:%.*]]) align 1 [[AGG_RESULT:%.*]]) +// CHECK-SAME: ptr {{.*}}dead_on_unwind noalias writable sret([[CLASS_X:%.*]]) align 1 [[AGG_RESULT:%[^),]+]]) X Test() { X x; @@ -26,16 +28,18 @@ X Test() // sret argument. // CHECK-CXX98: call void @_ZN1XC1ERKS_( // CHECK-CXX11: call void @_ZN1XC1EOS_( - // CHECK-CXX11-NONZEROALLOCAAS: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr - // CHECK-CXX11-NONZEROALLOCAAS-NEXT: call void @_ZN1XC1EOS_(ptr noundef nonnull align 1 dereferenceable(1) [[TMP0]] + // CHECK-CXX11-NONZEROALLOCAAS: call void @_ZN1XC1EOS_(ptr noundef nonnull align 1 dereferenceable(1) [[AGG_RESULT]] + // CHECK-CXX11-SPIRV: [[TMP0:%.*]] = addrspacecast ptr [[AGG_RESULT]] to ptr addrspace(4) + // CHECK-CXX11-SPIRV-NEXT: call spir_func addrspace(4) void @_ZN1XC1EOS_(ptr addrspace(4) noundef align 1 dereferenceable{{.*}}(1) [[TMP0]] // CHECK-CXX98-ELIDE-NOT: call void @_ZN1XC1ERKS_( // CHECK-CXX11-ELIDE-NOT: call void @_ZN1XC1EOS_( // CHECK-CXX11-NONZEROALLOCAAS-ELIDE-NOT: call void @_ZN1XC1EOS_( + // CHECK-CXX11-SPIRV-ELIDE-NOT: call void @_ZN1XC1EOS_( // Make sure that the destructor for X is called. // FIXME: This call is present even in the -ELIDE runs, but is guarded by a // branch that is never taken in those cases. We could generate better IR // here. - // CHECK: call void @_ZN1XD1Ev( + // CHECK: call {{.*}}void @_ZN1XD1Ev( return x; } diff --git a/clang/test/CodeGenHIP/sret-nontrivial-copyable.hip b/clang/test/CodeGenHIP/sret-nontrivial-copyable.hip index ee39104470fa1..3ef293d126cf8 100644 --- a/clang/test/CodeGenHIP/sret-nontrivial-copyable.hip +++ b/clang/test/CodeGenHIP/sret-nontrivial-copyable.hip @@ -1,15 +1,16 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 -// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -x c++ -std=c++2b %s -o - | FileCheck %s +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --functions ".*" --include-generated-funcs --version 6 +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -x c++ -std=c++2b %s -o - | FileCheck --check-prefix=AMDGCN %s +// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -x c++ -std=c++2b %s -o - | FileCheck --check-prefix=SPIRV %s // REQUIRES: amdgpu-registered-target // Verify that a non-trivially-copyable type returned via sret into a member -// field does not trigger an EmitAggregateCopy assertion. -// fix for a buildbot failure +// field uses addrspace(0) for the sret pointer (not addrspace(5)). So +// in-place construction through the addrspace(0) is the only legal option. struct NontrivialPtr { void *p; NontrivialPtr() noexcept; - NontrivialPtr(const NontrivialPtr &) noexcept; - NontrivialPtr &operator=(const NontrivialPtr &) noexcept; + NontrivialPtr(const NontrivialPtr &) = delete; + NontrivialPtr(NontrivialPtr &&) = delete; ~NontrivialPtr() noexcept; }; @@ -21,14 +22,53 @@ struct Wrapper { virtual ~Wrapper() noexcept; }; -// CHECK-LABEL: define dso_local void @_ZN7WrapperC1Ev( -// CHECK-SAME: ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] align 2 { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr -// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8 -// CHECK-NEXT: call void @_ZN7WrapperC2Ev(ptr noundef nonnull align 8 dereferenceable(16) [[THIS1]]) #[[ATTR2:[0-9]+]] -// CHECK-NEXT: ret void -// Wrapper::Wrapper() noexcept : field(make()) {} +// AMDGCN-LABEL: define dso_local void @_ZN7WrapperC2Ev( +// AMDGCN-SAME: ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] align 2 { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// AMDGCN-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr +// AMDGCN-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: store ptr addrspace(1) getelementptr inbounds inrange(-16, 16) ({ [4 x ptr addrspace(1)] }, ptr addrspace(1) @_ZTV7Wrapper, i32 0, i32 0, i32 2), ptr [[THIS1]], align 8 +// AMDGCN-NEXT: [[FIELD:%.*]] = getelementptr inbounds nuw [[STRUCT_WRAPPER:%.*]], ptr [[THIS1]], i32 0, i32 1 +// AMDGCN-NEXT: call void @_Z4makev(ptr dead_on_unwind writable sret([[STRUCT_NONTRIVIALPTR:%.*]]) align 8 [[FIELD]]) #[[ATTR2:[0-9]+]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN-LABEL: define dso_local void @_ZN7WrapperC1Ev( +// AMDGCN-SAME: ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// AMDGCN-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr +// AMDGCN-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8 +// AMDGCN-NEXT: call void @_ZN7WrapperC2Ev(ptr noundef nonnull align 8 dereferenceable(16) [[THIS1]]) #[[ATTR2]] +// AMDGCN-NEXT: ret void +// +// +// SPIRV-LABEL: define spir_func void @_ZN7WrapperC2Ev( +// SPIRV-SAME: ptr addrspace(4) noundef align 8 dereferenceable_or_null(16) [[THIS:%.*]]) unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] align 2 { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[THIS_ADDR:%.*]] = alloca ptr addrspace(4), align 8 +// SPIRV-NEXT: [[TMP:%.*]] = alloca [[STRUCT_NONTRIVIALPTR:%.*]], align 8 +// SPIRV-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr [[THIS_ADDR]] to ptr addrspace(4) +// SPIRV-NEXT: store ptr addrspace(4) [[THIS]], ptr addrspace(4) [[THIS_ADDR_ASCAST]], align 8 +// SPIRV-NEXT: [[THIS1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[THIS_ADDR_ASCAST]], align 8 +// SPIRV-NEXT: store ptr addrspace(1) getelementptr inbounds inrange(-16, 16) ({ [4 x ptr addrspace(1)] }, ptr addrspace(1) @_ZTV7Wrapper, i32 0, i32 0, i32 2), ptr addrspace(4) [[THIS1]], align 8 +// SPIRV-NEXT: [[FIELD:%.*]] = getelementptr inbounds nuw [[STRUCT_WRAPPER:%.*]], ptr addrspace(4) [[THIS1]], i32 0, i32 1 +// SPIRV-NEXT: call spir_func addrspace(4) void @_Z4makev(ptr dead_on_unwind writable sret([[STRUCT_NONTRIVIALPTR]]) align 8 [[TMP]]) #[[ATTR3:[0-9]+]] +// SPIRV-NEXT: call addrspace(4) void @llvm.memcpy.p4.p0.i64(ptr addrspace(4) align 8 [[FIELD]], ptr align 8 [[TMP]], i64 8, i1 false) +// SPIRV-NEXT: ret void +// +// +// SPIRV-LABEL: define spir_func void @_ZN7WrapperC1Ev( +// SPIRV-SAME: ptr addrspace(4) noundef align 8 dereferenceable_or_null(16) [[THIS:%.*]]) unnamed_addr addrspace(4) #[[ATTR0]] align 2 { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[THIS_ADDR:%.*]] = alloca ptr addrspace(4), align 8 +// SPIRV-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr [[THIS_ADDR]] to ptr addrspace(4) +// SPIRV-NEXT: store ptr addrspace(4) [[THIS]], ptr addrspace(4) [[THIS_ADDR_ASCAST]], align 8 +// SPIRV-NEXT: [[THIS1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[THIS_ADDR_ASCAST]], align 8 +// SPIRV-NEXT: call spir_func addrspace(4) void @_ZN7WrapperC2Ev(ptr addrspace(4) noundef align 8 dereferenceable_or_null(16) [[THIS1]]) #[[ATTR3]] +// SPIRV-NEXT: ret void +// diff --git a/clang/test/CodeGenHIP/store-addr-space.hip b/clang/test/CodeGenHIP/store-addr-space.hip index 6103edba46274..34aca1fadccee 100644 --- a/clang/test/CodeGenHIP/store-addr-space.hip +++ b/clang/test/CodeGenHIP/store-addr-space.hip @@ -2,6 +2,8 @@ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm -fcuda-is-device \ // RUN: -o - %s | FileCheck --check-prefix=AMDGCN --enable-var-scope %s +// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -x hip -emit-llvm -fcuda-is-device \ +// RUN: -o - %s | FileCheck --check-prefix=SPIRV --enable-var-scope %s struct Foo { unsigned long long val; @@ -12,19 +14,18 @@ struct Foo { }; // AMDGCN-LABEL: define dso_local void @_Z3barPK3Foo( -// AMDGCN-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_FOO:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[SRC_PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// AMDGCN-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FOO:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[SRC_PTR:%.*]]) #[[ATTR0:[0-9]+]] { // AMDGCN-NEXT: [[ENTRY:.*:]] -// AMDGCN-NEXT: [[RESULT_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// AMDGCN-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 8, addrspace(5) // AMDGCN-NEXT: [[SRC_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // AMDGCN-NEXT: [[DST:%.*]] = alloca [[UNION_ANON:%.*]], align 8, addrspace(5) // AMDGCN-NEXT: [[RESULT_PTR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RESULT_PTR]] to ptr // AMDGCN-NEXT: [[SRC_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_PTR_ADDR]] to ptr -// AMDGCN-NEXT: [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr // AMDGCN-NEXT: [[DST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DST]] to ptr -// AMDGCN-NEXT: store ptr addrspace(5) [[AGG_RESULT]], ptr [[RESULT_PTR_ASCAST]], align 4 +// AMDGCN-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR_ASCAST]], align 8 // AMDGCN-NEXT: store ptr [[SRC_PTR]], ptr [[SRC_PTR_ADDR_ASCAST]], align 8 -// AMDGCN-NEXT: call void @_ZN3FooC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[AGG_RESULT_ASCAST]]) #[[ATTR1:[0-9]+]] -// AMDGCN-NEXT: store ptr [[AGG_RESULT_ASCAST]], ptr [[DST_ASCAST]], align 8 +// AMDGCN-NEXT: call void @_ZN3FooC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[AGG_RESULT]]) #[[ATTR1:[0-9]+]] +// AMDGCN-NEXT: store ptr [[AGG_RESULT]], ptr [[DST_ASCAST]], align 8 // AMDGCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SRC_PTR_ADDR_ASCAST]], align 8 // AMDGCN-NEXT: [[VAL:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], ptr [[TMP0]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP1:%.*]] = load i64, ptr [[VAL]], align 8 @@ -33,6 +34,28 @@ struct Foo { // AMDGCN-NEXT: store i64 [[TMP1]], ptr [[ARRAYIDX]], align 8 // AMDGCN-NEXT: ret void // +// SPIRV-LABEL: define spir_func void @_Z3barPK3Foo( +// SPIRV-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FOO:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[SRC_PTR:%.*]]) addrspace(4) #[[ATTR0:[0-9]+]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 8 +// SPIRV-NEXT: [[SRC_PTR_ADDR:%.*]] = alloca ptr addrspace(4), align 8 +// SPIRV-NEXT: [[DST:%.*]] = alloca [[UNION_ANON:%.*]], align 8 +// SPIRV-NEXT: [[RESULT_PTR_ASCAST:%.*]] = addrspacecast ptr [[RESULT_PTR]] to ptr addrspace(4) +// SPIRV-NEXT: [[SRC_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[SRC_PTR_ADDR]] to ptr addrspace(4) +// SPIRV-NEXT: [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr [[AGG_RESULT]] to ptr addrspace(4) +// SPIRV-NEXT: [[DST_ASCAST:%.*]] = addrspacecast ptr [[DST]] to ptr addrspace(4) +// SPIRV-NEXT: store ptr [[AGG_RESULT]], ptr addrspace(4) [[RESULT_PTR_ASCAST]], align 8 +// SPIRV-NEXT: store ptr addrspace(4) [[SRC_PTR]], ptr addrspace(4) [[SRC_PTR_ADDR_ASCAST]], align 8 +// SPIRV-NEXT: call spir_func addrspace(4) void @_ZN3FooC1Ev(ptr addrspace(4) noundef align 8 dereferenceable_or_null(8) [[AGG_RESULT_ASCAST]]) #[[ATTR1:[0-9]+]] +// SPIRV-NEXT: store ptr addrspace(4) [[AGG_RESULT_ASCAST]], ptr addrspace(4) [[DST_ASCAST]], align 8 +// SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[SRC_PTR_ADDR_ASCAST]], align 8 +// SPIRV-NEXT: [[VAL:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], ptr addrspace(4) [[TMP0]], i32 0, i32 0 +// SPIRV-NEXT: [[TMP1:%.*]] = load i64, ptr addrspace(4) [[VAL]], align 8 +// SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[DST_ASCAST]], align 8 +// SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr addrspace(4) [[TMP2]], i64 0 +// SPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) [[ARRAYIDX]], align 8 +// SPIRV-NEXT: ret void +// __attribute__((device)) Foo bar(const Foo *const src_ptr) { Foo result; diff --git a/clang/test/OpenMP/amdgcn_sret_ctor.cpp b/clang/test/OpenMP/amdgcn_sret_ctor.cpp index 81d0cce5190e7..c8195b124feb1 100644 --- a/clang/test/OpenMP/amdgcn_sret_ctor.cpp +++ b/clang/test/OpenMP/amdgcn_sret_ctor.cpp @@ -20,8 +20,7 @@ E::E() noexcept : foo(s()) {} // CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8 // CHECK-NEXT: [[FOO:%.*]] = getelementptr inbounds nuw [[STRUCT_E:%.*]], ptr [[THIS1]], i32 0, i32 0 -// CHECK-NEXT: [[FOO_ASCAST:%.*]] = addrspacecast ptr [[FOO]] to ptr addrspace(5) -// CHECK-NEXT: call void @_Z1sv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_S:%.*]]) align 1 [[FOO_ASCAST]]) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: call void @_Z1sv(ptr dead_on_unwind writable sret([[STRUCT_S:%.*]]) align 1 [[FOO]]) #[[ATTR2:[0-9]+]] // CHECK-NEXT: ret void // // From abe1317dd458b601bc41afa2b89b9b0e33e13211 Mon Sep 17 00:00:00 2001 From: Vigneshwar Jayakumar Date: Thu, 23 Apr 2026 19:46:14 -0500 Subject: [PATCH 4/4] [Clang][CodeGen] Fix sret lifetime marker AS mismatch after #186275 (#193850) After #186275, the sret address space can differ from the alloca address space (e.g., AS 0 vs AS 5 on AMDGPU). In CGCall.cpp EmitCall(), when a discarded-value sret temporary is created, SRetPtr is allocated in the alloca AS and a lifetime.start is emitted. The pointer is then addrspacecast'd to match the sret AS, but the CallLifetimeEnd cleanup was using the addrspacecast'd pointer, triggering an assertion in EmitLifetimeEnd ("Pointer should be in alloca address space"). Saves the original alloca pointer before the addrspacecast and uses it for the lifetime-end cleanup. Fixes buildbot failure: hip-third-party-libs-tests (cherry picked from commit 528e673fec477b087bf55da282585a9ed20b809a) --- clang/lib/CodeGen/CGCall.cpp | 12 +++- .../test/CodeGenHIP/sret-lifetime-markers.cpp | 60 +++++++++++++++++++ 2 files changed, 70 insertions(+), 2 deletions(-) create mode 100644 clang/test/CodeGenHIP/sret-lifetime-markers.cpp diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 74f5cda469161..e2de87a2e773e 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5321,6 +5321,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // If the call returns a temporary with struct return, create a temporary // alloca to hold the result, unless one is given to us. Address SRetPtr = Address::invalid(); + // Original alloca for lifetime markers + Address SRetAlloca = Address::invalid(); bool NeedSRetLifetimeEnd = false; if (RetAI.isIndirect() || RetAI.isInAlloca() || RetAI.isCoerceAndExpand()) { // For virtual function pointer thunks and musttail calls, we must always @@ -5335,8 +5337,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, SRetPtr = ReturnValue.getAddress(); } else { SRetPtr = CreateMemTempWithoutCast(RetTy, "tmp"); - if (HaveInsertPoint() && ReturnValue.isUnused()) + if (HaveInsertPoint() && ReturnValue.isUnused()) { NeedSRetLifetimeEnd = EmitLifetimeStart(SRetPtr.getBasePointer()); + if (NeedSRetLifetimeEnd) + SRetAlloca = SRetPtr; + } } if (IRFunctionArgs.hasSRetArg()) { // A mismatch between the allocated return value's AS and the target's @@ -5917,8 +5922,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // can't depend on being inside of an ExprWithCleanups, so we need to manually // pop this cleanup later on. Being eager about this is OK, since this // temporary is 'invisible' outside of the callee. + // Use the original alloca pointer (before any addrspacecast) for the + // lifetime end marker, since lifetime intrinsics must reference the alloca + // address space. if (NeedSRetLifetimeEnd) - pushFullExprCleanup(NormalEHLifetimeMarker, SRetPtr); + pushFullExprCleanup(NormalEHLifetimeMarker, SRetAlloca); llvm::BasicBlock *InvokeDest = CannotThrow ? nullptr : getInvokeDest(); diff --git a/clang/test/CodeGenHIP/sret-lifetime-markers.cpp b/clang/test/CodeGenHIP/sret-lifetime-markers.cpp new file mode 100644 index 0000000000000..42b1ee2707297 --- /dev/null +++ b/clang/test/CodeGenHIP/sret-lifetime-markers.cpp @@ -0,0 +1,60 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --functions ".*" --include-generated-funcs --version 6 +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -O1 -x c++ -std=c++2b %s -o - | FileCheck --check-prefix=AMDGCN %s +// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -O1 -x c++ -std=c++2b %s -o - | FileCheck --check-prefix=SPIRV %s +// REQUIRES: amdgpu-registered-target + +// Verify that lifetime markers for an sret temporary use the alloca address +// space (5), even when the sret pointer itself is in the default address +// space (0). Regression test for an assertion failure in EmitLifetimeEnd +// when the return value of a function returning a non-trivially-copyable +// type is discarded. + +template +struct SuperScalar { + double val[N]; + + SuperScalar() { + for (int i = 0; i < N; i++) val[i] = 0.0; + } + SuperScalar(const SuperScalar& rhs) { + for (int i = 0; i < N; i++) val[i] = rhs.val[i]; + } + SuperScalar& operator=(const SuperScalar& rhs) { + for (int i = 0; i < N; i++) val[i] = rhs.val[i]; + return *this; + } +}; + +template +SuperScalar atomic_fetch_add(SuperScalar* dest, const SuperScalar& val); + +template +void add_functor(SuperScalar* data, int i, SuperScalar& update) { + atomic_fetch_add(&data[i], update); +} + +template void add_functor<4>(SuperScalar<4>*, int, SuperScalar<4>&); +// AMDGCN-LABEL: define weak_odr void @_Z11add_functorILi4EEvP11SuperScalarIXT_EEiRS1_( +// AMDGCN-SAME: ptr noundef [[DATA:%.*]], i32 noundef [[I:%.*]], ptr noundef nonnull align 8 dereferenceable(32) [[UPDATE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] comdat { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_SUPERSCALAR:%.*]], align 8, addrspace(5) +// AMDGCN-NEXT: [[IDXPROM:%.*]] = sext i32 [[I]] to i64 +// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_SUPERSCALAR]], ptr [[DATA]], i64 [[IDXPROM]] +// AMDGCN-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[TMP]]) #[[ATTR3:[0-9]+]] +// AMDGCN-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// AMDGCN-NEXT: call void @_Z16atomic_fetch_addILi4EE11SuperScalarIXT_EEPS1_RKS1_(ptr dead_on_unwind writable sret([[STRUCT_SUPERSCALAR]]) align 8 [[TMP_ASCAST]], ptr noundef [[ARRAYIDX]], ptr noundef nonnull align 8 dereferenceable(32) [[UPDATE]]) #[[ATTR4:[0-9]+]] +// AMDGCN-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[TMP]]) #[[ATTR3]] +// AMDGCN-NEXT: ret void +// +// +// SPIRV-LABEL: define weak_odr spir_func void @_Z11add_functorILi4EEvP11SuperScalarIXT_EEiRS1_( +// SPIRV-SAME: ptr addrspace(4) noundef [[DATA:%.*]], i32 noundef [[I:%.*]], ptr addrspace(4) noundef align 8 dereferenceable(32) [[UPDATE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] comdat { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP:%.*]] = alloca [[STRUCT_SUPERSCALAR:%.*]], align 8 +// SPIRV-NEXT: [[IDXPROM:%.*]] = sext i32 [[I]] to i64 +// SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_SUPERSCALAR]], ptr addrspace(4) [[DATA]], i64 [[IDXPROM]] +// SPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(ptr nonnull [[TMP]]) #[[ATTR3:[0-9]+]] +// SPIRV-NEXT: call spir_func addrspace(4) void @_Z16atomic_fetch_addILi4EE11SuperScalarIXT_EEPS1_RKS1_(ptr dead_on_unwind nonnull writable sret([[STRUCT_SUPERSCALAR]]) align 8 [[TMP]], ptr addrspace(4) noundef [[ARRAYIDX]], ptr addrspace(4) noundef align 8 dereferenceable(32) [[UPDATE]]) #[[ATTR3]] +// SPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(ptr nonnull [[TMP]]) #[[ATTR3]] +// SPIRV-NEXT: ret void +//