Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions clang/lib/CodeGen/CGCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5321,6 +5321,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// If the call returns a temporary with struct return, create a temporary
// alloca to hold the result, unless one is given to us.
Address SRetPtr = Address::invalid();
// Original alloca for lifetime markers
Address SRetAlloca = Address::invalid();
bool NeedSRetLifetimeEnd = false;
if (RetAI.isIndirect() || RetAI.isInAlloca() || RetAI.isCoerceAndExpand()) {
// For virtual function pointer thunks and musttail calls, we must always
Expand All @@ -5335,8 +5337,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
SRetPtr = ReturnValue.getAddress();
} else {
SRetPtr = CreateMemTempWithoutCast(RetTy, "tmp");
if (HaveInsertPoint() && ReturnValue.isUnused())
if (HaveInsertPoint() && ReturnValue.isUnused()) {
NeedSRetLifetimeEnd = EmitLifetimeStart(SRetPtr.getBasePointer());
if (NeedSRetLifetimeEnd)
SRetAlloca = SRetPtr;
}
}
if (IRFunctionArgs.hasSRetArg()) {
// A mismatch between the allocated return value's AS and the target's
Expand Down Expand Up @@ -5917,8 +5922,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// can't depend on being inside of an ExprWithCleanups, so we need to manually
// pop this cleanup later on. Being eager about this is OK, since this
// temporary is 'invisible' outside of the callee.
// Use the original alloca pointer (before any addrspacecast) for the
// lifetime end marker, since lifetime intrinsics must reference the alloca
// address space.
if (NeedSRetLifetimeEnd)
pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, SRetPtr);
pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, SRetAlloca);

llvm::BasicBlock *InvokeDest = CannotThrow ? nullptr : getInvokeDest();

Expand Down
29 changes: 28 additions & 1 deletion clang/lib/CodeGen/CGExprAgg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -294,15 +294,42 @@ void AggExprEmitter::withReturnValueSlot(
// We need to always provide our own temporary if destruction is required.
// Otherwise, EmitCall will emit its own, notice that it's "unused", and end
// its lifetime before we have the chance to emit a proper destructor call.
//
// We also need a temporary if the destination is in a different address space
// from the sret AS. Use the target hook to get the actual sret AS for this
// return type.
const CXXRecordDecl *RD = RetTy->getAsCXXRecordDecl();
LangAS SRetLangAS = CGF.CGM.getTargetCodeGenInfo().getSRetAddrSpace(RD);
unsigned SRetAS = CGF.getContext().getTargetAddressSpace(SRetLangAS);
bool CanAggregateCopy =
RD ? (RD->hasTrivialCopyConstructor() ||
RD->hasTrivialMoveConstructor() || RD->hasTrivialCopyAssignment() ||
RD->hasTrivialMoveAssignment() || RD->hasAttr<TrivialABIAttr>() ||
RD->isUnion())
: RetTy.isTriviallyCopyableType(CGF.getContext());
bool DestASMismatch = !Dest.isIgnored() && CanAggregateCopy &&
Dest.getAddress()
.getBasePointer()
->stripPointerCasts()
->getType()
->getPointerAddressSpace() != SRetAS;
bool UseTemp = Dest.isPotentiallyAliased() || Dest.requiresGCollection() ||
(RequiresDestruction && Dest.isIgnored());
(RequiresDestruction && Dest.isIgnored()) || DestASMismatch;

Address RetAddr = Address::invalid();

EHScopeStack::stable_iterator LifetimeEndBlock;
llvm::IntrinsicInst *LifetimeStartInst = nullptr;
if (!UseTemp) {
RetAddr = Dest.getAddress();
if (RetAddr.isValid() && RetAddr.getAddressSpace() != SRetAS) {
llvm::Type *SRetPtrTy =
llvm::PointerType::get(CGF.getLLVMContext(), SRetAS);
RetAddr = RetAddr.withPointer(
CGF.CGM.getTargetCodeGenInfo().performAddrSpaceCast(
CGF, RetAddr.getBasePointer(), LangAS::Default, SRetPtrTy),
RetAddr.isKnownNonNull());
}
} else {
RetAddr = CGF.CreateMemTempWithoutCast(RetTy, "tmp");
if (CGF.EmitLifetimeStart(RetAddr.getBasePointer())) {
Expand Down
10 changes: 6 additions & 4 deletions clang/lib/CodeGen/ItaniumCXXABI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1372,12 +1372,14 @@ bool ItaniumCXXABI::classifyReturnType(CGFunctionInfo &FI) const {
if (!RD)
return false;

// If C++ prohibits us from making a copy, return by address.
// If C++ prohibits us from making a copy, return by address using the target
// hook getSRetAddrSpace to decide the AS.
if (!RD->canPassInRegisters()) {
auto Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType());
FI.getReturnInfo() = ABIArgInfo::getIndirect(
Align, /*AddrSpace=*/CGM.getDataLayout().getAllocaAddrSpace(),
/*ByVal=*/false);
LangAS SRetAS = CGM.getTargetCodeGenInfo().getSRetAddrSpace(RD);
unsigned AS = CGM.getContext().getTargetAddressSpace(SRetAS);
FI.getReturnInfo() =
ABIArgInfo::getIndirect(Align, /*AddrSpace=*/AS, /*ByVal=*/false);
return true;
}
return false;
Expand Down
7 changes: 4 additions & 3 deletions clang/lib/CodeGen/MicrosoftCXXABI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1189,9 +1189,10 @@ bool MicrosoftCXXABI::classifyReturnType(CGFunctionInfo &FI) const {

if (isIndirectReturn) {
CharUnits Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType());
FI.getReturnInfo() = ABIArgInfo::getIndirect(
Align, /*AddrSpace=*/CGM.getDataLayout().getAllocaAddrSpace(),
/*ByVal=*/false);
LangAS SRetAS = CGM.getTargetCodeGenInfo().getSRetAddrSpace(RD);
unsigned AS = CGM.getContext().getTargetAddressSpace(SRetAS);
FI.getReturnInfo() =
ABIArgInfo::getIndirect(Align, /*AddrSpace=*/AS, /*ByVal=*/false);

// MSVC always passes `this` before the `sret` parameter.
FI.getReturnInfo().setSRetAfterThis(FI.isInstanceMethod());
Expand Down
7 changes: 7 additions & 0 deletions clang/lib/CodeGen/TargetInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class Value;
}

namespace clang {
class CXXRecordDecl;
class Decl;

namespace CodeGen {
Expand Down Expand Up @@ -344,6 +345,12 @@ class TargetCodeGenInfo {
LangAS SrcAddr,
llvm::Type *DestTy) const;

/// Get the address space for an indirect (sret) return of the given type.
/// The default falls back to the alloca AS.
virtual LangAS getSRetAddrSpace(const CXXRecordDecl *RD) const {
return getASTAllocaAddressSpace();
}

/// Get address space of pointer parameter for __cxa_atexit.
virtual LangAS getAddrSpaceOfCxaAtexitPtrParam() const {
return LangAS::Default;
Expand Down
13 changes: 13 additions & 0 deletions clang/lib/CodeGen/Targets/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "ABIInfoImpl.h"
#include "TargetInfo.h"
#include "clang/AST/DeclCXX.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/AMDGPUAddrSpace.h"

Expand Down Expand Up @@ -308,6 +309,9 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
return getLangASFromTargetAS(
getABIInfo().getDataLayout().getAllocaAddrSpace());
}

LangAS getSRetAddrSpace(const CXXRecordDecl *RD) const override;

LangAS getGlobalVarAddressSpace(CodeGenModule &CGM,
const VarDecl *D) const override;
llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts,
Expand Down Expand Up @@ -448,6 +452,15 @@ llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer(
llvm::ConstantPointerNull::get(NPT), PT);
}

LangAS
AMDGPUTargetCodeGenInfo::getSRetAddrSpace(const CXXRecordDecl *RD) const {
// Types with no viable copy/move must be constructed in-place , use the
// default AS so the sret pointer matches the "this" convention.
if (RD && !RD->canPassInRegisters())
return LangAS::Default;
return getASTAllocaAddressSpace();
}

LangAS
AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
const VarDecl *D) const {
Expand Down
12 changes: 8 additions & 4 deletions clang/test/CodeGenCXX/no-elide-constructors.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
// RUN: %clang_cc1 -std=c++98 -triple i386-unknown-unknown -fno-elide-constructors -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CXX98
// RUN: %clang_cc1 -std=c++11 -triple i386-unknown-unknown -fno-elide-constructors -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CXX11
// RUN: %clang_cc1 -std=c++11 -triple amdgcn-amd-amdhsa -fno-elide-constructors -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK --check-prefix=CHECK-CXX11-NONZEROALLOCAAS
// RUN: %clang_cc1 -std=c++11 -triple spirv64-amd-amdhsa -fno-elide-constructors -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK --check-prefix=CHECK-CXX11-SPIRV
// RUN: %clang_cc1 -std=c++98 -triple i386-unknown-unknown -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CXX98-ELIDE
// RUN: %clang_cc1 -std=c++11 -triple i386-unknown-unknown -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CXX11-ELIDE
// RUN: %clang_cc1 -std=c++11 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CXX11-NONZEROALLOCAAS-ELIDE
// RUN: %clang_cc1 -std=c++11 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CXX11-SPIRV-ELIDE

// Reduced from PR12208
class X {
Expand All @@ -17,7 +19,7 @@ class X {
};

// CHECK-LABEL: define{{.*}} void @_Z4Testv(
// CHECK-SAME: ptr {{.*}}dead_on_unwind noalias writable sret([[CLASS_X:%.*]]) align 1 [[AGG_RESULT:%.*]])
// CHECK-SAME: ptr {{.*}}dead_on_unwind noalias writable sret([[CLASS_X:%.*]]) align 1 [[AGG_RESULT:%[^),]+]])
X Test()
{
X x;
Expand All @@ -26,16 +28,18 @@ X Test()
// sret argument.
// CHECK-CXX98: call void @_ZN1XC1ERKS_(
// CHECK-CXX11: call void @_ZN1XC1EOS_(
// CHECK-CXX11-NONZEROALLOCAAS: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr
// CHECK-CXX11-NONZEROALLOCAAS-NEXT: call void @_ZN1XC1EOS_(ptr noundef nonnull align 1 dereferenceable(1) [[TMP0]]
// CHECK-CXX11-NONZEROALLOCAAS: call void @_ZN1XC1EOS_(ptr noundef nonnull align 1 dereferenceable(1) [[AGG_RESULT]]
// CHECK-CXX11-SPIRV: [[TMP0:%.*]] = addrspacecast ptr [[AGG_RESULT]] to ptr addrspace(4)
// CHECK-CXX11-SPIRV-NEXT: call spir_func addrspace(4) void @_ZN1XC1EOS_(ptr addrspace(4) noundef align 1 dereferenceable{{.*}}(1) [[TMP0]]
// CHECK-CXX98-ELIDE-NOT: call void @_ZN1XC1ERKS_(
// CHECK-CXX11-ELIDE-NOT: call void @_ZN1XC1EOS_(
// CHECK-CXX11-NONZEROALLOCAAS-ELIDE-NOT: call void @_ZN1XC1EOS_(
// CHECK-CXX11-SPIRV-ELIDE-NOT: call void @_ZN1XC1EOS_(

// Make sure that the destructor for X is called.
// FIXME: This call is present even in the -ELIDE runs, but is guarded by a
// branch that is never taken in those cases. We could generate better IR
// here.
// CHECK: call void @_ZN1XD1Ev(
// CHECK: call {{.*}}void @_ZN1XD1Ev(
return x;
}
63 changes: 63 additions & 0 deletions clang/test/CodeGenHIP/placement-new-addrspace.hip
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --functions "make_big|kernel|local_test" --version 5
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -disable-llvm-passes -x hip -std=c++17 %s -o - | FileCheck %s
// REQUIRES: amdgpu-registered-target

// Verify that when a function returning an aggregate via sret is called with a
// destination in a different address space (e.g. global pointer from kernel
// arg), the compiler materialises a temporary in the alloca AS and copies back,
// rather than emitting an invalid addrspacecast of the destination pointer.

typedef __SIZE_TYPE__ size_t;
__attribute__((device)) void *operator new(size_t, void *p) noexcept { return p; }

struct Big {
int v[32];
__attribute__((device)) Big(int x) {
for (int i = 0; i < 32; ++i)
v[i] = x + i;
}
};

// CHECK-LABEL: define dso_local void @_Z8make_bigv(
// CHECK-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_BIG:%.*]]) align 4 [[AGG_RESULT:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr
// CHECK-NEXT: call void @_ZN3BigC1Ei(ptr noundef nonnull align 4 dereferenceable(128) [[AGG_RESULT_ASCAST]], i32 noundef 7) #[[ATTR3:[0-9]+]]
// CHECK-NEXT: ret void
//
__attribute__((device)) Big make_big() { return Big(7); }

// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z6kernelP3Big(
// CHECK-SAME: ptr addrspace(1) noundef [[OUT_COERCE:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[OUT:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_BIG:%.*]], align 4, addrspace(5)
// CHECK-NEXT: [[OUT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT]] to ptr
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
// CHECK-NEXT: store ptr addrspace(1) [[OUT_COERCE]], ptr [[OUT_ASCAST]], align 8
// CHECK-NEXT: [[OUT1:%.*]] = load ptr, ptr [[OUT_ASCAST]], align 8
// CHECK-NEXT: store ptr [[OUT1]], ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: call void @_Z8make_bigv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_BIG]]) align 4 [[TMP]]) #[[ATTR3]]
// CHECK-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP0]], ptr addrspace(5) align 4 [[TMP]], i64 128, i1 false)
// CHECK-NEXT: ret void
//
__attribute__((global)) void kernel(Big *out) {
new (out) Big(make_big());
}

// If the destination is ultimately backed by alloca AS (even through cast
// chains), we should pass it directly as sret and avoid an extra temp/copy.
// CHECK-LABEL: define dso_local void @_Z10local_testv(
// CHECK-SAME: ) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[LOCAL:%.*]] = alloca [[STRUCT_BIG:%.*]], align 4, addrspace(5)
// CHECK-NEXT: [[LOCAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LOCAL]] to ptr
// CHECK-NEXT: [[LOCAL_ASCAST_ASCAST:%.*]] = addrspacecast ptr [[LOCAL_ASCAST]] to ptr addrspace(5)
// CHECK-NEXT: call void @_Z8make_bigv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_BIG]]) align 4 [[LOCAL_ASCAST_ASCAST]]) #[[ATTR3]]
// CHECK-NEXT: ret void
//
__attribute__((device)) void local_test() {
Big local = make_big();
}
60 changes: 60 additions & 0 deletions clang/test/CodeGenHIP/sret-lifetime-markers.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --functions ".*" --include-generated-funcs --version 6
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -O1 -x c++ -std=c++2b %s -o - | FileCheck --check-prefix=AMDGCN %s
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -O1 -x c++ -std=c++2b %s -o - | FileCheck --check-prefix=SPIRV %s
// REQUIRES: amdgpu-registered-target

// Verify that lifetime markers for an sret temporary use the alloca address
// space (5), even when the sret pointer itself is in the default address
// space (0). Regression test for an assertion failure in EmitLifetimeEnd
// when the return value of a function returning a non-trivially-copyable
// type is discarded.

template <int N>
struct SuperScalar {
double val[N];

SuperScalar() {
for (int i = 0; i < N; i++) val[i] = 0.0;
}
SuperScalar(const SuperScalar& rhs) {
for (int i = 0; i < N; i++) val[i] = rhs.val[i];
}
SuperScalar& operator=(const SuperScalar& rhs) {
for (int i = 0; i < N; i++) val[i] = rhs.val[i];
return *this;
}
};

template <int N>
SuperScalar<N> atomic_fetch_add(SuperScalar<N>* dest, const SuperScalar<N>& val);

template <int N>
void add_functor(SuperScalar<N>* data, int i, SuperScalar<N>& update) {
atomic_fetch_add(&data[i], update);
}

template void add_functor<4>(SuperScalar<4>*, int, SuperScalar<4>&);
// AMDGCN-LABEL: define weak_odr void @_Z11add_functorILi4EEvP11SuperScalarIXT_EEiRS1_(
// AMDGCN-SAME: ptr noundef [[DATA:%.*]], i32 noundef [[I:%.*]], ptr noundef nonnull align 8 dereferenceable(32) [[UPDATE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] comdat {
// AMDGCN-NEXT: [[ENTRY:.*:]]
// AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_SUPERSCALAR:%.*]], align 8, addrspace(5)
// AMDGCN-NEXT: [[IDXPROM:%.*]] = sext i32 [[I]] to i64
// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_SUPERSCALAR]], ptr [[DATA]], i64 [[IDXPROM]]
// AMDGCN-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[TMP]]) #[[ATTR3:[0-9]+]]
// AMDGCN-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
// AMDGCN-NEXT: call void @_Z16atomic_fetch_addILi4EE11SuperScalarIXT_EEPS1_RKS1_(ptr dead_on_unwind writable sret([[STRUCT_SUPERSCALAR]]) align 8 [[TMP_ASCAST]], ptr noundef [[ARRAYIDX]], ptr noundef nonnull align 8 dereferenceable(32) [[UPDATE]]) #[[ATTR4:[0-9]+]]
// AMDGCN-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[TMP]]) #[[ATTR3]]
// AMDGCN-NEXT: ret void
//
//
// SPIRV-LABEL: define weak_odr spir_func void @_Z11add_functorILi4EEvP11SuperScalarIXT_EEiRS1_(
// SPIRV-SAME: ptr addrspace(4) noundef [[DATA:%.*]], i32 noundef [[I:%.*]], ptr addrspace(4) noundef align 8 dereferenceable(32) [[UPDATE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] comdat {
// SPIRV-NEXT: [[ENTRY:.*:]]
// SPIRV-NEXT: [[TMP:%.*]] = alloca [[STRUCT_SUPERSCALAR:%.*]], align 8
// SPIRV-NEXT: [[IDXPROM:%.*]] = sext i32 [[I]] to i64
// SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_SUPERSCALAR]], ptr addrspace(4) [[DATA]], i64 [[IDXPROM]]
// SPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(ptr nonnull [[TMP]]) #[[ATTR3:[0-9]+]]
// SPIRV-NEXT: call spir_func addrspace(4) void @_Z16atomic_fetch_addILi4EE11SuperScalarIXT_EEPS1_RKS1_(ptr dead_on_unwind nonnull writable sret([[STRUCT_SUPERSCALAR]]) align 8 [[TMP]], ptr addrspace(4) noundef [[ARRAYIDX]], ptr addrspace(4) noundef align 8 dereferenceable(32) [[UPDATE]]) #[[ATTR3]]
// SPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(ptr nonnull [[TMP]]) #[[ATTR3]]
// SPIRV-NEXT: ret void
//
Loading