Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 18 additions & 8 deletions llvm/lib/Transforms/Vectorize/VectorCombine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2547,12 +2547,14 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
bool VectorCombine::foldShuffleOfSelects(Instruction &I) {
ArrayRef<int> Mask;
Value *C1, *T1, *F1, *C2, *T2, *F2;
if (!match(&I, m_Shuffle(
m_OneUse(m_Select(m_Value(C1), m_Value(T1), m_Value(F1))),
m_OneUse(m_Select(m_Value(C2), m_Value(T2), m_Value(F2))),
m_Mask(Mask))))
if (!match(&I, m_Shuffle(m_Select(m_Value(C1), m_Value(T1), m_Value(F1)),
m_Select(m_Value(C2), m_Value(T2), m_Value(F2)),
m_Mask(Mask))))
return false;

auto *Sel1 = cast<Instruction>(I.getOperand(0));
auto *Sel2 = cast<Instruction>(I.getOperand(1));

auto *C1VecTy = dyn_cast<FixedVectorType>(C1->getType());
auto *C2VecTy = dyn_cast<FixedVectorType>(C2->getType());
if (!C1VecTy || !C2VecTy || C1VecTy != C2VecTy)
Expand All @@ -2570,11 +2572,14 @@ bool VectorCombine::foldShuffleOfSelects(Instruction &I) {
auto *DstVecTy = cast<FixedVectorType>(I.getType());
auto SK = TargetTransformInfo::SK_PermuteTwoSrc;
auto SelOp = Instruction::Select;
InstructionCost OldCost = TTI.getCmpSelInstrCost(

InstructionCost CostSel1 = TTI.getCmpSelInstrCost(
SelOp, SrcVecTy, C1VecTy, CmpInst::BAD_ICMP_PREDICATE, CostKind);
OldCost += TTI.getCmpSelInstrCost(SelOp, SrcVecTy, C2VecTy,
CmpInst::BAD_ICMP_PREDICATE, CostKind);
OldCost +=
InstructionCost CostSel2 = TTI.getCmpSelInstrCost(
SelOp, SrcVecTy, C2VecTy, CmpInst::BAD_ICMP_PREDICATE, CostKind);

InstructionCost OldCost =
CostSel1 + CostSel2 +
TTI.getShuffleCost(SK, DstVecTy, SrcVecTy, Mask, CostKind, 0, nullptr,
{I.getOperand(0), I.getOperand(1)}, &I);

Expand All @@ -2590,6 +2595,11 @@ bool VectorCombine::foldShuffleOfSelects(Instruction &I) {
NewCost += TTI.getCmpSelInstrCost(SelOp, DstVecTy, C1C2ShuffledVecTy,
CmpInst::BAD_ICMP_PREDICATE, CostKind);

if (!Sel1->hasOneUse())
NewCost += CostSel1;
if (!Sel2->hasOneUse())
NewCost += CostSel2;

LLVM_DEBUG(dbgs() << "Found a shuffle feeding two selects: " << I
<< "\n OldCost: " << OldCost << " vs NewCost: " << NewCost
<< "\n");
Expand Down
84 changes: 84 additions & 0 deletions llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll
Original file line number Diff line number Diff line change
Expand Up @@ -637,3 +637,87 @@ define <4 x i32> @src_v2tov4_i32_change_to_other_vector(<2 x i1> %a, <2 x i1> %b
%res = shufflevector <2 x i32> %select.xz, <2 x i32> %select.yx, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
ret <4 x i32> %res
}

define <4 x i32> @src_v2tov4_i32_multiuse_sel1(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z, ptr %p) {
; CHECK-LABEL: define <4 x i32> @src_v2tov4_i32_multiuse_sel1(
; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]]
; CHECK-NEXT: store <2 x i32> [[SELECT_XZ]], ptr [[P]], align 8
; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]]
; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[SELECT_XZ]], <2 x i32> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: ret <4 x i32> [[RES]]
;
%select.xz = select <2 x i1> %a, <2 x i32> %x, <2 x i32> %z
store <2 x i32> %select.xz, ptr %p
%select.yx = select <2 x i1> %b, <2 x i32> %y, <2 x i32> %x
%res = shufflevector <2 x i32> %select.xz, <2 x i32> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %res
}

define <4 x i32> @src_v2tov4_i32_multiuse_sel2(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z, ptr %p) {
; CHECK-LABEL: define <4 x i32> @src_v2tov4_i32_multiuse_sel2(
; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]]
; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]]
; CHECK-NEXT: store <2 x i32> [[SELECT_YX]], ptr [[P]], align 8
; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[SELECT_XZ]], <2 x i32> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: ret <4 x i32> [[RES]]
;
%select.xz = select <2 x i1> %a, <2 x i32> %x, <2 x i32> %z
%select.yx = select <2 x i1> %b, <2 x i32> %y, <2 x i32> %x
store <2 x i32> %select.yx, ptr %p
%res = shufflevector <2 x i32> %select.xz, <2 x i32> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %res
}

define <4 x i32> @src_v2tov4_i32_multiuse_both(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z, ptr %p1, ptr %p2) {
; CHECK-LABEL: define <4 x i32> @src_v2tov4_i32_multiuse_both(
; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]]
; CHECK-NEXT: store <2 x i32> [[SELECT_XZ]], ptr [[P1]], align 8
; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]]
; CHECK-NEXT: store <2 x i32> [[SELECT_YX]], ptr [[P2]], align 8
; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[SELECT_XZ]], <2 x i32> [[SELECT_YX]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: ret <4 x i32> [[RES]]
;
%select.xz = select <2 x i1> %a, <2 x i32> %x, <2 x i32> %z
store <2 x i32> %select.xz, ptr %p1
%select.yx = select <2 x i1> %b, <2 x i32> %y, <2 x i32> %x
store <2 x i32> %select.yx, ptr %p2
%res = shufflevector <2 x i32> %select.xz, <2 x i32> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %res
}

define <2 x i32> @src_v1024tov2_i32_multiuse_sel1(<1024 x i1> %a, <1024 x i1> %b, <1024 x i32> %x, <1024 x i32> %y, <1024 x i32> %z, ptr %p1, ptr %p2) {
; SSE-LABEL: define <2 x i32> @src_v1024tov2_i32_multiuse_sel1(
; SSE-SAME: <1024 x i1> [[A:%.*]], <1024 x i1> [[B:%.*]], <1024 x i32> [[X:%.*]], <1024 x i32> [[Y:%.*]], <1024 x i32> [[Z:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] {
; SSE-NEXT: [[SELECT_XZ:%.*]] = select <1024 x i1> [[A]], <1024 x i32> [[X]], <1024 x i32> [[Z]]
; SSE-NEXT: store <1024 x i32> [[SELECT_XZ]], ptr [[P1]], align 4096
; SSE-NEXT: [[SELECT_YX:%.*]] = select <1024 x i1> [[B]], <1024 x i32> [[Y]], <1024 x i32> [[X]]
; SSE-NEXT: [[RES:%.*]] = shufflevector <1024 x i32> [[SELECT_XZ]], <1024 x i32> [[SELECT_YX]], <2 x i32> <i32 0, i32 1024>
; SSE-NEXT: ret <2 x i32> [[RES]]
;
; AVX2-LABEL: define <2 x i32> @src_v1024tov2_i32_multiuse_sel1(
; AVX2-SAME: <1024 x i1> [[A:%.*]], <1024 x i1> [[B:%.*]], <1024 x i32> [[X:%.*]], <1024 x i32> [[Y:%.*]], <1024 x i32> [[Z:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] {
; AVX2-NEXT: [[SELECT_XZ:%.*]] = select <1024 x i1> [[A]], <1024 x i32> [[X]], <1024 x i32> [[Z]]
; AVX2-NEXT: store <1024 x i32> [[SELECT_XZ]], ptr [[P1]], align 4096
; AVX2-NEXT: [[SELECT_YX:%.*]] = select <1024 x i1> [[B]], <1024 x i32> [[Y]], <1024 x i32> [[X]]
; AVX2-NEXT: [[RES:%.*]] = shufflevector <1024 x i32> [[SELECT_XZ]], <1024 x i32> [[SELECT_YX]], <2 x i32> <i32 0, i32 1024>
; AVX2-NEXT: ret <2 x i32> [[RES]]
;
; AVX512-LABEL: define <2 x i32> @src_v1024tov2_i32_multiuse_sel1(
; AVX512-SAME: <1024 x i1> [[A:%.*]], <1024 x i1> [[B:%.*]], <1024 x i32> [[X:%.*]], <1024 x i32> [[Y:%.*]], <1024 x i32> [[Z:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] {
; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <1024 x i1> [[A]], <1024 x i32> [[X]], <1024 x i32> [[Z]]
; AVX512-NEXT: store <1024 x i32> [[SELECT_XZ]], ptr [[P1]], align 4096
; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <1024 x i1> [[A]], <1024 x i1> [[B]], <2 x i32> <i32 0, i32 1024>
; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <1024 x i32> [[X]], <1024 x i32> [[Y]], <2 x i32> <i32 0, i32 1024>
; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <1024 x i32> [[Z]], <1024 x i32> [[X]], <2 x i32> <i32 0, i32 1024>
; AVX512-NEXT: [[RES:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]]
; AVX512-NEXT: ret <2 x i32> [[RES]]
;
%select.xz = select <1024 x i1> %a, <1024 x i32> %x, <1024 x i32> %z
store <1024 x i32> %select.xz, ptr %p1
%select.yx = select <1024 x i1> %b, <1024 x i32> %y, <1024 x i32> %x
%res = shufflevector <1024 x i32> %select.xz, <1024 x i32> %select.yx, <2 x i32> <i32 0, i32 1024>
ret <2 x i32> %res
}