diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 9239cb1b989b2..e581c225aec6f 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -2547,12 +2547,14 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) { bool VectorCombine::foldShuffleOfSelects(Instruction &I) { ArrayRef Mask; Value *C1, *T1, *F1, *C2, *T2, *F2; - if (!match(&I, m_Shuffle( - m_OneUse(m_Select(m_Value(C1), m_Value(T1), m_Value(F1))), - m_OneUse(m_Select(m_Value(C2), m_Value(T2), m_Value(F2))), - m_Mask(Mask)))) + if (!match(&I, m_Shuffle(m_Select(m_Value(C1), m_Value(T1), m_Value(F1)), + m_Select(m_Value(C2), m_Value(T2), m_Value(F2)), + m_Mask(Mask)))) return false; + auto *Sel1 = cast(I.getOperand(0)); + auto *Sel2 = cast(I.getOperand(1)); + auto *C1VecTy = dyn_cast(C1->getType()); auto *C2VecTy = dyn_cast(C2->getType()); if (!C1VecTy || !C2VecTy || C1VecTy != C2VecTy) @@ -2570,11 +2572,14 @@ bool VectorCombine::foldShuffleOfSelects(Instruction &I) { auto *DstVecTy = cast(I.getType()); auto SK = TargetTransformInfo::SK_PermuteTwoSrc; auto SelOp = Instruction::Select; - InstructionCost OldCost = TTI.getCmpSelInstrCost( + + InstructionCost CostSel1 = TTI.getCmpSelInstrCost( SelOp, SrcVecTy, C1VecTy, CmpInst::BAD_ICMP_PREDICATE, CostKind); - OldCost += TTI.getCmpSelInstrCost(SelOp, SrcVecTy, C2VecTy, - CmpInst::BAD_ICMP_PREDICATE, CostKind); - OldCost += + InstructionCost CostSel2 = TTI.getCmpSelInstrCost( + SelOp, SrcVecTy, C2VecTy, CmpInst::BAD_ICMP_PREDICATE, CostKind); + + InstructionCost OldCost = + CostSel1 + CostSel2 + TTI.getShuffleCost(SK, DstVecTy, SrcVecTy, Mask, CostKind, 0, nullptr, {I.getOperand(0), I.getOperand(1)}, &I); @@ -2590,6 +2595,11 @@ bool VectorCombine::foldShuffleOfSelects(Instruction &I) { NewCost += TTI.getCmpSelInstrCost(SelOp, DstVecTy, C1C2ShuffledVecTy, CmpInst::BAD_ICMP_PREDICATE, CostKind); + if (!Sel1->hasOneUse()) + NewCost += CostSel1; + if (!Sel2->hasOneUse()) + NewCost += CostSel2; + LLVM_DEBUG(dbgs() << "Found a shuffle feeding two selects: " << I << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost << "\n"); diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll index 7883eb42aefac..cf57a503c2197 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll @@ -637,3 +637,87 @@ define <4 x i32> @src_v2tov4_i32_change_to_other_vector(<2 x i1> %a, <2 x i1> %b %res = shufflevector <2 x i32> %select.xz, <2 x i32> %select.yx, <4 x i32> ret <4 x i32> %res } + +define <4 x i32> @src_v2tov4_i32_multiuse_sel1(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z, ptr %p) { +; CHECK-LABEL: define <4 x i32> @src_v2tov4_i32_multiuse_sel1( +; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]], ptr [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]] +; CHECK-NEXT: store <2 x i32> [[SELECT_XZ]], ptr [[P]], align 8 +; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]] +; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[SELECT_XZ]], <2 x i32> [[SELECT_YX]], <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %select.xz = select <2 x i1> %a, <2 x i32> %x, <2 x i32> %z + store <2 x i32> %select.xz, ptr %p + %select.yx = select <2 x i1> %b, <2 x i32> %y, <2 x i32> %x + %res = shufflevector <2 x i32> %select.xz, <2 x i32> %select.yx, <4 x i32> + ret <4 x i32> %res +} + +define <4 x i32> @src_v2tov4_i32_multiuse_sel2(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z, ptr %p) { +; CHECK-LABEL: define <4 x i32> @src_v2tov4_i32_multiuse_sel2( +; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]], ptr [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]] +; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]] +; CHECK-NEXT: store <2 x i32> [[SELECT_YX]], ptr [[P]], align 8 +; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[SELECT_XZ]], <2 x i32> [[SELECT_YX]], <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %select.xz = select <2 x i1> %a, <2 x i32> %x, <2 x i32> %z + %select.yx = select <2 x i1> %b, <2 x i32> %y, <2 x i32> %x + store <2 x i32> %select.yx, ptr %p + %res = shufflevector <2 x i32> %select.xz, <2 x i32> %select.yx, <4 x i32> + ret <4 x i32> %res +} + +define <4 x i32> @src_v2tov4_i32_multiuse_both(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z, ptr %p1, ptr %p2) { +; CHECK-LABEL: define <4 x i32> @src_v2tov4_i32_multiuse_both( +; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]] +; CHECK-NEXT: store <2 x i32> [[SELECT_XZ]], ptr [[P1]], align 8 +; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]] +; CHECK-NEXT: store <2 x i32> [[SELECT_YX]], ptr [[P2]], align 8 +; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[SELECT_XZ]], <2 x i32> [[SELECT_YX]], <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %select.xz = select <2 x i1> %a, <2 x i32> %x, <2 x i32> %z + store <2 x i32> %select.xz, ptr %p1 + %select.yx = select <2 x i1> %b, <2 x i32> %y, <2 x i32> %x + store <2 x i32> %select.yx, ptr %p2 + %res = shufflevector <2 x i32> %select.xz, <2 x i32> %select.yx, <4 x i32> + ret <4 x i32> %res +} + +define <2 x i32> @src_v1024tov2_i32_multiuse_sel1(<1024 x i1> %a, <1024 x i1> %b, <1024 x i32> %x, <1024 x i32> %y, <1024 x i32> %z, ptr %p1, ptr %p2) { +; SSE-LABEL: define <2 x i32> @src_v1024tov2_i32_multiuse_sel1( +; SSE-SAME: <1024 x i1> [[A:%.*]], <1024 x i1> [[B:%.*]], <1024 x i32> [[X:%.*]], <1024 x i32> [[Y:%.*]], <1024 x i32> [[Z:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[SELECT_XZ:%.*]] = select <1024 x i1> [[A]], <1024 x i32> [[X]], <1024 x i32> [[Z]] +; SSE-NEXT: store <1024 x i32> [[SELECT_XZ]], ptr [[P1]], align 4096 +; SSE-NEXT: [[SELECT_YX:%.*]] = select <1024 x i1> [[B]], <1024 x i32> [[Y]], <1024 x i32> [[X]] +; SSE-NEXT: [[RES:%.*]] = shufflevector <1024 x i32> [[SELECT_XZ]], <1024 x i32> [[SELECT_YX]], <2 x i32> +; SSE-NEXT: ret <2 x i32> [[RES]] +; +; AVX2-LABEL: define <2 x i32> @src_v1024tov2_i32_multiuse_sel1( +; AVX2-SAME: <1024 x i1> [[A:%.*]], <1024 x i1> [[B:%.*]], <1024 x i32> [[X:%.*]], <1024 x i32> [[Y:%.*]], <1024 x i32> [[Z:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] { +; AVX2-NEXT: [[SELECT_XZ:%.*]] = select <1024 x i1> [[A]], <1024 x i32> [[X]], <1024 x i32> [[Z]] +; AVX2-NEXT: store <1024 x i32> [[SELECT_XZ]], ptr [[P1]], align 4096 +; AVX2-NEXT: [[SELECT_YX:%.*]] = select <1024 x i1> [[B]], <1024 x i32> [[Y]], <1024 x i32> [[X]] +; AVX2-NEXT: [[RES:%.*]] = shufflevector <1024 x i32> [[SELECT_XZ]], <1024 x i32> [[SELECT_YX]], <2 x i32> +; AVX2-NEXT: ret <2 x i32> [[RES]] +; +; AVX512-LABEL: define <2 x i32> @src_v1024tov2_i32_multiuse_sel1( +; AVX512-SAME: <1024 x i1> [[A:%.*]], <1024 x i1> [[B:%.*]], <1024 x i32> [[X:%.*]], <1024 x i32> [[Y:%.*]], <1024 x i32> [[Z:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] { +; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <1024 x i1> [[A]], <1024 x i32> [[X]], <1024 x i32> [[Z]] +; AVX512-NEXT: store <1024 x i32> [[SELECT_XZ]], ptr [[P1]], align 4096 +; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <1024 x i1> [[A]], <1024 x i1> [[B]], <2 x i32> +; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <1024 x i32> [[X]], <1024 x i32> [[Y]], <2 x i32> +; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <1024 x i32> [[Z]], <1024 x i32> [[X]], <2 x i32> +; AVX512-NEXT: [[RES:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]] +; AVX512-NEXT: ret <2 x i32> [[RES]] +; + %select.xz = select <1024 x i1> %a, <1024 x i32> %x, <1024 x i32> %z + store <1024 x i32> %select.xz, ptr %p1 + %select.yx = select <1024 x i1> %b, <1024 x i32> %y, <1024 x i32> %x + %res = shufflevector <1024 x i32> %select.xz, <1024 x i32> %select.yx, <2 x i32> + ret <2 x i32> %res +}