Skip to content

Commit 472db88

Browse files
committed
Add test case with optimization applied
1 parent a0430f2 commit 472db88

File tree

2 files changed

+45
-15
lines changed

2 files changed

+45
-15
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2552,8 +2552,8 @@ bool VectorCombine::foldShuffleOfSelects(Instruction &I) {
25522552
m_Mask(Mask))))
25532553
return false;
25542554

2555-
auto *Sel0 = cast<Instruction>(I.getOperand(0));
2556-
auto *Sel1 = cast<Instruction>(I.getOperand(1));
2555+
auto *Sel1 = cast<Instruction>(I.getOperand(0));
2556+
auto *Sel2 = cast<Instruction>(I.getOperand(1));
25572557

25582558
auto *C1VecTy = dyn_cast<FixedVectorType>(C1->getType());
25592559
auto *C2VecTy = dyn_cast<FixedVectorType>(C2->getType());
@@ -2573,13 +2573,12 @@ bool VectorCombine::foldShuffleOfSelects(Instruction &I) {
25732573
auto SK = TargetTransformInfo::SK_PermuteTwoSrc;
25742574
auto SelOp = Instruction::Select;
25752575

2576-
InstructionCost CostSel0 = TTI.getCmpSelInstrCost(
2577-
SelOp, SrcVecTy, C1VecTy, CmpInst::BAD_ICMP_PREDICATE, CostKind);
25782576
InstructionCost CostSel1 = TTI.getCmpSelInstrCost(
2577+
SelOp, SrcVecTy, C1VecTy, CmpInst::BAD_ICMP_PREDICATE, CostKind);
2578+
InstructionCost CostSel2 = TTI.getCmpSelInstrCost(
25792579
SelOp, SrcVecTy, C2VecTy, CmpInst::BAD_ICMP_PREDICATE, CostKind);
25802580

2581-
InstructionCost OldCost = CostSel0 + CostSel1;
2582-
OldCost +=
2581+
InstructionCost OldCost = CostSel1 + CostSel2 +
25832582
TTI.getShuffleCost(SK, DstVecTy, SrcVecTy, Mask, CostKind, 0, nullptr,
25842583
{I.getOperand(0), I.getOperand(1)}, &I);
25852584

@@ -2595,10 +2594,10 @@ bool VectorCombine::foldShuffleOfSelects(Instruction &I) {
25952594
NewCost += TTI.getCmpSelInstrCost(SelOp, DstVecTy, C1C2ShuffledVecTy,
25962595
CmpInst::BAD_ICMP_PREDICATE, CostKind);
25972596

2598-
if (!Sel0->hasOneUse())
2599-
NewCost += CostSel0;
26002597
if (!Sel1->hasOneUse())
26012598
NewCost += CostSel1;
2599+
if (!Sel2->hasOneUse())
2600+
NewCost += CostSel2;
26022601

26032602
LLVM_DEBUG(dbgs() << "Found a shuffle feeding two selects: " << I
26042603
<< "\n OldCost: " << OldCost << " vs NewCost: " << NewCost

llvm/test/Transforms/VectorCombine/X86/shuffle-of-selects.ll

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -638,9 +638,8 @@ define <4 x i32> @src_v2tov4_i32_change_to_other_vector(<2 x i1> %a, <2 x i1> %b
638638
ret <4 x i32> %res
639639
}
640640

641-
; Multi-use tests - first select has multiple uses
642-
define <4 x i32> @src_v2tov4_i32_multiuse_sel0(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z, ptr %p) {
643-
; CHECK-LABEL: define <4 x i32> @src_v2tov4_i32_multiuse_sel0(
641+
define <4 x i32> @src_v2tov4_i32_multiuse_sel1(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z, ptr %p) {
642+
; CHECK-LABEL: define <4 x i32> @src_v2tov4_i32_multiuse_sel1(
644643
; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
645644
; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]]
646645
; CHECK-NEXT: store <2 x i32> [[SELECT_XZ]], ptr [[P]], align 8
@@ -655,9 +654,8 @@ define <4 x i32> @src_v2tov4_i32_multiuse_sel0(<2 x i1> %a, <2 x i1> %b, <2 x i3
655654
ret <4 x i32> %res
656655
}
657656

658-
; Multi-use tests - second select has multiple uses
659-
define <4 x i32> @src_v2tov4_i32_multiuse_sel1(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z, ptr %p) {
660-
; CHECK-LABEL: define <4 x i32> @src_v2tov4_i32_multiuse_sel1(
657+
define <4 x i32> @src_v2tov4_i32_multiuse_sel2(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z, ptr %p) {
658+
; CHECK-LABEL: define <4 x i32> @src_v2tov4_i32_multiuse_sel2(
661659
; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
662660
; CHECK-NEXT: [[SELECT_XZ:%.*]] = select <2 x i1> [[A]], <2 x i32> [[X]], <2 x i32> [[Z]]
663661
; CHECK-NEXT: [[SELECT_YX:%.*]] = select <2 x i1> [[B]], <2 x i32> [[Y]], <2 x i32> [[X]]
@@ -672,7 +670,6 @@ define <4 x i32> @src_v2tov4_i32_multiuse_sel1(<2 x i1> %a, <2 x i1> %b, <2 x i3
672670
ret <4 x i32> %res
673671
}
674672

675-
; Multi-use tests - both selects have multiple uses
676673
define <4 x i32> @src_v2tov4_i32_multiuse_both(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z, ptr %p1, ptr %p2) {
677674
; CHECK-LABEL: define <4 x i32> @src_v2tov4_i32_multiuse_both(
678675
; CHECK-SAME: <2 x i1> [[A:%.*]], <2 x i1> [[B:%.*]], <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[Z:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] {
@@ -690,3 +687,37 @@ define <4 x i32> @src_v2tov4_i32_multiuse_both(<2 x i1> %a, <2 x i1> %b, <2 x i3
690687
%res = shufflevector <2 x i32> %select.xz, <2 x i32> %select.yx, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
691688
ret <4 x i32> %res
692689
}
690+
691+
define <2 x i32> @src_v1024tov2_i32_multiuse_sel1(<1024 x i1> %a, <1024 x i1> %b, <1024 x i32> %x, <1024 x i32> %y, <1024 x i32> %z, ptr %p1, ptr %p2) {
692+
; SSE-LABEL: define <2 x i32> @src_v1024tov2_i32_multiuse_sel1(
693+
; SSE-SAME: <1024 x i1> [[A:%.*]], <1024 x i1> [[B:%.*]], <1024 x i32> [[X:%.*]], <1024 x i32> [[Y:%.*]], <1024 x i32> [[Z:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] {
694+
; SSE-NEXT: [[SELECT_XZ:%.*]] = select <1024 x i1> [[A]], <1024 x i32> [[X]], <1024 x i32> [[Z]]
695+
; SSE-NEXT: store <1024 x i32> [[SELECT_XZ]], ptr [[P1]], align 4096
696+
; SSE-NEXT: [[SELECT_YX:%.*]] = select <1024 x i1> [[B]], <1024 x i32> [[Y]], <1024 x i32> [[X]]
697+
; SSE-NEXT: [[RES:%.*]] = shufflevector <1024 x i32> [[SELECT_XZ]], <1024 x i32> [[SELECT_YX]], <2 x i32> <i32 0, i32 1024>
698+
; SSE-NEXT: ret <2 x i32> [[RES]]
699+
;
700+
; AVX2-LABEL: define <2 x i32> @src_v1024tov2_i32_multiuse_sel1(
701+
; AVX2-SAME: <1024 x i1> [[A:%.*]], <1024 x i1> [[B:%.*]], <1024 x i32> [[X:%.*]], <1024 x i32> [[Y:%.*]], <1024 x i32> [[Z:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] {
702+
; AVX2-NEXT: [[SELECT_XZ:%.*]] = select <1024 x i1> [[A]], <1024 x i32> [[X]], <1024 x i32> [[Z]]
703+
; AVX2-NEXT: store <1024 x i32> [[SELECT_XZ]], ptr [[P1]], align 4096
704+
; AVX2-NEXT: [[SELECT_YX:%.*]] = select <1024 x i1> [[B]], <1024 x i32> [[Y]], <1024 x i32> [[X]]
705+
; AVX2-NEXT: [[RES:%.*]] = shufflevector <1024 x i32> [[SELECT_XZ]], <1024 x i32> [[SELECT_YX]], <2 x i32> <i32 0, i32 1024>
706+
; AVX2-NEXT: ret <2 x i32> [[RES]]
707+
;
708+
; AVX512-LABEL: define <2 x i32> @src_v1024tov2_i32_multiuse_sel1(
709+
; AVX512-SAME: <1024 x i1> [[A:%.*]], <1024 x i1> [[B:%.*]], <1024 x i32> [[X:%.*]], <1024 x i32> [[Y:%.*]], <1024 x i32> [[Z:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] {
710+
; AVX512-NEXT: [[SELECT_XZ:%.*]] = select <1024 x i1> [[A]], <1024 x i32> [[X]], <1024 x i32> [[Z]]
711+
; AVX512-NEXT: store <1024 x i32> [[SELECT_XZ]], ptr [[P1]], align 4096
712+
; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <1024 x i1> [[A]], <1024 x i1> [[B]], <2 x i32> <i32 0, i32 1024>
713+
; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <1024 x i32> [[X]], <1024 x i32> [[Y]], <2 x i32> <i32 0, i32 1024>
714+
; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <1024 x i32> [[Z]], <1024 x i32> [[X]], <2 x i32> <i32 0, i32 1024>
715+
; AVX512-NEXT: [[RES:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]]
716+
; AVX512-NEXT: ret <2 x i32> [[RES]]
717+
;
718+
%select.xz = select <1024 x i1> %a, <1024 x i32> %x, <1024 x i32> %z
719+
store <1024 x i32> %select.xz, ptr %p1
720+
%select.yx = select <1024 x i1> %b, <1024 x i32> %y, <1024 x i32> %x
721+
%res = shufflevector <1024 x i32> %select.xz, <1024 x i32> %select.yx, <2 x i32> <i32 0, i32 1024>
722+
ret <2 x i32> %res
723+
}

0 commit comments

Comments
 (0)