diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 837393b0cbdcd..42094e93d8d65 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -5894,6 +5894,17 @@ static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT, int Pattern) { if (Pattern == AArch64SVEPredPattern::all) return DAG.getConstant(1, DL, VT); + + // When the number of active elements of a pattern matches the scalable vector + // length, we can upgrade the pattern to ALL and emit a splat instead. + if (unsigned PatNumElts = getNumElementsFromSVEPredPattern(Pattern)) { + const AArch64Subtarget &Subtarget = DAG.getSubtarget(); + unsigned NumElts = VT.getVectorMinNumElements(); + unsigned VScale = Subtarget.getSVEVectorSizeInBits() / 128; + if (PatNumElts == (NumElts * VScale)) + return DAG.getConstant(1, DL, VT); + } + return DAG.getNode(AArch64ISD::PTRUE, DL, VT, DAG.getTargetConstant(Pattern, DL, MVT::i32)); } @@ -15336,20 +15347,6 @@ static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) { N.getConstantOperandVal(0) == AArch64SVEPredPattern::all) return N.getValueType().getVectorMinNumElements() >= NumElts; - // If we're compiling for a specific vector-length, we can check if the - // pattern's VL equals that of the scalable vector at runtime. - if (N.getOpcode() == AArch64ISD::PTRUE) { - const auto &Subtarget = DAG.getSubtarget(); - unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits(); - unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits(); - if (MaxSVESize && MinSVESize == MaxSVESize) { - unsigned VScale = MaxSVESize / AArch64::SVEBitsPerBlock; - unsigned PatNumElts = - getNumElementsFromSVEPredPattern(N.getConstantOperandVal(0)); - return PatNumElts == (NumElts * VScale); - } - } - return false; } @@ -30326,16 +30323,6 @@ static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL, getSVEPredPatternFromNumElements(VT.getVectorNumElements()); assert(PgPattern && "Unexpected element count for SVE predicate"); - // For vectors that are exactly getMaxSVEVectorSizeInBits big, we can use - // AArch64SVEPredPattern::all, which can enable the use of unpredicated - // variants of instructions when available. - const auto &Subtarget = DAG.getSubtarget(); - unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits(); - unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits(); - if (MaxSVESize && MinSVESize == MaxSVESize && - MaxSVESize == VT.getSizeInBits()) - PgPattern = AArch64SVEPredPattern::all; - MVT MaskVT; switch (VT.getVectorElementType().getSimpleVT().SimpleTy) { default: diff --git a/llvm/test/CodeGen/AArch64/active_lane_mask.ll b/llvm/test/CodeGen/AArch64/active_lane_mask.ll index 879dd4c12c0ba..b77e90f6fdc45 100644 --- a/llvm/test/CodeGen/AArch64/active_lane_mask.ll +++ b/llvm/test/CodeGen/AArch64/active_lane_mask.ll @@ -453,7 +453,7 @@ entry: define @lane_mask_nxv16i1_imm256() vscale_range(16, 16) { ; CHECK-LABEL: lane_mask_nxv16i1_imm256: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.b, vl256 +; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ret entry: %active.lane.mask = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 256) diff --git a/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll b/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll index c3322ca38f9e5..d0026db0176e1 100644 --- a/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll +++ b/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll @@ -208,7 +208,7 @@ define @vec_scalable_subvec_fixed_idx_nonzero_large_i32(ptr % ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: ptrue p0.d, vl8 +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x1] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll index 72d839a21a29f..2aef74a91c056 100644 --- a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll +++ b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll @@ -563,7 +563,7 @@ define @splice_nxv16i8_neg128( %a, @splice_nxv16i8_neg256( %a, %b) vscale_range(16,16) #0 { ; CHECK-LABEL: splice_nxv16i8_neg256: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.b, vl256 +; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: rev p0.b, p0.b ; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll index a82998473fe68..4005e7d99400d 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll @@ -110,7 +110,7 @@ define @whilele_b_ii_dont_fold_to_ptrue_nonexistent_vl9() { define @whilele_b_vl_maximum() vscale_range(16, 16) { ; CHECK-LABEL: whilele_b_vl_maximum: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.b, vl256 +; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.whilele.nxv16i1.i64(i64 0, i64 255) ret %out @@ -245,7 +245,7 @@ define @whilelo_b_ii_dont_fold_to_ptrue_nonexistent_vl9() { define @whilelo_b_vl_maximum() vscale_range(16, 16) { ; CHECK-LABEL: whilelo_b_vl_maximum: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.b, vl256 +; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 0, i64 256) ret %out @@ -370,7 +370,7 @@ define @whilels_b_ii_dont_fold_to_ptrue_nonexistent_vl9() { define @whilels_b_ii_vl_maximum() vscale_range(16, 16) { ; CHECK-LABEL: whilels_b_ii_vl_maximum: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.b, vl256 +; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.whilels.nxv16i1.i64(i64 0, i64 255) ret %out @@ -504,7 +504,7 @@ define @whilelt_b_ii_dont_fold_to_ptrue_nonexistent_vl9() { define @whilelt_b_ii_vl_maximum() vscale_range(16, 16) { ; CHECK-LABEL: whilelt_b_ii_vl_maximum: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.b, vl256 +; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.whilelt.nxv16i1.i64(i64 0, i64 256) ret %out