diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 4fe900e9421f8..d23441a4a8129 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1349,6 +1349,37 @@ Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I, return nullptr; }; + // Special case for reconstructing across a select: + // (Cond ? V1 : (X & Mask)) op + // zext (Cond ? V2 : trunc X) + // -> (Cond ? (V1 op zext V2) : ((X & Mask) op zext trunc X)) + auto foldReconstruction = [&](Value *V1, Value *Masked, + Value *ZExtSel) -> Value * { + Value *X; + if (!match(Masked, m_OneUse(m_And(m_Value(X), m_Constant())))) + return nullptr; + + Value *V2, *Trunc; + if (!match(ZExtSel, m_ZExt(m_OneUse(m_Select(m_Specific(Cond), m_Value(V2), + m_Value(Trunc)))))) + return nullptr; + + if (!match(Trunc, m_Trunc(m_Specific(X)))) + return nullptr; + + Value *ZExtTrue = Builder.CreateZExt(V2, V1->getType()); + Value *True; + if (!(True = simplifyBinOp(Opcode, V1, ZExtTrue, FMF, Q))) + True = Builder.CreateOr(V1, ZExtTrue); + + Value *ZExtFalse = Builder.CreateZExt(Trunc, V1->getType()); + Value *False; + if (!(False = simplifyBinOp(Opcode, Masked, ZExtFalse, FMF, Q))) + False = Builder.CreateOr(Masked, ZExtFalse); + + return Builder.CreateSelect(Cond, True, False, I.getName()); + }; + if (LHSIsSelect && RHSIsSelect && A == D) { // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F) Cond = A; @@ -1368,6 +1399,8 @@ Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I, False = simplifyBinOp(Opcode, C, RHS, FMF, Q); if (Value *NewSel = foldAddNegate(B, C, RHS)) return NewSel; + if (Value *NewSel = foldReconstruction(B, C, RHS)) + return NewSel; } else if (RHSIsSelect && RHS->hasOneUse()) { // X op (D ? E : F) -> D ? (X op E) : (X op F) Cond = D; @@ -1375,6 +1408,8 @@ Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I, False = simplifyBinOp(Opcode, LHS, F, FMF, Q); if (Value *NewSel = foldAddNegate(E, F, LHS)) return NewSel; + if (Value *NewSel = foldReconstruction(E, F, LHS)) + return NewSel; } if (!True || !False) diff --git a/llvm/test/Transforms/InstCombine/select-reconstruction.ll b/llvm/test/Transforms/InstCombine/select-reconstruction.ll new file mode 100644 index 0000000000000..eb918ed4f40d0 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/select-reconstruction.ll @@ -0,0 +1,93 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +define i40 @select_reconstruction_i40(i40 %arg0) { +; CHECK-LABEL: define i40 @select_reconstruction_i40( +; CHECK-SAME: i40 [[ARG0:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = trunc i40 [[ARG0]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i40 0, i40 [[ARG0]] +; CHECK-NEXT: ret i40 [[TMP3]] +; + %1 = trunc i40 %arg0 to i8 + %2 = icmp eq i8 %1, 2 + %3 = and i40 %arg0, -256 + %4 = select i1 %2, i8 0, i8 %1 + %5 = select i1 %2, i40 0, i40 %3 + %6 = zext i8 %4 to i40 + %7 = or disjoint i40 %5, %6 + ret i40 %7 +} + +define i40 @select_reconstruction_any_cmp_val(i40 %arg0, i8 %arg1) { +; CHECK-LABEL: define i40 @select_reconstruction_any_cmp_val( +; CHECK-SAME: i40 [[ARG0:%.*]], i8 [[ARG1:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = trunc i40 [[ARG0]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[ARG1]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i40 0, i40 [[ARG0]] +; CHECK-NEXT: ret i40 [[TMP3]] +; + %1 = trunc i40 %arg0 to i8 + %2 = icmp eq i8 %1, %arg1 + %3 = and i40 %arg0, -256 + %4 = select i1 %2, i8 0, i8 %1 + %5 = select i1 %2, i40 0, i40 %3 + %6 = zext i8 %4 to i40 + %7 = or disjoint i40 %5, %6 + ret i40 %7 +} + +define i40 @select_reconstruction_257_mask(i40 %arg0) { +; CHECK-LABEL: define i40 @select_reconstruction_257_mask( +; CHECK-SAME: i40 [[ARG0:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = trunc i40 [[ARG0]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = and i40 [[ARG0]], -257 +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], i40 0, i40 [[TMP3]] +; CHECK-NEXT: ret i40 [[TMP4]] +; + %1 = trunc i40 %arg0 to i8 + %2 = icmp eq i8 %1, 2 + %3 = and i40 %arg0, -257 + %4 = select i1 %2, i8 0, i8 %1 + %5 = select i1 %2, i40 0, i40 %3 + %6 = zext i8 %4 to i40 + %7 = or disjoint i40 %5, %6 + ret i40 %7 +} + +define i40 @select_reconstruction_i16_mask(i40 %arg0) { +; CHECK-LABEL: define i40 @select_reconstruction_i16_mask( +; CHECK-SAME: i40 [[ARG0:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = trunc i40 [[ARG0]] to i16 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i16 [[TMP1]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i40 0, i40 [[ARG0]] +; CHECK-NEXT: ret i40 [[TMP3]] +; + %1 = trunc i40 %arg0 to i16 + %2 = icmp eq i16 %1, 2 + %3 = and i40 %arg0, -65356 + %4 = select i1 %2, i16 0, i16 %1 + %5 = select i1 %2, i40 0, i40 %3 + %6 = zext i16 %4 to i40 + %7 = or disjoint i40 %5, %6 + ret i40 %7 +} + +define <2 x i32> @select_reconstruction_vec_any_cmp_val(<2 x i32> %arg0, <2 x i8> %arg1) { +; CHECK-LABEL: define <2 x i32> @select_reconstruction_vec_any_cmp_val( +; CHECK-SAME: <2 x i32> [[ARG0:%.*]], <2 x i8> [[ARG1:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[ARG0]] to <2 x i8> +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i8> [[ARG1]], [[TMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> zeroinitializer, <2 x i32> [[ARG0]] +; CHECK-NEXT: ret <2 x i32> [[TMP7]] +; + %1 = trunc <2 x i32> %arg0 to <2 x i8> + %2 = icmp eq <2 x i8> %1, %arg1 + %3 = and <2 x i32> %arg0, + %4 = select <2 x i1> %2, <2 x i8> , <2 x i8> %1 + %5 = select <2 x i1> %2, <2 x i32> , <2 x i32> %3 + %6 = zext <2 x i8> %4 to <2 x i32> + %7 = or <2 x i32> %5, %6 + ret <2 x i32> %7 +} pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy