-
Notifications
You must be signed in to change notification settings - Fork 15.6k
InstCombine: Basic insertelement support for SimplifyDemandedFPClass #174100
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/arsenm/instcombine/simplify-demanded-fp-class-fix-defining-undef-vector-elts
Are you sure you want to change the base?
Conversation
|
Warning This pull request is not mergeable via GitHub because a downstack PR is open. Once all requirements are satisfied, merge this PR as a stack on Graphite.
This stack of pull requests is managed by Graphite. Learn more about stacking. |
|
@llvm/pr-subscribers-llvm-transforms Author: Matt Arsenault (arsenm) ChangesEventually this should pull up the known elements logic from Full diff: https://github.com/llvm/llvm-project/pull/174100.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 39b95c45cb6e1..87b8664760b09 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -2303,6 +2303,17 @@ Value *InstCombinerImpl::SimplifyDemandedUseFPClass(Value *V,
return I;
break;
}
+ case Instruction::InsertElement: {
+ KnownFPClass KnownInserted, KnownVec;
+ if (SimplifyDemandedFPClass(I, 1, DemandedMask, KnownInserted, Depth + 1) ||
+ SimplifyDemandedFPClass(I, 0, DemandedMask, KnownVec, Depth + 1))
+ return I;
+ break;
+
+ // TODO: Use demanded elements logic from computeKnownFPClass
+ Known = KnownVec | KnownInserted;
+ break;
+ }
default:
Known = computeKnownFPClass(I, DemandedMask, CxtI, Depth + 1);
break;
diff --git a/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass-insertelement.ll b/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass-insertelement.ll
new file mode 100644
index 0000000000000..36d8766d967aa
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass-insertelement.ll
@@ -0,0 +1,187 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+declare nofpclass(inf zero sub norm) <4 x half> @returns_nan()
+declare nofpclass(nan zero sub norm) <4 x half> @returns_inf()
+declare nofpclass(inf zero sub norm) half @returns_nan_f16()
+declare nofpclass(inf zero sub norm) <vscale x 4 x half> @returns_nan_nxv4f16()
+
+define nofpclass(inf zero sub norm) <4 x half> @ret_only_nan__insert_unknown_unknown(<4 x half> %vec, half %elt, i32 %idx) {
+; CHECK-LABEL: define nofpclass(inf zero sub norm) <4 x half> @ret_only_nan__insert_unknown_unknown(
+; CHECK-SAME: <4 x half> [[VEC:%.*]], half [[ELT:%.*]], i32 [[IDX:%.*]]) {
+; CHECK-NEXT: [[INSERT:%.*]] = insertelement <4 x half> [[VEC]], half [[ELT]], i32 [[IDX]]
+; CHECK-NEXT: ret <4 x half> [[INSERT]]
+;
+ %insert = insertelement <4 x half> %vec, half %elt, i32 %idx
+ ret <4 x half> %insert
+}
+
+define nofpclass(qnan inf zero sub norm) <4 x half> @ret_only_snan__insert_unknown_unknown(<4 x half> %vec, half %elt, i32 %idx) {
+; CHECK-LABEL: define nofpclass(qnan inf zero sub norm) <4 x half> @ret_only_snan__insert_unknown_unknown(
+; CHECK-SAME: <4 x half> [[VEC:%.*]], half [[ELT:%.*]], i32 [[IDX:%.*]]) {
+; CHECK-NEXT: [[INSERT:%.*]] = insertelement <4 x half> [[VEC]], half [[ELT]], i32 [[IDX]]
+; CHECK-NEXT: ret <4 x half> [[INSERT]]
+;
+ %insert = insertelement <4 x half> %vec, half %elt, i32 %idx
+ ret <4 x half> %insert
+}
+
+define nofpclass(snan inf zero sub norm) <4 x half> @ret_only_qnan__insert_unknown_unknown(<4 x half> %vec, half %elt, i32 %idx) {
+; CHECK-LABEL: define nofpclass(snan inf zero sub norm) <4 x half> @ret_only_qnan__insert_unknown_unknown(
+; CHECK-SAME: <4 x half> [[VEC:%.*]], half [[ELT:%.*]], i32 [[IDX:%.*]]) {
+; CHECK-NEXT: [[INSERT:%.*]] = insertelement <4 x half> [[VEC]], half [[ELT]], i32 [[IDX]]
+; CHECK-NEXT: ret <4 x half> [[INSERT]]
+;
+ %insert = insertelement <4 x half> %vec, half %elt, i32 %idx
+ ret <4 x half> %insert
+}
+
+define nofpclass(nan zero sub norm) <4 x half> @ret_only_inf__insert_unknown_unknown(<4 x half> %vec, half %elt, i32 %idx) {
+; CHECK-LABEL: define nofpclass(nan zero sub norm) <4 x half> @ret_only_inf__insert_unknown_unknown(
+; CHECK-SAME: <4 x half> [[VEC:%.*]], half [[ELT:%.*]], i32 [[IDX:%.*]]) {
+; CHECK-NEXT: [[INSERT:%.*]] = insertelement <4 x half> [[VEC]], half [[ELT]], i32 [[IDX]]
+; CHECK-NEXT: ret <4 x half> [[INSERT]]
+;
+ %insert = insertelement <4 x half> %vec, half %elt, i32 %idx
+ ret <4 x half> %insert
+}
+
+define nofpclass(nan ninf zero sub norm) <4 x half> @ret_only_pinf__insert_unknown_unknown(<4 x half> %vec, half %elt, i32 %idx) {
+; CHECK-LABEL: define nofpclass(nan ninf zero sub norm) <4 x half> @ret_only_pinf__insert_unknown_unknown(
+; CHECK-SAME: <4 x half> [[VEC:%.*]], half [[ELT:%.*]], i32 [[IDX:%.*]]) {
+; CHECK-NEXT: ret <4 x half> splat (half 0xH7C00)
+;
+ %insert = insertelement <4 x half> %vec, half %elt, i32 %idx
+ ret <4 x half> %insert
+}
+
+define nofpclass(nan pinf zero sub norm) <4 x half> @ret_only_ninf__insert_unknown_unknown(<4 x half> %vec, half %elt, i32 %idx) {
+; CHECK-LABEL: define nofpclass(nan pinf zero sub norm) <4 x half> @ret_only_ninf__insert_unknown_unknown(
+; CHECK-SAME: <4 x half> [[VEC:%.*]], half [[ELT:%.*]], i32 [[IDX:%.*]]) {
+; CHECK-NEXT: ret <4 x half> splat (half 0xHFC00)
+;
+ %insert = insertelement <4 x half> %vec, half %elt, i32 %idx
+ ret <4 x half> %insert
+}
+
+define nofpclass(nan inf sub norm) <4 x half> @ret_only_zero__insert_unknown_unknown(<4 x half> %vec, half %elt, i32 %idx) {
+; CHECK-LABEL: define nofpclass(nan inf sub norm) <4 x half> @ret_only_zero__insert_unknown_unknown(
+; CHECK-SAME: <4 x half> [[VEC:%.*]], half [[ELT:%.*]], i32 [[IDX:%.*]]) {
+; CHECK-NEXT: [[INSERT:%.*]] = insertelement <4 x half> [[VEC]], half [[ELT]], i32 [[IDX]]
+; CHECK-NEXT: ret <4 x half> [[INSERT]]
+;
+ %insert = insertelement <4 x half> %vec, half %elt, i32 %idx
+ ret <4 x half> %insert
+}
+
+define nofpclass(nan inf nzero sub norm) <4 x half> @ret_only_pzero__insert_unknown_unknown(<4 x half> %vec, half %elt, i32 %idx) {
+; CHECK-LABEL: define nofpclass(nan inf nzero sub norm) <4 x half> @ret_only_pzero__insert_unknown_unknown(
+; CHECK-SAME: <4 x half> [[VEC:%.*]], half [[ELT:%.*]], i32 [[IDX:%.*]]) {
+; CHECK-NEXT: ret <4 x half> zeroinitializer
+;
+ %insert = insertelement <4 x half> %vec, half %elt, i32 %idx
+ ret <4 x half> %insert
+}
+
+define nofpclass(nan inf pzero sub norm) <4 x half> @ret_only_nzero__insert_unknown_unknown(<4 x half> %vec, half %elt, i32 %idx) {
+; CHECK-LABEL: define nofpclass(nan inf pzero sub norm) <4 x half> @ret_only_nzero__insert_unknown_unknown(
+; CHECK-SAME: <4 x half> [[VEC:%.*]], half [[ELT:%.*]], i32 [[IDX:%.*]]) {
+; CHECK-NEXT: ret <4 x half> splat (half 0xH8000)
+;
+ %insert = insertelement <4 x half> %vec, half %elt, i32 %idx
+ ret <4 x half> %insert
+}
+
+; Remove select
+define nofpclass(nan) <4 x half> @ret_nonan__insert__select_unknown_nan__unknown(i1 %cond, <4 x half> %unknown.vec, half %elt, i32 %idx) {
+; CHECK-LABEL: define nofpclass(nan) <4 x half> @ret_nonan__insert__select_unknown_nan__unknown(
+; CHECK-SAME: i1 [[COND:%.*]], <4 x half> [[UNKNOWN_VEC:%.*]], half [[ELT:%.*]], i32 [[IDX:%.*]]) {
+; CHECK-NEXT: [[NAN_VEC:%.*]] = call <4 x half> @returns_nan()
+; CHECK-NEXT: [[INSERT:%.*]] = insertelement <4 x half> [[UNKNOWN_VEC]], half [[ELT]], i32 [[IDX]]
+; CHECK-NEXT: ret <4 x half> [[INSERT]]
+;
+ %nan.vec = call <4 x half> @returns_nan()
+ %select.vec = select i1 %cond, <4 x half> %unknown.vec, <4 x half> %nan.vec
+ %insert = insertelement <4 x half> %select.vec, half %elt, i32 %idx
+ ret <4 x half> %insert
+}
+
+; Remove select
+define nofpclass(nan) <4 x half> @ret_nonan__insert__unknown__select_unknown_nan(i1 %cond, <4 x half> %unknown.vec, half %unknown.elt, i32 %idx) {
+; CHECK-LABEL: define nofpclass(nan) <4 x half> @ret_nonan__insert__unknown__select_unknown_nan(
+; CHECK-SAME: i1 [[COND:%.*]], <4 x half> [[UNKNOWN_VEC:%.*]], half [[UNKNOWN_ELT:%.*]], i32 [[IDX:%.*]]) {
+; CHECK-NEXT: [[NAN_ELT:%.*]] = call half @returns_nan_f16()
+; CHECK-NEXT: [[INSERT:%.*]] = insertelement <4 x half> [[UNKNOWN_VEC]], half [[UNKNOWN_ELT]], i32 [[IDX]]
+; CHECK-NEXT: ret <4 x half> [[INSERT]]
+;
+ %nan.elt = call half @returns_nan_f16()
+ %select.elt = select i1 %cond, half %unknown.elt, half %nan.elt
+ %insert = insertelement <4 x half> %unknown.vec, half %select.elt, i32 %idx
+ ret <4 x half> %insert
+}
+
+; Remove selects
+define nofpclass(nan) <4 x half> @ret_nonan__insert__select_unknown_nan__select_unknown_nan(i1 %cond, <4 x half> %unknown.vec, half %unknown.elt, i32 %idx) {
+; CHECK-LABEL: define nofpclass(nan) <4 x half> @ret_nonan__insert__select_unknown_nan__select_unknown_nan(
+; CHECK-SAME: i1 [[COND:%.*]], <4 x half> [[UNKNOWN_VEC:%.*]], half [[UNKNOWN_ELT:%.*]], i32 [[IDX:%.*]]) {
+; CHECK-NEXT: [[NAN_VEC:%.*]] = call <4 x half> @returns_nan()
+; CHECK-NEXT: [[NAN_ELT:%.*]] = call half @returns_nan_f16()
+; CHECK-NEXT: [[INSERT:%.*]] = insertelement <4 x half> [[UNKNOWN_VEC]], half [[UNKNOWN_ELT]], i32 [[IDX]]
+; CHECK-NEXT: ret <4 x half> [[INSERT]]
+;
+ %nan.vec = call <4 x half> @returns_nan()
+ %nan.elt = call half @returns_nan_f16()
+ %select.elt = select i1 %cond, half %unknown.elt, half %nan.elt
+ %select.vec = select i1 %cond, <4 x half> %unknown.vec, <4 x half> %nan.vec
+ %insert = insertelement <4 x half> %select.vec, half %select.elt, i32 %idx
+ ret <4 x half> %insert
+}
+
+; Remove selects
+define nofpclass(nan) <vscale x 4 x half> @ret_nonan__insert__select_unknown_nan__select_unknown_nan__scalable(i1 %cond, <vscale x 4 x half> %unknown.vec, half %unknown.elt, i32 %idx) {
+; CHECK-LABEL: define nofpclass(nan) <vscale x 4 x half> @ret_nonan__insert__select_unknown_nan__select_unknown_nan__scalable(
+; CHECK-SAME: i1 [[COND:%.*]], <vscale x 4 x half> [[UNKNOWN_VEC:%.*]], half [[UNKNOWN_ELT:%.*]], i32 [[IDX:%.*]]) {
+; CHECK-NEXT: [[NAN_VEC:%.*]] = call <vscale x 4 x half> @returns_nan_nxv4f16()
+; CHECK-NEXT: [[NAN_ELT:%.*]] = call half @returns_nan_f16()
+; CHECK-NEXT: [[INSERT:%.*]] = insertelement <vscale x 4 x half> [[UNKNOWN_VEC]], half [[UNKNOWN_ELT]], i32 [[IDX]]
+; CHECK-NEXT: ret <vscale x 4 x half> [[INSERT]]
+;
+ %nan.vec = call <vscale x 4 x half> @returns_nan_nxv4f16()
+ %nan.elt = call half @returns_nan_f16()
+ %select.elt = select i1 %cond, half %unknown.elt, half %nan.elt
+ %select.vec = select i1 %cond, <vscale x 4 x half> %unknown.vec, <vscale x 4 x half> %nan.vec
+ %insert = insertelement <vscale x 4 x half> %select.vec, half %select.elt, i32 %idx
+ ret <vscale x 4 x half> %insert
+}
+
+; We cannot prove the source of the exp is nan due to the inserted
+; element, so it won't be rewritten to fadd
+define nofpclass(snan) <4 x half> @insert_unknown_taints_known_nan_vector_input(half %unknown.elt, i32 %idx) {
+; CHECK-LABEL: define nofpclass(snan) <4 x half> @insert_unknown_taints_known_nan_vector_input(
+; CHECK-SAME: half [[UNKNOWN_ELT:%.*]], i32 [[IDX:%.*]]) {
+; CHECK-NEXT: [[NAN_VEC:%.*]] = call <4 x half> @returns_nan()
+; CHECK-NEXT: [[INSERT:%.*]] = insertelement <4 x half> [[NAN_VEC]], half [[UNKNOWN_ELT]], i32 [[IDX]]
+; CHECK-NEXT: [[EXP:%.*]] = call <4 x half> @llvm.exp.v4f16(<4 x half> [[INSERT]])
+; CHECK-NEXT: ret <4 x half> [[EXP]]
+;
+ %nan.vec = call <4 x half> @returns_nan()
+ %insert = insertelement <4 x half> %nan.vec, half %unknown.elt, i32 %idx
+ %exp = call <4 x half> @llvm.exp.v4f16(<4 x half> %insert)
+ ret <4 x half> %exp
+}
+
+; Do not rewrite exp to fadd
+define nofpclass(snan) <4 x half> @insert_unknown_vector_taints_known_nan_vector_insert(<4 x half> %unknown.vec, i32 %idx) {
+; CHECK-LABEL: define nofpclass(snan) <4 x half> @insert_unknown_vector_taints_known_nan_vector_insert(
+; CHECK-SAME: <4 x half> [[UNKNOWN_VEC:%.*]], i32 [[IDX:%.*]]) {
+; CHECK-NEXT: [[NAN_ELT:%.*]] = call half @returns_nan_f16()
+; CHECK-NEXT: [[INSERT:%.*]] = insertelement <4 x half> [[UNKNOWN_VEC]], half [[NAN_ELT]], i32 [[IDX]]
+; CHECK-NEXT: [[EXP:%.*]] = call <4 x half> @llvm.exp.v4f16(<4 x half> [[INSERT]])
+; CHECK-NEXT: ret <4 x half> [[EXP]]
+;
+ %nan.elt = call half @returns_nan_f16()
+ %insert = insertelement <4 x half> %unknown.vec, half %nan.elt, i32 %idx
+ %exp = call <4 x half> @llvm.exp.v4f16(<4 x half> %insert)
+ ret <4 x half> %exp
+}
+
|
llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
Outdated
Show resolved
Hide resolved
Eventually this should pull up the known elements logic from computeKnownFPClass.
a3fec0e to
984582e
Compare
fd1d31d to
7050ca2
Compare

Eventually this should pull up the known elements logic from
computeKnownFPClass.