diff --git a/src/hotspot/share/classfile/vmIntrinsics.hpp b/src/hotspot/share/classfile/vmIntrinsics.hpp index 18300ca3465..78193f2d0b3 100644 --- a/src/hotspot/share/classfile/vmIntrinsics.hpp +++ b/src/hotspot/share/classfile/vmIntrinsics.hpp @@ -858,14 +858,15 @@ class methodHandle; "Ljava/lang/Object;ILjdk/internal/vm/vector/VectorSupport$StoreVectorOperation;)V") \ do_name(vector_store_op_name, "store") \ \ - do_intrinsic(_VectorStoreMaskedOp, jdk_internal_vm_vector_VectorSupport, vector_store_masked_op_name, vector_store_masked_op_sig, F_S) \ - do_signature(vector_store_masked_op_sig, "(Ljava/lang/Class;Ljava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;JLjdk/internal/vm/vector/VectorSupport$Vector;" \ + do_intrinsic(_VectorStoreMaskedOp, jdk_internal_vm_vector_VectorSupport, vector_store_masked_op_name, vector_store_masked_op_sig, F_S) \ + do_signature(vector_store_masked_op_sig, "(Ljava/lang/Class;Ljava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;JLjdk/internal/vm/vector/VectorSupport$Vector;" \ "Ljdk/internal/vm/vector/VectorSupport$VectorMask;Ljava/lang/Object;I" \ "Ljdk/internal/vm/vector/VectorSupport$StoreVectorMaskedOperation;)V") \ do_name(vector_store_masked_op_name, "storeMasked") \ \ - do_intrinsic(_VectorReductionCoerced, jdk_internal_vm_vector_VectorSupport, vector_reduction_coerced_name, vector_reduction_coerced_sig, F_S) \ - do_signature(vector_reduction_coerced_sig, "(ILjava/lang/Class;Ljava/lang/Class;ILjdk/internal/vm/vector/VectorSupport$Vector;Ljava/util/function/Function;)J") \ + do_intrinsic(_VectorReductionCoerced, jdk_internal_vm_vector_VectorSupport, vector_reduction_coerced_name, vector_reduction_coerced_sig, F_S)\ + do_signature(vector_reduction_coerced_sig, "(ILjava/lang/Class;Ljava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;Ljava/lang/Object;" \ + "Ljdk/internal/vm/vector/VectorSupport$ReductionOperation;)J") \ do_name(vector_reduction_coerced_name, "reductionCoerced") \ \ do_intrinsic(_VectorTest, jdk_internal_vm_vector_VectorSupport, vector_test_name, vector_test_sig, F_S) \ diff --git a/src/hotspot/share/opto/vectorIntrinsics.cpp b/src/hotspot/share/opto/vectorIntrinsics.cpp index 5a914a4d60c..d83afca8595 100644 --- a/src/hotspot/share/opto/vectorIntrinsics.cpp +++ b/src/hotspot/share/opto/vectorIntrinsics.cpp @@ -1270,16 +1270,17 @@ bool LibraryCallKit::inline_vector_gather_scatter(bool is_scatter) { return true; } -// > -// long reductionCoerced(int oprId, Class vectorClass, Class elementType, int vlen, -// V v, -// Function defaultImpl) - +// +// long reductionCoerced(int oprId, Class vectorClass, Class maskClass, +// Class elementType, int length, V v, M m, +// ReductionOperation defaultImpl) { +// bool LibraryCallKit::inline_vector_reduction() { const TypeInt* opr = gvn().type(argument(0))->isa_int(); const TypeInstPtr* vector_klass = gvn().type(argument(1))->isa_instptr(); - const TypeInstPtr* elem_klass = gvn().type(argument(2))->isa_instptr(); - const TypeInt* vlen = gvn().type(argument(3))->isa_int(); + const TypeInstPtr* mask_klass = gvn().type(argument(2))->isa_instptr(); + const TypeInstPtr* elem_klass = gvn().type(argument(3))->isa_instptr(); + const TypeInt* vlen = gvn().type(argument(4))->isa_int(); if (opr == NULL || vector_klass == NULL || elem_klass == NULL || vlen == NULL || !opr->is_con() || vector_klass->const_oop() == NULL || elem_klass->const_oop() == NULL || !vlen->is_con()) { @@ -1287,8 +1288,8 @@ bool LibraryCallKit::inline_vector_reduction() { tty->print_cr(" ** missing constant: opr=%s vclass=%s etype=%s vlen=%s", NodeClassNames[argument(0)->Opcode()], NodeClassNames[argument(1)->Opcode()], - NodeClassNames[argument(2)->Opcode()], - NodeClassNames[argument(3)->Opcode()]); + NodeClassNames[argument(3)->Opcode()], + NodeClassNames[argument(4)->Opcode()]); } return false; // not enough info for intrinsification } @@ -1305,16 +1306,51 @@ bool LibraryCallKit::inline_vector_reduction() { } return false; // should be primitive type } + + const Type* vmask_type = gvn().type(argument(6)); + bool is_masked_op = vmask_type != TypePtr::NULL_PTR; + if (is_masked_op) { + if (mask_klass == NULL || mask_klass->const_oop() == NULL) { + if (C->print_intrinsics()) { + tty->print_cr(" ** missing constant: maskclass=%s", NodeClassNames[argument(2)->Opcode()]); + } + return false; // not enough info for intrinsification + } + + if (!is_klass_initialized(mask_klass)) { + if (C->print_intrinsics()) { + tty->print_cr(" ** mask klass argument not initialized"); + } + return false; + } + + if (vmask_type->maybe_null()) { + if (C->print_intrinsics()) { + tty->print_cr(" ** null mask values are not allowed for masked op"); + } + return false; + } + } + BasicType elem_bt = elem_type->basic_type(); int num_elem = vlen->get_con(); - int opc = VectorSupport::vop2ideal(opr->get_con(), elem_bt); int sopc = ReductionNode::opcode(opc, elem_bt); - // TODO When mask usage is supported, VecMaskNotUsed needs to be VecMaskUseLoad. - if (!arch_supports_vector(sopc, num_elem, elem_bt, VecMaskNotUsed)) { + // When using mask, mask use type needs to be VecMaskUseLoad. + if (!arch_supports_vector(sopc, num_elem, elem_bt, is_masked_op ? VecMaskUseLoad : VecMaskNotUsed)) { + if (C->print_intrinsics()) { + tty->print_cr(" ** not supported: arity=1 op=%d/reduce vlen=%d etype=%s is_masked_op=%d", + sopc, num_elem, type2name(elem_bt), is_masked_op ? 1 : 0); + } + return false; + } + + // Return true if current platform has implemented the masked operation with predicate feature. + bool use_predicate = is_masked_op && arch_supports_vector(sopc, num_elem, elem_bt, VecMaskUsePred); + if (is_masked_op && !use_predicate && !arch_supports_vector(Op_VectorBlend, num_elem, elem_bt, VecMaskUseLoad)) { if (C->print_intrinsics()) { - tty->print_cr(" ** not supported: arity=1 op=%d/reduce vlen=%d etype=%s ismask=no", + tty->print_cr(" ** not supported: arity=1 op=%d/reduce vlen=%d etype=%s is_masked_op=1", sopc, num_elem, type2name(elem_bt)); } return false; @@ -1323,33 +1359,64 @@ bool LibraryCallKit::inline_vector_reduction() { ciKlass* vbox_klass = vector_klass->const_oop()->as_instance()->java_lang_Class_klass(); const TypeInstPtr* vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass); - Node* opd = unbox_vector(argument(4), vbox_type, elem_bt, num_elem); + Node* opd = unbox_vector(argument(5), vbox_type, elem_bt, num_elem); if (opd == NULL) { return false; // operand unboxing failed } + Node* mask = NULL; + if (is_masked_op) { + ciKlass* mbox_klass = mask_klass->const_oop()->as_instance()->java_lang_Class_klass(); + assert(is_vector_mask(mbox_klass), "argument(2) should be a mask class"); + const TypeInstPtr* mbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, mbox_klass); + mask = unbox_vector(argument(6), mbox_type, elem_bt, num_elem); + if (mask == NULL) { + if (C->print_intrinsics()) { + tty->print_cr(" ** unbox failed mask=%s", + NodeClassNames[argument(6)->Opcode()]); + } + return false; + } + } + Node* init = ReductionNode::make_reduction_input(gvn(), opc, elem_bt); - Node* rn = gvn().transform(ReductionNode::make(opc, NULL, init, opd, elem_bt)); + Node* value = NULL; + if (mask == NULL) { + assert(!is_masked_op, "Masked op needs the mask value never null"); + value = ReductionNode::make(opc, NULL, init, opd, elem_bt); + } else { + if (use_predicate) { + if (C->print_intrinsics()) { + tty->print_cr(" ** predicate feature is not supported on current platform!"); + } + return false; + } else { + Node* reduce_identity = gvn().transform(VectorNode::scalar2vector(init, num_elem, Type::get_const_basic_type(elem_bt))); + value = gvn().transform(new VectorBlendNode(reduce_identity, opd, mask)); + value = ReductionNode::make(opc, NULL, init, value, elem_bt); + } + } + value = gvn().transform(value); Node* bits = NULL; switch (elem_bt) { case T_BYTE: case T_SHORT: case T_INT: { - bits = gvn().transform(new ConvI2LNode(rn)); + bits = gvn().transform(new ConvI2LNode(value)); break; } case T_FLOAT: { - rn = gvn().transform(new MoveF2INode(rn)); - bits = gvn().transform(new ConvI2LNode(rn)); + value = gvn().transform(new MoveF2INode(value)); + bits = gvn().transform(new ConvI2LNode(value)); break; } case T_DOUBLE: { - bits = gvn().transform(new MoveD2LNode(rn)); + bits = gvn().transform(new MoveD2LNode(value)); break; } case T_LONG: { - bits = rn; // no conversion needed + bits = value; // no conversion needed break; } default: fatal("%s", type2name(elem_bt)); diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp index b51b0a51680..e955bfc5367 100644 --- a/src/hotspot/share/opto/vectornode.cpp +++ b/src/hotspot/share/opto/vectornode.cpp @@ -1076,7 +1076,9 @@ Node* ReductionNode::make_reduction_input(PhaseGVN& gvn, int opc, BasicType bt) case Op_MinReductionV: switch (bt) { case T_BYTE: + return gvn.makecon(TypeInt::make(max_jbyte)); case T_SHORT: + return gvn.makecon(TypeInt::make(max_jshort)); case T_INT: return gvn.makecon(TypeInt::MAX); case T_LONG: @@ -1091,7 +1093,9 @@ Node* ReductionNode::make_reduction_input(PhaseGVN& gvn, int opc, BasicType bt) case Op_MaxReductionV: switch (bt) { case T_BYTE: + return gvn.makecon(TypeInt::make(min_jbyte)); case T_SHORT: + return gvn.makecon(TypeInt::make(min_jshort)); case T_INT: return gvn.makecon(TypeInt::MIN); case T_LONG: diff --git a/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java b/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java index 626a264f5a9..15a1cdac56b 100644 --- a/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java +++ b/src/java.base/share/classes/jdk/internal/vm/vector/VectorSupport.java @@ -212,14 +212,19 @@ V indexVector(Class vClass, Class E, int length, @IntrinsicCandidate public static - > - long reductionCoerced(int oprId, Class vectorClass, Class elementType, int length, - V v, - Function defaultImpl) { + + long reductionCoerced(int oprId, Class vectorClass, Class maskClass, + Class elementType, int length, V v, M m, + ReductionOperation defaultImpl) { assert isNonCapturingLambda(defaultImpl) : defaultImpl; - return defaultImpl.apply(v); + return defaultImpl.apply(v, m); } + public interface ReductionOperation { + long apply(V v, M mask); + } + + /* ============================================================================ */ public interface VecExtractOp { diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte128Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte128Vector.java index 1407b6d8c67..845379eaffa 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte128Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte128Vector.java @@ -236,8 +236,8 @@ Byte128Vector tOp(Vector v1, Vector v2, @ForceInline final @Override - byte rOp(byte v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + byte rOp(byte v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -334,7 +334,7 @@ public final byte reduceLanes(VectorOperators.Associative op) { @ForceInline public final byte reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Byte128Mask.class, m); // specialized } @Override @@ -347,7 +347,7 @@ public final long reduceLanesToLong(VectorOperators.Associative op) { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Byte128Mask.class, m); // specialized } @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte256Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte256Vector.java index 75c67bb9035..a06f5ca518b 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte256Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte256Vector.java @@ -236,8 +236,8 @@ Byte256Vector tOp(Vector v1, Vector v2, @ForceInline final @Override - byte rOp(byte v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + byte rOp(byte v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -334,7 +334,7 @@ public final byte reduceLanes(VectorOperators.Associative op) { @ForceInline public final byte reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Byte256Mask.class, m); // specialized } @Override @@ -347,7 +347,7 @@ public final long reduceLanesToLong(VectorOperators.Associative op) { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Byte256Mask.class, m); // specialized } @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte512Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte512Vector.java index 433558cf4fd..ba5b303768c 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte512Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte512Vector.java @@ -236,8 +236,8 @@ Byte512Vector tOp(Vector v1, Vector v2, @ForceInline final @Override - byte rOp(byte v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + byte rOp(byte v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -334,7 +334,7 @@ public final byte reduceLanes(VectorOperators.Associative op) { @ForceInline public final byte reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Byte512Mask.class, m); // specialized } @Override @@ -347,7 +347,7 @@ public final long reduceLanesToLong(VectorOperators.Associative op) { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Byte512Mask.class, m); // specialized } @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte64Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte64Vector.java index 9238934838b..3b30a8d5f3f 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte64Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Byte64Vector.java @@ -236,8 +236,8 @@ Byte64Vector tOp(Vector v1, Vector v2, @ForceInline final @Override - byte rOp(byte v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + byte rOp(byte v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -334,7 +334,7 @@ public final byte reduceLanes(VectorOperators.Associative op) { @ForceInline public final byte reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Byte64Mask.class, m); // specialized } @Override @@ -347,7 +347,7 @@ public final long reduceLanesToLong(VectorOperators.Associative op) { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Byte64Mask.class, m); // specialized } @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteMaxVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteMaxVector.java index e7344d198f5..1382e4a8888 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteMaxVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteMaxVector.java @@ -236,8 +236,8 @@ ByteMaxVector tOp(Vector v1, Vector v2, @ForceInline final @Override - byte rOp(byte v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + byte rOp(byte v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -334,7 +334,7 @@ public final byte reduceLanes(VectorOperators.Associative op) { @ForceInline public final byte reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, ByteMaxMask.class, m); // specialized } @Override @@ -347,7 +347,7 @@ public final long reduceLanesToLong(VectorOperators.Associative op) { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, ByteMaxMask.class, m); // specialized } @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteVector.java index 3d8cc1b46b3..66676e1faa8 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ByteVector.java @@ -287,7 +287,22 @@ ByteVector tOpTemplate(Vector o1, /*package-private*/ abstract - byte rOp(byte v, FBinOp f); + byte rOp(byte v, VectorMask m, FBinOp f); + + @ForceInline + final + byte rOpTemplate(byte v, VectorMask m, FBinOp f) { + if (m == null) { + return rOpTemplate(v, f); + } + byte[] vec = vec(); + boolean[] mbits = ((AbstractMask)m).getBits(); + for (int i = 0; i < vec.length; i++) { + v = mbits[i] ? f.apply(i, v, vec[i]) : v; + } + return v; + } + @ForceInline final byte rOpTemplate(byte v, FBinOp f) { @@ -2532,9 +2547,18 @@ public abstract byte reduceLanes(VectorOperators.Associative op, @ForceInline final byte reduceLanesTemplate(VectorOperators.Associative op, + Class> maskClass, VectorMask m) { - ByteVector v = reduceIdentityVector(op).blend(this, m); - return v.reduceLanesTemplate(op); + m.check(maskClass, this); + if (op == FIRST_NONZERO) { + ByteVector v = reduceIdentityVector(op).blend(this, m); + return v.reduceLanesTemplate(op); + } + int opc = opCode(op); + return fromBits(VectorSupport.reductionCoerced( + opc, getClass(), maskClass, byte.class, length(), + this, m, + REDUCE_IMPL.find(op, opc, ByteVector::reductionOperations))); } /*package-private*/ @@ -2549,30 +2573,34 @@ byte reduceLanesTemplate(VectorOperators.Associative op) { } int opc = opCode(op); return fromBits(VectorSupport.reductionCoerced( - opc, getClass(), byte.class, length(), - this, - REDUCE_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - case VECTOR_OP_ADD: return v -> - toBits(v.rOp((byte)0, (i, a, b) -> (byte)(a + b))); - case VECTOR_OP_MUL: return v -> - toBits(v.rOp((byte)1, (i, a, b) -> (byte)(a * b))); - case VECTOR_OP_MIN: return v -> - toBits(v.rOp(MAX_OR_INF, (i, a, b) -> (byte) Math.min(a, b))); - case VECTOR_OP_MAX: return v -> - toBits(v.rOp(MIN_OR_INF, (i, a, b) -> (byte) Math.max(a, b))); - case VECTOR_OP_AND: return v -> - toBits(v.rOp((byte)-1, (i, a, b) -> (byte)(a & b))); - case VECTOR_OP_OR: return v -> - toBits(v.rOp((byte)0, (i, a, b) -> (byte)(a | b))); - case VECTOR_OP_XOR: return v -> - toBits(v.rOp((byte)0, (i, a, b) -> (byte)(a ^ b))); - default: return null; - }}))); + opc, getClass(), null, byte.class, length(), + this, null, + REDUCE_IMPL.find(op, opc, ByteVector::reductionOperations))); } + private static final - ImplCache> REDUCE_IMPL - = new ImplCache<>(Associative.class, ByteVector.class); + ImplCache>> + REDUCE_IMPL = new ImplCache<>(Associative.class, ByteVector.class); + + private static ReductionOperation> reductionOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_ADD: return (v, m) -> + toBits(v.rOp((byte)0, m, (i, a, b) -> (byte)(a + b))); + case VECTOR_OP_MUL: return (v, m) -> + toBits(v.rOp((byte)1, m, (i, a, b) -> (byte)(a * b))); + case VECTOR_OP_MIN: return (v, m) -> + toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> (byte) Math.min(a, b))); + case VECTOR_OP_MAX: return (v, m) -> + toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> (byte) Math.max(a, b))); + case VECTOR_OP_AND: return (v, m) -> + toBits(v.rOp((byte)-1, m, (i, a, b) -> (byte)(a & b))); + case VECTOR_OP_OR: return (v, m) -> + toBits(v.rOp((byte)0, m, (i, a, b) -> (byte)(a | b))); + case VECTOR_OP_XOR: return (v, m) -> + toBits(v.rOp((byte)0, m, (i, a, b) -> (byte)(a ^ b))); + default: return null; + } + } private @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double128Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double128Vector.java index 10084ccf83f..e626b706808 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double128Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double128Vector.java @@ -236,8 +236,8 @@ Double128Vector tOp(Vector v1, Vector v2, @ForceInline final @Override - double rOp(double v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + double rOp(double v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -328,7 +328,7 @@ public final double reduceLanes(VectorOperators.Associative op) { @ForceInline public final double reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Double128Mask.class, m); // specialized } @Override @@ -341,7 +341,7 @@ public final long reduceLanesToLong(VectorOperators.Associative op) { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Double128Mask.class, m); // specialized } @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double256Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double256Vector.java index 3d511ac336d..2d4514ebc98 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double256Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double256Vector.java @@ -236,8 +236,8 @@ Double256Vector tOp(Vector v1, Vector v2, @ForceInline final @Override - double rOp(double v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + double rOp(double v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -328,7 +328,7 @@ public final double reduceLanes(VectorOperators.Associative op) { @ForceInline public final double reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Double256Mask.class, m); // specialized } @Override @@ -341,7 +341,7 @@ public final long reduceLanesToLong(VectorOperators.Associative op) { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Double256Mask.class, m); // specialized } @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double512Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double512Vector.java index 1d452e88dd0..aa14a66a467 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double512Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double512Vector.java @@ -236,8 +236,8 @@ Double512Vector tOp(Vector v1, Vector v2, @ForceInline final @Override - double rOp(double v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + double rOp(double v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -328,7 +328,7 @@ public final double reduceLanes(VectorOperators.Associative op) { @ForceInline public final double reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Double512Mask.class, m); // specialized } @Override @@ -341,7 +341,7 @@ public final long reduceLanesToLong(VectorOperators.Associative op) { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Double512Mask.class, m); // specialized } @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double64Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double64Vector.java index ba40a8145aa..8482ddb8407 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double64Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Double64Vector.java @@ -236,8 +236,8 @@ Double64Vector tOp(Vector v1, Vector v2, @ForceInline final @Override - double rOp(double v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + double rOp(double v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -328,7 +328,7 @@ public final double reduceLanes(VectorOperators.Associative op) { @ForceInline public final double reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Double64Mask.class, m); // specialized } @Override @@ -341,7 +341,7 @@ public final long reduceLanesToLong(VectorOperators.Associative op) { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Double64Mask.class, m); // specialized } @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleMaxVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleMaxVector.java index 46c95507b29..ccba22fe2b2 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleMaxVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleMaxVector.java @@ -236,8 +236,8 @@ DoubleMaxVector tOp(Vector v1, Vector v2, @ForceInline final @Override - double rOp(double v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + double rOp(double v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -328,7 +328,7 @@ public final double reduceLanes(VectorOperators.Associative op) { @ForceInline public final double reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, DoubleMaxMask.class, m); // specialized } @Override @@ -341,7 +341,7 @@ public final long reduceLanesToLong(VectorOperators.Associative op) { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, DoubleMaxMask.class, m); // specialized } @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleVector.java index 531e8125020..a4f3ac5a8b9 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/DoubleVector.java @@ -287,7 +287,22 @@ DoubleVector tOpTemplate(Vector o1, /*package-private*/ abstract - double rOp(double v, FBinOp f); + double rOp(double v, VectorMask m, FBinOp f); + + @ForceInline + final + double rOpTemplate(double v, VectorMask m, FBinOp f) { + if (m == null) { + return rOpTemplate(v, f); + } + double[] vec = vec(); + boolean[] mbits = ((AbstractMask)m).getBits(); + for (int i = 0; i < vec.length; i++) { + v = mbits[i] ? f.apply(i, v, vec[i]) : v; + } + return v; + } + @ForceInline final double rOpTemplate(double v, FBinOp f) { @@ -2389,9 +2404,18 @@ public abstract double reduceLanes(VectorOperators.Associative op, @ForceInline final double reduceLanesTemplate(VectorOperators.Associative op, + Class> maskClass, VectorMask m) { - DoubleVector v = reduceIdentityVector(op).blend(this, m); - return v.reduceLanesTemplate(op); + m.check(maskClass, this); + if (op == FIRST_NONZERO) { + DoubleVector v = reduceIdentityVector(op).blend(this, m); + return v.reduceLanesTemplate(op); + } + int opc = opCode(op); + return fromBits(VectorSupport.reductionCoerced( + opc, getClass(), maskClass, double.class, length(), + this, m, + REDUCE_IMPL.find(op, opc, DoubleVector::reductionOperations))); } /*package-private*/ @@ -2406,24 +2430,28 @@ public abstract double reduceLanes(VectorOperators.Associative op, } int opc = opCode(op); return fromBits(VectorSupport.reductionCoerced( - opc, getClass(), double.class, length(), - this, - REDUCE_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - case VECTOR_OP_ADD: return v -> - toBits(v.rOp((double)0, (i, a, b) -> (double)(a + b))); - case VECTOR_OP_MUL: return v -> - toBits(v.rOp((double)1, (i, a, b) -> (double)(a * b))); - case VECTOR_OP_MIN: return v -> - toBits(v.rOp(MAX_OR_INF, (i, a, b) -> (double) Math.min(a, b))); - case VECTOR_OP_MAX: return v -> - toBits(v.rOp(MIN_OR_INF, (i, a, b) -> (double) Math.max(a, b))); - default: return null; - }}))); + opc, getClass(), null, double.class, length(), + this, null, + REDUCE_IMPL.find(op, opc, DoubleVector::reductionOperations))); } + private static final - ImplCache> REDUCE_IMPL - = new ImplCache<>(Associative.class, DoubleVector.class); + ImplCache>> + REDUCE_IMPL = new ImplCache<>(Associative.class, DoubleVector.class); + + private static ReductionOperation> reductionOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_ADD: return (v, m) -> + toBits(v.rOp((double)0, m, (i, a, b) -> (double)(a + b))); + case VECTOR_OP_MUL: return (v, m) -> + toBits(v.rOp((double)1, m, (i, a, b) -> (double)(a * b))); + case VECTOR_OP_MIN: return (v, m) -> + toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> (double) Math.min(a, b))); + case VECTOR_OP_MAX: return (v, m) -> + toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> (double) Math.max(a, b))); + default: return null; + } + } private @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float128Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float128Vector.java index 2a8d7ec17bd..3ac4f7bc269 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float128Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float128Vector.java @@ -236,8 +236,8 @@ Float128Vector tOp(Vector v1, Vector v2, @ForceInline final @Override - float rOp(float v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + float rOp(float v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -328,7 +328,7 @@ public final float reduceLanes(VectorOperators.Associative op) { @ForceInline public final float reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Float128Mask.class, m); // specialized } @Override @@ -341,7 +341,7 @@ public final long reduceLanesToLong(VectorOperators.Associative op) { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Float128Mask.class, m); // specialized } @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float256Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float256Vector.java index 3e6ee24fa8e..5a25e5054f1 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float256Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float256Vector.java @@ -236,8 +236,8 @@ Float256Vector tOp(Vector v1, Vector v2, @ForceInline final @Override - float rOp(float v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + float rOp(float v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -328,7 +328,7 @@ public final float reduceLanes(VectorOperators.Associative op) { @ForceInline public final float reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Float256Mask.class, m); // specialized } @Override @@ -341,7 +341,7 @@ public final long reduceLanesToLong(VectorOperators.Associative op) { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Float256Mask.class, m); // specialized } @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float512Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float512Vector.java index 54dbfabeb55..b4fea785dca 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float512Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float512Vector.java @@ -236,8 +236,8 @@ Float512Vector tOp(Vector v1, Vector v2, @ForceInline final @Override - float rOp(float v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + float rOp(float v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -328,7 +328,7 @@ public final float reduceLanes(VectorOperators.Associative op) { @ForceInline public final float reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Float512Mask.class, m); // specialized } @Override @@ -341,7 +341,7 @@ public final long reduceLanesToLong(VectorOperators.Associative op) { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Float512Mask.class, m); // specialized } @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float64Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float64Vector.java index 08dc6acb341..bf1bf909ced 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float64Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Float64Vector.java @@ -236,8 +236,8 @@ Float64Vector tOp(Vector v1, Vector v2, @ForceInline final @Override - float rOp(float v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + float rOp(float v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -328,7 +328,7 @@ public final float reduceLanes(VectorOperators.Associative op) { @ForceInline public final float reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Float64Mask.class, m); // specialized } @Override @@ -341,7 +341,7 @@ public final long reduceLanesToLong(VectorOperators.Associative op) { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Float64Mask.class, m); // specialized } @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatMaxVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatMaxVector.java index 2545228fd1d..8918d6182e5 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatMaxVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatMaxVector.java @@ -236,8 +236,8 @@ FloatMaxVector tOp(Vector v1, Vector v2, @ForceInline final @Override - float rOp(float v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + float rOp(float v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -328,7 +328,7 @@ public final float reduceLanes(VectorOperators.Associative op) { @ForceInline public final float reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, FloatMaxMask.class, m); // specialized } @Override @@ -341,7 +341,7 @@ public final long reduceLanesToLong(VectorOperators.Associative op) { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, FloatMaxMask.class, m); // specialized } @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatVector.java index 0da08805a05..474cf7c5d84 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/FloatVector.java @@ -287,7 +287,22 @@ FloatVector tOpTemplate(Vector o1, /*package-private*/ abstract - float rOp(float v, FBinOp f); + float rOp(float v, VectorMask m, FBinOp f); + + @ForceInline + final + float rOpTemplate(float v, VectorMask m, FBinOp f) { + if (m == null) { + return rOpTemplate(v, f); + } + float[] vec = vec(); + boolean[] mbits = ((AbstractMask)m).getBits(); + for (int i = 0; i < vec.length; i++) { + v = mbits[i] ? f.apply(i, v, vec[i]) : v; + } + return v; + } + @ForceInline final float rOpTemplate(float v, FBinOp f) { @@ -2409,9 +2424,18 @@ public abstract float reduceLanes(VectorOperators.Associative op, @ForceInline final float reduceLanesTemplate(VectorOperators.Associative op, + Class> maskClass, VectorMask m) { - FloatVector v = reduceIdentityVector(op).blend(this, m); - return v.reduceLanesTemplate(op); + m.check(maskClass, this); + if (op == FIRST_NONZERO) { + FloatVector v = reduceIdentityVector(op).blend(this, m); + return v.reduceLanesTemplate(op); + } + int opc = opCode(op); + return fromBits(VectorSupport.reductionCoerced( + opc, getClass(), maskClass, float.class, length(), + this, m, + REDUCE_IMPL.find(op, opc, FloatVector::reductionOperations))); } /*package-private*/ @@ -2426,24 +2450,28 @@ float reduceLanesTemplate(VectorOperators.Associative op) { } int opc = opCode(op); return fromBits(VectorSupport.reductionCoerced( - opc, getClass(), float.class, length(), - this, - REDUCE_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - case VECTOR_OP_ADD: return v -> - toBits(v.rOp((float)0, (i, a, b) -> (float)(a + b))); - case VECTOR_OP_MUL: return v -> - toBits(v.rOp((float)1, (i, a, b) -> (float)(a * b))); - case VECTOR_OP_MIN: return v -> - toBits(v.rOp(MAX_OR_INF, (i, a, b) -> (float) Math.min(a, b))); - case VECTOR_OP_MAX: return v -> - toBits(v.rOp(MIN_OR_INF, (i, a, b) -> (float) Math.max(a, b))); - default: return null; - }}))); + opc, getClass(), null, float.class, length(), + this, null, + REDUCE_IMPL.find(op, opc, FloatVector::reductionOperations))); } + private static final - ImplCache> REDUCE_IMPL - = new ImplCache<>(Associative.class, FloatVector.class); + ImplCache>> + REDUCE_IMPL = new ImplCache<>(Associative.class, FloatVector.class); + + private static ReductionOperation> reductionOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_ADD: return (v, m) -> + toBits(v.rOp((float)0, m, (i, a, b) -> (float)(a + b))); + case VECTOR_OP_MUL: return (v, m) -> + toBits(v.rOp((float)1, m, (i, a, b) -> (float)(a * b))); + case VECTOR_OP_MIN: return (v, m) -> + toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> (float) Math.min(a, b))); + case VECTOR_OP_MAX: return (v, m) -> + toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> (float) Math.max(a, b))); + default: return null; + } + } private @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int128Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int128Vector.java index 32a92f0813e..3ae78e2594d 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int128Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int128Vector.java @@ -236,8 +236,8 @@ Int128Vector tOp(Vector v1, Vector v2, @ForceInline final @Override - int rOp(int v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + int rOp(int v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -334,7 +334,7 @@ public final int reduceLanes(VectorOperators.Associative op) { @ForceInline public final int reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Int128Mask.class, m); // specialized } @Override @@ -347,7 +347,7 @@ public final long reduceLanesToLong(VectorOperators.Associative op) { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Int128Mask.class, m); // specialized } @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int256Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int256Vector.java index 59e07b32913..2f7a4088d5e 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int256Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int256Vector.java @@ -236,8 +236,8 @@ Int256Vector tOp(Vector v1, Vector v2, @ForceInline final @Override - int rOp(int v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + int rOp(int v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -334,7 +334,7 @@ public final int reduceLanes(VectorOperators.Associative op) { @ForceInline public final int reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Int256Mask.class, m); // specialized } @Override @@ -347,7 +347,7 @@ public final long reduceLanesToLong(VectorOperators.Associative op) { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Int256Mask.class, m); // specialized } @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int512Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int512Vector.java index 91566126983..4ef56453ec8 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int512Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int512Vector.java @@ -236,8 +236,8 @@ Int512Vector tOp(Vector v1, Vector v2, @ForceInline final @Override - int rOp(int v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + int rOp(int v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -334,7 +334,7 @@ public final int reduceLanes(VectorOperators.Associative op) { @ForceInline public final int reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Int512Mask.class, m); // specialized } @Override @@ -347,7 +347,7 @@ public final long reduceLanesToLong(VectorOperators.Associative op) { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Int512Mask.class, m); // specialized } @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int64Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int64Vector.java index e1dc0c69e87..51549a09845 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int64Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Int64Vector.java @@ -236,8 +236,8 @@ Int64Vector tOp(Vector v1, Vector v2, @ForceInline final @Override - int rOp(int v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + int rOp(int v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -334,7 +334,7 @@ public final int reduceLanes(VectorOperators.Associative op) { @ForceInline public final int reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Int64Mask.class, m); // specialized } @Override @@ -347,7 +347,7 @@ public final long reduceLanesToLong(VectorOperators.Associative op) { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Int64Mask.class, m); // specialized } @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntMaxVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntMaxVector.java index aba1ef6af55..4a33df9b0d7 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntMaxVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntMaxVector.java @@ -236,8 +236,8 @@ IntMaxVector tOp(Vector v1, Vector v2, @ForceInline final @Override - int rOp(int v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + int rOp(int v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -334,7 +334,7 @@ public final int reduceLanes(VectorOperators.Associative op) { @ForceInline public final int reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, IntMaxMask.class, m); // specialized } @Override @@ -347,7 +347,7 @@ public final long reduceLanesToLong(VectorOperators.Associative op) { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, IntMaxMask.class, m); // specialized } @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntVector.java index 156dafcc0cf..d2cc8e613d7 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/IntVector.java @@ -287,7 +287,22 @@ IntVector tOpTemplate(Vector o1, /*package-private*/ abstract - int rOp(int v, FBinOp f); + int rOp(int v, VectorMask m, FBinOp f); + + @ForceInline + final + int rOpTemplate(int v, VectorMask m, FBinOp f) { + if (m == null) { + return rOpTemplate(v, f); + } + int[] vec = vec(); + boolean[] mbits = ((AbstractMask)m).getBits(); + for (int i = 0; i < vec.length; i++) { + v = mbits[i] ? f.apply(i, v, vec[i]) : v; + } + return v; + } + @ForceInline final int rOpTemplate(int v, FBinOp f) { @@ -2531,9 +2546,18 @@ public abstract int reduceLanes(VectorOperators.Associative op, @ForceInline final int reduceLanesTemplate(VectorOperators.Associative op, + Class> maskClass, VectorMask m) { - IntVector v = reduceIdentityVector(op).blend(this, m); - return v.reduceLanesTemplate(op); + m.check(maskClass, this); + if (op == FIRST_NONZERO) { + IntVector v = reduceIdentityVector(op).blend(this, m); + return v.reduceLanesTemplate(op); + } + int opc = opCode(op); + return fromBits(VectorSupport.reductionCoerced( + opc, getClass(), maskClass, int.class, length(), + this, m, + REDUCE_IMPL.find(op, opc, IntVector::reductionOperations))); } /*package-private*/ @@ -2548,30 +2572,34 @@ int reduceLanesTemplate(VectorOperators.Associative op) { } int opc = opCode(op); return fromBits(VectorSupport.reductionCoerced( - opc, getClass(), int.class, length(), - this, - REDUCE_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - case VECTOR_OP_ADD: return v -> - toBits(v.rOp((int)0, (i, a, b) -> (int)(a + b))); - case VECTOR_OP_MUL: return v -> - toBits(v.rOp((int)1, (i, a, b) -> (int)(a * b))); - case VECTOR_OP_MIN: return v -> - toBits(v.rOp(MAX_OR_INF, (i, a, b) -> (int) Math.min(a, b))); - case VECTOR_OP_MAX: return v -> - toBits(v.rOp(MIN_OR_INF, (i, a, b) -> (int) Math.max(a, b))); - case VECTOR_OP_AND: return v -> - toBits(v.rOp((int)-1, (i, a, b) -> (int)(a & b))); - case VECTOR_OP_OR: return v -> - toBits(v.rOp((int)0, (i, a, b) -> (int)(a | b))); - case VECTOR_OP_XOR: return v -> - toBits(v.rOp((int)0, (i, a, b) -> (int)(a ^ b))); - default: return null; - }}))); + opc, getClass(), null, int.class, length(), + this, null, + REDUCE_IMPL.find(op, opc, IntVector::reductionOperations))); } + private static final - ImplCache> REDUCE_IMPL - = new ImplCache<>(Associative.class, IntVector.class); + ImplCache>> + REDUCE_IMPL = new ImplCache<>(Associative.class, IntVector.class); + + private static ReductionOperation> reductionOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_ADD: return (v, m) -> + toBits(v.rOp((int)0, m, (i, a, b) -> (int)(a + b))); + case VECTOR_OP_MUL: return (v, m) -> + toBits(v.rOp((int)1, m, (i, a, b) -> (int)(a * b))); + case VECTOR_OP_MIN: return (v, m) -> + toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> (int) Math.min(a, b))); + case VECTOR_OP_MAX: return (v, m) -> + toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> (int) Math.max(a, b))); + case VECTOR_OP_AND: return (v, m) -> + toBits(v.rOp((int)-1, m, (i, a, b) -> (int)(a & b))); + case VECTOR_OP_OR: return (v, m) -> + toBits(v.rOp((int)0, m, (i, a, b) -> (int)(a | b))); + case VECTOR_OP_XOR: return (v, m) -> + toBits(v.rOp((int)0, m, (i, a, b) -> (int)(a ^ b))); + default: return null; + } + } private @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long128Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long128Vector.java index e65a8ffbba2..12ad9211668 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long128Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long128Vector.java @@ -231,8 +231,8 @@ Long128Vector tOp(Vector v1, Vector v2, @ForceInline final @Override - long rOp(long v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + long rOp(long v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -329,7 +329,7 @@ public final long reduceLanes(VectorOperators.Associative op) { @ForceInline public final long reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Long128Mask.class, m); // specialized } @Override @@ -342,7 +342,7 @@ public final long reduceLanesToLong(VectorOperators.Associative op) { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Long128Mask.class, m); // specialized } @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long256Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long256Vector.java index 72c4fc31336..f95bab8fcac 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long256Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long256Vector.java @@ -231,8 +231,8 @@ Long256Vector tOp(Vector v1, Vector v2, @ForceInline final @Override - long rOp(long v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + long rOp(long v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -329,7 +329,7 @@ public final long reduceLanes(VectorOperators.Associative op) { @ForceInline public final long reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Long256Mask.class, m); // specialized } @Override @@ -342,7 +342,7 @@ public final long reduceLanesToLong(VectorOperators.Associative op) { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Long256Mask.class, m); // specialized } @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long512Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long512Vector.java index e34f91063b7..2a047181fa5 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long512Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long512Vector.java @@ -231,8 +231,8 @@ Long512Vector tOp(Vector v1, Vector v2, @ForceInline final @Override - long rOp(long v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + long rOp(long v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -329,7 +329,7 @@ public final long reduceLanes(VectorOperators.Associative op) { @ForceInline public final long reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Long512Mask.class, m); // specialized } @Override @@ -342,7 +342,7 @@ public final long reduceLanesToLong(VectorOperators.Associative op) { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Long512Mask.class, m); // specialized } @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long64Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long64Vector.java index 752ed385ce5..52f13bc91c9 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long64Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Long64Vector.java @@ -231,8 +231,8 @@ Long64Vector tOp(Vector v1, Vector v2, @ForceInline final @Override - long rOp(long v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + long rOp(long v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -329,7 +329,7 @@ public final long reduceLanes(VectorOperators.Associative op) { @ForceInline public final long reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Long64Mask.class, m); // specialized } @Override @@ -342,7 +342,7 @@ public final long reduceLanesToLong(VectorOperators.Associative op) { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Long64Mask.class, m); // specialized } @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongMaxVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongMaxVector.java index e1641d27306..651428b3b27 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongMaxVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongMaxVector.java @@ -231,8 +231,8 @@ LongMaxVector tOp(Vector v1, Vector v2, @ForceInline final @Override - long rOp(long v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + long rOp(long v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -329,7 +329,7 @@ public final long reduceLanes(VectorOperators.Associative op) { @ForceInline public final long reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, LongMaxMask.class, m); // specialized } @Override @@ -342,7 +342,7 @@ public final long reduceLanesToLong(VectorOperators.Associative op) { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, LongMaxMask.class, m); // specialized } @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongVector.java index c44760a4a53..56a7a58cae9 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/LongVector.java @@ -287,7 +287,22 @@ LongVector tOpTemplate(Vector o1, /*package-private*/ abstract - long rOp(long v, FBinOp f); + long rOp(long v, VectorMask m, FBinOp f); + + @ForceInline + final + long rOpTemplate(long v, VectorMask m, FBinOp f) { + if (m == null) { + return rOpTemplate(v, f); + } + long[] vec = vec(); + boolean[] mbits = ((AbstractMask)m).getBits(); + for (int i = 0; i < vec.length; i++) { + v = mbits[i] ? f.apply(i, v, vec[i]) : v; + } + return v; + } + @ForceInline final long rOpTemplate(long v, FBinOp f) { @@ -2397,9 +2412,18 @@ public abstract long reduceLanes(VectorOperators.Associative op, @ForceInline final long reduceLanesTemplate(VectorOperators.Associative op, + Class> maskClass, VectorMask m) { - LongVector v = reduceIdentityVector(op).blend(this, m); - return v.reduceLanesTemplate(op); + m.check(maskClass, this); + if (op == FIRST_NONZERO) { + LongVector v = reduceIdentityVector(op).blend(this, m); + return v.reduceLanesTemplate(op); + } + int opc = opCode(op); + return fromBits(VectorSupport.reductionCoerced( + opc, getClass(), maskClass, long.class, length(), + this, m, + REDUCE_IMPL.find(op, opc, LongVector::reductionOperations))); } /*package-private*/ @@ -2414,30 +2438,34 @@ long reduceLanesTemplate(VectorOperators.Associative op) { } int opc = opCode(op); return fromBits(VectorSupport.reductionCoerced( - opc, getClass(), long.class, length(), - this, - REDUCE_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - case VECTOR_OP_ADD: return v -> - toBits(v.rOp((long)0, (i, a, b) -> (long)(a + b))); - case VECTOR_OP_MUL: return v -> - toBits(v.rOp((long)1, (i, a, b) -> (long)(a * b))); - case VECTOR_OP_MIN: return v -> - toBits(v.rOp(MAX_OR_INF, (i, a, b) -> (long) Math.min(a, b))); - case VECTOR_OP_MAX: return v -> - toBits(v.rOp(MIN_OR_INF, (i, a, b) -> (long) Math.max(a, b))); - case VECTOR_OP_AND: return v -> - toBits(v.rOp((long)-1, (i, a, b) -> (long)(a & b))); - case VECTOR_OP_OR: return v -> - toBits(v.rOp((long)0, (i, a, b) -> (long)(a | b))); - case VECTOR_OP_XOR: return v -> - toBits(v.rOp((long)0, (i, a, b) -> (long)(a ^ b))); - default: return null; - }}))); + opc, getClass(), null, long.class, length(), + this, null, + REDUCE_IMPL.find(op, opc, LongVector::reductionOperations))); } + private static final - ImplCache> REDUCE_IMPL - = new ImplCache<>(Associative.class, LongVector.class); + ImplCache>> + REDUCE_IMPL = new ImplCache<>(Associative.class, LongVector.class); + + private static ReductionOperation> reductionOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_ADD: return (v, m) -> + toBits(v.rOp((long)0, m, (i, a, b) -> (long)(a + b))); + case VECTOR_OP_MUL: return (v, m) -> + toBits(v.rOp((long)1, m, (i, a, b) -> (long)(a * b))); + case VECTOR_OP_MIN: return (v, m) -> + toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> (long) Math.min(a, b))); + case VECTOR_OP_MAX: return (v, m) -> + toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> (long) Math.max(a, b))); + case VECTOR_OP_AND: return (v, m) -> + toBits(v.rOp((long)-1, m, (i, a, b) -> (long)(a & b))); + case VECTOR_OP_OR: return (v, m) -> + toBits(v.rOp((long)0, m, (i, a, b) -> (long)(a | b))); + case VECTOR_OP_XOR: return (v, m) -> + toBits(v.rOp((long)0, m, (i, a, b) -> (long)(a ^ b))); + default: return null; + } + } private @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short128Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short128Vector.java index 77277538d21..7cceece588c 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short128Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short128Vector.java @@ -236,8 +236,8 @@ Short128Vector tOp(Vector v1, Vector v2, @ForceInline final @Override - short rOp(short v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + short rOp(short v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -334,7 +334,7 @@ public final short reduceLanes(VectorOperators.Associative op) { @ForceInline public final short reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Short128Mask.class, m); // specialized } @Override @@ -347,7 +347,7 @@ public final long reduceLanesToLong(VectorOperators.Associative op) { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Short128Mask.class, m); // specialized } @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short256Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short256Vector.java index 643c2da2d51..ade7ab8bc79 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short256Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short256Vector.java @@ -236,8 +236,8 @@ Short256Vector tOp(Vector v1, Vector v2, @ForceInline final @Override - short rOp(short v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + short rOp(short v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -334,7 +334,7 @@ public final short reduceLanes(VectorOperators.Associative op) { @ForceInline public final short reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Short256Mask.class, m); // specialized } @Override @@ -347,7 +347,7 @@ public final long reduceLanesToLong(VectorOperators.Associative op) { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Short256Mask.class, m); // specialized } @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short512Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short512Vector.java index b81563aa0e9..6b4f4a11796 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short512Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short512Vector.java @@ -236,8 +236,8 @@ Short512Vector tOp(Vector v1, Vector v2, @ForceInline final @Override - short rOp(short v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + short rOp(short v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -334,7 +334,7 @@ public final short reduceLanes(VectorOperators.Associative op) { @ForceInline public final short reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Short512Mask.class, m); // specialized } @Override @@ -347,7 +347,7 @@ public final long reduceLanesToLong(VectorOperators.Associative op) { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Short512Mask.class, m); // specialized } @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short64Vector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short64Vector.java index f35858f0ce8..2b47fbfeacc 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short64Vector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/Short64Vector.java @@ -236,8 +236,8 @@ Short64Vector tOp(Vector v1, Vector v2, @ForceInline final @Override - short rOp(short v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + short rOp(short v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -334,7 +334,7 @@ public final short reduceLanes(VectorOperators.Associative op) { @ForceInline public final short reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, Short64Mask.class, m); // specialized } @Override @@ -347,7 +347,7 @@ public final long reduceLanesToLong(VectorOperators.Associative op) { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, Short64Mask.class, m); // specialized } @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortMaxVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortMaxVector.java index 8c92a66edbe..b204fe48711 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortMaxVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortMaxVector.java @@ -236,8 +236,8 @@ ShortMaxVector tOp(Vector v1, Vector v2, @ForceInline final @Override - short rOp(short v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + short rOp(short v, VectorMask m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -334,7 +334,7 @@ public final short reduceLanes(VectorOperators.Associative op) { @ForceInline public final short reduceLanes(VectorOperators.Associative op, VectorMask m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, ShortMaxMask.class, m); // specialized } @Override @@ -347,7 +347,7 @@ public final long reduceLanesToLong(VectorOperators.Associative op) { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, ShortMaxMask.class, m); // specialized } @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java index 03867ac7320..681d160c2af 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/ShortVector.java @@ -287,7 +287,22 @@ ShortVector tOpTemplate(Vector o1, /*package-private*/ abstract - short rOp(short v, FBinOp f); + short rOp(short v, VectorMask m, FBinOp f); + + @ForceInline + final + short rOpTemplate(short v, VectorMask m, FBinOp f) { + if (m == null) { + return rOpTemplate(v, f); + } + short[] vec = vec(); + boolean[] mbits = ((AbstractMask)m).getBits(); + for (int i = 0; i < vec.length; i++) { + v = mbits[i] ? f.apply(i, v, vec[i]) : v; + } + return v; + } + @ForceInline final short rOpTemplate(short v, FBinOp f) { @@ -2532,9 +2547,18 @@ public abstract short reduceLanes(VectorOperators.Associative op, @ForceInline final short reduceLanesTemplate(VectorOperators.Associative op, + Class> maskClass, VectorMask m) { - ShortVector v = reduceIdentityVector(op).blend(this, m); - return v.reduceLanesTemplate(op); + m.check(maskClass, this); + if (op == FIRST_NONZERO) { + ShortVector v = reduceIdentityVector(op).blend(this, m); + return v.reduceLanesTemplate(op); + } + int opc = opCode(op); + return fromBits(VectorSupport.reductionCoerced( + opc, getClass(), maskClass, short.class, length(), + this, m, + REDUCE_IMPL.find(op, opc, ShortVector::reductionOperations))); } /*package-private*/ @@ -2549,30 +2573,34 @@ short reduceLanesTemplate(VectorOperators.Associative op) { } int opc = opCode(op); return fromBits(VectorSupport.reductionCoerced( - opc, getClass(), short.class, length(), - this, - REDUCE_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - case VECTOR_OP_ADD: return v -> - toBits(v.rOp((short)0, (i, a, b) -> (short)(a + b))); - case VECTOR_OP_MUL: return v -> - toBits(v.rOp((short)1, (i, a, b) -> (short)(a * b))); - case VECTOR_OP_MIN: return v -> - toBits(v.rOp(MAX_OR_INF, (i, a, b) -> (short) Math.min(a, b))); - case VECTOR_OP_MAX: return v -> - toBits(v.rOp(MIN_OR_INF, (i, a, b) -> (short) Math.max(a, b))); - case VECTOR_OP_AND: return v -> - toBits(v.rOp((short)-1, (i, a, b) -> (short)(a & b))); - case VECTOR_OP_OR: return v -> - toBits(v.rOp((short)0, (i, a, b) -> (short)(a | b))); - case VECTOR_OP_XOR: return v -> - toBits(v.rOp((short)0, (i, a, b) -> (short)(a ^ b))); - default: return null; - }}))); + opc, getClass(), null, short.class, length(), + this, null, + REDUCE_IMPL.find(op, opc, ShortVector::reductionOperations))); } + private static final - ImplCache> REDUCE_IMPL - = new ImplCache<>(Associative.class, ShortVector.class); + ImplCache>> + REDUCE_IMPL = new ImplCache<>(Associative.class, ShortVector.class); + + private static ReductionOperation> reductionOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_ADD: return (v, m) -> + toBits(v.rOp((short)0, m, (i, a, b) -> (short)(a + b))); + case VECTOR_OP_MUL: return (v, m) -> + toBits(v.rOp((short)1, m, (i, a, b) -> (short)(a * b))); + case VECTOR_OP_MIN: return (v, m) -> + toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> (short) Math.min(a, b))); + case VECTOR_OP_MAX: return (v, m) -> + toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> (short) Math.max(a, b))); + case VECTOR_OP_AND: return (v, m) -> + toBits(v.rOp((short)-1, m, (i, a, b) -> (short)(a & b))); + case VECTOR_OP_OR: return (v, m) -> + toBits(v.rOp((short)0, m, (i, a, b) -> (short)(a | b))); + case VECTOR_OP_XOR: return (v, m) -> + toBits(v.rOp((short)0, m, (i, a, b) -> (short)(a ^ b))); + default: return null; + } + } private @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template index 306c1eb8cb7..0b1bfe82de5 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-Vector.java.template @@ -291,7 +291,22 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { /*package-private*/ abstract - $type$ rOp($type$ v, FBinOp f); + $type$ rOp($type$ v, VectorMask<$Boxtype$> m, FBinOp f); + + @ForceInline + final + $type$ rOpTemplate($type$ v, VectorMask<$Boxtype$> m, FBinOp f) { + if (m == null) { + return rOpTemplate(v, f); + } + $type$[] vec = vec(); + boolean[] mbits = ((AbstractMask<$Boxtype$>)m).getBits(); + for (int i = 0; i < vec.length; i++) { + v = mbits[i] ? f.apply(i, v, vec[i]) : v; + } + return v; + } + @ForceInline final $type$ rOpTemplate($type$ v, FBinOp f) { @@ -2948,9 +2963,18 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { @ForceInline final $type$ reduceLanesTemplate(VectorOperators.Associative op, + Class> maskClass, VectorMask<$Boxtype$> m) { - $abstractvectortype$ v = reduceIdentityVector(op).blend(this, m); - return v.reduceLanesTemplate(op); + m.check(maskClass, this); + if (op == FIRST_NONZERO) { + $abstractvectortype$ v = reduceIdentityVector(op).blend(this, m); + return v.reduceLanesTemplate(op); + } + int opc = opCode(op); + return fromBits(VectorSupport.reductionCoerced( + opc, getClass(), maskClass, $type$.class, length(), + this, m, + REDUCE_IMPL.find(op, opc, $abstractvectortype$::reductionOperations))); } /*package-private*/ @@ -2965,32 +2989,36 @@ public abstract class $abstractvectortype$ extends AbstractVector<$Boxtype$> { } int opc = opCode(op); return fromBits(VectorSupport.reductionCoerced( - opc, getClass(), $type$.class, length(), - this, - REDUCE_IMPL.find(op, opc, (opc_) -> { - switch (opc_) { - case VECTOR_OP_ADD: return v -> - toBits(v.rOp(($type$)0, (i, a, b) -> ($type$)(a + b))); - case VECTOR_OP_MUL: return v -> - toBits(v.rOp(($type$)1, (i, a, b) -> ($type$)(a * b))); - case VECTOR_OP_MIN: return v -> - toBits(v.rOp(MAX_OR_INF, (i, a, b) -> ($type$) Math.min(a, b))); - case VECTOR_OP_MAX: return v -> - toBits(v.rOp(MIN_OR_INF, (i, a, b) -> ($type$) Math.max(a, b))); + opc, getClass(), null, $type$.class, length(), + this, null, + REDUCE_IMPL.find(op, opc, $abstractvectortype$::reductionOperations))); + } + + private static final + ImplCache>> + REDUCE_IMPL = new ImplCache<>(Associative.class, $Type$Vector.class); + + private static ReductionOperation<$abstractvectortype$, VectorMask<$Boxtype$>> reductionOperations(int opc_) { + switch (opc_) { + case VECTOR_OP_ADD: return (v, m) -> + toBits(v.rOp(($type$)0, m, (i, a, b) -> ($type$)(a + b))); + case VECTOR_OP_MUL: return (v, m) -> + toBits(v.rOp(($type$)1, m, (i, a, b) -> ($type$)(a * b))); + case VECTOR_OP_MIN: return (v, m) -> + toBits(v.rOp(MAX_OR_INF, m, (i, a, b) -> ($type$) Math.min(a, b))); + case VECTOR_OP_MAX: return (v, m) -> + toBits(v.rOp(MIN_OR_INF, m, (i, a, b) -> ($type$) Math.max(a, b))); #if[BITWISE] - case VECTOR_OP_AND: return v -> - toBits(v.rOp(($type$)-1, (i, a, b) -> ($type$)(a & b))); - case VECTOR_OP_OR: return v -> - toBits(v.rOp(($type$)0, (i, a, b) -> ($type$)(a | b))); - case VECTOR_OP_XOR: return v -> - toBits(v.rOp(($type$)0, (i, a, b) -> ($type$)(a ^ b))); + case VECTOR_OP_AND: return (v, m) -> + toBits(v.rOp(($type$)-1, m, (i, a, b) -> ($type$)(a & b))); + case VECTOR_OP_OR: return (v, m) -> + toBits(v.rOp(($type$)0, m, (i, a, b) -> ($type$)(a | b))); + case VECTOR_OP_XOR: return (v, m) -> + toBits(v.rOp(($type$)0, m, (i, a, b) -> ($type$)(a ^ b))); #end[BITWISE] - default: return null; - }}))); + default: return null; + } } - private static final - ImplCache> REDUCE_IMPL - = new ImplCache<>(Associative.class, $Type$Vector.class); private @ForceInline diff --git a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-VectorBits.java.template b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-VectorBits.java.template index 54b50fb7cbd..a5fe360f21c 100644 --- a/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-VectorBits.java.template +++ b/src/jdk.incubator.vector/share/classes/jdk/incubator/vector/X-VectorBits.java.template @@ -238,8 +238,8 @@ final class $vectortype$ extends $abstractvectortype$ { @ForceInline final @Override - $type$ rOp($type$ v, FBinOp f) { - return super.rOpTemplate(v, f); // specialize + $type$ rOp($type$ v, VectorMask<$Boxtype$> m, FBinOp f) { + return super.rOpTemplate(v, m, f); // specialize } @Override @@ -338,7 +338,7 @@ final class $vectortype$ extends $abstractvectortype$ { @ForceInline public final $type$ reduceLanes(VectorOperators.Associative op, VectorMask<$Boxtype$> m) { - return super.reduceLanesTemplate(op, m); // specialized + return super.reduceLanesTemplate(op, $masktype$.class, m); // specialized } @Override @@ -351,7 +351,7 @@ final class $vectortype$ extends $abstractvectortype$ { @ForceInline public final long reduceLanesToLong(VectorOperators.Associative op, VectorMask<$Boxtype$> m) { - return (long) super.reduceLanesTemplate(op, m); // specialized + return (long) super.reduceLanesTemplate(op, $masktype$.class, m); // specialized } @ForceInline