diff --git a/src/hotspot/share/classfile/vmIntrinsics.hpp b/src/hotspot/share/classfile/vmIntrinsics.hpp index 7dbfb862ac264..a5846ee3a19ba 100644 --- a/src/hotspot/share/classfile/vmIntrinsics.hpp +++ b/src/hotspot/share/classfile/vmIntrinsics.hpp @@ -189,6 +189,8 @@ class methodHandle; do_intrinsic(_minF, java_lang_Math, min_name, float2_float_signature, F_S) \ do_intrinsic(_maxD, java_lang_Math, max_name, double2_double_signature, F_S) \ do_intrinsic(_minD, java_lang_Math, min_name, double2_double_signature, F_S) \ + do_intrinsic(_maxL, java_lang_Math, max_name, long2_long_signature, F_S) \ + do_intrinsic(_minL, java_lang_Math, min_name, long2_long_signature, F_S) \ do_intrinsic(_roundD, java_lang_Math, round_name, double_long_signature, F_S) \ do_intrinsic(_roundF, java_lang_Math, round_name, float_int_signature, F_S) \ do_intrinsic(_dcopySign, java_lang_Math, copySign_name, double2_double_signature, F_S) \ diff --git a/src/hotspot/share/opto/c2compiler.cpp b/src/hotspot/share/opto/c2compiler.cpp index 790512d310d27..3effa8eee0498 100644 --- a/src/hotspot/share/opto/c2compiler.cpp +++ b/src/hotspot/share/opto/c2compiler.cpp @@ -633,6 +633,8 @@ bool C2Compiler::is_intrinsic_supported(vmIntrinsics::ID id) { case vmIntrinsics::_max: case vmIntrinsics::_min_strict: case vmIntrinsics::_max_strict: + case vmIntrinsics::_maxL: + case vmIntrinsics::_minL: case vmIntrinsics::_arraycopy: case vmIntrinsics::_arraySort: case vmIntrinsics::_arrayPartition: diff --git a/src/hotspot/share/opto/library_call.cpp b/src/hotspot/share/opto/library_call.cpp index 05efda3c64b74..6315583571b47 100644 --- a/src/hotspot/share/opto/library_call.cpp +++ b/src/hotspot/share/opto/library_call.cpp @@ -691,17 +691,17 @@ bool LibraryCallKit::try_to_inline(int predicate) { case vmIntrinsics::_max: case vmIntrinsics::_min_strict: case vmIntrinsics::_max_strict: - return inline_min_max(intrinsic_id()); - - case vmIntrinsics::_maxF: + case vmIntrinsics::_minL: + case vmIntrinsics::_maxL: case vmIntrinsics::_minF: - case vmIntrinsics::_maxD: + case vmIntrinsics::_maxF: case vmIntrinsics::_minD: - case vmIntrinsics::_maxF_strict: + case vmIntrinsics::_maxD: case vmIntrinsics::_minF_strict: - case vmIntrinsics::_maxD_strict: + case vmIntrinsics::_maxF_strict: case vmIntrinsics::_minD_strict: - return inline_fp_min_max(intrinsic_id()); + case vmIntrinsics::_maxD_strict: + return inline_min_max(intrinsic_id()); case vmIntrinsics::_VectorUnaryOp: return inline_vector_nary_operation(1); @@ -1942,7 +1942,78 @@ bool LibraryCallKit::inline_notify(vmIntrinsics::ID id) { //----------------------------inline_min_max----------------------------------- bool LibraryCallKit::inline_min_max(vmIntrinsics::ID id) { - set_result(generate_min_max(id, argument(0), argument(1))); + Node* a = nullptr; + Node* b = nullptr; + Node* n = nullptr; + switch (id) { + case vmIntrinsics::_min: + case vmIntrinsics::_max: + case vmIntrinsics::_minF: + case vmIntrinsics::_maxF: + case vmIntrinsics::_minF_strict: + case vmIntrinsics::_maxF_strict: + case vmIntrinsics::_min_strict: + case vmIntrinsics::_max_strict: + assert(callee()->signature()->size() == 2, "minF/maxF has 2 parameters of size 1 each."); + a = argument(0); + b = argument(1); + break; + case vmIntrinsics::_minD: + case vmIntrinsics::_maxD: + case vmIntrinsics::_minD_strict: + case vmIntrinsics::_maxD_strict: + assert(callee()->signature()->size() == 4, "minD/maxD has 2 parameters of size 2 each."); + a = round_double_node(argument(0)); + b = round_double_node(argument(2)); + break; + case vmIntrinsics::_minL: + case vmIntrinsics::_maxL: + assert(callee()->signature()->size() == 4, "minL/maxL has 2 parameters of size 2 each."); + a = argument(0); + b = argument(2); + break; + default: + fatal_unexpected_iid(id); + break; + } + + switch (id) { + case vmIntrinsics::_min: + case vmIntrinsics::_min_strict: + n = new MinINode(a, b); + break; + case vmIntrinsics::_max: + case vmIntrinsics::_max_strict: + n = new MaxINode(a, b); + break; + case vmIntrinsics::_minF: + case vmIntrinsics::_minF_strict: + n = new MinFNode(a, b); + break; + case vmIntrinsics::_maxF: + case vmIntrinsics::_maxF_strict: + n = new MaxFNode(a, b); + break; + case vmIntrinsics::_minD: + case vmIntrinsics::_minD_strict: + n = new MinDNode(a, b); + break; + case vmIntrinsics::_maxD: + case vmIntrinsics::_maxD_strict: + n = new MaxDNode(a, b); + break; + case vmIntrinsics::_minL: + n = new MinLNode(_gvn.C, a, b); + break; + case vmIntrinsics::_maxL: + n = new MaxLNode(_gvn.C, a, b); + break; + default: + fatal_unexpected_iid(id); + break; + } + + set_result(_gvn.transform(n)); return true; } @@ -2021,25 +2092,6 @@ bool LibraryCallKit::inline_math_unsignedMultiplyHigh() { return true; } -Node* -LibraryCallKit::generate_min_max(vmIntrinsics::ID id, Node* x0, Node* y0) { - Node* result_val = nullptr; - switch (id) { - case vmIntrinsics::_min: - case vmIntrinsics::_min_strict: - result_val = _gvn.transform(new MinINode(x0, y0)); - break; - case vmIntrinsics::_max: - case vmIntrinsics::_max_strict: - result_val = _gvn.transform(new MaxINode(x0, y0)); - break; - default: - fatal_unexpected_iid(id); - break; - } - return result_val; -} - inline int LibraryCallKit::classify_unsafe_addr(Node* &base, Node* &offset, BasicType type) { const TypePtr* base_type = TypePtr::NULL_PTR; @@ -4456,7 +4508,7 @@ bool LibraryCallKit::inline_array_copyOf(bool is_copyOfRange) { if (!stopped()) { // How many elements will we copy from the original? // The answer is MinI(orig_tail, length). - Node* moved = generate_min_max(vmIntrinsics::_min, orig_tail, length); + Node* moved = _gvn.transform(new MinINode(orig_tail, length)); // Generate a direct call to the right arraycopy function(s). // We know the copy is disjoint but we might not know if the @@ -8477,91 +8529,6 @@ bool LibraryCallKit::inline_character_compare(vmIntrinsics::ID id) { return true; } -//------------------------------inline_fp_min_max------------------------------ -bool LibraryCallKit::inline_fp_min_max(vmIntrinsics::ID id) { -/* DISABLED BECAUSE METHOD DATA ISN'T COLLECTED PER CALL-SITE, SEE JDK-8015416. - - // The intrinsic should be used only when the API branches aren't predictable, - // the last one performing the most important comparison. The following heuristic - // uses the branch statistics to eventually bail out if necessary. - - ciMethodData *md = callee()->method_data(); - - if ( md != nullptr && md->is_mature() && md->invocation_count() > 0 ) { - ciCallProfile cp = caller()->call_profile_at_bci(bci()); - - if ( ((double)cp.count()) / ((double)md->invocation_count()) < 0.8 ) { - // Bail out if the call-site didn't contribute enough to the statistics. - return false; - } - - uint taken = 0, not_taken = 0; - - for (ciProfileData *p = md->first_data(); md->is_valid(p); p = md->next_data(p)) { - if (p->is_BranchData()) { - taken = ((ciBranchData*)p)->taken(); - not_taken = ((ciBranchData*)p)->not_taken(); - } - } - - double balance = (((double)taken) - ((double)not_taken)) / ((double)md->invocation_count()); - balance = balance < 0 ? -balance : balance; - if ( balance > 0.2 ) { - // Bail out if the most important branch is predictable enough. - return false; - } - } -*/ - - Node *a = nullptr; - Node *b = nullptr; - Node *n = nullptr; - switch (id) { - case vmIntrinsics::_maxF: - case vmIntrinsics::_minF: - case vmIntrinsics::_maxF_strict: - case vmIntrinsics::_minF_strict: - assert(callee()->signature()->size() == 2, "minF/maxF has 2 parameters of size 1 each."); - a = argument(0); - b = argument(1); - break; - case vmIntrinsics::_maxD: - case vmIntrinsics::_minD: - case vmIntrinsics::_maxD_strict: - case vmIntrinsics::_minD_strict: - assert(callee()->signature()->size() == 4, "minD/maxD has 2 parameters of size 2 each."); - a = round_double_node(argument(0)); - b = round_double_node(argument(2)); - break; - default: - fatal_unexpected_iid(id); - break; - } - switch (id) { - case vmIntrinsics::_maxF: - case vmIntrinsics::_maxF_strict: - n = new MaxFNode(a, b); - break; - case vmIntrinsics::_minF: - case vmIntrinsics::_minF_strict: - n = new MinFNode(a, b); - break; - case vmIntrinsics::_maxD: - case vmIntrinsics::_maxD_strict: - n = new MaxDNode(a, b); - break; - case vmIntrinsics::_minD: - case vmIntrinsics::_minD_strict: - n = new MinDNode(a, b); - break; - default: - fatal_unexpected_iid(id); - break; - } - set_result(_gvn.transform(n)); - return true; -} - bool LibraryCallKit::inline_profileBoolean() { Node* counts = argument(1); const TypeAryPtr* ary = nullptr; diff --git a/src/hotspot/share/opto/library_call.hpp b/src/hotspot/share/opto/library_call.hpp index afc8d329228e4..790c03be7ca51 100644 --- a/src/hotspot/share/opto/library_call.hpp +++ b/src/hotspot/share/opto/library_call.hpp @@ -223,7 +223,6 @@ class LibraryCallKit : public GraphKit { bool inline_math_subtractExactL(bool is_decrement); bool inline_min_max(vmIntrinsics::ID id); bool inline_notify(vmIntrinsics::ID id); - Node* generate_min_max(vmIntrinsics::ID id, Node* x, Node* y); // This returns Type::AnyPtr, RawPtr, or OopPtr. int classify_unsafe_addr(Node* &base, Node* &offset, BasicType type); Node* make_unsafe_address(Node*& base, Node* offset, BasicType type = T_ILLEGAL, bool can_cast = false); @@ -354,7 +353,6 @@ class LibraryCallKit : public GraphKit { bool inline_vectorizedMismatch(); bool inline_fma(vmIntrinsics::ID id); bool inline_character_compare(vmIntrinsics::ID id); - bool inline_fp_min_max(vmIntrinsics::ID id); bool inline_galoisCounterMode_AESCrypt(); Node* inline_galoisCounterMode_AESCrypt_predicate(); diff --git a/src/java.base/share/classes/java/lang/Math.java b/src/java.base/share/classes/java/lang/Math.java index 6403524a49edc..5f664b54cdf5b 100644 --- a/src/java.base/share/classes/java/lang/Math.java +++ b/src/java.base/share/classes/java/lang/Math.java @@ -2033,6 +2033,7 @@ public static int max(int a, int b) { * @param b another argument. * @return the larger of {@code a} and {@code b}. */ + @IntrinsicCandidate public static long max(long a, long b) { return (a >= b) ? a : b; } @@ -2128,6 +2129,7 @@ public static int min(int a, int b) { * @param b another argument. * @return the smaller of {@code a} and {@code b}. */ + @IntrinsicCandidate public static long min(long a, long b) { return (a <= b) ? a : b; } diff --git a/test/hotspot/jtreg/compiler/c2/irTests/TestMinMaxIdentities.java b/test/hotspot/jtreg/compiler/c2/irTests/TestMinMaxIdentities.java index ef2a47ee61413..f97ce602e5979 100644 --- a/test/hotspot/jtreg/compiler/c2/irTests/TestMinMaxIdentities.java +++ b/test/hotspot/jtreg/compiler/c2/irTests/TestMinMaxIdentities.java @@ -112,9 +112,11 @@ public int intMaxMax(int a, int b) { // Longs - // As Math.min/max(LL) is not intrinsified, it first needs to be transformed into CMoveL and then MinL/MaxL before + // As Math.min/max(LL) is not intrinsified in the backend, it first needs to be transformed into CMoveL and then MinL/MaxL before // the identity can be matched. However, the outer min/max is not transformed into CMove because of the CMove cost model. - // As JDK-8307513 adds intrinsics for the methods, the tests will be updated then. + // JDK-8307513 adds intrinsics for the methods such that MinL/MaxL replace the ternary operations, + // and this enables identities to be matched. + // Note that before JDK-8307513 MinL/MaxL nodes were already present before macro expansion. @Test @IR(applyIfPlatform = { "riscv64", "false" }, phase = { CompilePhase.BEFORE_MACRO_EXPANSION }, counts = { IRNode.MIN_L, "1" }) @@ -123,13 +125,13 @@ public long longMinMin(long a, long b) { } @Test - @IR(applyIfPlatform = { "riscv64", "false" }, phase = { CompilePhase.BEFORE_MACRO_EXPANSION }, counts = { IRNode.MIN_L, "1" }) + @IR(failOn = { IRNode.MIN_L, IRNode.MAX_L }) public long longMinMax(long a, long b) { return Math.min(a, Math.max(a, b)); } @Test - @IR(applyIfPlatform = { "riscv64", "false" }, phase = { CompilePhase.BEFORE_MACRO_EXPANSION }, counts = { IRNode.MAX_L, "1" }) + @IR(failOn = { IRNode.MIN_L, IRNode.MAX_L }) public long longMaxMin(long a, long b) { return Math.max(a, Math.min(a, b)); } diff --git a/test/hotspot/jtreg/compiler/intrinsics/math/TestMinMaxInlining.java b/test/hotspot/jtreg/compiler/intrinsics/math/TestMinMaxInlining.java new file mode 100644 index 0000000000000..f85d0cdb9f7e2 --- /dev/null +++ b/test/hotspot/jtreg/compiler/intrinsics/math/TestMinMaxInlining.java @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2025, Red Hat, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @bug 8307513 + * @summary Test min and max IR inlining decisions + * @library /test/lib / + * @run driver compiler.intrinsics.math.TestMinMaxInlining + */ + +package compiler.intrinsics.math; + +import compiler.lib.ir_framework.Argument; +import compiler.lib.ir_framework.Arguments; +import compiler.lib.ir_framework.Check; +import compiler.lib.ir_framework.CompilePhase; +import compiler.lib.ir_framework.IR; +import compiler.lib.ir_framework.IRNode; +import compiler.lib.ir_framework.Test; +import compiler.lib.ir_framework.TestFramework; + +public class TestMinMaxInlining { + public static void main(String[] args) { + TestFramework.run(); + } + + @Test + @Arguments(values = { Argument.NUMBER_MINUS_42, Argument.NUMBER_42 }) + @IR(counts = { IRNode.MIN_I, "1" }) + private static int testIntMin(int a, int b) { + return Math.min(a, b); + } + + @Check(test = "testIntMin") + public static void checkTestIntMin(int result) { + if (result != -42) { + throw new RuntimeException("Incorrect result: " + result); + } + } + + @Test + @Arguments(values = { Argument.NUMBER_MINUS_42, Argument.NUMBER_42 }) + @IR(counts = { IRNode.MAX_I, "1" }) + private static int testIntMax(int a, int b) { + return Math.max(a, b); + } + + @Check(test = "testIntMax") + public static void checkTestIntMax(int result) { + if (result != 42) { + throw new RuntimeException("Incorrect result: " + result); + } + } + + // JDK-8307513 does not change the way MinL/MaxL nodes intrinsified in backend. + // So they are still transformed into CmpL + CMoveL nodes after macro expansion. + // This is the reason for the different before/after macro expansion assertions below. + + // MinL is not implemented in the backed, so at macro expansion it gets transformed into a CMoveL. + // The IR asserts verify that before macro expansion MinL exists, + // but after macro expansion the node disappears. + @Test + @Arguments(values = { Argument.NUMBER_MINUS_42, Argument.NUMBER_42 }) + @IR(phase = { CompilePhase.BEFORE_MACRO_EXPANSION }, counts = { IRNode.MIN_L, "1" }) + @IR(phase = { CompilePhase.AFTER_MACRO_EXPANSION }, counts = { IRNode.MIN_L, "0" }) + private static long testLongMin(long a, long b) { + return Math.min(a, b); + } + + @Check(test = "testLongMin") + public static void checkTestLongMin(long result) { + if (result != -42L) { + throw new RuntimeException("Incorrect result: " + result); + } + } + + // MaxL is not implemented in the backed, so at macro expansion it gets transformed into a CMoveL. + // The IR asserts verify that before macro expansion MinL exists, + // but after macro expansion the node disappears. + @Test + @Arguments(values = { Argument.NUMBER_MINUS_42, Argument.NUMBER_42 }) + @IR(phase = { CompilePhase.BEFORE_MACRO_EXPANSION }, counts = { IRNode.MAX_L, "1" }) + @IR(phase = { CompilePhase.AFTER_MACRO_EXPANSION }, counts = { IRNode.MAX_L, "0" }) + private static long testLongMax(long a, long b) { + return Math.max(a, b); + } + + @Check(test = "testLongMax") + public static void checkTestLongMax(long result) { + if (result != 42L) { + throw new RuntimeException("Incorrect result: " + result); + } + } + + @Test + @Arguments(values = { Argument.NUMBER_MINUS_42, Argument.NUMBER_42 }) + @IR(counts = { IRNode.MIN_F, "1" }, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}) + private static float testFloatMin(float a, float b) { + return Math.min(a, b); + } + + @Check(test = "testFloatMin") + public static void checkTestFloatMin(float result) { + if (result != -42f) { + throw new RuntimeException("Incorrect result: " + result); + } + } + + @Test + @Arguments(values = { Argument.NUMBER_MINUS_42, Argument.NUMBER_42 }) + @IR(counts = { IRNode.MAX_F, "1" }, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}) + private static float testFloatMax(float a, float b) { + return Math.max(a, b); + } + + @Check(test = "testFloatMax") + public static void checkTestFloatMax(float result) { + if (result != 42f) { + throw new RuntimeException("Incorrect result: " + result); + } + } + + @Test + @Arguments(values = { Argument.NUMBER_MINUS_42, Argument.NUMBER_42 }) + @IR(counts = { IRNode.MIN_D, "1" }, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}) + private static double testDoubleMin(double a, double b) { + return Math.min(a, b); + } + + @Check(test = "testDoubleMin") + public static void checkTestDoubleMin(double result) { + if (result != -42D) { + throw new RuntimeException("Incorrect result: " + result); + } + } + + @Test + @Arguments(values = { Argument.NUMBER_MINUS_42, Argument.NUMBER_42 }) + @IR(counts = { IRNode.MAX_D, "1" }, + applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"}) + private static double testDoubleMax(double a, double b) { + return Math.max(a, b); + } + + @Check(test = "testDoubleMax") + public static void checkTestDoubleMax(double result) { + if (result != 42D) { + throw new RuntimeException("Incorrect result: " + result); + } + } +} diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/MinMaxRed_Long.java b/test/hotspot/jtreg/compiler/loopopts/superword/MinMaxRed_Long.java new file mode 100644 index 0000000000000..9c3e2c7bc5d35 --- /dev/null +++ b/test/hotspot/jtreg/compiler/loopopts/superword/MinMaxRed_Long.java @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2025, Red Hat, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @bug 8307513 + * @summary [SuperWord] MaxReduction and MinReduction should vectorize for long + * @library /test/lib / + * @run driver compiler.loopopts.superword.MinMaxRed_Long + */ + +package compiler.loopopts.superword; + +import compiler.lib.ir_framework.*; +import jdk.test.lib.Utils; + +import java.util.Arrays; +import java.util.Random; +import java.util.stream.LongStream; + +public class MinMaxRed_Long { + + private static final Random random = Utils.getRandomInstance(); + + public static void main(String[] args) throws Exception { + TestFramework framework = new TestFramework(); + framework.addFlags("-XX:+IgnoreUnrecognizedVMOptions", + "-XX:LoopUnrollLimit=250", + "-XX:CompileThresholdScaling=0.1"); + framework.start(); + } + + @Run(test = {"maxReductionImplement"}, + mode = RunMode.STANDALONE) + public void runMaxTest() { + runMaxTest(50); + runMaxTest(80); + runMaxTest(100); + } + + private static void runMaxTest(int probability) { + long[] longs = reductionInit(probability); + long res = 0; + for (int j = 0; j < 2000; j++) { + res = maxReductionImplement(longs, res); + } + if (res == 11 * Arrays.stream(longs).max().getAsLong()) { + System.out.println("Success"); + } else { + throw new AssertionError("Failed"); + } + } + + @Run(test = {"minReductionImplement"}, + mode = RunMode.STANDALONE) + public void runMinTest() { + runMinTest(50); + runMinTest(80); + runMinTest(100); + } + + private static void runMinTest(int probability) { + long[] longs = reductionInit(probability); + // Negating the values generated for controlling max branching + // allows same logic to be used for min tests. + longs = negate(longs); + long res = 0; + for (int j = 0; j < 2000; j++) { + res = minReductionImplement(longs, res); + } + if (res == 11 * Arrays.stream(longs).min().getAsLong()) { + System.out.println("Success"); + } else { + throw new AssertionError("Failed"); + } + } + + static long[] negate(long[] nums) { + return LongStream.of(nums).map(l -> -l).toArray(); + } + + public static long[] reductionInit(int probability) { + int aboveCount, abovePercent; + long[] longs = new long[1024]; + + // Generates an array of numbers such that as the array is iterated + // there is P probability of finding a new max value, + // and 100-P probability of not finding a new max value. + // The algorithm loops around if the distribution does not match the probability, + // but it approximates the probability as the array sizes increase. + // The worst case of this algorithm is when the desired array size is 100 + // and the aim is to get 50% of probability, which can only be satisfied + // with 50 elements being a new max. This situation can take 15 rounds. + // As sizes increase, say 10'000 elements, + // the number of elements that have to satisfy 50% increases, + // so the algorithm will stop as an example when 5027 elements are a new max values. + // Also, probability values in the edges will achieve their objective quicker, + // with 0% or 100% probability doing it in a single loop. + // To support the same algorithm for min calculations, + // negating the array elements achieves the same objective. + do { + long max = random.nextLong(10); + longs[0] = max; + + aboveCount = 0; + for (int i = 1; i < longs.length; i++) { + long value; + if (random.nextLong(101) <= probability) { + long increment = random.nextLong(10); + value = max + increment; + aboveCount++; + } else { + // Decrement by at least 1 + long diffToMax = random.nextLong(10) + 1; + value = max - diffToMax; + } + longs[i] = value; + max = Math.max(max, value); + } + + abovePercent = ((aboveCount + 1) * 100) / longs.length; + } while (abovePercent != probability); + + return longs; + } + + @Test + @IR(applyIfAnd = {"SuperWordReductions", "true", "MaxVectorSize", ">=32"}, + applyIfCPUFeatureOr = {"avx512", "true", "asimd" , "true"}, + counts = {IRNode.MIN_REDUCTION_V, " > 0"}) + public static long minReductionImplement(long[] a, long res) { + for (int i = 0; i < a.length; i++) { + final long v = 11 * a[i]; + res = Math.min(res, v); + } + return res; + } + + @Test + @IR(applyIfAnd = {"SuperWordReductions", "true", "MaxVectorSize", ">=32"}, + applyIfCPUFeatureOr = {"avx512", "true", "asimd" , "true"}, + counts = {IRNode.MAX_REDUCTION_V, " > 0"}) + public static long maxReductionImplement(long[] a, long res) { + for (int i = 0; i < a.length; i++) { + final long v = 11 * a[i]; + res = Math.max(res, v); + } + return res; + } +} diff --git a/test/micro/org/openjdk/bench/java/lang/MinMaxVector.java b/test/micro/org/openjdk/bench/java/lang/MinMaxVector.java new file mode 100644 index 0000000000000..fa42d0be83439 --- /dev/null +++ b/test/micro/org/openjdk/bench/java/lang/MinMaxVector.java @@ -0,0 +1,300 @@ +package org.openjdk.bench.java.lang; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; + +import java.util.Arrays; +import java.util.IntSummaryStatistics; +import java.util.LongSummaryStatistics; +import java.util.Random; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.TimeUnit; +import java.util.stream.LongStream; + +@Warmup(iterations = 3, time = 5) +@Measurement(iterations = 4, time = 5) +@Fork(2) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +@BenchmarkMode(Mode.Throughput) +public class MinMaxVector +{ + @State(Scope.Thread) + public static class LoopState { + @Param({"2048"}) + int size; + + /** + * Probability of one of the min/max branches being taken. + * For max, this value represents the percentage of branches in which + * the value will be bigger or equal than the current max. + * For min, this value represents the percentage of branches in which + * the value will be smaller or equal than the current min. + */ + @Param({"50", "80", "100"}) + int probability; + + int[] minIntA; + int[] minIntB; + long[] minLongA; + long[] minLongB; + int[] maxIntA; + int[] maxIntB; + long[] maxLongA; + long[] maxLongB; + int[] resultIntArray; + long[] resultLongArray; + + @Setup + public void setup() { + final long[][] longs = distributeLongRandomIncrement(size, probability); + maxLongA = longs[0]; + maxLongB = longs[1]; + maxIntA = toInts(maxLongA); + maxIntB = toInts(maxLongB); + minLongA = negate(maxLongA); + minLongB = negate(maxLongB); + minIntA = toInts(minLongA); + minIntB = toInts(minLongB); + resultIntArray = new int[size]; + resultLongArray = new long[size]; + } + + static long[] negate(long[] nums) { + return LongStream.of(nums).map(l -> -l).toArray(); + } + + static int[] toInts(long[] nums) { + return Arrays.stream(nums).mapToInt(i -> (int) i).toArray(); + } + + static long[][] distributeLongRandomIncrement(int size, int probability) { + long[][] result; + int aboveCount, abovePercent; + + // This algorithm generates 2 arrays of numbers. + // The first array is created such that as the array is iterated, + // there is P probability of finding a new min/max value, + // and 100-P probability of not finding a new min/max value. + // This first array is used on its own for tests that iterate an array to reduce it to a single value, + // e.g. the min or max value in the array. + // The second array is loaded with values relative to the first array, + // such that when the values in the same index are compared for min/max, + // the probability that a new min/max value is found has the probability P. + do { + long max = ThreadLocalRandom.current().nextLong(10); + result = new long[2][size]; + result[0][0] = max; + result[1][0] = max - 1; + + aboveCount = 0; + for (int i = 1; i < result[0].length; i++) { + long value; + if (ThreadLocalRandom.current().nextLong(101) <= probability) { + long increment = ThreadLocalRandom.current().nextLong(10); + value = max + increment; + aboveCount++; + } else { + // Decrement by at least 1 + long diffToMax = ThreadLocalRandom.current().nextLong(10) + 1; + value = max - diffToMax; + } + result[0][i] = value; + result[1][i] = max; + max = Math.max(max, value); + } + + abovePercent = ((aboveCount + 1) * 100) / size; + } while (abovePercent != probability); + + return result; + } + } + + @State(Scope.Thread) + public static class RangeState + { + @Param({"1000"}) + int size; + + /** + * Define range of values to clip as a percentage. + * For example, if value is 100, then all values are considered in the range, + * and so the highest value would be the max value and the lowest value the min value in the array. + * If the value is 90, then highest would be 10% lower than the max value, + * and the min value would be 10% higher than the min value. + */ + @Param({"90", "100"}) + int range; + + @Param("0") + int seed; + + int[] ints; + int[] resultInts; + long[] longs; + long[] resultLongs; + int highestInt; + int lowestInt; + long highestLong; + long lowestLong; + Random r = new Random(seed); + + @Setup + public void setup() { + ints = new int[size]; + resultInts = new int[size]; + longs = new long[size]; + resultLongs = new long[size]; + + for (int i = 0; i < size; i++) { + ints[i] = r.nextInt(); + longs[i] = r.nextLong(); + } + + final IntSummaryStatistics intStats = Arrays.stream(ints).summaryStatistics(); + highestInt = (intStats.getMax() * range) / 100; + lowestInt = intStats.getMin() + (intStats.getMax() - highestInt); + + final LongSummaryStatistics longStats = Arrays.stream(longs).summaryStatistics(); + highestLong = (longStats.getMax() * range) / 100; + lowestLong = longStats.getMin() + (longStats.getMax() - highestLong); + } + } + + @Benchmark + public int[] intClippingRange(RangeState state) { + for (int i = 0; i < state.size; i++) { + state.resultInts[i] = Math.min(Math.max(state.ints[i], state.lowestInt), state.highestInt); + } + return state.resultInts; + } + + @Benchmark + public int[] intLoopMin(LoopState state) { + for (int i = 0; i < state.size; i++) { + state.resultIntArray[i] = Math.min(state.minIntA[i], state.minIntB[i]); + } + return state.resultIntArray; + } + + @Benchmark + public int[] intLoopMax(LoopState state) { + for (int i = 0; i < state.size; i++) { + state.resultIntArray[i] = Math.max(state.maxIntA[i], state.maxIntB[i]); + } + return state.resultIntArray; + } + + @Benchmark + public int intReductionMultiplyMin(LoopState state) { + int result = 0; + for (int i = 0; i < state.size; i++) { + final int v = 11 * state.minIntA[i]; + result = Math.min(result, v); + } + return result; + } + + @Benchmark + public int intReductionSimpleMin(LoopState state) { + int result = 0; + for (int i = 0; i < state.size; i++) { + final int v = state.minIntA[i]; + result = Math.min(result, v); + } + return result; + } + + @Benchmark + public int intReductionMultiplyMax(LoopState state) { + int result = 0; + for (int i = 0; i < state.size; i++) { + final int v = 11 * state.maxIntA[i]; + result = Math.max(result, v); + } + return result; + } + + @Benchmark + public int intReductionSimpleMax(LoopState state) { + int result = 0; + for (int i = 0; i < state.size; i++) { + final int v = state.maxIntA[i]; + result = Math.max(result, v); + } + return result; + } + + @Benchmark + public long[] longClippingRange(RangeState state) { + for (int i = 0; i < state.size; i++) { + state.resultLongs[i] = Math.min(Math.max(state.longs[i], state.lowestLong), state.highestLong); + } + return state.resultLongs; + } + + @Benchmark + public long[] longLoopMin(LoopState state) { + for (int i = 0; i < state.size; i++) { + state.resultLongArray[i] = Math.min(state.minLongA[i], state.minLongB[i]); + } + return state.resultLongArray; + } + + @Benchmark + public long[] longLoopMax(LoopState state) { + for (int i = 0; i < state.size; i++) { + state.resultLongArray[i] = Math.max(state.maxLongA[i], state.maxLongB[i]); + } + return state.resultLongArray; + } + + @Benchmark + public long longReductionMultiplyMin(LoopState state) { + long result = 0; + for (int i = 0; i < state.size; i++) { + final long v = 11 * state.minLongA[i]; + result = Math.min(result, v); + } + return result; + } + + @Benchmark + public long longReductionSimpleMin(LoopState state) { + long result = 0; + for (int i = 0; i < state.size; i++) { + final long v = state.minLongA[i]; + result = Math.min(result, v); + } + return result; + } + + @Benchmark + public long longReductionMultiplyMax(LoopState state) { + long result = 0; + for (int i = 0; i < state.size; i++) { + final long v = 11 * state.maxLongA[i]; + result = Math.max(result, v); + } + return result; + } + + @Benchmark + public long longReductionSimpleMax(LoopState state) { + long result = 0; + for (int i = 0; i < state.size; i++) { + final long v = state.maxLongA[i]; + result = Math.max(result, v); + } + return result; + } +}