diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp index 7a4d7c6d6f340..958626c20c2ed 100644 --- a/src/hotspot/cpu/x86/assembler_x86.cpp +++ b/src/hotspot/cpu/x86/assembler_x86.cpp @@ -2867,6 +2867,17 @@ void Assembler::mov(Register dst, Register src) { movq(dst, src); } +void Assembler::movapd(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionMark im(this); + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + attributes.set_rex_vex_w_reverted(); + simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8(0x28); + emit_operand(dst, src, 0); +} + void Assembler::movapd(XMMRegister dst, XMMRegister src) { int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit; InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); @@ -8070,6 +8081,14 @@ void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector emit_operand(dst, src, 0); } +void Assembler::orpd(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_rex_vex_w_reverted(); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16(0x56, (0xC0 | encode)); +} + void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) { InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_rex_vex_w_reverted(); diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp index 719334701a5a6..10e6264160ce7 100644 --- a/src/hotspot/cpu/x86/assembler_x86.hpp +++ b/src/hotspot/cpu/x86/assembler_x86.hpp @@ -950,6 +950,7 @@ class Assembler : public AbstractAssembler { // New cpus require use of movaps and movapd to avoid partial register stall // when moving between registers. void movaps(XMMRegister dst, XMMRegister src); + void movapd(XMMRegister dst, Address src); void movapd(XMMRegister dst, XMMRegister src); // End avoid using directly @@ -2450,6 +2451,9 @@ class Assembler : public AbstractAssembler { void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len); void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len); + // Bitwise Logical OR of Packed Floating-Point Values + void orpd(XMMRegister dst, XMMRegister src); + void unpckhpd(XMMRegister dst, XMMRegister src); void unpcklpd(XMMRegister dst, XMMRegister src); diff --git a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp index 60ce3419dfb42..3ea2e99fe570f 100644 --- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp @@ -720,7 +720,8 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { if (x->id() == vmIntrinsics::_dexp || x->id() == vmIntrinsics::_dlog || x->id() == vmIntrinsics::_dpow || x->id() == vmIntrinsics::_dcos || x->id() == vmIntrinsics::_dsin || x->id() == vmIntrinsics::_dtan || - x->id() == vmIntrinsics::_dlog10 || x->id() == vmIntrinsics::_dtanh + x->id() == vmIntrinsics::_dlog10 || x->id() == vmIntrinsics::_dtanh || + x->id() == vmIntrinsics::_dcbrt ) { do_LibmIntrinsic(x); return; @@ -807,7 +808,7 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) { } break; case vmIntrinsics::_dpow: - if (StubRoutines::dpow() != nullptr) { + if (StubRoutines::dpow() != nullptr) { __ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args()); } else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args()); @@ -828,18 +829,24 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) { } break; case vmIntrinsics::_dtan: - if (StubRoutines::dtan() != nullptr) { + if (StubRoutines::dtan() != nullptr) { __ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args()); } else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args()); } break; case vmIntrinsics::_dtanh: - assert(StubRoutines::dtanh() != nullptr, "tanh intrinsic not found"); - if (StubRoutines::dtanh() != nullptr) { + assert(StubRoutines::dtanh() != nullptr, "tanh intrinsic not found"); + if (StubRoutines::dtanh() != nullptr) { __ call_runtime_leaf(StubRoutines::dtanh(), getThreadTemp(), result_reg, cc->args()); } break; + case vmIntrinsics::_dcbrt: + assert(StubRoutines::dcbrt() != nullptr, "cbrt intrinsic not found"); + if (StubRoutines::dcbrt() != nullptr) { + __ call_runtime_leaf(StubRoutines::dcbrt(), getThreadTemp(), result_reg, cc->args()); + } + break; default: ShouldNotReachHere(); } diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index 35e461b601f0f..38dc569d652e2 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -2250,6 +2250,16 @@ void MacroAssembler::evmovdqaq(XMMRegister dst, AddressLiteral src, int vector_l } } +void MacroAssembler::movapd(XMMRegister dst, AddressLiteral src, Register rscratch) { + assert(rscratch != noreg || always_reachable(src), "missing"); + + if (reachable(src)) { + Assembler::movapd(dst, as_Address(src)); + } else { + lea(rscratch, src); + Assembler::movapd(dst, Address(rscratch, 0)); + } +} void MacroAssembler::movdqa(XMMRegister dst, AddressLiteral src, Register rscratch) { assert(rscratch != noreg || always_reachable(src), "missing"); diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp index efd1a4c154f1c..f7ac6fb4297ea 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -995,6 +995,8 @@ class MacroAssembler: public Assembler { void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); } void andpd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); + void andnpd(XMMRegister dst, XMMRegister src) { Assembler::andnpd(dst, src); } + void andps(XMMRegister dst, XMMRegister src) { Assembler::andps(dst, src); } void andps(XMMRegister dst, Address src) { Assembler::andps(dst, src); } void andps(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); @@ -1007,6 +1009,8 @@ class MacroAssembler: public Assembler { void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); } void comisd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); + void orpd(XMMRegister dst, XMMRegister src) { Assembler::orpd(dst, src); } + void cmp32_mxcsr_std(Address mxcsr_save, Register tmp, Register rscratch = noreg); void ldmxcsr(Address src) { Assembler::ldmxcsr(src); } void ldmxcsr(AddressLiteral src, Register rscratch = noreg); @@ -1241,6 +1245,9 @@ class MacroAssembler: public Assembler { void evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register rscratch = noreg); void evmovdqaq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register rscratch = noreg); + using Assembler::movapd; + void movapd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); + // Move Aligned Double Quadword void movdqa(XMMRegister dst, XMMRegister src) { Assembler::movdqa(dst, src); } void movdqa(XMMRegister dst, Address src) { Assembler::movdqa(dst, src); } diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp index 1a16416787d6e..0f7090b0cb9b2 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp @@ -3692,6 +3692,9 @@ void StubGenerator::generate_libm_stubs() { if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtanh)) { StubRoutines::_dtanh = generate_libmTanh(); // from stubGenerator_x86_64_tanh.cpp } + if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcbrt)) { + StubRoutines::_dcbrt = generate_libmCbrt(); // from stubGenerator_x86_64_cbrt.cpp + } if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dexp)) { StubRoutines::_dexp = generate_libmExp(); // from stubGenerator_x86_64_exp.cpp } diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp index c08b0168796e4..2cf35ea60152a 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp @@ -553,6 +553,7 @@ class StubGenerator: public StubCodeGenerator { address generate_libmCos(); address generate_libmTan(); address generate_libmTanh(); + address generate_libmCbrt(); address generate_libmExp(); address generate_libmPow(); address generate_libmLog(); diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_cbrt.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_cbrt.cpp new file mode 100644 index 0000000000000..da60a9be27633 --- /dev/null +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_cbrt.cpp @@ -0,0 +1,366 @@ +/* + * Copyright (c) 2025, Intel Corporation. All rights reserved. + * Intel Math Library (LIBM) Source Code + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "macroAssembler_x86.hpp" +#include "stubGenerator_x86_64.hpp" + +/******************************************************************************/ +// ALGORITHM DESCRIPTION +// --------------------- +// +// x=2^{3*k+j} * 1.b1 b2 ... b5 b6 ... b52 +// Let r=(x*2^{-3k-j} - 1.b1 b2 ... b5 1)* rcp[b1 b2 ..b5], +// where rcp[b1 b2 .. b5]=1/(1.b1 b2 b3 b4 b5 1) in double precision +// cbrt(2^j * 1. b1 b2 .. b5 1) is approximated as T[j][b1..b5]+D[j][b1..b5] +// (T stores the high 53 bits, D stores the low order bits) +// Result=2^k*T+(2^k*T*r)*P+2^k*D +// where P=p1+p2*r+..+p8*r^7 +// +// Special cases: +// cbrt(NaN) = quiet NaN +// cbrt(+/-INF) = +/-INF +// cbrt(+/-0) = +/-0 +// +/******************************************************************************/ + +ATTRIBUTE_ALIGNED(4) static const juint _SIG_MASK[] = +{ + 0, 1032192 +}; + +ATTRIBUTE_ALIGNED(4) static const juint _EXP_MASK[] = +{ + 0, 3220176896 +}; + +ATTRIBUTE_ALIGNED(4) static const juint _EXP_MSK2[] = +{ + 0, 3220193280 +}; + +ATTRIBUTE_ALIGNED(4) static const juint _EXP_MSK3[] = +{ + 4294967295, 1048575 +}; + +ATTRIBUTE_ALIGNED(4) static const juint _SCALE63[] = +{ + 0, 1138753536 +}; + +ATTRIBUTE_ALIGNED(4) static const juint _ZERON[] = +{ + 0, 2147483648 +}; + +ATTRIBUTE_ALIGNED(4) static const juint _INF[] = +{ + 0, 2146435072 +}; + +ATTRIBUTE_ALIGNED(4) static const juint _NEG_INF[] = +{ + 0, 4293918720 +}; + +ATTRIBUTE_ALIGNED(16) static const juint _coeff_table[] = +{ + 1553778919, 3213899486, 3534952507, 3215266280, 1646371399, + 3214412045, 477218588, 3216798151, 3582521621, 1066628362, + 1007461464, 1068473053, 889629714, 1067378449, 1431655765, + 1070945621 +}; + +ATTRIBUTE_ALIGNED(4) static const juint _rcp_table[] = +{ + 528611360, 3220144632, 2884679527, 3220082993, 1991868891, 3220024928, + 2298714891, 3219970134, 58835168, 3219918343, 3035110223, 3219869313, + 1617585086, 3219822831, 2500867033, 3219778702, 4241943008, 3219736752, + 258732970, 3219696825, 404232216, 3219658776, 2172167368, 3219622476, + 1544257904, 3219587808, 377579543, 3219554664, 1616385542, 3219522945, + 813783277, 3219492562, 3940743189, 3219463431, 2689777499, 3219435478, + 1700977147, 3219408632, 3169102082, 3219382828, 327235604, 3219358008, + 1244336319, 3219334115, 1300311200, 3219311099, 3095471925, 3219288912, + 2166487928, 3219267511, 2913108253, 3219246854, 293672978, 3219226904, + 288737297, 3219207624, 1810275472, 3219188981, 174592167, 3219170945, + 3539053052, 3219153485, 2164392968, 3219136576 +}; + +ATTRIBUTE_ALIGNED(4) static const juint _cbrt_table[] = +{ + 572345495, 1072698681, 1998204467, 1072709382, 3861501553, 1072719872, + 2268192434, 1072730162, 2981979308, 1072740260, 270859143, 1072750176, + 2958651392, 1072759916, 313113243, 1072769490, 919449400, 1072778903, + 2809328903, 1072788162, 2222981587, 1072797274, 2352530781, 1072806244, + 594152517, 1072815078, 1555767199, 1072823780, 4282421314, 1072832355, + 2355578597, 1072840809, 1162590619, 1072849145, 797864051, 1072857367, + 431273680, 1072865479, 2669831148, 1072873484, 733477752, 1072881387, + 4280220604, 1072889189, 801961634, 1072896896, 2915370760, 1072904508, + 1159613482, 1072912030, 2689944798, 1072919463, 1248687822, 1072926811, + 2967951030, 1072934075, 630170432, 1072941259, 3760898254, 1072948363, + 0, 1072955392, 2370273294, 1072962345, 1261754802, 1072972640, + 546334065, 1072986123, 1054893830, 1072999340, 1571187597, 1073012304, + 1107975175, 1073025027, 3606909377, 1073037519, 1113616747, 1073049792, + 4154744632, 1073061853, 3358931423, 1073073713, 4060702372, 1073085379, + 747576176, 1073096860, 3023138255, 1073108161, 1419988548, 1073119291, + 1914185305, 1073130255, 294389948, 1073141060, 3761802570, 1073151710, + 978281566, 1073162213, 823148820, 1073172572, 2420954441, 1073182792, + 3815449908, 1073192878, 2046058587, 1073202835, 1807524753, 1073212666, + 2628681401, 1073222375, 3225667357, 1073231966, 1555307421, 1073241443, + 3454043099, 1073250808, 1208137896, 1073260066, 3659916772, 1073269218, + 1886261264, 1073278269, 3593647839, 1073287220, 3086012205, 1073296075, + 2769796922, 1073304836, 888716057, 1073317807, 2201465623, 1073334794, + 164369365, 1073351447, 3462666733, 1073367780, 2773905457, 1073383810, + 1342879088, 1073399550, 2543933975, 1073415012, 1684477781, 1073430209, + 3532178543, 1073445151, 1147747300, 1073459850, 1928031793, 1073474314, + 2079717015, 1073488553, 4016765315, 1073502575, 3670431139, 1073516389, + 3549227225, 1073530002, 11637607, 1073543422, 588220169, 1073556654, + 2635407503, 1073569705, 2042029317, 1073582582, 1925128962, 1073595290, + 4136375664, 1073607834, 759964600, 1073620221, 4257606771, 1073632453, + 297278907, 1073644538, 3655053093, 1073656477, 2442253172, 1073668277, + 1111876799, 1073679941, 3330973139, 1073691472, 3438879452, 1073702875, + 3671565478, 1073714153, 1317849547, 1073725310, 1642364115, 1073736348 +}; + +ATTRIBUTE_ALIGNED(4) static const juint _D_table[] = +{ + 4050900474, 1014427190, 1157977860, 1016444461, 1374568199, 1017271387, + 2809163288, 1016882676, 3742377377, 1013168191, 3101606597, 1017541672, + 65224358, 1017217597, 2691591250, 1017266643, 4020758549, 1017689313, + 1316310992, 1018030788, 1031537856, 1014090882, 3261395239, 1016413641, + 886424999, 1016313335, 3114776834, 1014195875, 1681120620, 1017825416, + 1329600273, 1016625740, 465474623, 1017097119, 4251633980, 1017169077, + 1986990133, 1017710645, 752958613, 1017159641, 2216216792, 1018020163, + 4282860129, 1015924861, 1557627859, 1016039538, 3889219754, 1018086237, + 3684996408, 1017353275, 723532103, 1017717141, 2951149676, 1012528470, + 831890937, 1017830553, 1031212645, 1017387331, 2741737450, 1017604974, + 2863311531, 1003776682, 4276736099, 1013153088, 4111778382, 1015673686, + 1728065769, 1016413986, 2708718031, 1018078833, 1069335005, 1015291224, + 700037144, 1016482032, 2904566452, 1017226861, 4074156649, 1017622651, + 25019565, 1015245366, 3601952608, 1015771755, 3267129373, 1017904664, + 503203103, 1014921629, 2122011730, 1018027866, 3927295461, 1014189456, + 2790625147, 1016024251, 1330460186, 1016940346, 4033568463, 1015538390, + 3695818227, 1017509621, 257573361, 1017208868, 3227697852, 1017337964, + 234118548, 1017169577, 4009025803, 1017278524, 1948343394, 1017749310, + 678398162, 1018144239, 3083864863, 1016669086, 2415453452, 1017890370, + 175467344, 1017330033, 3197359580, 1010339928, 2071276951, 1015941358, + 268372543, 1016737773, 938132959, 1017389108, 1816750559, 1017337448, + 4119203749, 1017152174, 2578653878, 1013108497, 2470331096, 1014678606, + 123855735, 1016553320, 1265650889, 1014782687, 3414398172, 1017182638, + 1040773369, 1016158401, 3483628886, 1016886550, 4140499405, 1016191425, + 3893477850, 1016964495, 3935319771, 1009634717, 2978982660, 1015027112, + 2452709923, 1017990229, 3190365712, 1015835149, 4237588139, 1015832925, + 2610678389, 1017962711, 2127316774, 1017405770, 824267502, 1017959463, + 2165924042, 1017912225, 2774007076, 1013257418, 4123916326, 1017582284, + 1976417958, 1016959909, 4092806412, 1017711279, 119251817, 1015363631, + 3475418768, 1017675415, 1972580503, 1015470684, 815541017, 1017517969, + 2429917451, 1017397776, 4062888482, 1016749897, 68284153, 1017925678, + 2207779246, 1016320298, 1183466520, 1017408657, 143326427, 1017060403 +}; + +#define __ _masm-> + +address StubGenerator::generate_libmCbrt() { + StubGenStubId stub_id = StubGenStubId::dcbrt_id; + StubCodeMark mark(this, stub_id); + address start = __ pc(); + + Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1; + Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1; + Label B1_1, B1_2, B1_4; + + address SIG_MASK = (address)_SIG_MASK; + address EXP_MASK = (address)_EXP_MASK; + address EXP_MSK2 = (address)_EXP_MSK2; + address EXP_MSK3 = (address)_EXP_MSK3; + address SCALE63 = (address)_SCALE63; + address ZERON = (address)_ZERON; + address INF = (address)_INF; + address NEG_INF = (address)_NEG_INF; + address coeff_table = (address)_coeff_table; + address rcp_table = (address)_rcp_table; + address cbrt_table = (address)_cbrt_table; + address D_table = (address)_D_table; + + __ enter(); // required for proper stackwalking of RuntimeStub frame + + __ bind(B1_1); + __ subq(rsp, 24); + __ movsd(Address(rsp), xmm0); + + __ bind(B1_2); + __ movq(xmm7, xmm0); + __ movl(rdx, 524032); + __ movsd(xmm5, ExternalAddress(EXP_MSK3), r11 /*rscratch*/); + __ movsd(xmm3, ExternalAddress(EXP_MSK2), r11 /*rscratch*/); + __ psrlq(xmm7, 44); + __ pextrw(rcx, xmm7, 0); + __ movdl(rax, xmm7); + __ movsd(xmm1, ExternalAddress(EXP_MASK), r11 /*rscratch*/); + __ movsd(xmm2, ExternalAddress(SIG_MASK), r11 /*rscratch*/); + __ andl(rcx, 248); + __ lea(r8, ExternalAddress(rcp_table)); + __ movsd(xmm4, Address(rcx, r8, Address::times_1)); + __ movq(r9, rax); + __ andl(rdx, rax); + __ cmpl(rdx, 0); + __ jcc(Assembler::equal, L_2TAG_PACKET_0_0_1); // Branch only if |x| is denormalized + __ cmpl(rdx, 524032); + __ jcc(Assembler::equal, L_2TAG_PACKET_1_0_1); // Branch only if |x| is INF or NaN + __ shrl(rdx, 8); + __ shrq(r9, 8); + __ andpd(xmm2, xmm0); + __ andpd(xmm0, xmm5); + __ orpd(xmm3, xmm2); + __ orpd(xmm1, xmm0); + __ movapd(xmm5, ExternalAddress(coeff_table), r11 /*rscratch*/); + __ movl(rax, 5462); + __ movapd(xmm6, ExternalAddress(coeff_table + 16), r11 /*rscratch*/); + __ mull(rdx); + __ movq(rdx, r9); + __ andq(r9, 2047); + __ shrl(rax, 14); + __ andl(rdx, 2048); + __ subq(r9, rax); + __ subq(r9, rax); + __ subq(r9, rax); + __ shlq(r9, 8); + __ addl(rax, 682); + __ orl(rax, rdx); + __ movdl(xmm7, rax); + __ addq(rcx, r9); + __ psllq(xmm7, 52); + + __ bind(L_2TAG_PACKET_2_0_1); + __ movapd(xmm2, ExternalAddress(coeff_table + 32), r11 /*rscratch*/); + __ movapd(xmm0, ExternalAddress(coeff_table + 48), r11 /*rscratch*/); + __ subsd(xmm1, xmm3); + __ movq(xmm3, xmm7); + __ lea(r8, ExternalAddress(cbrt_table)); + __ mulsd(xmm7, Address(rcx, r8, Address::times_1)); + __ mulsd(xmm1, xmm4); + __ lea(r8, ExternalAddress(D_table)); + __ mulsd(xmm3, Address(rcx, r8, Address::times_1)); + __ movapd(xmm4, xmm1); + __ unpcklpd(xmm1, xmm1); + __ mulpd(xmm5, xmm1); + __ mulpd(xmm6, xmm1); + __ mulpd(xmm1, xmm1); + __ addpd(xmm2, xmm5); + __ addpd(xmm0, xmm6); + __ mulpd(xmm2, xmm1); + __ mulpd(xmm1, xmm1); + __ mulsd(xmm4, xmm7); + __ addpd(xmm0, xmm2); + __ mulsd(xmm1, xmm0); + __ unpckhpd(xmm0, xmm0); + __ addsd(xmm0, xmm1); + __ mulsd(xmm0, xmm4); + __ addsd(xmm0, xmm3); + __ addsd(xmm0, xmm7); + __ jmp(B1_4); + + __ bind(L_2TAG_PACKET_0_0_1); + __ mulsd(xmm0, ExternalAddress(SCALE63), r11 /*rscratch*/); + __ movq(xmm7, xmm0); + __ movl(rdx, 524032); + __ psrlq(xmm7, 44); + __ pextrw(rcx, xmm7, 0); + __ movdl(rax, xmm7); + __ andl(rcx, 248); + __ lea(r8, ExternalAddress(rcp_table)); + __ movsd(xmm4, Address(rcx, r8, Address::times_1)); + __ movq(r9, rax); + __ andl(rdx, rax); + __ shrl(rdx, 8); + __ shrq(r9, 8); + __ cmpl(rdx, 0); + __ jcc(Assembler::equal, L_2TAG_PACKET_3_0_1); // Branch only if |x| is zero + __ andpd(xmm2, xmm0); + __ andpd(xmm0, xmm5); + __ orpd(xmm3, xmm2); + __ orpd(xmm1, xmm0); + __ movapd(xmm5, ExternalAddress(coeff_table), r11 /*rscratch*/); + __ movl(rax, 5462); + __ movapd(xmm6, ExternalAddress(coeff_table + 16), r11 /*rscratch*/); + __ mull(rdx); + __ movq(rdx, r9); + __ andq(r9, 2047); + __ shrl(rax, 14); + __ andl(rdx, 2048); + __ subq(r9, rax); + __ subq(r9, rax); + __ subq(r9, rax); + __ shlq(r9, 8); + __ addl(rax, 661); + __ orl(rax, rdx); + __ movdl(xmm7, rax); + __ addq(rcx, r9); + __ psllq(xmm7, 52); + __ jmp(L_2TAG_PACKET_2_0_1); + + __ bind(L_2TAG_PACKET_3_0_1); + __ cmpq(r9, 0); + __ jcc(Assembler::notEqual, L_2TAG_PACKET_4_0_1); // Branch only if x is negative zero + __ xorpd(xmm0, xmm0); + __ jmp(B1_4); + + __ bind(L_2TAG_PACKET_4_0_1); + __ movsd(xmm0, ExternalAddress(ZERON), r11 /*rscratch*/); + __ jmp(B1_4); + + __ bind(L_2TAG_PACKET_1_0_1); + __ movl(rax, Address(rsp, 4)); + __ movl(rdx, Address(rsp)); + __ movl(rcx, rax); + __ andl(rcx, 2147483647); + __ cmpl(rcx, 2146435072); + __ jcc(Assembler::above, L_2TAG_PACKET_5_0_1); // Branch only if |x| is NaN + __ cmpl(rdx, 0); + __ jcc(Assembler::notEqual, L_2TAG_PACKET_5_0_1); // Branch only if |x| is NaN + __ cmpl(rax, 2146435072); + __ jcc(Assembler::notEqual, L_2TAG_PACKET_6_0_1); // Branch only if x is negative INF + __ movsd(xmm0, ExternalAddress(INF), r11 /*rscratch*/); + __ jmp(B1_4); + + __ bind(L_2TAG_PACKET_6_0_1); + __ movsd(xmm0, ExternalAddress(NEG_INF), r11 /*rscratch*/); + __ jmp(B1_4); + + __ bind(L_2TAG_PACKET_5_0_1); + __ movsd(xmm0, Address(rsp)); + __ addsd(xmm0, xmm0); + __ movq(Address(rsp, 8), xmm0); + + __ bind(B1_4); + __ addq(rsp, 24); + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(0); + + return start; +} + +#undef __ diff --git a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp index 6be702f2699a6..9ea4aeeccfa3b 100644 --- a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp +++ b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp @@ -468,6 +468,10 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M assert(StubRoutines::dtanh() != nullptr, "not initialized"); __ movdbl(xmm0, Address(rsp, wordSize)); __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtanh()))); + } else if (kind == Interpreter::java_lang_math_cbrt) { + assert(StubRoutines::dcbrt() != nullptr, "not initialized"); + __ movdbl(xmm0, Address(rsp, wordSize)); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcbrt()))); } else if (kind == Interpreter::java_lang_math_abs) { assert(StubRoutines::x86::double_sign_mask() != nullptr, "not initialized"); __ movdbl(xmm0, Address(rsp, wordSize)); diff --git a/src/hotspot/share/c1/c1_Compiler.cpp b/src/hotspot/share/c1/c1_Compiler.cpp index dd13b84edf51e..b5fa7dcf247e3 100644 --- a/src/hotspot/share/c1/c1_Compiler.cpp +++ b/src/hotspot/share/c1/c1_Compiler.cpp @@ -168,6 +168,7 @@ bool Compiler::is_intrinsic_supported(vmIntrinsics::ID id) { case vmIntrinsics::_dtan: #if defined(AMD64) case vmIntrinsics::_dtanh: + case vmIntrinsics::_dcbrt: #endif case vmIntrinsics::_dlog: case vmIntrinsics::_dlog10: diff --git a/src/hotspot/share/c1/c1_GraphBuilder.cpp b/src/hotspot/share/c1/c1_GraphBuilder.cpp index 201ee695f6941..8658bebdaeea3 100644 --- a/src/hotspot/share/c1/c1_GraphBuilder.cpp +++ b/src/hotspot/share/c1/c1_GraphBuilder.cpp @@ -3298,6 +3298,7 @@ GraphBuilder::GraphBuilder(Compilation* compilation, IRScope* scope) case vmIntrinsics::_dcos : // fall through case vmIntrinsics::_dtan : // fall through case vmIntrinsics::_dtanh : // fall through + case vmIntrinsics::_dcbrt : // fall through case vmIntrinsics::_dlog : // fall through case vmIntrinsics::_dlog10 : // fall through case vmIntrinsics::_dexp : // fall through diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp index 214da53799367..341de0ac0c2b3 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.cpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp @@ -2870,6 +2870,7 @@ void LIRGenerator::do_Intrinsic(Intrinsic* x) { case vmIntrinsics::_dtanh: // fall through case vmIntrinsics::_dsin : // fall through case vmIntrinsics::_dcos : // fall through + case vmIntrinsics::_dcbrt : // fall through case vmIntrinsics::_dexp : // fall through case vmIntrinsics::_dpow : do_MathIntrinsic(x); break; case vmIntrinsics::_arraycopy: do_ArrayCopy(x); break; diff --git a/src/hotspot/share/c1/c1_Runtime1.cpp b/src/hotspot/share/c1/c1_Runtime1.cpp index 0f87a90a417a7..3e268884a5db1 100644 --- a/src/hotspot/share/c1/c1_Runtime1.cpp +++ b/src/hotspot/share/c1/c1_Runtime1.cpp @@ -348,6 +348,7 @@ const char* Runtime1::name_for_address(address entry) { FUNCTION_CASE(entry, StubRoutines::dcos()); FUNCTION_CASE(entry, StubRoutines::dtan()); FUNCTION_CASE(entry, StubRoutines::dtanh()); + FUNCTION_CASE(entry, StubRoutines::dcbrt()); #undef FUNCTION_CASE diff --git a/src/hotspot/share/classfile/vmIntrinsics.cpp b/src/hotspot/share/classfile/vmIntrinsics.cpp index e23720f3ae049..aec00f501d0d2 100644 --- a/src/hotspot/share/classfile/vmIntrinsics.cpp +++ b/src/hotspot/share/classfile/vmIntrinsics.cpp @@ -92,6 +92,7 @@ bool vmIntrinsics::preserves_state(vmIntrinsics::ID id) { case vmIntrinsics::_dcos: case vmIntrinsics::_dtan: case vmIntrinsics::_dtanh: + case vmIntrinsics::_dcbrt: case vmIntrinsics::_dlog: case vmIntrinsics::_dlog10: case vmIntrinsics::_dexp: @@ -144,6 +145,7 @@ bool vmIntrinsics::can_trap(vmIntrinsics::ID id) { case vmIntrinsics::_dcos: case vmIntrinsics::_dtan: case vmIntrinsics::_dtanh: + case vmIntrinsics::_dcbrt: case vmIntrinsics::_dlog: case vmIntrinsics::_dlog10: case vmIntrinsics::_dexp: @@ -288,6 +290,7 @@ bool vmIntrinsics::disabled_by_jvm_flags(vmIntrinsics::ID id) { case vmIntrinsics::_dcos: case vmIntrinsics::_dtan: case vmIntrinsics::_dtanh: + case vmIntrinsics::_dcbrt: case vmIntrinsics::_dlog: case vmIntrinsics::_dexp: case vmIntrinsics::_dpow: diff --git a/src/hotspot/share/classfile/vmIntrinsics.hpp b/src/hotspot/share/classfile/vmIntrinsics.hpp index 2959f35ef2c3e..eeefddfedfc44 100644 --- a/src/hotspot/share/classfile/vmIntrinsics.hpp +++ b/src/hotspot/share/classfile/vmIntrinsics.hpp @@ -135,7 +135,7 @@ class methodHandle; do_name(log_name,"log") do_name(log10_name,"log10") do_name(pow_name,"pow") \ do_name(exp_name,"exp") do_name(min_name,"min") do_name(max_name,"max") \ do_name(floor_name, "floor") do_name(ceil_name, "ceil") do_name(rint_name, "rint") \ - do_name(round_name, "round") do_name(tanh_name,"tanh") \ + do_name(round_name, "round") do_name(tanh_name,"tanh") do_name(cbrt_name,"cbrt") \ \ do_name(addExact_name,"addExact") \ do_name(decrementExact_name,"decrementExact") \ @@ -161,7 +161,8 @@ class methodHandle; do_intrinsic(_dcos, java_lang_Math, cos_name, double_double_signature, F_S) \ do_intrinsic(_dtan, java_lang_Math, tan_name, double_double_signature, F_S) \ do_intrinsic(_datan2, java_lang_Math, atan2_name, double2_double_signature, F_S) \ - do_intrinsic(_dtanh, java_lang_Math, tanh_name, double_double_signature, F_S) \ + do_intrinsic(_dtanh, java_lang_Math, tanh_name, double_double_signature, F_S) \ + do_intrinsic(_dcbrt, java_lang_Math, cbrt_name, double_double_signature, F_S) \ do_intrinsic(_dsqrt, java_lang_Math, sqrt_name, double_double_signature, F_S) \ do_intrinsic(_dlog, java_lang_Math, log_name, double_double_signature, F_S) \ do_intrinsic(_dlog10, java_lang_Math, log10_name, double_double_signature, F_S) \ diff --git a/src/hotspot/share/interpreter/abstractInterpreter.cpp b/src/hotspot/share/interpreter/abstractInterpreter.cpp index 25e23a15a588a..1de7dd824f8bc 100644 --- a/src/hotspot/share/interpreter/abstractInterpreter.cpp +++ b/src/hotspot/share/interpreter/abstractInterpreter.cpp @@ -138,6 +138,7 @@ AbstractInterpreter::MethodKind AbstractInterpreter::method_kind(const methodHan case vmIntrinsics::_dcos: return java_lang_math_cos; case vmIntrinsics::_dtan: return java_lang_math_tan; case vmIntrinsics::_dtanh: return java_lang_math_tanh; + case vmIntrinsics::_dcbrt: return java_lang_math_cbrt; case vmIntrinsics::_dabs: return java_lang_math_abs; case vmIntrinsics::_dlog: return java_lang_math_log; case vmIntrinsics::_dlog10: return java_lang_math_log10; @@ -199,6 +200,7 @@ vmIntrinsics::ID AbstractInterpreter::method_intrinsic(MethodKind kind) { case java_lang_math_cos : return vmIntrinsics::_dcos; case java_lang_math_tan : return vmIntrinsics::_dtan; case java_lang_math_tanh : return vmIntrinsics::_dtanh; + case java_lang_math_cbrt : return vmIntrinsics::_dcbrt; case java_lang_math_abs : return vmIntrinsics::_dabs; case java_lang_math_log : return vmIntrinsics::_dlog; case java_lang_math_log10 : return vmIntrinsics::_dlog10; @@ -303,6 +305,7 @@ void AbstractInterpreter::print_method_kind(MethodKind kind) { case java_lang_math_cos : tty->print("java_lang_math_cos" ); break; case java_lang_math_tan : tty->print("java_lang_math_tan" ); break; case java_lang_math_tanh : tty->print("java_lang_math_tanh" ); break; + case java_lang_math_cbrt : tty->print("java_lang_math_cbrt" ); break; case java_lang_math_abs : tty->print("java_lang_math_abs" ); break; case java_lang_math_log : tty->print("java_lang_math_log" ); break; case java_lang_math_log10 : tty->print("java_lang_math_log10" ); break; diff --git a/src/hotspot/share/interpreter/abstractInterpreter.hpp b/src/hotspot/share/interpreter/abstractInterpreter.hpp index cc1a66f442d4d..b6876b3a2da01 100644 --- a/src/hotspot/share/interpreter/abstractInterpreter.hpp +++ b/src/hotspot/share/interpreter/abstractInterpreter.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -39,7 +39,7 @@ // Organization of the interpreter(s). There exists two different interpreters in hotpot // an assembly language version (aka template interpreter) and a high level language version -// (aka c++ interpreter). Th division of labor is as follows: +// (aka c++ interpreter). The division of labor is as follows: // Template Interpreter Zero Interpreter Functionality // @@ -73,6 +73,7 @@ class AbstractInterpreter: AllStatic { java_lang_math_cos, // implementation of java.lang.Math.cos (x) java_lang_math_tan, // implementation of java.lang.Math.tan (x) java_lang_math_tanh, // implementation of java.lang.Math.tanh (x) + java_lang_math_cbrt, // implementation of java.lang.Math.cbrt (x) java_lang_math_abs, // implementation of java.lang.Math.abs (x) java_lang_math_sqrt, // implementation of java.lang.Math.sqrt (x) java_lang_math_sqrt_strict, // implementation of java.lang.StrictMath.sqrt(x) @@ -152,6 +153,7 @@ class AbstractInterpreter: AllStatic { case vmIntrinsics::_dcos : // fall thru case vmIntrinsics::_dtan : // fall thru case vmIntrinsics::_dtanh : // fall thru + case vmIntrinsics::_dcbrt : // fall thru case vmIntrinsics::_dabs : // fall thru case vmIntrinsics::_dsqrt : // fall thru case vmIntrinsics::_dsqrt_strict : // fall thru diff --git a/src/hotspot/share/interpreter/templateInterpreterGenerator.cpp b/src/hotspot/share/interpreter/templateInterpreterGenerator.cpp index 1d3be066941a8..533c88cce9ed8 100644 --- a/src/hotspot/share/interpreter/templateInterpreterGenerator.cpp +++ b/src/hotspot/share/interpreter/templateInterpreterGenerator.cpp @@ -194,6 +194,7 @@ void TemplateInterpreterGenerator::generate_all() { method_entry(java_lang_math_cos ) method_entry(java_lang_math_tan ) method_entry(java_lang_math_tanh ) + method_entry(java_lang_math_cbrt ) method_entry(java_lang_math_abs ) method_entry(java_lang_math_sqrt ) method_entry(java_lang_math_sqrt_strict) @@ -453,6 +454,7 @@ address TemplateInterpreterGenerator::generate_intrinsic_entry(AbstractInterpret case Interpreter::java_lang_math_cos : // fall thru case Interpreter::java_lang_math_tan : // fall thru case Interpreter::java_lang_math_tanh : // fall thru + case Interpreter::java_lang_math_cbrt : // fall thru case Interpreter::java_lang_math_abs : // fall thru case Interpreter::java_lang_math_log : // fall thru case Interpreter::java_lang_math_log10 : // fall thru @@ -487,4 +489,3 @@ address TemplateInterpreterGenerator::generate_intrinsic_entry(AbstractInterpret } return entry_point; } - diff --git a/src/hotspot/share/interpreter/zero/zeroInterpreterGenerator.cpp b/src/hotspot/share/interpreter/zero/zeroInterpreterGenerator.cpp index faa36fc1ab1f5..c4eeb3fa84002 100644 --- a/src/hotspot/share/interpreter/zero/zeroInterpreterGenerator.cpp +++ b/src/hotspot/share/interpreter/zero/zeroInterpreterGenerator.cpp @@ -54,6 +54,7 @@ void ZeroInterpreterGenerator::generate_all() { method_entry(java_lang_math_cos ); method_entry(java_lang_math_tan ); method_entry(java_lang_math_tanh ); + method_entry(java_lang_math_cbrt ); method_entry(java_lang_math_abs ); method_entry(java_lang_math_sqrt ); method_entry(java_lang_math_sqrt_strict); @@ -96,6 +97,7 @@ address ZeroInterpreterGenerator::generate_method_entry( case Interpreter::java_lang_math_cos : // fall thru case Interpreter::java_lang_math_tan : // fall thru case Interpreter::java_lang_math_tanh : // fall thru + case Interpreter::java_lang_math_cbrt : // fall thru case Interpreter::java_lang_math_abs : // fall thru case Interpreter::java_lang_math_log : // fall thru case Interpreter::java_lang_math_log10 : // fall thru diff --git a/src/hotspot/share/jvmci/jvmciCompilerToVM.hpp b/src/hotspot/share/jvmci/jvmciCompilerToVM.hpp index b44b4bb911625..e85888d510725 100644 --- a/src/hotspot/share/jvmci/jvmciCompilerToVM.hpp +++ b/src/hotspot/share/jvmci/jvmciCompilerToVM.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -120,6 +120,7 @@ class CompilerToVM { static address dcos; static address dtan; static address dtanh; + static address dcbrt; static address dexp; static address dlog; static address dlog10; diff --git a/src/hotspot/share/jvmci/jvmciCompilerToVMInit.cpp b/src/hotspot/share/jvmci/jvmciCompilerToVMInit.cpp index a729e34a2796b..a22d432584cca 100644 --- a/src/hotspot/share/jvmci/jvmciCompilerToVMInit.cpp +++ b/src/hotspot/share/jvmci/jvmciCompilerToVMInit.cpp @@ -135,6 +135,7 @@ address CompilerToVM::Data::dsin; address CompilerToVM::Data::dcos; address CompilerToVM::Data::dtan; address CompilerToVM::Data::dtanh; +address CompilerToVM::Data::dcbrt; address CompilerToVM::Data::dexp; address CompilerToVM::Data::dlog; address CompilerToVM::Data::dlog10; @@ -264,6 +265,7 @@ void CompilerToVM::Data::initialize(JVMCI_TRAPS) { } SET_TRIGFUNC_OR_NULL(dtanh); + SET_TRIGFUNC_OR_NULL(dcbrt); #undef SET_TRIGFUNC_OR_NULL diff --git a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp index 8ca13a3a6c376..6a68086d6c599 100644 --- a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp +++ b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp @@ -134,6 +134,7 @@ static_field(CompilerToVM::Data, dcos, address) \ static_field(CompilerToVM::Data, dtan, address) \ static_field(CompilerToVM::Data, dtanh, address) \ + static_field(CompilerToVM::Data, dcbrt, address) \ static_field(CompilerToVM::Data, dexp, address) \ static_field(CompilerToVM::Data, dlog, address) \ static_field(CompilerToVM::Data, dlog10, address) \ diff --git a/src/hotspot/share/opto/c2compiler.cpp b/src/hotspot/share/opto/c2compiler.cpp index 272692446ae61..0c642211e1fec 100644 --- a/src/hotspot/share/opto/c2compiler.cpp +++ b/src/hotspot/share/opto/c2compiler.cpp @@ -616,6 +616,7 @@ bool C2Compiler::is_intrinsic_supported(vmIntrinsics::ID id) { case vmIntrinsics::_dcos: case vmIntrinsics::_dtan: case vmIntrinsics::_dtanh: + case vmIntrinsics::_dcbrt: case vmIntrinsics::_dabs: case vmIntrinsics::_fabs: case vmIntrinsics::_iabs: diff --git a/src/hotspot/share/opto/library_call.cpp b/src/hotspot/share/opto/library_call.cpp index 65635001e131d..38b7fc50200c8 100644 --- a/src/hotspot/share/opto/library_call.cpp +++ b/src/hotspot/share/opto/library_call.cpp @@ -245,6 +245,7 @@ bool LibraryCallKit::try_to_inline(int predicate) { case vmIntrinsics::_dcos: case vmIntrinsics::_dtan: case vmIntrinsics::_dtanh: + case vmIntrinsics::_dcbrt: case vmIntrinsics::_dabs: case vmIntrinsics::_fabs: case vmIntrinsics::_iabs: @@ -1886,6 +1887,9 @@ bool LibraryCallKit::inline_math_native(vmIntrinsics::ID id) { case vmIntrinsics::_dtanh: return StubRoutines::dtanh() != nullptr ? runtime_math(OptoRuntime::Math_D_D_Type(), StubRoutines::dtanh(), "dtanh") : false; + case vmIntrinsics::_dcbrt: + return StubRoutines::dcbrt() != nullptr ? + runtime_math(OptoRuntime::Math_D_D_Type(), StubRoutines::dcbrt(), "dcbrt") : false; case vmIntrinsics::_dexp: return StubRoutines::dexp() != nullptr ? runtime_math(OptoRuntime::Math_D_D_Type(), StubRoutines::dexp(), "dexp") : diff --git a/src/hotspot/share/runtime/stubDeclarations.hpp b/src/hotspot/share/runtime/stubDeclarations.hpp index 46feaaf017586..5dc7d2936621f 100644 --- a/src/hotspot/share/runtime/stubDeclarations.hpp +++ b/src/hotspot/share/runtime/stubDeclarations.hpp @@ -591,6 +591,8 @@ do_entry(initial, dtan, dtan, dtan) \ do_stub(initial, dtanh) \ do_entry(initial, dtanh, dtanh, dtanh) \ + do_stub(initial, dcbrt) \ + do_entry(initial, dcbrt, dcbrt, dcbrt) \ do_stub(initial, fmod) \ do_entry(initial, fmod, fmod, fmod) \ /* following generic entries should really be x86_32 only */ \ diff --git a/src/java.base/share/classes/java/lang/Math.java b/src/java.base/share/classes/java/lang/Math.java index 5f664b54cdf5b..da718ba04f2a3 100644 --- a/src/java.base/share/classes/java/lang/Math.java +++ b/src/java.base/share/classes/java/lang/Math.java @@ -445,6 +445,7 @@ public static double sqrt(double a) { * @return the cube root of {@code a}. * @since 1.5 */ + @IntrinsicCandidate public static double cbrt(double a) { return StrictMath.cbrt(a); } diff --git a/test/micro/org/openjdk/bench/java/lang/CbrtPerf.java b/test/micro/org/openjdk/bench/java/lang/CbrtPerf.java new file mode 100644 index 0000000000000..0143b76fe6c19 --- /dev/null +++ b/test/micro/org/openjdk/bench/java/lang/CbrtPerf.java @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package org.openjdk.bench.java.lang; + +import java.util.concurrent.TimeUnit; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.annotations.OperationsPerInvocation; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +import java.util.Random; + +public class CbrtPerf { + + @Warmup(iterations = 3, time = 5, timeUnit = TimeUnit.MILLISECONDS) + @Measurement(iterations = 4, time = 5, timeUnit = TimeUnit.MILLISECONDS) + @Fork(2) + @BenchmarkMode(Mode.Throughput) + @State(Scope.Thread) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public static class CbrtPerfRanges { + public static int cbrtInputCount = 2048; + + @Param({"0", "1"}) + public int cbrtRangeIndex; + + public double [] cbrtPosRandInputs; + public double [] cbrtNegRandInputs; + public int cbrtInputIndex = 0; + public double cbrtRangeInputs[][] = { {0.0, 0x1.0P-1022}, {0x1.0P-1022, 1.7976931348623157E308} }; + + @Setup + public void setupValues() { + Random random = new Random(1023); + + // Fill the positive and negative cbrt vectors with random values + cbrtPosRandInputs = new double[cbrtInputCount]; + cbrtNegRandInputs = new double[cbrtInputCount]; + + for (int i = 0; i < cbrtInputCount; i++) { + double cbrtLowerBound = cbrtRangeInputs[cbrtRangeIndex][0]; + double cbrtUpperBound = cbrtRangeInputs[cbrtRangeIndex][1]; + cbrtPosRandInputs[i] = random.nextDouble(cbrtLowerBound, cbrtUpperBound); + cbrtNegRandInputs[i] = random.nextDouble(-cbrtUpperBound, -cbrtLowerBound); + } + } + + @Benchmark + @OperationsPerInvocation(2048) + public double cbrtPosRangeDouble() { + double res = 0.0; + for (int i = 0; i < cbrtInputCount; i++) { + res += Math.cbrt(cbrtPosRandInputs[i]); + } + return res; + } + + @Benchmark + @OperationsPerInvocation(2048) + public double cbrtNegRangeDouble() { + double res = 0.0; + for (int i = 0; i < cbrtInputCount; i++) { + res += Math.cbrt(cbrtNegRandInputs[i]); + } + return res; + } + } + + @Warmup(iterations = 3, time = 5, timeUnit = TimeUnit.SECONDS) + @Measurement(iterations = 4, time = 5, timeUnit = TimeUnit.SECONDS) + @Fork(2) + @BenchmarkMode(Mode.Throughput) + @State(Scope.Thread) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public static class CbrtPerfConstant { + public static final double constDouble0 = 0.0; + public static final double constDouble1 = 1.0; + public static final double constDouble27 = 27.0; + public static final double constDouble512 = 512.0; + + @Benchmark + public double cbrtConstDouble0() { + return Math.cbrt(constDouble0); + } + + @Benchmark + public double cbrtConstDouble1() { + return Math.cbrt(constDouble1); + } + + @Benchmark + public double cbrtConstDouble27() { + return Math.cbrt(constDouble27); + } + + @Benchmark + public double cbrtConstDouble512() { + return Math.cbrt(constDouble512); + } + } + + @Warmup(iterations = 3, time = 5, timeUnit = TimeUnit.SECONDS) + @Measurement(iterations = 4, time = 5, timeUnit = TimeUnit.SECONDS) + @Fork(2) + @BenchmarkMode(Mode.Throughput) + @State(Scope.Thread) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public static class CbrtPerfSpecialValues { + public double double0 = 0.0; + public double doubleNegative0 = -0.0; + public double doubleInf = Double.POSITIVE_INFINITY; + public double doubleNegativeInf = Double.NEGATIVE_INFINITY; + public double doubleNaN = Double.NaN; + + @Benchmark + public double cbrtDouble0() { + return Math.cbrt(double0); + } + + @Benchmark + public double cbrtDoubleNegative0() { + return Math.cbrt(doubleNegative0); + } + + @Benchmark + public double cbrtDoubleInf() { + return Math.cbrt(doubleInf); + } + + @Benchmark + public double cbrtDoubleNegativeInf() { + return Math.cbrt(doubleNegativeInf); + } + + @Benchmark + public double cbrtDoubleNaN() { + return Math.cbrt(doubleNaN); + } + } + + public static void main(String[] args) throws RunnerException { + Options opt = new OptionsBuilder() + .include(CbrtPerfRanges.class.getSimpleName()) + .build(); + + new Runner(opt).run(); + + opt = new OptionsBuilder() + .include(CbrtPerfConstant.class.getSimpleName()) + .build(); + + new Runner(opt).run(); + } +}